├── datum
    ├── README.md
    ├── __init__.py
    ├── meta
    │   ├── __init__.py
    │   └── dataset.py
    ├── models
    │   ├── __init__.py
    │   ├── ssd
    │   │   ├── __init__.py
    │   │   └── ssd_dataset.py
    │   └── yolo
    │   │   ├── __init__.py
    │   │   ├── yolo_dataset.py
    │   │   └── yolo_batch_dataset.py
    └── utils
    │   ├── __init__.py
    │   ├── process_config.py
    │   └── tools.py
├── results
    ├── test_res_image
    │   ├── loss1.png
    │   ├── loss2.png
    │   └── loss3.png
    └── README.md
├── Others
    ├── __init__.py
    ├── vedia
    │   ├── __init__.py
    │   ├── show.py
    │   └── convert2voc.py
    ├── voc
    │   ├── __init__.py
    │   └── process_pascal_voc.py
    ├── satellite
    │   ├── __init__.py
    │   ├── process.py
    │   ├── bbox_cluster.py
    │   ├── clip_video.py
    │   └── prepare_trainsamples.py
    ├── lsd12
    │   ├── __init__.py
    │   ├── label_config.py
    │   ├── check_dataset.py
    │   └── format_input.py
    └── README.md
├── checks
    ├── __init__.py
    ├── brain
    │   ├── __init__.py
    │   ├── ssd
    │   │   ├── __init__.py
    │   │   └── check_ssd_model.py
    │   └── yolo
    │   │   ├── __init__.py
    │   │   └── check_yolo_model.py
    ├── datasets
    │   ├── __init__.py
    │   └── check_ssd_dataset.py
    └── observe
    │   ├── __init__.py
    │   ├── check_median_blur.py
    │   ├── check_gaussian_blur.py
    │   ├── check_average_blur.py
    │   ├── check_color.py
    │   ├── check_parameters.py
    │   └── check_background.py
├── eagle
    ├── __init__.py
    ├── brain
    │   ├── __init__.py
    │   ├── solver
    │   │   ├── __init__.py
    │   │   ├── solver.py
    │   │   ├── yolo_solver.py
    │   │   ├── ssd_solver.py
    │   │   └── yolo_u_solver.py
    │   ├── ssd
    │   │   ├── __init__.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── components.py
    │   │   │   └── net.py
    │   │   ├── normalization.py
    │   │   ├── anchor_boxes.py
    │   │   └── loss.py
    │   ├── yolo
    │   │   ├── __init__.py
    │   │   └── net.py
    │   └── rotation
    │   │   ├── __init__.py
    │   │   └── yolo
    │   │       ├── __init__.py
    │   │       └── net.py
    ├── observe
    │   ├── __init__.py
    │   ├── base
    │   │   └── __init__.py
    │   └── augmentors
    │   │   ├── __init__.py
    │   │   ├── flip.py
    │   │   ├── arithmetic.py
    │   │   └── blur.py
    ├── README.md
    └── utils.py
├── examples
    ├── __init__.py
    ├── ssd
    │   ├── __init__.py
    │   └── vgg_trainer.py
    ├── unet
    │   ├── __init__.py
    │   ├── train.py
    │   └── predict.py
    └── yolo
    │   ├── __init__.py
    │   └── train.py
├── conf
    ├── yolo_train.cfg
    ├── yolo_train_server.cfg
    ├── yolo_unet_train_server.cfg
    ├── yolo_unet_train.cfg
    ├── ssd_train.cfg
    ├── ssd_train_server.cfg
    ├── dilated_ssd_train.cfg
    └── ssd_train_512.cfg
└── README.md


/datum/README.md:
--------------------------------------------------------------------------------
1 | ## DataSets
2 | 
3 | 主要是编写统一的数据集处理逻辑。
4 | 
5 | ### 目录结构
6 | 
7 | ### 配置信息
8 | 


--------------------------------------------------------------------------------
/results/test_res_image/loss1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guiyang882/DL.EyeSight/HEAD/results/test_res_image/loss1.png


--------------------------------------------------------------------------------
/results/test_res_image/loss2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guiyang882/DL.EyeSight/HEAD/results/test_res_image/loss2.png


--------------------------------------------------------------------------------
/results/test_res_image/loss3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guiyang882/DL.EyeSight/HEAD/results/test_res_image/loss3.png


--------------------------------------------------------------------------------
/results/README.md:
--------------------------------------------------------------------------------
 1 | ## 存储模型结果
 2 | 
 3 | ```
 4 | results
 5 | ├── ssd
 6 | │   ├── train_model
 7 | │   └── pretrain
 8 | └── yolo
 9 |     ├── train_model
10 |     └── pretrain
11 | ```


--------------------------------------------------------------------------------
/Others/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/5
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/checks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/2/26
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/datum/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/5
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/2/26
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/6
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/Others/vedia/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/12
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/Others/voc/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/15
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/checks/brain/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/2/28
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/datum/meta/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/5
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/datum/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/8
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/datum/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/5
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/brain/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/2/26
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/examples/ssd/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/6
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/examples/unet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/6
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/examples/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/6
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/Others/satellite/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/15
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/checks/brain/ssd/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/9
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/checks/brain/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/14
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/checks/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/11
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/checks/observe/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/2/28
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/datum/models/ssd/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/8
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/datum/models/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/8
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/brain/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/4
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/brain/ssd/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/1
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/brain/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/1
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/observe/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/2/26
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/brain/rotation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/12
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/brain/ssd/models/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/1
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/observe/base/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/2/26
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/Others/lsd12/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/1/3
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 


--------------------------------------------------------------------------------
/eagle/brain/rotation/yolo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/3/12
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/eagle/observe/augmentors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2009 IW.
2 | # All rights reserved.
3 | #
4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
5 | # Date:   2018/2/26
6 | 
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function


--------------------------------------------------------------------------------
/Others/README.md:
--------------------------------------------------------------------------------
 1 | ## 数据集格式统一
 2 | 主要包括各种数据集的数据统一组织
 3 | 
 4 | ### 数据集列表
 5 | - [x] VOC数据集
 6 | - [x] VEDIA数据集
 7 | - [ ] KITTI数据集
 8 | - [ ] NWPU-VHR
 9 | - [ ] DOTA(A Large-scale Dataset for Object Detection in Aerial Images)
10 | - [ ] RSOD-Dataset
11 | - [ ] INRIA aerial image dataset
12 | 
13 | 
14 | ### 数据集的介绍
15 | 


--------------------------------------------------------------------------------
/conf/yolo_train.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | image_size: 448
 3 | image_width: 448
 4 | image_height: 448
 5 | batch_size: 32
 6 | num_classes: 20
 7 | max_objects_per_image: 20
 8 | 
 9 | [DataSet]
10 | path: /Volumes/projects/DataSets/VOC/pascal_voc_2007.txt
11 | thread_num: 8
12 | 
13 | [Net]
14 | weight_decay: 0.0005
15 | cell_size: 7
16 | boxes_per_cell: 2
17 | object_scale: 1
18 | noobject_scale: 0.5
19 | class_scale: 1
20 | coord_scale: 5
21 | 
22 | [Solver]
23 | lr: 0.0005
24 | moment: 0.9
25 | max_iterators: 1000000
26 | pretrain_model_path: /Users/liuguiyang/github.com/DL.EyeSight/results/yolo/pretrain/yolo_tiny.ckpt
27 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/yolo/train_model/


--------------------------------------------------------------------------------
/conf/yolo_train_server.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | image_size: 448
 3 | image_width: 448
 4 | image_height: 448
 5 | batch_size: 64
 6 | num_classes: 20
 7 | max_objects_per_image: 20
 8 | 
 9 | [DataSet]
10 | path: /home/ai-i-liuguiyang/repos_ssd/VOC_DATA/pascal_voc_2007.txt
11 | thread_num: 10
12 | 
13 | [Net]
14 | weight_decay: 0.0005
15 | cell_size: 7
16 | boxes_per_cell: 2
17 | object_scale: 1
18 | noobject_scale: 0.5
19 | class_scale: 1
20 | coord_scale: 5
21 | 
22 | [Solver]
23 | lr: 0.0005
24 | moment: 0.9
25 | max_iterators: 1000000
26 | pretrain_model_path: /home/ai-i-liuguiyang/proj/DL.EyeSight/results/yolo/pretrain/yolo_tiny.ckpt
27 | train_dir: /home/ai-i-liuguiyang/proj/DL.EyeSight/results/yolo/train_model/


--------------------------------------------------------------------------------
/datum/meta/dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/5
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | 
12 | class DataSet(object):
13 |     def __init__(self, common_params, dataset_params):
14 |         if not isinstance(common_params, dict):
15 |             raise TypeError("common_params must be dict")
16 |         if not isinstance(dataset_params, dict):
17 |             raise TypeError("dataset_params must be dict")
18 | 
19 | 
20 |     def batch(self):
21 |         raise NotImplementedError
22 | 


--------------------------------------------------------------------------------
/eagle/brain/solver/solver.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/4
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | 
12 | class Solver(object):
13 |     def __init__(self, dataset, net, common_params, solver_params):
14 |         if not isinstance(common_params, dict):
15 |             raise TypeError("common_params must be dict")
16 |         if not isinstance(solver_params, dict):
17 |             raise TypeError("solver_params must be dict")
18 | 
19 |     def solve(self):
20 |         raise NotImplementedError
21 | 


--------------------------------------------------------------------------------
/conf/yolo_unet_train_server.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | image_size: 512
 3 | image_width: 512
 4 | image_height: 512
 5 | batch_size: 128
 6 | num_classes: 1
 7 | max_objects_per_image: 30
 8 | 
 9 | [DataSet]
10 | path: /home/ai-i-liuguiyang/datasets/CSUVideo/512x512/train.txt
11 | thread_num: 8
12 | 
13 | [Net]
14 | weight_decay: 0.0005
15 | # cell_size: 9, 15
16 | cell_size: 9
17 | boxes_per_cell: 2
18 | object_scale: 1
19 | noobject_scale: 0.5
20 | class_scale: 1
21 | coord_scale: 5
22 | 
23 | [Solver]
24 | lr: 0.0005
25 | moment: 0.9
26 | max_iterators: 100000
27 | pretrain_model_path: /home/ai-i-liuguiyang/github.com/DL.EyeSight/results/unet/pretrain/model.ckpt
28 | train_dir: /home/ai-i-liuguiyang/github.com/DL.EyeSight/results/unet/train_model


--------------------------------------------------------------------------------
/conf/yolo_unet_train.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | # 当is_predict == True时, batch_size==1, 只进行前馈操作
 3 | # 当is_predict == False时, 进行网络训练
 4 | is_predict: True
 5 | image_size: 512
 6 | image_width: 512
 7 | image_height: 512
 8 | batch_size: 32
 9 | num_classes: 1
10 | max_objects_per_image: 30
11 | 
12 | [DataSet]
13 | path: /Volumes/projects/DataSets/CSUVideo/512x512/train.txt
14 | thread_num: 8
15 | 
16 | [Net]
17 | weight_decay: 0.0005
18 | # cell_size: 9, 15
19 | cell_size: 9
20 | boxes_per_cell: 2
21 | object_scale: 1
22 | noobject_scale: 0.5
23 | class_scale: 1
24 | coord_scale: 5
25 | 
26 | [Solver]
27 | lr: 0.0005
28 | moment: 0.9
29 | max_iterators: 100000
30 | pretrain_model_path: /Users/liuguiyang/github.com/DL.EyeSight/results/unet/pretrain/model.ckpt
31 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/unet/train_model/


--------------------------------------------------------------------------------
/Others/lsd12/label_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/1/3
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | sign_idx_dict = {
12 |     "airplane": 1,
13 |     "ship": 2,
14 |     "storagetank": 3,
15 |     "baseballdiamond": 4,
16 |     "tenniscourt": 5,
17 |     "basketballcourt": 6,
18 |     "groundtrackfield": 7,
19 |     "harbor": 8,
20 |     "bridge": 9,
21 |     "vehicle": 10,
22 |     "car": 10,
23 |     "vehiclecar": 10,
24 |     "campingcar": 11,
25 |     "van": 11,
26 |     "pickup": 12,
27 |     "truck": 12,
28 |     "tractor": 12
29 | }
30 | 
31 | idx_sign_dict = {
32 |     1: "airplane",
33 |     2: "ship",
34 |     3: "storagetank",
35 |     4: "baseballdiamond",
36 |     5: "tenniscourt",
37 |     6: "basketballcourt",
38 |     7: "groundtrackfield",
39 |     8: "harbor",
40 |     9: "bridge",
41 |     10: "vehicle",
42 |     11: "van",
43 |     12: "truck"
44 | }
45 | 


--------------------------------------------------------------------------------
/checks/datasets/check_ssd_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/11
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from optparse import OptionParser
12 | 
13 | from datum.utils.process_config import process_config
14 | from datum.models.ssd.ssd_dataset import SSDDataSet
15 | 
16 | 
17 | parser = OptionParser()
18 | parser.add_option("-c", "--conf",
19 |                   dest="configure",
20 |                   help="configure filename")
21 | (options, args) = parser.parse_args()
22 | if options.configure:
23 |     conf_file = str(options.configure)
24 | else:
25 |     print('please sspecify --conf configure filename')
26 |     exit(0)
27 | 
28 | common_params, dataset_params, net_params, solver_params, box_encoder_params = \
29 |     process_config(conf_file)
30 | 
31 | 
32 | data_generator = SSDDataSet(common_params, dataset_params, box_encoder_params)
33 | data_generator.batch()
34 | 


--------------------------------------------------------------------------------
/checks/brain/yolo/check_yolo_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/14
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import tensorflow as tf
12 | from optparse import OptionParser
13 | 
14 | from datum.utils.process_config import process_config
15 | from eagle.brain.yolo.yolo_u_net import YOLOUNet
16 | 
17 | 
18 | parser = OptionParser()
19 | parser.add_option("-c", "--conf",
20 |                   dest="configure",
21 |                   help="configure filename")
22 | (options, args) = parser.parse_args()
23 | if options.configure:
24 |     conf_file = str(options.configure)
25 | else:
26 |     print('please sspecify --conf configure filename')
27 |     exit(0)
28 | 
29 | common_params, dataset_params, net_params, solver_params = \
30 |     process_config(conf_file)
31 | 
32 | net = YOLOUNet(common_params, net_params)
33 | images = tf.placeholder(dtype=tf.float32, shape=(32, 512, 512, 3))
34 | model_spec = net.inference(images)
35 | print(model_spec)
36 | 


--------------------------------------------------------------------------------
/checks/brain/ssd/check_ssd_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/9
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import tensorflow as tf
12 | from optparse import OptionParser
13 | 
14 | from datum.utils.process_config import process_config
15 | from eagle.brain.ssd.models.vgg import SSDVGG
16 | 
17 | 
18 | parser = OptionParser()
19 | parser.add_option("-c", "--conf",
20 |                   dest="configure",
21 |                   help="configure filename")
22 | (options, args) = parser.parse_args()
23 | if options.configure:
24 |     conf_file = str(options.configure)
25 | else:
26 |     print('please sspecify --conf configure filename')
27 |     exit(0)
28 | 
29 | common_params, dataset_params, net_params, solver_params, box_encoder_params = \
30 |     process_config(conf_file)
31 | 
32 | net = SSDVGG(common_params, net_params, box_encoder_params)
33 | images = tf.placeholder(dtype=tf.float32, shape=(32, 300, 300, 3))
34 | model_spec = net.inference(images)
35 | print(model_spec)
36 | 


--------------------------------------------------------------------------------
/examples/yolo/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/6
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from optparse import OptionParser
12 | 
13 | from datum.utils.process_config import process_config
14 | from datum.models.yolo.yolo_dataset import YoloDataSet
15 | from eagle.brain.solver.yolo_solver import YoloSolver
16 | from eagle.brain.yolo.yolo_tiny_net import YoloTinyNet
17 | 
18 | parser = OptionParser()
19 | parser.add_option("-c", "--conf", dest="configure",  
20 |                   help="configure filename")
21 | (options, args) = parser.parse_args() 
22 | if options.configure:
23 |   conf_file = str(options.configure)
24 | else:
25 |   print('please sspecify --conf configure filename')
26 |   exit(0)
27 | 
28 | common_params, dataset_params, net_params, solver_params = process_config(conf_file)
29 | dataset = YoloDataSet(common_params, dataset_params)
30 | net = YoloTinyNet(common_params, net_params)
31 | solver = YoloSolver(dataset, net, common_params, solver_params)
32 | solver.solve()


--------------------------------------------------------------------------------
/eagle/README.md:
--------------------------------------------------------------------------------
 1 | ## 工程的主体模块
 2 | ```shell
 3 | eagle
 4 | ├── brain
 5 | │   ├── rotation
 6 | │   │   └── yolo
 7 | │   ├── solver
 8 | │   │   ├── solver.py
 9 | │   │   ├── ssd_solver.py
10 | │   │   └── yolo_solver.py
11 | │   ├── ssd
12 | │   │   ├── anchor_boxes.py
13 | │   │   ├── box_encode_decode_utils.py
14 | │   │   ├── loss.py
15 | │   │   ├── models
16 | │   │   │   ├── components.py
17 | │   │   │   ├── net.py
18 | │   │   │   ├── squeezenet_300.py
19 | │   │   │   ├── squeezenet_512.py
20 | │   │   │   └── vgg.py
21 | │   │   └── normalization.py
22 | │   └── yolo
23 | │       ├── net.py
24 | │       ├── yolo_net.py
25 | │       └── yolo_tiny_net.py
26 | ├── observe
27 | │   ├── augmentors
28 | │   │   ├── arithmetic.py
29 | │   │   ├── blur.py
30 | │   │   ├── color.py
31 | │   │   └── flip.py
32 | │   └── base
33 | ├── parameter.py
34 | └── utils.py
35 | 
36 | eagle
37 | ├── README.md
38 | ├── brain                           检测算法的核心模块
39 | │   ├── solver                      对于模型的处理框架
40 | │   ├── ssd                         SSD检测模型相关文件
41 | │   │   └── models
42 | │   └── yolo                        Yolo检测模型相关文件
43 | ├── observe                         前期数据预处理的模块
44 | │   ├── augmentors                  图像处理方法部分代码
45 | │   └── base                        基本的处理框架的父类信息
46 | ├── parameter.py                    对于随机参数的控制部分代码
47 | ├── trainer                         实际调用的代码的处理逻辑
48 | └── utils.py                        对于工程中各个部分的通用代码
49 | 
50 | ```


--------------------------------------------------------------------------------
/examples/unet/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/6
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from optparse import OptionParser
12 | 
13 | from datum.utils.process_config import process_config
14 | # from datum.models.yolo.yolo_dataset import YoloDataSet
15 | from datum.models.yolo.yolo_batch_dataset import YoloDataSet
16 | from eagle.brain.solver.yolo_u_solver import YoloUSolver
17 | from eagle.brain.yolo.yolo_u_net import YoloUNet
18 | 
19 | parser = OptionParser()
20 | parser.add_option("-c", "--conf", dest="configure",  
21 |                   help="configure filename")
22 | (options, args) = parser.parse_args() 
23 | if options.configure:
24 |   conf_file = str(options.configure)
25 | else:
26 |   print('please sspecify --conf configure filename')
27 |   exit(0)
28 | 
29 | common_params, dataset_params, net_params, solver_params = process_config(conf_file)
30 | print("After Proces Config File !")
31 | dataset = YoloDataSet(common_params, dataset_params)
32 | print("Prepared DataSet !")
33 | net = YoloUNet(common_params, net_params)
34 | print("Building the Deep Learning Model !")
35 | solver = YoloUSolver(dataset, net, common_params, solver_params)
36 | print("Now Start Learning Best Parameters !")
37 | solver.solve()
38 | 


--------------------------------------------------------------------------------
/eagle/brain/rotation/yolo/net.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/1
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | 
12 | class Net(object):
13 |     def __init__(self, common_params, net_params):
14 |         if not isinstance(common_params, dict):
15 |             raise TypeError("common_params must be dict")
16 |         if not isinstance(net_params, dict):
17 |             raise TypeError("net_params must be dict")
18 | 
19 |     def inference(self, images):
20 |         """Build the yolo model
21 |         Args:
22 |           images:  4-D tensor [batch_size, image_height, image_width, channels]
23 |         Returns:
24 |           predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell]
25 |         """
26 |         raise NotImplementedError
27 | 
28 |     def loss(self, predicts, labels, objects_num):
29 |         """Add Loss to all the trainable variables
30 |         Args:
31 |           predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell]
32 |           ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell)
33 |           labels  : 3-D tensor of [batch_size, max_objects, 5]
34 |           objects_num: 1-D tensor [batch_size]
35 |         """
36 |         raise NotImplementedError
37 | 


--------------------------------------------------------------------------------
/eagle/brain/ssd/normalization.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2017/12/18
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | 
13 | import keras.backend as K
14 | from keras.engine.topology import Layer
15 | from keras.engine.topology import InputSpec
16 | 
17 | 
18 | class L2Normalization(Layer):
19 |     def __init__(self, gamma_init=20, **kwargs):
20 |         if K.image_dim_ordering() == 'tf':
21 |             self.axis = 3
22 |         else:
23 |             self.axis = 1
24 |         self.gamma_init = gamma_init
25 |         super(L2Normalization, self).__init__(**kwargs)
26 | 
27 |     def build(self, input_shape):
28 |         self.input_spec = [InputSpec(shape=input_shape)]
29 |         gamma = self.gamma_init * np.ones((input_shape[self.axis],))
30 |         self.gamma = K.variable(gamma, name="{}_gamma".format(self.name))
31 |         self.trainable_weights = [self.gamma]
32 |         super(L2Normalization, self).build(input_shape)
33 | 
34 |     def call(self, x, mask=None):
35 |         output = K.l2_normalize(x, self.axis)
36 |         output *= self.gamma
37 |         return output
38 | 
39 |     def get_config(self):
40 |         config = {
41 |             'gamma_init': self.gamma_init
42 |         }
43 |         base_config = super(L2Normalization, self).get_config()
44 |         return dict(list(base_config.items()) + list(config.items()))
45 | 


--------------------------------------------------------------------------------
/conf/ssd_train.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | model_name: VGG
 3 | image_size: 300
 4 | image_width: 300
 5 | image_height: 300
 6 | image_channel: 3
 7 | num_classes: 1
 8 | batch_size: 10
 9 | is_predict: False
10 | 
11 | [DataSet]
12 | # 数据集中数据的信息存储 [image_path, xmin, ymin, xmax, ymax, class_id]
13 | path: /Volumes/projects/DataSets/CSUVideo/300x300/train_samples.txt
14 | # 是否需要添加背景这个类别，默认背景的类别为0，程序自动添加，其它label自动加一
15 | is_need_bg: True
16 | # 数据集中的类别信息必须和path文件中的一致
17 | classes: ["airplane"]
18 | # path文件中数据的格式规定
19 | box_output_format: ["xmin", "xmax", "ymin", "ymax", "class_id"]
20 | # 数据预处理组织中的进程数目
21 | thread_num: 8
22 | # 当原始图像在进行resize时出现比例不一致问题
23 | # 在给你的范围内可以直接resize，其他的范围需要进行裁剪然后在resize
24 | upper_resize_rate: 0.2
25 | lower_resize_rate: 0.2
26 | 
27 | [BoxEncoder]
28 | # the spatial dimensions of the model's predictor layers to create the anchor boxes.
29 | predictor_sizes: [[37, 37], [18, 18], [9, 9], [5, 5], [3, 3], [1, 1]]
30 | scales: [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]
31 | aspect_ratios_per_layer: [[0.5, 1.0, 2.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]]
32 | two_boxes_for_ar1: True
33 | variances: [0.1, 0.1, 0.2, 0.2]
34 | coords: centroids
35 | normalize_coords: True
36 | pos_iou_threshold: 0.5
37 | neg_iou_threshold: 0.2
38 | 
39 | [Net]
40 | neg_pos_ratio=3
41 | n_neg_min=0
42 | loss_alpha=1.0
43 | 
44 | [Solver]
45 | lr: 0.0001
46 | beta_1=0.9
47 | beta_2=0.999
48 | epsilon=1e-08
49 | decay=5e-04
50 | max_iterators: 10000
51 | #pretrain_model_path: /Volumes/projects/github.com/Object.Tracking.Video/trainer/weights/ssd300_weights_epoch-00_loss-2.3397_val_loss-3.6407.h5
52 | pretrain_model_path: None
53 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/ssd/train_model/


--------------------------------------------------------------------------------
/checks/observe/check_median_blur.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/2/28
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import cv2
12 | import numpy as np
13 | from skimage import data
14 | 
15 | import eagle.utils as eu
16 | from eagle.observe.augmentors.blur import MedianBlur
17 | 
18 | TIME_PER_STEP = 5000
19 | NB_AUGS_PER_IMAGE = 10
20 | 
21 | def main():
22 |     image = data.astronaut()
23 |     image = eu.imresize_single_image(image, (64, 64))
24 |     print("image shape:", image.shape)
25 |     print("Press any key or wait %d ms to proceed to the next image." % (TIME_PER_STEP,))
26 | 
27 |     k = [
28 |         1,
29 |         3,
30 |         5,
31 |         7,
32 |         (3, 3),
33 |         (1, 11)
34 |     ]
35 | 
36 |     cv2.namedWindow("aug", cv2.WINDOW_NORMAL)
37 |     cv2.resizeWindow("aug", 64*NB_AUGS_PER_IMAGE, 64)
38 |     #cv2.imshow("aug", image[..., ::-1])
39 |     #cv2.waitKey(TIME_PER_STEP)
40 | 
41 |     for ki in k:
42 |         aug = MedianBlur(k=ki)
43 |         img_aug = [aug.augment_image(image) for _ in range(NB_AUGS_PER_IMAGE)]
44 |         img_aug = np.hstack(img_aug)
45 |         print("dtype", img_aug.dtype, "averages", np.average(img_aug, axis=tuple(range(0, img_aug.ndim-1))))
46 |         #print("dtype", img_aug.dtype, "averages", img_aug.mean(axis=range(1, img_aug.ndim)))
47 | 
48 |         # title = "k=%s" % (str(ki),)
49 |         # img_aug = ia.draw_text(img_aug, x=5, y=5, text=title)
50 | 
51 |         cv2.imshow("aug", img_aug[..., ::-1]) # here with rgb2bgr
52 |         cv2.waitKey(TIME_PER_STEP)
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/checks/observe/check_gaussian_blur.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/2/28
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import cv2
12 | import numpy as np
13 | from skimage import data
14 | 
15 | import eagle.utils as eu
16 | from eagle.observe.augmentors.blur import MedianBlur
17 | 
18 | TIME_PER_STEP = 5000
19 | NB_AUGS_PER_IMAGE = 10
20 | 
21 | 
22 | def main():
23 |     image = data.astronaut()
24 |     image = eu.imresize_single_image(image, (128, 128))
25 |     print("image shape:", image.shape)
26 |     print("Press any key or wait %d ms to proceed to the next image." % (TIME_PER_STEP,))
27 | 
28 |     k = [
29 |         1,
30 |         3,
31 |         5,
32 |         7,
33 |         (3, 3),
34 |         (1, 11)
35 |     ]
36 | 
37 |     cv2.namedWindow("aug", cv2.WINDOW_NORMAL)
38 |     cv2.resizeWindow("aug", 128*NB_AUGS_PER_IMAGE, 128)
39 |     #cv2.imshow("aug", image[..., ::-1])
40 |     #cv2.waitKey(TIME_PER_STEP)
41 | 
42 |     for ki in k:
43 |         aug = MedianBlur(k=ki)
44 |         img_aug = [aug.augment_image(image) for _ in range(NB_AUGS_PER_IMAGE)]
45 |         img_aug = np.hstack(img_aug)
46 |         print("dtype", img_aug.dtype, "averages", np.average(img_aug, axis=tuple(range(0, img_aug.ndim-1))))
47 |         #print("dtype", img_aug.dtype, "averages", img_aug.mean(axis=range(1, img_aug.ndim)))
48 | 
49 |         # title = "k=%s" % (str(ki),)
50 |         # img_aug = ia.draw_text(img_aug, x=5, y=5, text=title)
51 | 
52 |         cv2.imshow("aug", img_aug[..., ::-1]) # here with rgb2bgr
53 |         cv2.waitKey(TIME_PER_STEP)
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/conf/ssd_train_server.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | image_size: 300
 3 | image_width: 300
 4 | image_height: 300
 5 | image_channel: 3
 6 | num_classes: 20
 7 | batch_size: 64
 8 | 
 9 | [DataSet]
10 | # 数据集中数据的信息存储 [image_path, xmin, ymin, xmax, ymax, class_id]
11 | path: /home/ai-i-liuguiyang/datasets/VOC/total.txt
12 | # 是否需要添加背景这个类别，默认背景的类别为0，程序自动添加，其它label自动加一
13 | is_need_bg: True
14 | # 数据集中的类别信息必须和path文件中的一致
15 | classes: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
16 | # path文件中数据的格式规定
17 | box_output_format: ["xmin", "xmax", "ymin", "ymax", "class_id"]
18 | # 数据预处理组织中的进程数目
19 | thread_num: 10
20 | # 当原始图像在进行resize时出现比例不一致问题
21 | # 在给你的范围内可以直接resize，其他的范围需要进行裁剪然后在resize
22 | upper_resize_rate: 0.2
23 | lower_resize_rate: 0.2
24 | 
25 | [BoxEncoder]
26 | # the spatial dimensions of the model's predictor layers to create the anchor boxes.
27 | predictor_sizes: [[37, 37], [18, 18], [9, 9], [5, 5], [3, 3], [1, 1]]
28 | scales: [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]
29 | aspect_ratios_per_layer: [[0.5, 1.0, 2.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]]
30 | two_boxes_for_ar1: True
31 | variances: [0.1, 0.1, 0.2, 0.2]
32 | coords: centroids
33 | normalize_coords: True
34 | pos_iou_threshold: 0.5
35 | neg_iou_threshold: 0.2
36 | 
37 | [Net]
38 | neg_pos_ratio=3
39 | n_neg_min=0
40 | loss_alpha=1.0
41 | 
42 | [Solver]
43 | lr: 0.0001
44 | beta_1=0.9
45 | beta_2=0.999
46 | epsilon=1e-08
47 | decay=5e-04
48 | max_iterators: 100000
49 | pretrain_model_path: /home/ai-i-liuguiyang/github.com/DL.EyeSight/results/ssd/pretrain/model.ckpt-64000
50 | train_dir: /home/ai-i-liuguiyang/github.com/DL.EyeSight/results/ssd/train_model/


--------------------------------------------------------------------------------
/examples/ssd/vgg_trainer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2017/12/18
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from optparse import OptionParser
12 | 
13 | import os, sys
14 | abs_path = os.path.abspath(__file__)
15 | proj_root = "/".join(abs_path.split("/")[:-3])
16 | sys.path.insert(0, proj_root)
17 | 
18 | from importlib import reload
19 | reload(sys)
20 | 
21 | from datum.utils.process_config import process_config
22 | from datum.models.ssd.ssd_dataset import SSDDataSet
23 | from eagle.brain.ssd.models.vgg import SSDVGG
24 | from eagle.brain.ssd.models.vgg_dilated import SSDVGGDilated
25 | from eagle.brain.solver.ssd_solver import SSDSolver
26 | 
27 | parser = OptionParser()
28 | parser.add_option("-c", "--conf",
29 |                   dest="configure",
30 |                   help="configure filename")
31 | (options, args) = parser.parse_args()
32 | if options.configure:
33 |     conf_file = str(options.configure)
34 | else:
35 |     print('please specify --conf configure filename')
36 |     exit(0)
37 | 
38 | common_params, dataset_params, net_params, solver_params, box_encoder_params = \
39 |     process_config(conf_file)
40 | 
41 | data_generator = SSDDataSet(common_params, dataset_params, box_encoder_params)
42 | model_name = common_params.get("model_name", "VGG")
43 | if model_name == "VGG":
44 |     net = SSDVGG(common_params, net_params, box_encoder_params)
45 | elif model_name == "VGG-Dilated":
46 |     net = SSDVGGDilated(common_params, net_params, box_encoder_params)
47 | else:
48 |     raise ValueError("model_name is not fitted !", model_name)
49 | solver = SSDSolver(data_generator, net, common_params, solver_params)
50 | solver.solve()
51 | 


--------------------------------------------------------------------------------
/checks/observe/check_average_blur.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/2/28
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import cv2
12 | import numpy as np
13 | from skimage import data
14 | 
15 | import eagle.utils as eu
16 | from eagle.observe.augmentors.blur import AverageBlur
17 | 
18 | 
19 | TIME_PER_STEP = 5000
20 | NB_AUGS_PER_IMAGE = 10
21 | 
22 | 
23 | def main():
24 |     image = data.astronaut()
25 |     image = eu.imresize_single_image(image, (64, 64))
26 |     print("image shape:", image.shape)
27 |     print("Press any key or wait %d ms to proceed to the next image." % (TIME_PER_STEP,))
28 | 
29 |     k = [
30 |         1,
31 |         2,
32 |         4,
33 |         8,
34 |         16,
35 |         (8, 8),
36 |         (1, 8),
37 |         ((1, 1), (8, 8)),
38 |         ((1, 16), (1, 16)),
39 |         ((1, 16), 1)
40 |     ]
41 | 
42 |     cv2.namedWindow("aug", cv2.WINDOW_NORMAL)
43 |     cv2.resizeWindow("aug", 64*NB_AUGS_PER_IMAGE, 64)
44 |     #cv2.imshow("aug", image[..., ::-1])
45 |     #cv2.waitKey(TIME_PER_STEP)
46 | 
47 |     for ki in k:
48 |         aug = AverageBlur(k=ki)
49 |         img_aug = [aug.augment_image(image) for _ in range(NB_AUGS_PER_IMAGE)]
50 |         img_aug = np.hstack(img_aug)
51 |         print("dtype", img_aug.dtype, "averages", np.average(img_aug, axis=tuple(range(0, img_aug.ndim-1))))
52 |         #print("dtype", img_aug.dtype, "averages", img_aug.mean(axis=range(1, img_aug.ndim)))
53 | 
54 |         # title = "k=%s" % (str(ki),)
55 |         # img_aug = ia.draw_text(img_aug, x=5, y=5, text=title)
56 | 
57 |         cv2.imshow("aug", img_aug[..., ::-1]) # here with rgb2bgr
58 |         cv2.waitKey(TIME_PER_STEP)
59 | 
60 | if __name__ == "__main__":
61 |     main()
62 | 


--------------------------------------------------------------------------------
/conf/dilated_ssd_train.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | model_name: VGG-Dilated
 3 | image_size: 300
 4 | image_width: 300
 5 | image_height: 300
 6 | image_channel: 3
 7 | num_classes: 20
 8 | batch_size: 10
 9 | is_predict: False
10 | 
11 | [DataSet]
12 | # 数据集中数据的信息存储 [image_path, xmin, ymin, xmax, ymax, class_id]
13 | path: /Volumes/projects/DataSets/VOC/pascal_voc_2007.txt
14 | # 是否需要添加背景这个类别，默认背景的类别为0，程序自动添加，其它label自动加一
15 | is_need_bg: True
16 | # 数据集中的类别信息必须和path文件中的一致
17 | classes: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
18 | # path文件中数据的格式规定
19 | box_output_format: ["xmin", "xmax", "ymin", "ymax", "class_id"]
20 | # 数据预处理组织中的进程数目
21 | thread_num: 8
22 | # 当原始图像在进行resize时出现比例不一致问题
23 | # 在给你的范围内可以直接resize，其他的范围需要进行裁剪然后在resize
24 | upper_resize_rate: 0.2
25 | lower_resize_rate: 0.2
26 | 
27 | [BoxEncoder]
28 | # the spatial dimensions of the model's predictor layers to create the anchor boxes.
29 | predictor_sizes: [[37, 37], [18, 18], [9, 9], [5, 5], [3, 3], [1, 1]]
30 | scales: [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]
31 | aspect_ratios_per_layer: [[0.5, 1.0, 2.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]]
32 | two_boxes_for_ar1: True
33 | variances: [0.1, 0.1, 0.2, 0.2]
34 | coords: centroids
35 | normalize_coords: True
36 | pos_iou_threshold: 0.5
37 | neg_iou_threshold: 0.2
38 | 
39 | [Net]
40 | neg_pos_ratio=3
41 | n_neg_min=0
42 | loss_alpha=1.0
43 | 
44 | [Solver]
45 | lr: 0.0001
46 | beta_1=0.9
47 | beta_2=0.999
48 | epsilon=1e-08
49 | decay=5e-04
50 | max_iterators: 1000000
51 | #pretrain_model_path: /Volumes/projects/github.com/Object.Tracking.Video/trainer/weights/ssd300_weights_epoch-00_loss-2.3397_val_loss-3.6407.h5
52 | pretrain_model_path: None
53 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/yolo/train_model/


--------------------------------------------------------------------------------
/conf/ssd_train_512.cfg:
--------------------------------------------------------------------------------
 1 | [Common]
 2 | model_name: VGG-Dilated
 3 | image_size: 512
 4 | image_width: 512
 5 | image_height: 512
 6 | image_channel: 3
 7 | num_classes: 12
 8 | batch_size: 1
 9 | is_predict: False
10 | 
11 | [DataSet]
12 | # 数据集中数据的信息存储 [image_path, xmin, ymin, xmax, ymax, class_id]
13 | # 数据集中的target的标记从0开始，顺序和classes中的下标一致
14 | path: /Volumes/projects/DataSets/LSD12/train_data_list.txt
15 | # 是否需要添加背景这个类别，默认背景的类别为0，程序自动添加，其它label自动加一
16 | is_need_bg: True
17 | # 数据集中的类别信息必须和path文件中的一致
18 | classes: ["airplane", "ship", "storagetank", "baseballdiamond", "tenniscourt", "basketballcourt", "groundtrackfield", "harbor", "bridge", "vehicle", "van", "truck"]
19 | # path文件中数据的格式规定
20 | box_output_format: ["xmin", "xmax", "ymin", "ymax", "class_id"]
21 | # 数据预处理组织中的进程数目
22 | thread_num: 8
23 | # 当原始图像在进行resize时出现比例不一致问题
24 | # 在给你的范围内可以直接resize，其他的范围需要进行裁剪然后在resize
25 | upper_resize_rate: 0.2
26 | lower_resize_rate: 0.2
27 | 
28 | [BoxEncoder]
29 | # the spatial dimensions of the model's predictor layers to create the anchor boxes.
30 | predictor_sizes: [[64, 64], [32, 32], [16, 16], [7, 7], [3, 3], [1, 1]]
31 | scales: [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05]
32 | aspect_ratios_per_layer: [[0.5, 1.0, 2.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]]
33 | two_boxes_for_ar1: True
34 | variances: [0.1, 0.1, 0.2, 0.2]
35 | coords: centroids
36 | normalize_coords: True
37 | pos_iou_threshold: 0.5
38 | neg_iou_threshold: 0.2
39 | 
40 | [Net]
41 | neg_pos_ratio=3
42 | n_neg_min=0
43 | loss_alpha=1.0
44 | 
45 | [Solver]
46 | lr: 0.0001
47 | beta_1=0.9
48 | beta_2=0.999
49 | epsilon=1e-08
50 | decay=5e-04
51 | max_iterators: 1000000
52 | #pretrain_model_path: /Volumes/projects/github.com/Object.Tracking.Video/trainer/weights/ssd300_weights_epoch-00_loss-2.3397_val_loss-3.6407.h5
53 | pretrain_model_path: None
54 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/dilated/train_model/


--------------------------------------------------------------------------------
/eagle/brain/ssd/models/components.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/7
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from keras.layers import Activation, Conv2D, Concatenate
12 | from keras.layers import BatchNormalization
13 | 
14 | def _fire(x, filters, name="fire"):
15 |     sq_filters, ex1_filters, ex2_filters = filters
16 |     squeeze = Conv2D(sq_filters, (1, 1), activation="relu", padding="same", kernel_initializer="he_normal", name=name+"/squeeze1x1")(x)
17 |     expand1 = Conv2D(ex1_filters, (1, 1), activation="relu", padding="same", kernel_initializer="he_normal", name=name+"/expand1x1")(squeeze)
18 |     expand2 = Conv2D(ex2_filters, (3, 3), activation="relu", padding="same", kernel_initializer="he_normal", name=name+"/expand3x3")(squeeze)
19 |     x = Concatenate(axis=-1, name=name+"/concate")([expand1, expand2])
20 |     return x
21 | 
22 | def _fire_with_bn(x, filters, name="fire"):
23 |     sq_filters, ex1_filters, ex2_filters = filters
24 |     squeeze = Conv2D(sq_filters, (1, 1), activation="relu", padding="same", kernel_initializer="he_normal", name=name+"/squeeze1x1")(x)
25 |     expand1 = Activation(activation="relu", name=name+"/relu_expand1x1")(BatchNormalization(name=name+"/expand1x1/bn")(Conv2D(ex1_filters, (1, 1), strides=(1, 1), padding="same", kernel_initializer="he_normal", name=name+"/expand1x1")(squeeze)))
26 |     expand2 = Activation(activation="relu", name=name+"/relu_expand3x3")(BatchNormalization(name=name+"/expand3x3/bn")(Conv2D(ex2_filters, (3, 3), strides=(1, 1), padding="same", kernel_initializer="he_normal", name=name+"/expand3x3")(squeeze)))
27 |     x = Concatenate(axis=-1, name=name+"/concate")([expand1, expand2])
28 |     return x
29 | 
30 | def _conv2D_with_bn(x, n_filters, k_size, k_stride, name, pad="same"):
31 |     x = Conv2D(n_filters, k_size, strides=(k_stride, k_stride), padding=pad, kernel_initializer="he_normal", name=name+"/conv")(x)
32 |     x = BatchNormalization(name=name+"/bn")(x)
33 |     x = Activation(activation="relu", name=name+"/relu")(x)
34 |     return x
35 | 


--------------------------------------------------------------------------------
/examples/unet/predict.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/6
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import cv2
12 | import numpy as np
13 | from optparse import OptionParser
14 | 
15 | from datum.utils.process_config import process_config
16 | # from datum.models.yolo.yolo_dataset import YoloDataSet
17 | from datum.models.yolo.yolo_batch_dataset import YoloDataSet
18 | from eagle.brain.solver.yolo_u_solver import YoloUSolver
19 | from eagle.brain.yolo.yolo_u_net import YoloUNet
20 | 
21 | parser = OptionParser()
22 | parser.add_option("-c", "--conf", dest="configure",  
23 |                   help="configure filename")
24 | (options, args) = parser.parse_args() 
25 | if options.configure:
26 |   conf_file = str(options.configure)
27 | else:
28 |   print('please specify --conf configure filename')
29 |   exit(0)
30 | 
31 | common_params, dataset_params, net_params, solver_params = process_config(conf_file)
32 | print("After Proces Config File !")
33 | # dataset = YoloDataSet(common_params, dataset_params)
34 | # print("Prepared DataSet !")
35 | net = YoloUNet(common_params, net_params)
36 | print("Building the Deep Learning Model !")
37 | solver = YoloUSolver(None, net, common_params, solver_params)
38 | print("Now Start Learning Best Parameters !")
39 | image_path = "/Volumes/projects/DataSets/CSUVideo/512x512/large_tunisia_total/JPEGImages/000011_1428_408_1940_920_35.jpg"
40 | 
41 | img_width, img_height = 512, 512
42 | single_image = cv2.imread(image_path)
43 | resized_img = cv2.resize(single_image, (img_height, img_width))
44 | np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)
45 | np_img = np_img.astype(np.float32)
46 | np_img = np_img / 255.0 * 2 - 1
47 | np_img = np.reshape(np_img, (1, img_height, img_width, 3))
48 | 
49 | (xmin, ymin, xmax, ymax, class_num) = solver.model_predict(np_img)
50 | 
51 | cv2.rectangle(resized_img, (int(xmin), int(ymin)),
52 |                       (int(xmax), int(ymax)), (0, 0, 255))
53 | # cv2.imwrite('cat_out.jpg', resized_img)
54 | cv2.imshow('cat_out.jpg', resized_img)
55 | cv2.waitKey()
56 | 


--------------------------------------------------------------------------------
/Others/lsd12/check_dataset.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | from datum.utils.tools import extract_target_from_xml
 4 | from Others.lsd12.label_config import sign_idx_dict, idx_sign_dict
 5 | 
 6 | 
 7 | dataset_dir = "/Volumes/projects/DataSets/LSD12/"
 8 | 
 9 | 
10 | def get_true_id_label(label_name):
11 |     """
12 |     :return: label_id, label_name
13 |     """
14 |     return sign_idx_dict[label_name], idx_sign_dict[sign_idx_dict[label_name]]
15 | 
16 | 
17 | def disp_image():
18 |     with open(dataset_dir + "total.txt", "r") as reader:
19 |         for line in reader.readlines():
20 |             line = line.strip()
21 |             image_name = line + ".jpg"
22 |             anno_name = line + ".xml"
23 |             image = cv2.imread(dataset_dir + "JPEGImages/" + image_name)
24 |             anno_list = extract_target_from_xml(dataset_dir + "Annotations/" + anno_name)
25 |             for item in anno_list:
26 |                 label_id, label_name = get_true_id_label(item[-1])
27 |                 item[-1] = label_name
28 |                 xmin, ymin, xmax, ymax = item[:4]
29 |                 cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
30 |             cv2.imshow("src", image)
31 |             cv2.waitKey()
32 | 
33 | 
34 | def convert_standard():
35 |     output_path = dataset_dir + "train_data_list.txt"
36 |     write_handler = open(output_path, "w")
37 | 
38 |     with open(dataset_dir + "train.txt", "r") as reader:
39 |         for line in reader.readlines():
40 |             line = line.strip()
41 |             image_path = dataset_dir + "JPEGImages/" + line + ".jpg"
42 |             anno_path = dataset_dir + "Annotations/" + line + ".xml"
43 |             anno_list = extract_target_from_xml(anno_path)
44 |             anno_str_list = []
45 |             for item in anno_list:
46 |                 label_id, label_name = get_true_id_label(item[-1])
47 |                 item[-1] = label_id - 1
48 |                 item = [str(cell) for cell in item]
49 |                 anno_str_list.append(" ".join(item))
50 |             anno_info = " ".join(anno_str_list)
51 |             write_handler.write(image_path + " " + anno_info + "\n")
52 |     write_handler.close()
53 |     print("save the convert_data info to ", output_path)
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     convert_standard()
58 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 图像视频中的目标检测
 2 | 本工程主要目的是集成深度学习中常用的目标检测模型，并利用目标检测模型进行图像和视频中的检测！
 3 | 
 4 | ## 开发环境
 5 | ```shell
 6 | OS: Ubuntu 16.04
 7 | Python: Python 3.6.0
 8 | Tensorflow: 1.4.1 version
 9 | Opencv: 3.2.0 version for python
10 | ```
11 | 
12 | ## 实现模型介绍
13 | * SSD+VGG
14 | * SSD+Res
15 | * SSD+Inception
16 | * SSD+SqueezeNet
17 | * SSD+Deconvolution
18 | * YOLO
19 | 
20 | ## 运行说明
21 | 1. 先准备好数据集
22 | ```shell
23 | cat /Volumes/projects/DataSets/VOC2007/voc_train.txt
24 | image_path01 xmin ymin xmax ymax class_id xmin ymin xmax ymax class_id
25 | image_path02 xmin ymin xmax ymax class_id xmin ymin xmax ymax class_id
26 | image_path03 xmin ymin xmax ymax class_id xmin ymin xmax ymax class_id
27 | 
28 | PS：Class_id从0开始编号，顺序同cfg文件中的label顺序一致
29 | ```
30 | 2. 修改配置文件
31 | 配置文件存放在根目录下：**conf/ssd_train.cfg**
32 | 其中还有若干配置项，进行修改
33 | 
34 | 3. 运行程序
35 | 进入到example/ssd目录中
36 | ```
37 | python vgg_trainer.py -c ../../conf/ssd_train.cfg
38 | ```
39 | 
40 | ## TODOLISTS
41 | - [x] 整理文件目录结构，按照设计模式进行
42 | - [x] 增加数据预处理的PipeLine
43 |     - [x] 图像插值
44 |     - [x] 图像镜像操作(左右，上下)
45 |     - [x] 添加随机噪声(各种模糊操作)
46 |     - [x] 对比度拉伸
47 |     - [x] 饱和度变化
48 |     - [x] 图像锐化
49 | - [x] 提高模型训练速度
50 |     - [ ] RawData ---> TFRecords
51 |     - [x] Single Process ---> Multi Processes
52 | - [ ] 检测过程的可视化
53 | - [x] 编写检测网络结构模型文件
54 | - [x] 对数据集的处理结构的统一接口
55 | - [x] 编写对模块的测试文件
56 | 
57 | ## 实验结果
58 | - YOLOv1模型在Pascal VOC数据集上的表现
59 | <table border="0" align="center" cellpadding="0" cellspacing="0">
60 |   <tr>
61 |     <td valign="top">
62 |         <div style="margin-left:100px;">
63 |             <img src="https://github.com/liuguiyangnwpu/DL.EyeSight/blob/master/results/test_res_image/loss1.png" width="300" height="120"/>
64 |         </div>
65 |     </td>
66 |     <td valign="top">
67 |         <div style="margin-left:100px;">
68 |             <img src="https://github.com/liuguiyangnwpu/DL.EyeSight/blob/master/results/test_res_image/loss2.png" width="300" height="120"/>
69 |         </div>
70 |     </td>
71 |     <td valign="top">
72 |         <div style="margin-left:100px;">
73 |             <img src="https://github.com/liuguiyangnwpu/DL.EyeSight/blob/master/results/test_res_image/loss3.png" width="300" height="120"/>
74 |         </div>
75 |     </td>
76 |   </tr>
77 | </table>
78 | 
79 | 
80 | ## 联系我
81 | * New Issues
82 | * Send me E-mail: liuguiyangnwpu@163.com
83 | 


--------------------------------------------------------------------------------
/Others/satellite/process.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/15
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os
12 | import codecs
13 | import xml.etree.ElementTree as ET
14 | 
15 | 
16 | data_dir = "/Volumes/projects/DataSets/CSUVideo/512x512"
17 | image_sets = ["large_000013363_total", "large_000014631_total",
18 |               "large_minneapolis_1_total", "large_tunisia_total"]
19 | 
20 | def parse_xml(xml_file):
21 |     """
22 |     Args:
23 |       xml_file: the input xml file path
24 | 
25 |     Returns:
26 |       image_path: string
27 |       labels: list of [xmin, ymin, xmax, ymax, class]
28 |     """
29 |     tree = ET.parse(xml_file)
30 |     root = tree.getroot()
31 |     labels = []
32 | 
33 |     for item in root:
34 |         if item.tag == 'object':
35 |             obj_num = 1
36 |             bndbox = item.find("bndbox")
37 |             xmin = int(float(bndbox.find("xmin").text))
38 |             ymin = int(float(bndbox.find("ymin").text))
39 |             xmax = int(float(bndbox.find("xmax").text))
40 |             ymax = int(float(bndbox.find("ymax").text))
41 |             labels.append([xmin, ymin, xmax, ymax, obj_num])
42 | 
43 |     return labels
44 | 
45 | 
46 | def convert_list2str(labels):
47 |     return ",".join([",".join(list(map(str, item))) for item in labels])
48 | 
49 | 
50 | for dataset in image_sets:
51 |     anno_prefix = "/".join([data_dir, dataset, "Annotations"])
52 |     image_prefix = "/".join([data_dir, dataset, "JPEGImages"])
53 |     with codecs.open(data_dir + "/" + dataset + ".txt", "w", "utf8") as writer:
54 |         for anno_name in os.listdir(anno_prefix):
55 |             if anno_name.startswith("."):
56 |                 continue
57 |             anno_path = "/".join([anno_prefix, anno_name])
58 |             image_name = anno_name.replace("xml", "jpg")
59 |             image_path = "/".join([image_prefix, image_name])
60 |             if not os.path.isfile(image_path):
61 |                 print("{} not found !".format(image_path))
62 |             labels = parse_xml(anno_path)
63 |             anno_info = convert_list2str(labels)
64 |             writer.write("{},{}\n".format(image_path, anno_info))
65 | 


--------------------------------------------------------------------------------
/Others/vedia/show.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/12
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | # 主要是可视化带有旋转角度的图像中的标注
12 | 
13 | import os
14 | import codecs
15 | 
16 | import cv2
17 | import numpy as np
18 | 
19 | data_prefix = "/Volumes/projects/DataSets/VEDIA/"
20 | images_dir = ["512/Vehicules512/", "1024/Vehicules1024/"]
21 | annotations_filepath = ["512/Annotations512/annotation512.txt",
22 |                         "1024/Annotations1024/annotation1024.txt"]
23 | 
24 | 
25 | def show_image():
26 |     for img_dir, anno_file in zip(images_dir, annotations_filepath):
27 |         abs_img_dir = data_prefix + img_dir
28 |         abs_anno_path = data_prefix + anno_file
29 |         if not os.path.isfile(abs_anno_path):
30 |             raise ValueError("{} file not found !".format(abs_anno_path))
31 |         images_dict = {}
32 |         with codecs.open(abs_anno_path, "r", "utf8") as reader:
33 |             for line in reader:
34 |                 line = line.strip().split(' ')
35 |                 name_prefix = line[0] + "_co.png"
36 |                 image_path = abs_img_dir + name_prefix
37 |                 if image_path in images_dict.keys():
38 |                     images_dict[image_path].append(line)
39 |                 else:
40 |                     images_dict[image_path] = [line]
41 |         for img_path in images_dict.keys():
42 |             if not os.path.isfile(img_path):
43 |                 raise IOError("{} image path not found !".format(img_path))
44 |             image = cv2.imread(img_path)
45 | 
46 |             for line in images_dict[img_path]:
47 |                 center_x, center_y = float(line[1]), float(line[2])
48 |                 rotate_theta = float(line[3])
49 |                 points = np.array(list(map(float, line[4:12])),
50 |                                   np.int32).reshape((2, -1)).T
51 |                 fully_contain = int(line[-2])
52 |                 occluded = int(line[-1])
53 | 
54 |                 points = points.reshape((-1, 1, 2))
55 |                 cv2.polylines(image, [points], True, color=(255, 0, 0))
56 | 
57 |             cv2.imshow("src", image)
58 |             cv2.waitKey()
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     show_image()


--------------------------------------------------------------------------------
/checks/observe/check_color.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/2/28
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import cv2
12 | import numpy as np
13 | from skimage import data
14 | 
15 | import eagle.utils as eu
16 | from eagle.observe.augmentors.flip import Fliplr
17 | from eagle.observe.augmentors.arithmetic import Add
18 | from eagle.observe.augmentors.color import WithChannels, WithColorspace
19 | 
20 | TIME_PER_STEP = 10000
21 | 
22 | 
23 | def main_WithChannels():
24 |     image = data.astronaut()
25 |     print("image shape:", image.shape)
26 |     print("Press any key or wait %d ms to proceed to the next image." % (TIME_PER_STEP,))
27 | 
28 |     children_all = [
29 |         ("hflip", Fliplr(1)),
30 |         ("add", Add(50))
31 |     ]
32 | 
33 |     channels_all = [
34 |         None,
35 |         0,
36 |         [],
37 |         [0],
38 |         [0, 1],
39 |         [1, 2],
40 |         [0, 1, 2]
41 |     ]
42 | 
43 |     cv2.namedWindow("aug", cv2.WINDOW_NORMAL)
44 |     cv2.imshow("aug", image[..., ::-1])
45 |     cv2.waitKey(TIME_PER_STEP)
46 | 
47 |     for children_title, children in children_all:
48 |         for channels in channels_all:
49 |             aug = WithChannels(channels=channels, children=children)
50 |             img_aug = aug.augment_image(image)
51 |             print("dtype", img_aug.dtype, "averages", np.average(img_aug, axis=tuple(range(0, img_aug.ndim-1))))
52 |             #print("dtype", img_aug.dtype, "averages", img_aug.mean(axis=range(1, img_aug.ndim)))
53 | 
54 |             # title = "children=%s | channels=%s" % (children_title, channels)
55 |             # img_aug = ia.draw_text(img_aug, x=5, y=5, text=title)
56 | 
57 |             cv2.imshow("aug", img_aug[..., ::-1]) # here with rgb2bgr
58 |             cv2.waitKey(TIME_PER_STEP)
59 | 
60 | 
61 | def main_WithColorspace():
62 |     image = data.astronaut()
63 |     print("image shape:", image.shape)
64 | 
65 |     aug = WithColorspace(
66 |         from_colorspace="RGB",
67 |         to_colorspace="HSV",
68 |         children=WithChannels(0, Add(50))
69 |     )
70 | 
71 |     aug_no_colorspace = WithChannels(0, Add(50))
72 | 
73 |     img_show = np.hstack([
74 |         image,
75 |         aug.augment_image(image),
76 |         aug_no_colorspace.augment_image(image)
77 |     ])
78 | 
79 |     cv2.namedWindow("aug", cv2.WINDOW_NORMAL)
80 |     cv2.imshow("aug", img_show[..., ::-1])
81 |     cv2.waitKey(TIME_PER_STEP)
82 | 
83 | if __name__ == "__main__":
84 |     # main_WithChannels()
85 |     main_WithColorspace()
86 | 


--------------------------------------------------------------------------------
/Others/satellite/bbox_cluster.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/15
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | # 使用聚类方法进行目标框的聚类操作
12 | import os
13 | import numpy as np
14 | 
15 | import matplotlib.pyplot as plt
16 | from matplotlib.font_manager import FontProperties
17 | 
18 | from Others.satellite.process import parse_xml
19 | 
20 | # print(plt.rcParams.keys())
21 | # font = FontProperties(fname='/Library/Fonts/ufonts.com_fangsong.ttf')
22 | font = FontProperties(fname='/Users/liuguiyang/Library/Fonts/仿宋_GB2312.ttf')
23 | 
24 | data_dir = "/Volumes/projects/DataSets/CSUVideo/source"
25 | # namesets = ["large_000013363_total", "large_000014631_total",
26 | #             "large_minneapolis_1_total", "large_tunisia_total"]
27 | namesets = ["large_000013363_total"]
28 | 
29 | 
30 | datum = []
31 | for name in namesets:
32 |     anno_prefix = "/".join([data_dir, name, "Annotations"])
33 |     for anno_name in os.listdir(anno_prefix):
34 |         if anno_name.startswith("."):
35 |             continue
36 |         anno_path = "/".join([anno_prefix, anno_name])
37 |         # [xmin, ymin, xmax, ymax, class_id]
38 |         labels = parse_xml(anno_path)
39 |         if len(labels) == 0:
40 |             continue
41 |         datum.extend(labels)
42 | datum = np.array(datum, np.int32)
43 | 
44 | datum_width = datum[:, 2] - datum[:, 0]
45 | datum_height = datum[:, 3] - datum[:, 1]
46 | datum_ratio = datum_width / datum_height
47 | 
48 | print(datum_width.shape)
49 | print(datum_height.shape)
50 | print(datum_ratio.shape)
51 | d = {}
52 | for i in datum_width:
53 |     d.setdefault(i, 0)
54 |     d[i] += 1
55 | x_w = d.keys()
56 | y_w = d.values()
57 | 
58 | d = {}
59 | for i in datum_height:
60 |     d.setdefault(i, 0)
61 |     d[i] += 1
62 | x_h = d.keys()
63 | y_h = d.values()
64 | 
65 | select_1 = plt.scatter(x_w, y_w, marker="o", label=u'目标宽的分布')
66 | select_2 = plt.scatter(x_h, y_h, marker="*", label=u'目标高的分布')
67 | plt.legend(handles=[select_1, select_2], prop=font)
68 | 
69 | plt.title(u"目标尺寸分布图", fontproperties=font)
70 | plt.xlabel(u"尺寸/像素", fontproperties=font)
71 | plt.ylabel(u"数量/个", fontproperties=font)
72 | plt.savefig("h_w_distribution.png", dpi=300)
73 | # plt.show()
74 | # datum_width = datum_width.reshape((datum_width.shape[0], 1))
75 | # datum_height = datum_height.reshape((datum_height.shape[0], 1))
76 | # d = np.concatenate([datum_width, datum_height], axis=1)
77 | # plt.scatter(d[:, 0], d[:, 1])
78 | # plt.show()
79 | # kmeans= KMeans(n_clusters=3, random_state=0).fit(datum_width)
80 | # print(kmeans.labels_)
81 | # print(kmeans.cluster_centers_)
82 | 


--------------------------------------------------------------------------------
/datum/utils/process_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/3/5
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from configparser import ConfigParser
12 | 
13 | 
14 | def process_config(conf_file):
15 |     """process configure file to generate CommonParams, DataSetParams, NetParams
16 | 
17 |     Args:
18 |         conf_file: configure file path
19 |     Returns:
20 |         CommonParams, DataSetParams, NetParams, SolverParams
21 |     """
22 |     common_params = {}
23 |     dataset_params = {}
24 |     net_params = {}
25 |     solver_params = {}
26 |     box_encoder_params = {}
27 | 
28 |     # configure_parser
29 |     config = ConfigParser()
30 |     config.read(conf_file, encoding="utf8")
31 | 
32 |     # sections and options
33 |     for section in config.sections():
34 |         # construct common_params
35 |         if section == 'Common':
36 |             for option in config.options(section):
37 |                 common_params[option] = config.get(section, option)
38 |         # construct dataset_params
39 |         if section == 'DataSet':
40 |             for option in config.options(section):
41 |                 dataset_params[option] = config.get(section, option)
42 |         # construct net_params
43 |         if section == 'Net':
44 |             for option in config.options(section):
45 |                 net_params[option] = config.get(section, option)
46 |         # construct solver_params
47 |         if section == 'Solver':
48 |             for option in config.options(section):
49 |                 solver_params[option] = config.get(section, option)
50 | 
51 |         # construct box_encoder_params
52 |         if section == 'BoxEncoder':
53 |             for option in config.options(section):
54 |                 box_encoder_params[option] = config.get(section, option)
55 | 
56 |     # 检测当前任务是需要进行测试还是进行训练
57 |     if "is_predict" in common_params.keys():
58 |         if common_params["is_predict"] == "True":
59 |             common_params["is_predict"] = True
60 |             common_params["batch_size"] = 1
61 |         else:
62 |             common_params["is_predict"] = False
63 | 
64 |     if len(box_encoder_params) == 0:
65 |         return common_params, dataset_params, net_params, solver_params
66 | 
67 |     return common_params, dataset_params, net_params, solver_params, box_encoder_params
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     common_params, dataset_params, net_params, solver_params = process_config(
72 |         "../../conf/yolo_unet_train.cfg")
73 |     print(common_params)
74 |     print(dataset_params)
75 |     # import json
76 |     # print(json.loads(dataset_params["classes"]))
77 |     # print(net_params["aspect_ratios"])
78 |     # print(json.loads(net_params["aspect_ratios"]))


--------------------------------------------------------------------------------
/eagle/observe/augmentors/flip.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/2/28
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import numpy as np
12 | 
13 | import eagle.utils as eu
14 | from eagle.observe.base.meta import Augmentor
15 | from eagle.parameter import StochasticParameter, Binomial
16 | 
17 | 
18 | class Fliplr(Augmentor):
19 |     def __init__(self, p=0, name=None, deterministic=False, random_state=None):
20 |         super(Fliplr, self).__init__(
21 |             name=name, deterministic=deterministic, random_state=random_state)
22 | 
23 |         if eu.is_single_number(p):
24 |             self.p = Binomial(p)
25 |         elif isinstance(p, StochasticParameter):
26 |             self.p = p
27 |         else:
28 |             raise Exception("Expected p type StochasticParameter")
29 | 
30 |     def _augment_images(self, images, random_state, parents, hooks):
31 |         nb_images = len(images)
32 |         samples = self.p.draw_samples((nb_images,), random_state=random_state)
33 |         for i in range(nb_images):
34 |             if samples[i] == 1:
35 |                 images[i] = np.fliplr(images[i])
36 |         return images
37 | 
38 |     def _augment_keypoints(self,
39 |                            keypoints_on_images, random_state, parents, hooks):
40 |         nb_images = len(keypoints_on_images)
41 |         samples = self.p.draw_samples((nb_images, ), random_state=random_state)
42 |         for i, kps_oi in enumerate(keypoints_on_images):
43 |             if samples[i] == 1:
44 |                 width = kps_oi.shape[1]
45 |                 for kp in kps_oi.keypoints:
46 |                     kp.x = (width - 1) - kp.x
47 |         return keypoints_on_images
48 | 
49 |     def get_parameters(self):
50 |         return [self.p]
51 | 
52 | 
53 | class Flipud(Augmentor):
54 |     def __init__(self, p=0, name=None, deterministic=False, random_state=None):
55 |         super(Flipud, self).__init__(
56 |             name=name, deterministic=deterministic, random_state=random_state)
57 |         if eu.is_single_number(p):
58 |             self.p = Binomial(p)
59 |         elif isinstance(p, StochasticParameter):
60 |             self.p = p
61 |         else:
62 |             raise Exception("Expected p type StochasticParameter")
63 | 
64 |     def _augment_images(self, images, random_state, parents, hooks):
65 |         nb_images = len(images)
66 |         samples = self.p.draw_samples((nb_images,), random_state=random_state)
67 |         for i in range(nb_images):
68 |             if samples[i] == 1:
69 |                 images[i] = np.flipud(images[i])
70 |         return images
71 | 
72 |     def _augment_keypoints(self,
73 |                            keypoints_on_images, random_state, parents, hooks):
74 |         nb_images = len(keypoints_on_images)
75 |         samples = self.p.draw_samples((nb_images,), random_state=random_state)
76 |         for i, kps_oi in enumerate(keypoints_on_images):
77 |             if samples[i] == 1:
78 |                 height = kps_oi.shape[0]
79 |                 for kp in kps_oi.keypoints:
80 |                     kp.y = (height - 1) - kp.y
81 |         return keypoints_on_images
82 | 
83 |     def get_parameters(self):
84 |         return [self.p]
85 | 


--------------------------------------------------------------------------------
/Others/voc/process_pascal_voc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/5
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import xml.etree.ElementTree as ET
 13 | 
 14 | 
 15 | classes_name = [
 16 |     "aeroplane", "bicycle", "bird", "boat", "bottle",
 17 |     "bus", "car", "cat", "chair", "cow", "diningtable",
 18 |     "dog", "horse", "motorbike", "person", "pottedplant",
 19 |     "sheep", "sofa", "train","tvmonitor"
 20 | ]
 21 | 
 22 | classes_num = {
 23 |     'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3, 'bottle': 4,
 24 |     'bus': 5, 'car': 6, 'cat': 7, 'chair': 8, 'cow': 9, 'diningtable': 10,
 25 |     'dog': 11, 'horse': 12, 'motorbike': 13, 'person': 14, 'pottedplant': 15,
 26 |     'sheep': 16, 'sofa': 17, 'train': 18, 'tvmonitor': 19
 27 | }
 28 | 
 29 | DATA_ROOT = "/Volumes/projects/DataSets/VOC"
 30 | DATA_PATH = os.path.join(DATA_ROOT, "VOCdevkit/")
 31 | OUTPUT_PATH = os.path.join(DATA_ROOT, "pascal_voc_{}.txt")
 32 | 
 33 | 
 34 | def parse_xml(xml_file, year=2007):
 35 |     """
 36 |     Args:
 37 |       xml_file: the input xml file path
 38 | 
 39 |     Returns:
 40 |       image_path: string
 41 |       labels: list of [xmin, ymin, xmax, ymax, class]
 42 |     """
 43 |     tree = ET.parse(xml_file)
 44 |     root = tree.getroot()
 45 |     image_path = ''
 46 |     labels = []
 47 | 
 48 |     for item in root:
 49 |         if item.tag == 'filename':
 50 |             if year == 2007:
 51 |                 image_path = os.path.join(
 52 |                     DATA_PATH, 'VOC2007/JPEGImages', item.text)
 53 |             if year == 2012:
 54 |                 image_path = os.path.join(
 55 |                     DATA_PATH, 'VOC2012/JPEGImages', item.text)
 56 |         elif item.tag == 'object':
 57 |             obj_name = item[0].text
 58 |             obj_num = classes_num[obj_name]
 59 |             bndbox = item.find("bndbox")
 60 |             xmin = int(float(bndbox.find("xmin").text))
 61 |             ymin = int(float(bndbox.find("ymin").text))
 62 |             xmax = int(float(bndbox.find("xmax").text))
 63 |             ymax = int(float(bndbox.find("ymax").text))
 64 |             labels.append([xmin, ymin, xmax, ymax, obj_num])
 65 | 
 66 |     return image_path, labels
 67 | 
 68 | 
 69 | def convert_to_string(image_path, labels):
 70 |     out_string = ''
 71 |     out_string += image_path
 72 |     for label in labels:
 73 |         for i in label:
 74 |             out_string += ' ' + str(i)
 75 |     out_string += '\n'
 76 | 
 77 |     return out_string
 78 | 
 79 | 
 80 | def run_main(year=2007):
 81 |     print("Start format voc {} data !".format(year))
 82 |     out_file = open(OUTPUT_PATH.format(year), "w")
 83 |     if year == 2007:
 84 |         xml_dir = os.path.join(DATA_PATH, "VOC2007/Annotations/")
 85 |     if year == 2012:
 86 |         xml_dir = os.path.join(DATA_PATH, "VOC2012/Annotations/")
 87 | 
 88 |     xml_list = os.listdir(xml_dir)
 89 | 
 90 |     xml_list = [xml_dir + tmp for tmp in xml_list]
 91 |     for xml in xml_list:
 92 |         if not os.path.isfile(xml):
 93 |             print("{} not xml file path.".format(xml))
 94 |         image_path, labels = parse_xml(xml, year=year)
 95 |         record = convert_to_string(image_path, labels)
 96 |         out_file.write(record)
 97 |     out_file.close()
 98 | 
 99 | if __name__ == '__main__':
100 |     run_main(year=2007)
101 |     run_main(year=2012)
102 | 


--------------------------------------------------------------------------------
/checks/observe/check_parameters.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2009 IW.
 2 | # All rights reserved.
 3 | #
 4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
 5 | # Date:   2018/2/28
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | 
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | 
15 | from eagle.parameter import (
16 |     Binomial, Choice, DiscreteUniform, Normal, Uniform, Deterministic, Clip,
17 |     Discretize, Multiply, Add, Divide, Power, Absolute
18 | )
19 | 
20 | 
21 | def main():
22 |     params = [
23 |         ("Binomial(0.1)", Binomial(0.1)),
24 |         ("Choice", Choice([0, 1, 2])),
25 |         ("Choice with p", Choice([0, 1, 2], p=[0.1, 0.2, 0.7])),
26 |         ("DiscreteUniform(0, 10)", DiscreteUniform(0, 10)),
27 |         ("Normal(0, 1)", Normal(0, 1)),
28 |         ("Normal(1, 1)", Normal(1, 1)),
29 |         ("Normal(1, 2)", Normal(0, 2)),
30 |         ("Normal(Choice([-1, 1]), 2)", Normal(Choice([-1, 1]), 2)),
31 |         ("Discretize(Normal(0, 1.0))", Discretize(Normal(0, 1.0))),
32 |         ("Uniform(0, 10)", Uniform(0, 10)),
33 |         ("Deterministic(1)", Deterministic(1)),
34 |         ("Clip(Normal(0, 1), 0, None)", Clip(Normal(0, 1), minval=0, maxval=None)),
35 |         ("Multiply(Uniform(0, 10), 2)", Multiply(Uniform(0, 10), 2)),
36 |         ("Add(Uniform(0, 10), 5)", Add(Uniform(0, 10), 5)),
37 |         ("Absolute(Normal(0, 1))", Absolute(Normal(0, 1)))
38 |     ]
39 | 
40 |     params_arithmetic = [
41 |         ("Normal(0, 1.0)", Normal(0.0, 1.0)),
42 |         ("Normal(0, 1.0) + 5", Normal(0.0, 1.0) + 5),
43 |         ("5 + Normal(0, 1.0)", 5 + Normal(0.0, 1.0)),
44 |         ("5 + Normal(0, 1.0)", Add(5, Normal(0.0, 1.0), elementwise=True)),
45 |         ("Normal(0, 1.0) * 10", Normal(0.0, 1.0) * 10),
46 |         ("10 * Normal(0, 1.0)", 10 * Normal(0.0, 1.0)),
47 |         ("10 * Normal(0, 1.0)", Multiply(10, Normal(0.0, 1.0), elementwise=True)),
48 |         ("Normal(0, 1.0) / 10", Normal(0.0, 1.0) / 10),
49 |         ("10 / Normal(0, 1.0)", 10 / Normal(0.0, 1.0)),
50 |         ("10 / Normal(0, 1.0)", Divide(10, Normal(0.0, 1.0), elementwise=True)),
51 |         ("Normal(0, 1.0) ** 2", Normal(0.0, 1.0) ** 2),
52 |         ("2 ** Normal(0, 1.0)", 2 ** Normal(0.0, 1.0)),
53 |         ("2 ** Normal(0, 1.0)", Power(2, Normal(0.0, 1.0), elementwise=True))
54 |     ]
55 | 
56 |     params_noise = [
57 |         # ("SimplexNoise", SimplexNoise()),
58 |         # ("Sigmoid(SimplexNoise)", Sigmoid(SimplexNoise())),
59 |         # ("SimplexNoise(linear)", SimplexNoise(upscale_method="linear")),
60 |         # ("SimplexNoise(nearest)", SimplexNoise(upscale_method="nearest")),
61 |         # ("FrequencyNoise((-4, 4))", FrequencyNoise(exponent=(-4, 4))),
62 |         # ("FrequencyNoise(-2)", FrequencyNoise(exponent=-2)),
63 |         # ("FrequencyNoise(2)", FrequencyNoise(exponent=2))
64 |     ]
65 | 
66 |     images_params = [param.draw_distribution_graph() for (title, param) in params]
67 |     images_arithmetic = [param.draw_distribution_graph() for (title, param) in params_arithmetic]
68 | 
69 |     show_multi_array(images_params)
70 |     show_multi_array(images_arithmetic)
71 | 
72 | 
73 | def show_multi_array(image_arrays):
74 |     n = len(image_arrays)
75 |     h, w, c = image_arrays[0].shape
76 |     print("arrays num: {},single image shape: {}".format(n, image_arrays[0].shape))
77 | 
78 |     if n == 1:
79 |         plt.imshow(image_arrays[0])
80 |         plt.show()
81 |         return
82 | 
83 |     if int(np.sqrt(n)) ** 2 < n:
84 |         n = int(np.sqrt(n)) + 1
85 |     else:
86 |         n = int(np.sqrt(n))
87 | 
88 |     large_image = np.zeros((h*n, w*n, c), dtype=image_arrays[0].dtype)
89 |     for i, img in enumerate(image_arrays):
90 |         x1, y1 = (i%n)*w, (i//n)*h
91 |         x2, y2 = (i%n+1)*w, (i//n+1)*h
92 |         large_image[y1:y2, x1:x2] = img
93 |     plt.imshow(large_image)
94 |     plt.show()
95 | 
96 | 
97 | if __name__ == "__main__":
98 |     main()
99 | 


--------------------------------------------------------------------------------
/checks/observe/check_background.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/1
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | from skimage import data
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | import eagle.utils as eu
 16 | from eagle.observe.base.meta import Sequential
 17 | from eagle.observe.augmentors.flip import Fliplr, Flipud
 18 | from eagle.observe.base.basetype import KeyPoint, KeyPointsOnImage
 19 | from eagle.observe.base.basebatch import Batch, BatchLoader, BackgroundAugmentor
 20 | 
 21 | 
 22 | def main():
 23 |     augseq = Sequential([
 24 |         Fliplr(0.5),
 25 |         Flipud(0.5)
 26 |     ])
 27 | 
 28 |     print("------------------")
 29 |     print("augseq.augment_batches(batches, background=True)")
 30 |     print("------------------")
 31 |     batches = list(load_images())
 32 |     batches_aug = augseq.augment_batches(batches, background=True)
 33 |     images_aug = []
 34 |     keypoints_aug = []
 35 |     for batch_aug in batches_aug:
 36 |         images_aug.append(batch_aug.images_aug)
 37 |         keypoints_aug.append(batch_aug.keypoints_aug)
 38 |     grid = draw_grid(images_aug, keypoints_aug)
 39 |     print(grid.shape)
 40 |     plt.imshow(grid)
 41 |     plt.show()
 42 | 
 43 |     print("------------------")
 44 |     print("augseq.augment_batches(batches, background=True) -> only images")
 45 |     print("------------------")
 46 |     batches = list(load_images())
 47 |     batches = [batch.images for batch in batches]
 48 |     batches_aug = augseq.augment_batches(batches, background=True)
 49 |     images_aug = []
 50 |     keypoints_aug = None
 51 |     for batch_aug in batches_aug:
 52 |         images_aug.append(batch_aug)
 53 |     plt.imshow(draw_grid(images_aug, keypoints_aug))
 54 |     plt.show()
 55 | 
 56 |     print("------------------")
 57 |     print("BackgroundAugmenter")
 58 |     print("------------------")
 59 |     batch_loader = BatchLoader(load_images)
 60 |     bg_augmenter = BackgroundAugmentor(batch_loader, augseq)
 61 |     images_aug = []
 62 |     keypoints_aug = []
 63 |     while True:
 64 |         print("Next batch...")
 65 |         batch = bg_augmenter.get_batch()
 66 |         if batch is None:
 67 |             print("Finished.")
 68 |             break
 69 |         images_aug.append(batch.images_aug)
 70 |         keypoints_aug.append(batch.keypoints_aug)
 71 |     plt.imshow(draw_grid(images_aug, keypoints_aug))
 72 |     plt.show()
 73 | 
 74 | 
 75 | def load_images():
 76 |     batch_size = 4
 77 |     astronaut = data.astronaut()
 78 |     astronaut = eu.imresize_single_image(astronaut, (64, 64))
 79 |     kps = KeyPointsOnImage([KeyPoint(x=15, y=25)], shape=astronaut.shape)
 80 |     counter = 0
 81 |     for i in range(10):
 82 |         batch_images = []
 83 |         batch_kps = []
 84 |         for b in range(batch_size):
 85 |             batch_images.append(astronaut)
 86 |             batch_kps.append(kps)
 87 |             counter += 1
 88 |         batch = Batch(
 89 |             images=np.array(batch_images, dtype=np.uint8),
 90 |             keypoints=batch_kps
 91 |         )
 92 |         yield batch
 93 | 
 94 | 
 95 | def draw_grid(images_aug, keypoints_aug):
 96 |     if keypoints_aug is None:
 97 |         keypoints_aug = []
 98 |         for bidx in range(len(images_aug)):
 99 |             keypoints_aug.append([None for _ in images_aug[bidx]])
100 | 
101 |     images_kps_batches = []
102 |     for bidx in range(len(images_aug)):
103 |         images_kps_batch = []
104 |         for image, kps in zip(images_aug[bidx], keypoints_aug[bidx]):
105 |             if kps is None:
106 |                 image_kps = image
107 |             else:
108 |                 image_kps = kps.draw_on_image(image, size=5, color=[255, 0, 0])
109 |             images_kps_batch.append(image_kps)
110 |         images_kps_batches.extend(images_kps_batch)
111 | 
112 |     grid = eu.draw_grid(images_kps_batches, cols=len(images_aug[0]))
113 |     return grid
114 | 
115 | if __name__ == "__main__":
116 |     main()
117 | 


--------------------------------------------------------------------------------
/Others/satellite/clip_video.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @contact: liuguiyang15@mails.ucas.edu.cn
  4 | @file: clip_video.py
  5 | @time: 2018/5/2 16:57
  6 | """
  7 | 
  8 | # 主要是将吉林一号的视频数据进行裁剪，将图像的尺寸降下来，把没有的区域先去掉
  9 | 
 10 | import os
 11 | import cv2
 12 | from sklearn.utils import shuffle
 13 | 
 14 | from datum.utils.tools import extract_target_from_xml
 15 | 
 16 | 
 17 | video_names = [
 18 |     "large_000013363_total", "large_000014631_total",
 19 |     "large_minneapolis_1_total", "large_tunisia_total"]
 20 | # video_names = ["large_tunisia_total"]
 21 | root_dir_path = "/Volumes/projects/DataSets/CSUVideo/"
 22 | src_dir_path = root_dir_path + "吉林一号视频逐帧/"
 23 | clip_save_dir_path = root_dir_path + "标注结果图/"
 24 | clip_spec_infos = {
 25 |     # "large_000013363_total": {
 26 |     #     'xmin': 750, 'ymin': 0,
 27 |     #     'xmax': 3750, 'ymax': 2700
 28 |     # },
 29 |     "large_000013363_total": {
 30 |         'xmin': 0, 'ymin': 0,
 31 |         'xmax': 4096, 'ymax': 3072
 32 |     },
 33 |     # "large_000014631_total": {
 34 |     #     'xmin': 0, 'ymin': 500,
 35 |     #     'xmax': 3400, 'ymax': 3050
 36 |     # },
 37 |     "large_000014631_total": {
 38 |         'xmin': 0, 'ymin': 0,
 39 |         'xmax': 4096, 'ymax': 3072
 40 |     },
 41 |     "large_minneapolis_1_total": {
 42 |         'xmin': 0, 'ymin': 0,
 43 |         'xmax': 4096, 'ymax': 2160
 44 |     },
 45 |     "large_tunisia_total": {
 46 |         'xmin': 0, 'ymin': 0,
 47 |         'xmax': 4096, 'ymax': 2160
 48 |     }
 49 | }
 50 | 
 51 | 
 52 | def clipping_video(is_show=False, is_save_anno=True, is_save_image=False, is_save_anno_image=False):
 53 |     for video_name in video_names:
 54 |         xmin, ymin = clip_spec_infos[video_name]["xmin"], clip_spec_infos[video_name]["ymin"]
 55 |         xmax, ymax = clip_spec_infos[video_name]["xmax"], clip_spec_infos[video_name]["ymax"]
 56 | 
 57 |         video_image_dir_path = src_dir_path + video_name + "/JPEGImages/"
 58 |         anno_image_dir_path = src_dir_path + video_name + "/Annotations/"
 59 |         if is_show:
 60 |             cv2.namedWindow("src", cv2.WINDOW_NORMAL)
 61 |         N = len(os.listdir(video_image_dir_path))
 62 |         for image_id in range(1, N+1):
 63 |             image_path = video_image_dir_path + "%06d.jpg" % image_id
 64 |             anno_path = anno_image_dir_path + "%06d.xml" % image_id
 65 |             if not os.path.exists(anno_path):
 66 |                 print(anno_path)
 67 |                 continue
 68 |             anno_lists = extract_target_from_xml(anno_path)
 69 |             print(anno_path, len(anno_lists))
 70 | 
 71 |             image = cv2.imread(image_path)
 72 |             image = image[ymin:ymax, xmin:xmax]
 73 |             if is_save_image:
 74 |                 cv2.imwrite(
 75 |                     clip_save_dir_path + video_name + "/JPEGImages/%06d.jpg" % image_id,
 76 |                     image,
 77 |                     [int(cv2.IMWRITE_JPEG_QUALITY), 100])
 78 | 
 79 |             for anno in anno_lists:
 80 |                 a_xmin, a_ymin, a_xmax, a_ymax = anno[:4]
 81 |                 x1 = a_xmin - xmin
 82 |                 y1 = a_ymin - ymin
 83 |                 x2 = a_xmax - xmin
 84 |                 y2 = a_ymax - ymin
 85 |                 cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
 86 | 
 87 |             if is_save_anno_image:
 88 |                 cv2.imwrite(
 89 |                     clip_save_dir_path + video_name + "/JPEGImages/%06d.jpg" % image_id,
 90 |                     image,
 91 |                     [int(cv2.IMWRITE_JPEG_QUALITY), 100])
 92 | 
 93 |             if is_show:
 94 |                 cv2.imshow("src", image)
 95 |                 ch = cv2.waitKey(0)
 96 |                 if ch == ord('q'):
 97 |                     return
 98 | 
 99 |             # 存储当前的目标在裁剪过后的图像中的位置信息
100 |             if is_save_anno:
101 |                 save_new_anno_file = clip_save_dir_path + video_name + "/Annotations/%06d.txt" % image_id
102 |                 with open(save_new_anno_file, "w") as writer:
103 |                     writer.write("x1,y1,x2,y2,label\n")
104 |                     for item in anno_lists:
105 |                         writer.write("{},{},{},{},{}\n".format(*item))
106 | 
107 | 
108 | # 随机采样：提取20%的图像数据进行模型训练
109 | def shuffle_samples():
110 |     for video_name in video_names:
111 |         image_dir_path = clip_save_dir_path + video_name + "/JPEGImages/"
112 |         anno_dir_path = clip_save_dir_path + video_name + "/Annotations/"
113 | 
114 |         images_list = os.listdir(image_dir_path)
115 |         N = len(images_list)
116 |         selected_list = shuffle(images_list)[0:int(0.1 * N)]
117 |         for item in images_list:
118 |             if item not in selected_list:
119 |                 anno_name = item.split(".")[0] + ".txt"
120 |                 os.remove(image_dir_path + item)
121 |                 os.remove(anno_dir_path + anno_name)
122 | 
123 | 
124 | def crop_image_by_window():
125 |     pass
126 | 
127 | 
128 | if __name__ == '__main__':
129 |     clipping_video(is_show=False, is_save_anno=True, is_save_image=False, is_save_anno_image=True)
130 |     # shuffle_samples()
131 |     pass
132 | 


--------------------------------------------------------------------------------
/Others/satellite/prepare_trainsamples.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @contact: liuguiyang15@mails.ucas.edu.cn
  4 | @file: prepare_trainsamples.py
  5 | @time: 2018/5/17 13:05
  6 | """
  7 | 
  8 | # 将吉林一号卫星数据按照指定的格式进行组织
  9 | import os
 10 | import random
 11 | import cv2
 12 | 
 13 | 
 14 | train_video = ["large_000014631_total", "large_minneapolis_1_total", "large_tunisia_total"]
 15 | anno_dir_prefix = "/Volumes/projects/DataSets/CSUVideo/video_with_annotation/"
 16 | image_dir_prefix = "/Volumes/projects/DataSets/CSUVideo/src_video_frame/"
 17 | save_dir_prefix = "/Volumes/projects/DataSets/CSUVideo/300x300/"
 18 | 
 19 | SUB_IMG_WID, SUB_IMG_HEI, SUB_OVERLAP = 300, 300, 80
 20 | 
 21 | 
 22 | def twoboxes_overlap(box1, box2):
 23 |     x1 = max(box1[0], box2[0])
 24 |     y1 = max(box1[1], box2[1])
 25 |     x2 = min(box1[2], box2[2])
 26 |     y2 = min(box1[3], box2[3])
 27 |     if x2 <= x1 or y2 <= y1:
 28 |         return 0
 29 |     return (x2 - x1) * (y2 - y1)
 30 | 
 31 | 
 32 | def crop_image(image_path, anno_path, video_name, image_name):
 33 |     target_annos = []
 34 |     with open(anno_path, "r") as reader:
 35 |         cnt = 0
 36 |         for line in reader:
 37 |             cnt += 1
 38 |             if cnt == 1:
 39 |                 continue
 40 |             line = list(map(int, line.strip().split(",")[:-1]))
 41 |             target_annos.append(line)
 42 | 
 43 |     def select_subimage_anno(w, h):
 44 |         select_box = []
 45 |         for box in target_annos:
 46 |             x1, y1, x2, y2 = box
 47 |             x11, y11 = x1 - w, y1 - h
 48 |             x22, y22 = x2 - w, y2 - h
 49 |             gx1, gy1 = w, h
 50 |             gx2, gy2 = w + SUB_IMG_WID, h + SUB_IMG_HEI
 51 |             overlap_area = twoboxes_overlap(box, [gx1, gy1, gx2, gy2])
 52 |             if overlap_area <= 0:
 53 |                 continue
 54 |             new_box = [max(0, x11), max(0, y11), min(x22, SUB_IMG_WID), min(y22, SUB_IMG_HEI)]
 55 |             if overlap_area / ((x22 - x11) * (y22 - y11)) >= 0.7:
 56 |                 select_box.append(new_box)
 57 |         return select_box
 58 | 
 59 |     image_data = cv2.imread(image_path)
 60 |     H, W = image_data.shape[:2]
 61 |     cnt = 0
 62 |     for h in range(0, H, SUB_IMG_HEI-SUB_OVERLAP):
 63 |         for w in range(0, W, SUB_IMG_WID-SUB_OVERLAP):
 64 |             if h + SUB_IMG_HEI >= H:
 65 |                 h = H - SUB_IMG_HEI
 66 |             if w + SUB_IMG_WID >= W:
 67 |                 w = W - SUB_IMG_WID
 68 |             cnt += 1
 69 |             sub_image = image_data[h:h+SUB_IMG_HEI, w:w+SUB_IMG_WID]
 70 |             select_annos = select_subimage_anno(w, h)
 71 |             if len(select_annos) == 0:
 72 |                 continue
 73 |             # print(len(select_annos), select_annos)
 74 |             # for box in select_annos:
 75 |             #     x1, y1, x2, y2 = box
 76 |             #     cv2.rectangle(sub_image, (x1, y1), (x2, y2), (0, 0, 255), 2)
 77 |             # cv2.imshow("src", sub_image)
 78 |             # cv2.waitKey()
 79 |             image_name = image_name.split(".")[0]
 80 |             if not os.path.isdir(save_dir_prefix + video_name + "/JPEGImages/"):
 81 |                 os.makedirs(save_dir_prefix + video_name + "/JPEGImages/")
 82 |             if not os.path.isdir(save_dir_prefix + video_name + "/Annotations/"):
 83 |                 os.makedirs(save_dir_prefix + video_name + "/Annotations/")
 84 |             save_image_path = save_dir_prefix + video_name + "/JPEGImages/{}_{}_{}.jpg".format(image_name, w, h)
 85 |             save_anno_path = save_dir_prefix + video_name + "/Annotations/{}_{}_{}.txt".format(image_name, w, h)
 86 |             cv2.imwrite(save_image_path, sub_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
 87 |             with open(save_anno_path, "w") as writer:
 88 |                 for box in select_annos:
 89 |                     writer.write(",".join(map(str, box)) + "\n")
 90 | 
 91 | # for video_name in train_video:
 92 | #     image_dir_path = image_dir_prefix + video_name + "/JPEGImages/"
 93 | #     anno_dir_path = anno_dir_prefix + video_name + "/Annotations/"
 94 | #     anno_list = os.listdir(anno_dir_path)
 95 | #     random.shuffle(anno_list)
 96 | #     anno_list = random.sample(anno_list, int(len(anno_list) * 0.06))
 97 | #     for anno_name in anno_list:
 98 | #         anno_path = anno_dir_path + anno_name
 99 | #         image_path = image_dir_path + anno_name.replace("txt", "jpg")
100 | #         print(anno_path)
101 | #         print(image_path)
102 | #         crop_image(image_path, anno_path, video_name, anno_name)
103 | 
104 | 
105 | train_sample_path = save_dir_prefix + "train_samples.txt"
106 | writer = open(train_sample_path, "w")
107 | for video_name in train_video:
108 |     image_dir_path = save_dir_prefix + video_name + "/JPEGImages/"
109 |     anno_dir_path = save_dir_prefix + video_name + "/Annotations/"
110 |     anno_list = os.listdir(anno_dir_path)
111 |     for anno_name in anno_list:
112 |         anno_path = anno_dir_path + anno_name
113 |         image_path = image_dir_path + anno_name.replace("txt", "jpg")
114 |         anno_detail = ""
115 |         with open(anno_path, "r") as reader:
116 |             anno_info = []
117 |             for line in reader:
118 |                 line = line.strip().split(",") + ["0"]
119 |                 anno_info.append(" ".join(line))
120 |             anno_detail = " ".join(anno_info)
121 |         writer.write("{} {}\n".format(image_path, anno_detail))
122 | 


--------------------------------------------------------------------------------
/datum/models/yolo/yolo_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/5
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import random
 12 | import cv2
 13 | import numpy as np
 14 | from queue import Queue
 15 | from threading import Thread
 16 | 
 17 | from datum.meta.dataset import DataSet
 18 | 
 19 | 
 20 | class YoloDataSet(DataSet):
 21 |     """TextDataSet
 22 |     process text input file dataset
 23 |     text file format:
 24 |     image_path xmin1 ymin1 xmax1 ymax1 class1 xmin2 ymin2 xmax2 ymax2 class2
 25 |     """
 26 | 
 27 |     def __init__(self, common_params, dataset_params):
 28 |         super(YoloDataSet, self).__init__(common_params, dataset_params)
 29 | 
 30 |         # process params
 31 |         self.data_path = str(dataset_params['path'])
 32 |         self.width = int(common_params['image_size'])
 33 |         self.height = int(common_params['image_size'])
 34 |         self.batch_size = int(common_params['batch_size'])
 35 |         self.thread_num = int(dataset_params['thread_num'])
 36 |         self.max_objects = int(common_params['max_objects_per_image'])
 37 | 
 38 |         # record and image_label queue
 39 |         self.record_queue = Queue(maxsize=10000)
 40 |         self.image_label_queue = Queue(maxsize=5000)
 41 | 
 42 |         self.record_list = []
 43 | 
 44 |         # filling the record_list
 45 |         input_file = open(self.data_path, 'r')
 46 | 
 47 |         for line in input_file:
 48 |             line = line.strip()
 49 |             if ',' in line:
 50 |                 ss = line.split(',')
 51 |             else:
 52 |                 ss = line.split(' ')
 53 |             ss[1:] = [float(num) for num in ss[1:]]
 54 |             self.record_list.append(ss)
 55 | 
 56 |         self.record_point = 0
 57 |         self.record_number = len(self.record_list)
 58 | 
 59 |         self.num_batch_per_epoch = int(self.record_number / self.batch_size)
 60 | 
 61 |         t_record_producer = Thread(target=self.record_producer)
 62 |         t_record_producer.daemon = True
 63 |         t_record_producer.start()
 64 | 
 65 |         for i in range(self.thread_num):
 66 |             t = Thread(target=self.record_customer)
 67 |             t.daemon = True
 68 |             t.start()
 69 | 
 70 |     def record_producer(self):
 71 |         while True:
 72 |             if self.record_point % self.record_number == 0:
 73 |                 random.shuffle(self.record_list)
 74 |                 self.record_point = 0
 75 |             self.record_queue.put(self.record_list[self.record_point])
 76 |             self.record_point += 1
 77 | 
 78 |     def record_customer(self):
 79 |         while True:
 80 |             item = self.record_queue.get()
 81 |             out = self.record_process(item)
 82 |             self.image_label_queue.put(out)
 83 | 
 84 |     def record_process(self, record):
 85 |         """record process
 86 |         Args: record
 87 |         Returns:
 88 |           image: 3-D ndarray
 89 |           labels: 2-D list [self.max_objects, 5] (xcenter, ycenter, w, h, class_num)
 90 |           object_num:  total object number  int
 91 |         """
 92 |         image = cv2.imread(record[0])
 93 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 94 |         h = image.shape[0]
 95 |         w = image.shape[1]
 96 | 
 97 |         width_rate = self.width * 1.0 / w
 98 |         height_rate = self.height * 1.0 / h
 99 | 
100 |         image = cv2.resize(image, (self.height, self.width))
101 | 
102 |         labels = [[0, 0, 0, 0, 0]] * self.max_objects
103 |         i = 1
104 |         object_num = 0
105 |         while i < len(record):
106 |             xmin = record[i]
107 |             ymin = record[i + 1]
108 |             xmax = record[i + 2]
109 |             ymax = record[i + 3]
110 |             class_num = record[i + 4]
111 | 
112 |             xcenter = (xmin + xmax) * 1.0 / 2 * width_rate
113 |             ycenter = (ymin + ymax) * 1.0 / 2 * height_rate
114 | 
115 |             box_w = (xmax - xmin) * width_rate
116 |             box_h = (ymax - ymin) * height_rate
117 | 
118 |             labels[object_num] = [xcenter, ycenter, box_w, box_h, class_num]
119 |             object_num += 1
120 |             i += 5
121 |             if object_num >= self.max_objects:
122 |                 break
123 |         return [image, labels, object_num]
124 | 
125 |     def batch(self):
126 |         """get batch
127 |         Returns:
128 |           images: 4-D ndarray [batch_size, height, width, 3]
129 |           labels: 3-D ndarray [batch_size, max_objects, 5]
130 |           objects_num: 1-D ndarray [batch_size]
131 |         """
132 |         images = []
133 |         labels = []
134 |         objects_num = []
135 |         for i in range(self.batch_size):
136 |             image, label, object_num = self.image_label_queue.get()
137 |             images.append(image)
138 |             labels.append(label)
139 |             objects_num.append(object_num)
140 |         images = np.asarray(images, dtype=np.float32)
141 |         images = images / 255 * 2 - 1
142 |         labels = np.asarray(labels, dtype=np.float32)
143 |         objects_num = np.asarray(objects_num, dtype=np.int32)
144 |         return images, labels, objects_num
145 | 


--------------------------------------------------------------------------------
/eagle/brain/solver/yolo_solver.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/4
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import sys
 12 | import time
 13 | from datetime import datetime
 14 | 
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | 
 18 | from eagle.brain.solver.solver import Solver
 19 | 
 20 | 
 21 | class YoloSolver(Solver):
 22 |     def __init__(self, dataset, net, common_params, solver_params):
 23 |         super(YoloSolver, self).__init__(dataset, net, common_params, solver_params)
 24 | 
 25 |         # process params
 26 |         self.width = int(common_params['image_size'])
 27 |         self.height = int(common_params['image_size'])
 28 |         self.batch_size = int(common_params['batch_size'])
 29 |         self.max_objects = int(common_params['max_objects_per_image'])
 30 | 
 31 |         self.moment = float(solver_params['moment'])
 32 |         self.learning_rate = float(solver_params['lr'])
 33 |         self.train_dir = str(solver_params['train_dir'])
 34 |         self.max_iterators = int(solver_params['max_iterators'])
 35 |         self.pretrain_path = str(solver_params['pretrain_model_path'])
 36 | 
 37 |         self.dataset = dataset
 38 |         self.net = net
 39 | 
 40 |         # construct graph
 41 |         self.construct_graph()
 42 | 
 43 |     def _train(self):
 44 |         """Train model
 45 | 
 46 |         Create an optimizer and apply to all trainable variables.
 47 | 
 48 |         Args:
 49 |           total_loss: Total loss from net.loss()
 50 |           global_step: Integer Variable counting the number of training steps
 51 |           processed
 52 |         Returns:
 53 |           train_op: op for training
 54 |         """
 55 | 
 56 |         opt = tf.train.MomentumOptimizer(self.learning_rate, self.moment)
 57 |         grads = opt.compute_gradients(self.total_loss)
 58 | 
 59 |         apply_gradient_op = opt.apply_gradients(grads,
 60 |                                                 global_step=self.global_step)
 61 | 
 62 |         return apply_gradient_op
 63 | 
 64 |     def construct_graph(self):
 65 |         # construct graph
 66 |         self.global_step = tf.Variable(0, trainable=False)
 67 |         self.images = tf.placeholder(tf.float32, (
 68 |         self.batch_size, self.height, self.width, 3))
 69 |         self.labels = tf.placeholder(tf.float32,
 70 |                                      (self.batch_size, self.max_objects, 5))
 71 |         self.objects_num = tf.placeholder(tf.int32, (self.batch_size))
 72 | 
 73 |         self.predicts = self.net.inference(self.images)
 74 |         self.total_loss, self.nilboy = self.net.loss(self.predicts, self.labels,
 75 |                                                      self.objects_num)
 76 | 
 77 |         tf.summary.scalar('loss', self.total_loss)
 78 |         self.train_op = self._train()
 79 | 
 80 |     def solve(self):
 81 |         saver_pretrain = tf.train.Saver(self.net.pretrained_collection)
 82 |         saver_train = tf.train.Saver(self.net.trainable_collection, max_to_keep=3)
 83 | 
 84 |         init = tf.global_variables_initializer()
 85 | 
 86 |         summary_op = tf.summary.merge_all()
 87 | 
 88 |         sess = tf.Session()
 89 | 
 90 |         sess.run(init)
 91 |         saver_pretrain.restore(sess, self.pretrain_path)
 92 | 
 93 |         summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph)
 94 | 
 95 |         for step in range(self.max_iterators):
 96 |             start_time = time.time()
 97 |             np_images, np_labels, np_objects_num = self.dataset.batch()
 98 | 
 99 |             _, loss_value, nilboy = sess.run(
100 |                 [self.train_op, self.total_loss, self.nilboy],
101 |                 feed_dict={self.images: np_images, self.labels: np_labels,
102 |                            self.objects_num: np_objects_num})
103 |             # loss_value, nilboy = sess.run([self.total_loss, self.nilboy], feed_dict={self.images: np_images, self.labels: np_labels, self.objects_num: np_objects_num})
104 | 
105 | 
106 |             duration = time.time() - start_time
107 | 
108 |             assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
109 | 
110 |             if step % 10 == 0:
111 |                 num_examples_per_step = self.dataset.batch_size
112 |                 examples_per_sec = num_examples_per_step / duration
113 |                 sec_per_batch = float(duration)
114 | 
115 |                 format_str = ('%s: step %d, loss = %.2f '
116 |                               '(%.1f examples/sec; %.3f sec/batch)')
117 |                 print(format_str % (datetime.now(), step, loss_value,
118 |                                     examples_per_sec, sec_per_batch))
119 |                 sys.stdout.flush()
120 |             if step % 1000 == 0:
121 |                 summary_str = sess.run(summary_op,
122 |                                        feed_dict={self.images: np_images,
123 |                                                   self.labels: np_labels,
124 |                                                   self.objects_num: np_objects_num})
125 |                 summary_writer.add_summary(summary_str, step)
126 |             if step % 5000 == 0:
127 |                 saver_train.save(sess,
128 |                                  self.train_dir + '/model.ckpt',
129 |                                  global_step=step)
130 |         sess.close()
131 | 


--------------------------------------------------------------------------------
/Others/vedia/convert2voc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/4/3
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import codecs
 13 | from shutil import copyfile
 14 | import numpy as np
 15 | 
 16 | from xml.dom.minidom import parseString
 17 | from lxml.etree import Element, SubElement, tostring
 18 | import pprint
 19 | import cv2
 20 | 
 21 | # label_id_map = {
 22 | #     ""
 23 | # }
 24 | 
 25 | def format_voc_string(filename, anno_infos):
 26 |     # anno_cell in anno_infos
 27 |     ## anno_cell is dict{"label": "car", "p1":[x1, y1], "p2":[x2, y2]}
 28 | 
 29 |     node_root = Element('annotation')
 30 | 
 31 |     node_filename = SubElement(node_root, 'filename')
 32 |     node_filename.text = filename
 33 | 
 34 |     image = cv2.imread(filename)
 35 |     width, height = image.shape[:2]
 36 | 
 37 |     node_size = SubElement(node_root, 'size')
 38 |     node_width = SubElement(node_size, 'width')
 39 |     node_width.text = str(width)
 40 | 
 41 |     node_height = SubElement(node_size, 'height')
 42 |     node_height.text = str(height)
 43 | 
 44 |     node_depth = SubElement(node_size, 'depth')
 45 |     node_depth.text = '3'
 46 | 
 47 |     for anno_cell in anno_infos:
 48 |         node_object = SubElement(node_root, 'object')
 49 |         node_name = SubElement(node_object, 'name')
 50 |         node_name.text = anno_cell["label"]
 51 | 
 52 |         node_difficult = SubElement(node_object, 'difficult')
 53 |         node_difficult.text = '0'
 54 | 
 55 |         node_bndbox = SubElement(node_object, 'bndbox')
 56 |         node_x1 = SubElement(node_bndbox, 'x1')
 57 |         node_x1.text = str(anno_cell["p1"][0])
 58 |         node_y1 = SubElement(node_bndbox, 'y1')
 59 |         node_y1.text = str(anno_cell["p1"][1])
 60 | 
 61 |         node_x2 = SubElement(node_bndbox, 'x2')
 62 |         node_x2.text = str(anno_cell["p2"][0])
 63 |         node_y2 = SubElement(node_bndbox, 'y2')
 64 |         node_y2.text = str(anno_cell["p2"][1])
 65 | 
 66 |         node_x3 = SubElement(node_bndbox, 'x3')
 67 |         node_x3.text = str(anno_cell["p3"][0])
 68 |         node_y3 = SubElement(node_bndbox, 'y3')
 69 |         node_y3.text = str(anno_cell["p3"][1])
 70 | 
 71 |         node_x4 = SubElement(node_bndbox, 'x4')
 72 |         node_x4.text = str(anno_cell["p4"][0])
 73 |         node_y4 = SubElement(node_bndbox, 'y4')
 74 |         node_y4.text = str(anno_cell["p4"][1])
 75 | 
 76 |     xml = tostring(node_root, pretty_print=True)
 77 |     # dom = parseString(xml)
 78 |     return xml
 79 | 
 80 | data_prefix = "/Volumes/projects/DataSets/VEDIA/"
 81 | images_dir = ["512/Vehicules512/", "1024/Vehicules1024/"]
 82 | annotations_filepath = ["512/Annotations512/annotation512.txt"]
 83 | 
 84 | def convert():
 85 |     save_dir_prefix = "/Volumes/projects/DataSets/VEDIA/VOCFORMAT/"
 86 | 
 87 |     label_set = set()
 88 |     for img_dir, anno_file in zip(images_dir, annotations_filepath):
 89 |         abs_img_dir = data_prefix + img_dir
 90 |         abs_anno_path = data_prefix + anno_file
 91 |         if not os.path.isfile(abs_anno_path):
 92 |             raise ValueError("{} file not found !".format(abs_anno_path))
 93 |         images_dict = {}
 94 |         with codecs.open(abs_anno_path, "r", "utf8") as reader:
 95 |             for line in reader:
 96 |                 line = line.strip().split(' ')
 97 |                 name_prefix = line[0] + "_co.png"
 98 |                 image_path = abs_img_dir + name_prefix
 99 |                 images_dict.setdefault(image_path, [])
100 |                 images_dict[image_path].append(line)
101 | 
102 |         for img_path in images_dict.keys():
103 |             if not os.path.isfile(img_path):
104 |                 raise IOError("{} image path not found !".format(img_path))
105 | 
106 |             anno_infos = list()
107 |             for line in images_dict[img_path]:
108 |                 anno_cell = dict()
109 |                 center_x, center_y = float(line[1]), float(line[2])
110 |                 rotate_theta = float(line[3])
111 |                 points = np.array(list(map(float, line[4:12])),
112 |                                   np.int32).reshape((2, -1)).T
113 |                 fully_contain = int(line[-2])
114 |                 occluded = int(line[-1])
115 |                 if occluded:
116 |                     continue
117 |                 # print(points.shape)
118 |                 label = line[-3]
119 |                 label_set.add(label)
120 |                 anno_cell["label"] = label
121 |                 anno_cell["p1"] = points[0, :]
122 |                 anno_cell["p2"] = points[1, :]
123 |                 anno_cell["p3"] = points[2, :]
124 |                 anno_cell["p4"] = points[3, :]
125 |                 anno_infos.append(anno_cell)
126 |             if len(anno_infos) == 0:
127 |                 print(img_path)
128 |                 continue
129 | 
130 |             # copy file to dest
131 |             image_name = img_path.split("/")[-1]
132 |             copyfile(img_path, save_dir_prefix + "JPEGImages/" + image_name)
133 |             voc_xml = format_voc_string(img_path, anno_infos)
134 |             anno_name = img_path.split("/")[-1].replace("png", "xml")
135 |             anno_file_path = save_dir_prefix + "Annotations/" + anno_name
136 |             with open(anno_file_path, "wb") as writer:
137 |                 writer.write(voc_xml)
138 |             # print(voc_xml)
139 |             # return
140 |     print(label_set)
141 | 
142 | if __name__ == '__main__':
143 |     convert()


--------------------------------------------------------------------------------
/eagle/brain/solver/ssd_solver.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/4
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import sys
 12 | import time
 13 | from datetime import datetime
 14 | 
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | 
 18 | from eagle.brain.solver.solver import Solver
 19 | 
 20 | 
 21 | class SSDSolver(Solver):
 22 |     def __init__(self, dataset, net, common_params, solver_params):
 23 |         super(SSDSolver, self).__init__(
 24 |             dataset, net, common_params, solver_params)
 25 | 
 26 |         # process params
 27 |         self.width = int(common_params['image_size'])
 28 |         self.height = int(common_params['image_size'])
 29 |         self.batch_size = int(common_params['batch_size'])
 30 | 
 31 |         self.decay = float(solver_params['decay'])
 32 |         self.beta_1 = float(solver_params['beta_1'])
 33 |         self.beta_2 = float(solver_params['beta_2'])
 34 |         self.epsilon = float(solver_params['epsilon'])
 35 |         self.learning_rate = float(solver_params['lr'])
 36 |         self.train_dir = str(solver_params['train_dir'])
 37 |         self.max_iterators = int(solver_params['max_iterators'])
 38 |         self.pretrain_path = str(solver_params['pretrain_model_path'])
 39 | 
 40 |         self.dataset = dataset
 41 |         self.net = net
 42 | 
 43 |         # construct graph
 44 |         self.build_model()
 45 | 
 46 |     def _train(self):
 47 |         opt = tf.train.AdamOptimizer(
 48 |             learning_rate=self.learning_rate,
 49 |             beta1=self.beta_1,
 50 |             beta2=self.beta_2,
 51 |             epsilon=self.epsilon)
 52 |         grads = opt.compute_gradients(self.total_loss)
 53 |         apply_gradient_op = opt.apply_gradients(grads,
 54 |                                                 global_step=self.global_step)
 55 |         return apply_gradient_op
 56 | 
 57 |     def build_model(self):
 58 |         self.global_step = tf.Variable(0, trainable=False)
 59 |         self.images = tf.placeholder(
 60 |             tf.float32,
 61 |             shape=(self.batch_size, self.height, self.width, 3))
 62 |         model_spec = self.net.inference(self.images)
 63 |         self.predicts = model_spec["predictions"]
 64 |         predict_shape = model_spec["predictions"].get_shape().as_list()
 65 |         boxes_num = predict_shape[1]
 66 |         encode_length = predict_shape[2]
 67 | 
 68 |         '''
 69 |         Input Image (300, 300, 3):
 70 |         [32, 37, 37, 4, 8] ---> (cx, cy, w, h, variances)
 71 |         [32, 18, 18, 6, 8]
 72 |         [32,  9,  9, 6, 8]
 73 |         [32,  5,  5, 6, 8]
 74 |         [32,  3,  3, 4, 8]
 75 |         [32,  1,  1, 4, 8]
 76 |         ==> 37^2*4 + 18^2*6 + 9^2*6 + 5^2*6 + 3^2*6 + 1^2*4 = 8096
 77 |         '''
 78 | 
 79 |         self.labels = tf.placeholder(
 80 |             tf.float32,
 81 |             shape=(self.batch_size, boxes_num, encode_length))
 82 | 
 83 |         self.total_loss = self.net.loss(y_true=self.labels,
 84 |                                         y_pred=self.predicts)
 85 | 
 86 |         tf.summary.scalar('loss', self.total_loss)
 87 |         self.train_op = self._train()
 88 | 
 89 |     def solve(self):
 90 |         saver = tf.train.Saver(max_to_keep=3)
 91 | 
 92 |         init = tf.global_variables_initializer()
 93 |         summary_op = tf.summary.merge_all()
 94 | 
 95 |         sess = tf.Session()
 96 |         sess.run(init)
 97 |         if self.pretrain_path != "None":
 98 |             saver.restore(sess, self.pretrain_path)
 99 | 
100 |         summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph)
101 | 
102 |         for step in range(self.max_iterators):
103 |             start_time = time.time()
104 |             np_images, np_labels = self.dataset.batch()
105 | 
106 |             _, loss_value = sess.run(
107 |                 [self.train_op, self.total_loss],
108 |                 feed_dict={
109 |                     self.images: np_images,
110 |                     self.labels: np_labels
111 |                 })
112 | 
113 |             duration = time.time() - start_time
114 | 
115 |             assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
116 | 
117 |             if step % 10 == 0:
118 |                 num_examples_per_step = self.dataset.batch_size
119 |                 examples_per_sec = num_examples_per_step / duration
120 |                 sec_per_batch = float(duration)
121 | 
122 |                 format_str = ('%s: step %d, loss = %.2f '
123 |                               '(%.1f examples/sec; %.3f sec/batch)')
124 |                 print(format_str % (datetime.now(), step, loss_value,
125 |                                     examples_per_sec, sec_per_batch))
126 |                 sys.stdout.flush()
127 |             if step % 1000 == 0:
128 |                 summary_str = sess.run(summary_op,
129 |                                        feed_dict={
130 |                                            self.images: np_images,
131 |                                            self.labels: np_labels
132 |                                        })
133 |                 summary_writer.add_summary(summary_str, step)
134 |             if step % 2000 == 0:
135 |                 saver.save(sess,
136 |                            self.train_dir + '/model.ckpt', global_step=step)
137 |         saver.save(sess, self.train_dir + '/model.ckpt', global_step=step)
138 |         sess.close()
139 | 


--------------------------------------------------------------------------------
/eagle/observe/augmentors/arithmetic.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/2/28
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import cv2
 12 | import numpy as np
 13 | from scipy import ndimage
 14 | 
 15 | import eagle.utils as eu
 16 | from eagle.observe.base.meta import Augmentor
 17 | from eagle.parameter import StochasticParameter
 18 | from eagle.parameter import Deterministic, DiscreteUniform, Binomial
 19 | 
 20 | 
 21 | class Add(Augmentor):
 22 |     """
 23 |     Add a value to all pixels in an image.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     value : int or iterable of two ints or StochasticParameter, optional(default=0)
 28 |         Value to add to all
 29 |         pixels.
 30 |             * If an int, then that value will be used for all images.
 31 |             * If a tuple (a, b), then a value from the discrete range [a .. b]
 32 |               will be used.
 33 |             * If a StochasticParameter, then a value will be sampled per image
 34 |               from that parameter.
 35 | 
 36 |     per_channel : bool or float, optional(default=False)
 37 |         Whether to use the same value for all channels (False)
 38 |         or to sample a new value for each channel (True).
 39 |         If this value is a float p, then for p percent of all images
 40 |         `per_channel` will be treated as True, otherwise as False.
 41 | 
 42 |     name : string, optional(default=None)
 43 |         See `Augmenter.__init__()`
 44 | 
 45 |     deterministic : bool, optional(default=False)
 46 |         See `Augmenter.__init__()`
 47 | 
 48 |     random_state : int or np.random.RandomState or None, optional(default=None)
 49 |         See `Augmenter.__init__()`
 50 | 
 51 |     Examples
 52 |     --------
 53 |     >>> aug = iaa.Add(10)
 54 | 
 55 |     always adds a value of 10 to all pixels in the image.
 56 | 
 57 |     >>> aug = iaa.Add((-10, 10))
 58 | 
 59 |     adds a value from the discrete range [-10 .. 10] to all pixels of
 60 |     the input images. The exact value is sampled per image.
 61 | 
 62 |     >>> aug = iaa.Add((-10, 10), per_channel=True)
 63 | 
 64 |     adds a value from the discrete range [-10 .. 10] to all pixels of
 65 |     the input images. The exact value is sampled per image AND channel,
 66 |     i.e. to a red-channel it might add 5 while subtracting 7 from the
 67 |     blue channel of the same image.
 68 | 
 69 |     >>> aug = iaa.Add((-10, 10), per_channel=0.5)
 70 | 
 71 |     same as previous example, but the `per_channel` feature is only active
 72 |     for 50 percent of all images.
 73 | 
 74 |     """
 75 | 
 76 |     def __init__(self, value=0, per_channel=False, name=None,
 77 |                  deterministic=False, random_state=None):
 78 |         super(Add, self).__init__(name=name, deterministic=deterministic, random_state=random_state)
 79 | 
 80 |         if eu.is_single_integer(value):
 81 |             eu.do_assert(-255 <= value <= 255,
 82 |                          "Expected value to have range [-255, 255], got value %d." % (value,))
 83 |             self.value = Deterministic(value)
 84 |         elif eu.is_iterable(value):
 85 |             eu.do_assert(len(value) == 2,
 86 |                          "Expected tuple/list with 2 entries, got %d entries." % (len(value),))
 87 |             self.value = DiscreteUniform(value[0], value[1])
 88 |         elif isinstance(value, StochasticParameter):
 89 |             self.value = value
 90 |         else:
 91 |             raise Exception("Expected float or int, tuple/list with 2 entries or StochasticParameter. Got %s." % (type(value),))
 92 | 
 93 |         if per_channel in [True, False, 0, 1, 0.0, 1.0]:
 94 |             self.per_channel = Deterministic(int(per_channel))
 95 |         elif eu.is_single_number(per_channel):
 96 |             eu.do_assert(0 <= per_channel <= 1.0,
 97 |                          "Expected bool, or number in range [0, 1.0] for per_channel, got %s." % (type(per_channel),))
 98 |             self.per_channel = Binomial(per_channel)
 99 |         else:
100 |             raise Exception("Expected per_channel to be boolean or number or StochasticParameter")
101 | 
102 |     def _augment_images(self, images, random_state, parents, hooks):
103 |         input_dtypes = eu.copy_dtypes_for_restore(images)
104 | 
105 |         result = images
106 |         nb_images = len(images)
107 |         seeds = random_state.randint(0, 10**6, (nb_images,))
108 |         for i in range(nb_images):
109 |             image = images[i].astype(np.int32)
110 |             rs_image = eu.new_random_state(seeds[i])
111 |             per_channel = self.per_channel.draw_sample(random_state=rs_image)
112 |             if per_channel == 1:
113 |                 nb_channels = image.shape[2]
114 |                 samples = self.value.draw_samples((nb_channels,), random_state=rs_image)
115 |                 for c, sample in enumerate(samples):
116 |                     # TODO make value range more flexible
117 |                     eu.do_assert(-255 <= sample <= 255)
118 |                     image[..., c] += sample
119 |             else:
120 |                 sample = self.value.draw_sample(random_state=rs_image)
121 |                 # TODO make value range more flexible
122 |                 eu.do_assert(-255 <= sample <= 255)
123 |                 image += sample
124 |             result[i] = image
125 | 
126 |         # TODO make value range more flexible
127 |         eu.clip_augmented_images_(result, 0, 255)
128 |         eu.restore_augmented_images_dtypes_(result, input_dtypes)
129 | 
130 |         return result
131 | 
132 |     def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks):
133 |         return keypoints_on_images
134 | 
135 |     def get_parameters(self):
136 |         return [self.value]
137 | 


--------------------------------------------------------------------------------
/datum/models/yolo/yolo_batch_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/5
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import random
 12 | import cv2
 13 | import numpy as np
 14 | from queue import Queue
 15 | from threading import Thread, Lock
 16 | 
 17 | from datum.meta.dataset import DataSet
 18 | 
 19 | 
 20 | class YoloDataSet(DataSet):
 21 |     """TextDataSet
 22 |     process text input file dataset
 23 |     text file format:
 24 |     image_path xmin1 ymin1 xmax1 ymax1 class1 xmin2 ymin2 xmax2 ymax2 class2
 25 |     """
 26 | 
 27 |     def __init__(self, common_params, dataset_params):
 28 |         super(YoloDataSet, self).__init__(common_params, dataset_params)
 29 | 
 30 |         # process params
 31 |         self.data_path = str(dataset_params['path'])
 32 |         self.width = int(common_params['image_size'])
 33 |         self.height = int(common_params['image_size'])
 34 |         self.batch_size = int(common_params['batch_size'])
 35 |         self.thread_num = int(dataset_params['thread_num'])
 36 |         self.max_objects = int(common_params['max_objects_per_image'])
 37 | 
 38 |         # record and image_label queue
 39 |         self.image_label_queue = Queue(maxsize=100)
 40 | 
 41 |         self.record_list = []
 42 | 
 43 |         # filling the record_list
 44 |         input_file = open(self.data_path, 'r')
 45 | 
 46 |         for line in input_file:
 47 |             line = line.strip()
 48 |             if ',' in line:
 49 |                 ss = line.split(',')
 50 |             else:
 51 |                 ss = line.split(' ')
 52 |             ss[1:] = [float(num) for num in ss[1:]]
 53 |             self.record_list.append(ss)
 54 | 
 55 |         self.record_point = 0
 56 |         self.record_number = len(self.record_list)
 57 |         self.record_number_lock = Lock()
 58 | 
 59 |         for i in range(self.thread_num):
 60 |             t_record_producer = Thread(target=self.record_producer)
 61 |             t_record_producer.daemon = True
 62 |             t_record_producer.start()
 63 | 
 64 |         # for i in range(self.thread_num):
 65 |         #     t = Thread(target=self.record_customer)
 66 |         #     t.daemon = True
 67 |         #     t.start()
 68 | 
 69 |     def record_producer(self):
 70 |         def update_shuffle():
 71 |             if self.record_point % self.record_number == 0:
 72 |                 random.shuffle(self.record_list)
 73 |                 self.record_point = 0
 74 | 
 75 |         while True:
 76 |             outs = list()
 77 |             while len(outs) < self.batch_size:
 78 |                 item = self.record_list[self.record_point]
 79 |                 out = self.record_process(item)
 80 |                 outs.append(out)
 81 |                 self.record_number_lock.acquire()
 82 |                 self.record_point += 1
 83 |                 update_shuffle()
 84 |                 self.record_number_lock.release()
 85 | 
 86 |             self.image_label_queue.put(outs)
 87 | 
 88 |     # def record_customer(self):
 89 |     #     while True:
 90 |     #         item = self.record_queue.get()
 91 |     #         out = self.record_process(item)
 92 |     #         self.image_label_queue.put(out)
 93 | 
 94 |     def record_process(self, record):
 95 |         """record process
 96 |         Args: record
 97 |         Returns:
 98 |           image: 3-D ndarray
 99 |           labels: 2-D list [self.max_objects, 5] (xcenter, ycenter, w, h, class_num)
100 |           object_num:  total object number  int
101 |         """
102 |         image = cv2.imread(record[0])
103 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
104 |         h = image.shape[0]
105 |         w = image.shape[1]
106 | 
107 |         width_rate = self.width * 1.0 / w
108 |         height_rate = self.height * 1.0 / h
109 | 
110 |         image = cv2.resize(image, (self.height, self.width))
111 | 
112 |         labels = [[0, 0, 0, 0, 0]] * self.max_objects
113 |         i = 1
114 |         object_num = 0
115 |         while i < len(record):
116 |             xmin = record[i]
117 |             ymin = record[i + 1]
118 |             xmax = record[i + 2]
119 |             ymax = record[i + 3]
120 |             class_num = record[i + 4]
121 | 
122 |             xcenter = (xmin + xmax) * 1.0 / 2 * width_rate
123 |             ycenter = (ymin + ymax) * 1.0 / 2 * height_rate
124 | 
125 |             box_w = (xmax - xmin) * width_rate
126 |             box_h = (ymax - ymin) * height_rate
127 | 
128 |             labels[object_num] = [xcenter, ycenter, box_w, box_h, class_num]
129 |             object_num += 1
130 |             i += 5
131 |             if object_num >= self.max_objects:
132 |                 break
133 |         return [image, labels, object_num]
134 | 
135 |     def batch(self):
136 |         """get batch
137 |         Returns:
138 |           images: 4-D ndarray [batch_size, height, width, 3]
139 |           labels: 3-D ndarray [batch_size, max_objects, 5]
140 |           objects_num: 1-D ndarray [batch_size]
141 |         """
142 |         images = []
143 |         labels = []
144 |         objects_num = []
145 |         outs = self.image_label_queue.get()
146 |         for i in range(self.batch_size):
147 |             image, label, object_num = outs[i][:]
148 |             images.append(image)
149 |             labels.append(label)
150 |             objects_num.append(object_num)
151 | 
152 |         # for i in range(self.batch_size):
153 |         #     image, label, object_num = self.image_label_queue.get()
154 |         #     images.append(image)
155 |         #     labels.append(label)
156 |         #     objects_num.append(object_num)
157 |         images = np.asarray(images, dtype=np.float32)
158 |         images = images / 255 * 2 - 1
159 |         labels = np.asarray(labels, dtype=np.float32)
160 |         objects_num = np.asarray(objects_num, dtype=np.int32)
161 |         return images, labels, objects_num
162 | 


--------------------------------------------------------------------------------
/Others/lsd12/format_input.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/1/3
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import sys
 13 | 
 14 | p1 = "/".join(os.path.abspath(__file__).split("/")[:-3])
 15 | sys.path.insert(0, p1)
 16 | p1 = "/".join(os.path.abspath(__file__).split("/")[:-2])
 17 | sys.path.insert(0, p1)
 18 | p1 = "/".join(os.path.abspath(__file__).split("/")[:-1])
 19 | sys.path.insert(0, p1)
 20 | 
 21 | from importlib import reload
 22 | reload(sys)
 23 | 
 24 | import cv2
 25 | 
 26 | from datum.utils import tools
 27 | from Others.lsd12 import label_config as config
 28 | 
 29 | 
 30 | nwpu_voc_dir = "/Volumes/projects/repos/RSI/NWPUVHR10/NWPUVOCFORMAT/"
 31 | nwpu_voc_image_dir = nwpu_voc_dir + "JPEGImages/"
 32 | nwpu_voc_anno_dir = nwpu_voc_dir + "Annotations/"
 33 | 
 34 | vedia_voc_dir = "/Volumes/projects/repos/RSI/VEDAI/VEDIAVOCFORAMT/"
 35 | vedia_voc_image_dir = vedia_voc_dir + "JPEGImages/"
 36 | vedia_voc_anno_dir = vedia_voc_dir + "Annotations/"
 37 | 
 38 | lsd_voc_dir = "/Volumes/projects/repos/RSI/LSD10/"
 39 | lsd_voc_image_dir = lsd_voc_dir + "JPEGImages/"
 40 | lsd_voc_anno_dir = lsd_voc_dir + "Annotations/"
 41 | 
 42 | 
 43 | # 先确定每个原始数据集中的训练集和测试集
 44 | def split_dataset():
 45 |     nwpu_img_list = os.listdir(nwpu_voc_image_dir)
 46 |     vedia_img_list = os.listdir(vedia_voc_image_dir)
 47 |     test_nwpu_img_list = tools.rand_selected_file(nwpu_img_list)
 48 |     test_vedia_img_list = tools.rand_selected_file(vedia_img_list)
 49 |     with open(nwpu_voc_dir+"test.txt", "w") as test_nwpu_writer:
 50 |         for item in test_nwpu_img_list:
 51 |             test_nwpu_writer.write("{}\n".format(item))
 52 |     with open(nwpu_voc_dir+"train.txt", "w") as train_nwpu_writer:
 53 |         for item in nwpu_img_list:
 54 |             if item not in test_nwpu_img_list:
 55 |                 train_nwpu_writer.write("{}\n".format(item))
 56 |     with open(vedia_voc_dir+"test.txt", "w") as test_vedia_writer:
 57 |         for item in test_vedia_img_list:
 58 |             test_vedia_writer.write("{}\n".format(item))
 59 |     with open(vedia_voc_dir+"train.txt", "w") as train_vedia_writer:
 60 |         for item in vedia_img_list:
 61 |             if item not in test_vedia_img_list:
 62 |                 train_vedia_writer.write("{}\n".format(item))
 63 | 
 64 | 
 65 | # 更新数据集中的label信息
 66 | def flush_dataset():
 67 |     for anno_name in os.listdir(lsd_voc_anno_dir):
 68 |         abs_anno_path = lsd_voc_anno_dir + anno_name
 69 |         print(abs_anno_path)
 70 |         anno_targets = tools.extract_target_from_xml(abs_anno_path)
 71 |         new_anno_targets = list()
 72 |         for anno_info in anno_targets:
 73 |             label_name = anno_info[-1]
 74 |             label_id = config.sign_idx_dict[label_name]
 75 |             label_name = config.idx_sign_dict[label_id]
 76 |             new_anno_info = anno_info[:-1] + [label_name]
 77 |             new_anno_targets.append(new_anno_info)
 78 |         src_image = cv2.imread(
 79 |             lsd_voc_image_dir+anno_name.replace("xml", "jpg"))
 80 |         xml_obj = tools.fetch_xml_format(
 81 |             src_image, anno_name.replace("xml", "jpg"), new_anno_targets)
 82 |         with open(lsd_voc_anno_dir+anno_name, "w") as writer:
 83 |             writer.write(xml_obj)
 84 | 
 85 | # 获取标准的目标的label
 86 | def get_true_label_name(label_name):
 87 |     label_id = config.sign_idx_dict[label_name]
 88 |     label_name = config.idx_sign_dict[label_id]
 89 |     return label_name
 90 | 
 91 | 
 92 | # 将图像中非指定尺度数据进行标准化
 93 | def format_corp_images():
 94 |     for anno_name in os.listdir(lsd_voc_anno_dir):
 95 |         abs_anno_path = lsd_voc_anno_dir + anno_name
 96 |         abs_img_path = lsd_voc_image_dir + anno_name.replace("xml", "jpg")
 97 |         image_name = anno_name.replace("xml", "jpg")
 98 |         src_image = cv2.imread(abs_img_path)
 99 |         if src_image.shape == (512, 512, 3):
100 |             continue
101 | 
102 |         h, w = src_image.shape[:2]
103 |         if h <= 512 and w <= 512:
104 |             continue
105 |         
106 |         print(abs_img_path)
107 |         anno_targets = tools.extract_target_from_xml(abs_anno_path)
108 |         new_anno_targets = list()
109 |         for anno_info in anno_targets:
110 |             label_name = get_true_label_name(anno_info[-1])
111 |             new_anno_info = anno_info[:-1] + [label_name]
112 |             new_anno_targets.append(new_anno_info)
113 |         crop_list, anno_list = tools.crop_samples(src_image, new_anno_targets)
114 | 
115 |         for i in range(len(crop_list)):
116 |             x0, y0, x1, y1 = crop_list[i]
117 |             # roi = im[y1:y2, x1:x2] opencv中类似NUMPY的裁剪
118 |             sub_img = src_image[y0:y1, x0:x1]
119 |             f_name = image_name[:-4] + "_%d_%d_%d_%d_%d.jpg" % (x0, y0, x1, y1, i)
120 |             cv2.imwrite(lsd_voc_image_dir + f_name, sub_img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
121 |             a_name = image_name[:-4]+ "_%d_%d_%d_%d_%d.xml" % (x0, y0, x1, y1, i)
122 |             xml_obj = tools.fetch_xml_format(src_image, f_name, anno_list[i], "LSD12")
123 |             with open(lsd_voc_anno_dir + a_name, "w") as writer:
124 |                 writer.write(xml_obj)
125 | 
126 |         os.remove(abs_img_path)
127 |         os.remove(abs_anno_path)
128 | 
129 | # 根据图像文件列表，对数据集进行切分
130 | from sklearn.model_selection import train_test_split
131 | 
132 | def split_train_valid_test():
133 |     save_dir = "/Volumes/projects/repos/RSI/LSD10/"
134 |     file_path = save_dir + "total.txt"
135 |     image_list = list()
136 |     with open(file_path, "r") as h:
137 |         for line in h:
138 |             line = line.strip()
139 |             image_list.append(line)
140 |     X_train, X_test = train_test_split(image_list, test_size=0.3, random_state=42)
141 |     print(len(X_train), len(X_test))
142 |     X_train, X_valid = train_test_split(X_train, test_size=0.2, random_state=42)
143 |     print(len(X_train), len(X_valid))
144 |     with open(save_dir+"train.txt", "w") as h1:
145 |         for line in X_train:
146 |             h1.write("{}\n".format(line))
147 |     with open(save_dir+"valid.txt", "w") as h2:
148 |         for line in X_valid:
149 |             h2.write("{}\n".format(line))
150 |     with open(save_dir+"test.txt", "w") as h3:
151 |         for line in X_test:
152 |             h3.write("{}\n".format(line))
153 | 
154 | 
155 | if __name__ == '__main__':
156 |     split_train_valid_test()
157 |     # Others.show_targets(lsd_voc_image_dir, lsd_voc_anno_dir)
158 |     pass
159 | 


--------------------------------------------------------------------------------
/eagle/brain/ssd/models/net.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/8
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import tensorflow as tf
 12 | 
 13 | 
 14 | class Net(object):
 15 |     def __init__(self, common_params, net_params):
 16 |         if not isinstance(common_params, dict):
 17 |             raise TypeError("common_params must be dict")
 18 |         if not isinstance(net_params, dict):
 19 |             raise TypeError("net_params must be dict")
 20 | 
 21 |         # pretrained variable collection
 22 |         self.pretrained_collection = []
 23 |         # trainable variable collection
 24 |         self.trainable_collection = []
 25 | 
 26 |     def _variable_on_cpu(self, name, shape, initializer, pretrain=True,
 27 |                          train=True):
 28 |         """Helper to create a Variable stored on CPU memory.
 29 | 
 30 |         Args:
 31 |           name: name of the Variable
 32 |           shape: list of ints
 33 |           initializer: initializer of Variable
 34 | 
 35 |         Returns:
 36 |           Variable Tensor
 37 |         """
 38 |         # with tf.device('/cpu:0'):
 39 |         #     var = tf.get_variable(name, shape, initializer=initializer,
 40 |         #                           dtype=tf.float32)
 41 |         #     if pretrain:
 42 |         #         self.pretrained_collection.append(var)
 43 |         #     if train:
 44 |         #         self.trainable_collection.append(var)
 45 | 
 46 |         var = tf.get_variable(name, shape, initializer=initializer,
 47 |                               dtype=tf.float32)
 48 |         if pretrain:
 49 |             self.pretrained_collection.append(var)
 50 |         if train:
 51 |             self.trainable_collection.append(var)
 52 |         return var
 53 | 
 54 |     def _variable_with_weight_decay(self, name, shape, stddev, wd,
 55 |                                     pretrain=True, train=True):
 56 |         """Helper to create an initialized Variable with weight decay.
 57 | 
 58 |         Note that the Variable is initialized with truncated normal distribution
 59 |         A weight decay is added only if one is specified.
 60 | 
 61 |         Args:
 62 |           name: name of the variable
 63 |           shape: list of ints
 64 |           stddev: standard devision of a truncated Gaussian
 65 |           wd: add L2Loss weight decay multiplied by this float. If None, weight
 66 |           decay is not added for this Variable.
 67 | 
 68 |         Returns:
 69 |           Variable Tensor
 70 |         """
 71 |         var = self._variable_on_cpu(name, shape,
 72 |                                     tf.truncated_normal_initializer(
 73 |                                         stddev=stddev, dtype=tf.float32),
 74 |                                     pretrain, train)
 75 |         if wd is not None:
 76 |             weight_decay = tf.multiply(tf.nn.l2_loss(var), wd,
 77 |                                        name='weight_loss')
 78 |             tf.add_to_collection('losses', weight_decay)
 79 |         return var
 80 | 
 81 |     def conv2d(self, scope, input, kernel_size, stride=1, pretrain=True,
 82 |                train=True):
 83 |         """convolutional layer
 84 | 
 85 |         Args:
 86 |           input: 4-D tensor [batch_size, height, width, depth]
 87 |           scope: variable_scope name
 88 |           kernel_size: [k_height, k_width, in_channel, out_channel]
 89 |           stride: int32
 90 |         Return:
 91 |           output: 4-D tensor [batch_size, height/stride, width/stride, out_channels]
 92 |         """
 93 |         with tf.variable_scope(scope) as scope:
 94 |             kernel = self._variable_with_weight_decay('weights',
 95 |                                                       shape=kernel_size,
 96 |                                                       stddev=5e-2,
 97 |                                                       wd=self.weight_decay,
 98 |                                                       pretrain=pretrain,
 99 |                                                       train=train)
100 |             conv = tf.nn.conv2d(input, kernel, [1, stride, stride, 1],
101 |                                 padding='SAME')
102 |             biases = self._variable_on_cpu('biases', kernel_size[3:],
103 |                                            tf.constant_initializer(0.0),
104 |                                            pretrain, train)
105 |             bias = tf.nn.bias_add(conv, biases)
106 |             conv1 = self.leaky_relu(bias)
107 | 
108 |         return conv1
109 | 
110 |     def max_pool(self, input, kernel_size, stride):
111 |         """max_pool layer
112 | 
113 |         Args:
114 |           input: 4-D tensor [batch_zie, height, width, depth]
115 |           kernel_size: [k_height, k_width]
116 |           stride: int32
117 |         Return:
118 |           output: 4-D tensor [batch_size, height/stride, width/stride, depth]
119 |         """
120 |         return tf.nn.max_pool(input,
121 |                               ksize=[1, kernel_size[0], kernel_size[1], 1],
122 |                               strides=[1, stride, stride, 1],
123 |                               padding='SAME')
124 | 
125 |     def local(self, scope, input, in_dimension, out_dimension, leaky=True,
126 |               pretrain=True, train=True):
127 |         """Fully connection layer
128 | 
129 |         Args:
130 |           scope: variable_scope name
131 |           input: [batch_size, ???]
132 |           out_dimension: int32
133 |         Return:
134 |           output: 2-D tensor [batch_size, out_dimension]
135 |         """
136 |         with tf.variable_scope(scope) as scope:
137 |             reshape = tf.reshape(input, [tf.shape(input)[0], -1])
138 | 
139 |             weights = self._variable_with_weight_decay('weights',
140 |                                                        shape=[in_dimension,
141 |                                                               out_dimension],
142 |                                                        stddev=0.04,
143 |                                                        wd=self.weight_decay,
144 |                                                        pretrain=pretrain,
145 |                                                        train=train)
146 |             biases = self._variable_on_cpu('biases', [out_dimension],
147 |                                            tf.constant_initializer(0.0),
148 |                                            pretrain, train)
149 |             local = tf.matmul(reshape, weights) + biases
150 | 
151 |             if leaky:
152 |                 local = self.leaky_relu(local)
153 |             else:
154 |                 local = tf.identity(local, name=scope.name)
155 | 
156 |         return local
157 | 
158 |     def leaky_relu(self, x, alpha=0.1, dtype=tf.float32):
159 |         """leaky relu
160 |         if x > 0:
161 |           return x
162 |         else:
163 |           return alpha * x
164 |         Args:
165 |           x : Tensor
166 |           alpha: float
167 |         Return:
168 |           y : Tensor
169 |         """
170 |         x = tf.cast(x, dtype=dtype)
171 |         bool_mask = (x > 0)
172 |         mask = tf.cast(bool_mask, dtype=dtype)
173 |         return 1.0 * mask * x + alpha * (1 - mask) * x
174 | 
175 |     def inference(self, images):
176 |         """Build the yolo model
177 | 
178 |         Args:
179 |           images:  4-D tensor [batch_size, image_height, image_width, channels]
180 |         Returns:
181 |           predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell]
182 |         """
183 |         raise NotImplementedError
184 | 
185 |     def loss(self, y_true, y_pred):
186 |         raise NotImplementedError
187 | 


--------------------------------------------------------------------------------
/eagle/brain/solver/yolo_u_solver.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/4
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import sys
 12 | import time
 13 | from datetime import datetime
 14 | 
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | 
 18 | from eagle.brain.solver.solver import Solver
 19 | 
 20 | 
 21 | class YoloUSolver(Solver):
 22 |     def __init__(self, dataset, net, common_params, solver_params):
 23 |         super(YoloUSolver, self).__init__(dataset, net, common_params, solver_params)
 24 | 
 25 |         # process params
 26 |         self.width = int(common_params['image_size'])
 27 |         self.height = int(common_params['image_size'])
 28 |         self.batch_size = int(common_params['batch_size'])
 29 |         self.max_objects = int(common_params['max_objects_per_image'])
 30 | 
 31 |         self.moment = float(solver_params['moment'])
 32 |         self.learning_rate = float(solver_params['lr'])
 33 |         self.train_dir = str(solver_params['train_dir'])
 34 |         self.max_iterators = int(solver_params['max_iterators'])
 35 |         self.pretrain_path = str(solver_params['pretrain_model_path'])
 36 | 
 37 |         self.dataset = dataset
 38 |         self.net = net
 39 | 
 40 |         # construct graph
 41 |         self.construct_graph()
 42 | 
 43 |     def _train(self):
 44 |         """Train model
 45 | 
 46 |         Create an optimizer and apply to all trainable variables.
 47 | 
 48 |         Args:
 49 |           total_loss: Total loss from net.loss()
 50 |           global_step: Integer Variable counting the number of training steps
 51 |           processed
 52 |         Returns:
 53 |           train_op: op for training
 54 |         """
 55 | 
 56 |         opt = tf.train.MomentumOptimizer(self.learning_rate, self.moment)
 57 |         grads = opt.compute_gradients(self.total_loss)
 58 | 
 59 |         apply_gradient_op = opt.apply_gradients(grads,
 60 |                                                 global_step=self.global_step)
 61 | 
 62 |         return apply_gradient_op
 63 | 
 64 |     def construct_graph(self):
 65 |         # construct graph
 66 |         self.global_step = tf.Variable(0, trainable=False)
 67 |         self.images = tf.placeholder(tf.float32, (
 68 |         self.batch_size, self.height, self.width, 3))
 69 |         self.labels = tf.placeholder(tf.float32,
 70 |                                      (self.batch_size, self.max_objects, 5))
 71 |         self.objects_num = tf.placeholder(tf.int32, (self.batch_size))
 72 | 
 73 |         self.predicts = self.net.inference(self.images)
 74 | 
 75 |         self.net.set_cell_size(grid_size=9)
 76 |         total_loss_g9, nilboy_g9 = self.net.loss(
 77 |             self.predicts["predicts_g9"], self.labels, self.objects_num)
 78 |         self.net.set_cell_size(grid_size=15)
 79 |         total_loss_g15, nilboy_g15 = self.net.loss(
 80 |             self.predicts["predicts_g15"], self.labels, self.objects_num)
 81 | 
 82 |         # self.nilboy_g9 = nilboy_g9
 83 |         # self.nilboy_g15 = nilboy_g15
 84 |         self.total_loss = 0.5 * (total_loss_g9 + total_loss_g15)
 85 |         tf.summary.scalar('loss', self.total_loss)
 86 |         self.train_op = self._train()
 87 | 
 88 |     def solve(self):
 89 |         saver_pretrain = tf.train.Saver(max_to_keep=3)
 90 |         saver_train = tf.train.Saver(max_to_keep=3)
 91 | 
 92 |         init = tf.global_variables_initializer()
 93 | 
 94 |         summary_op = tf.summary.merge_all()
 95 | 
 96 |         sess = tf.Session()
 97 | 
 98 |         sess.run(init)
 99 |         if self.pretrain_path != "None":
100 |             saver_pretrain.restore(sess, self.pretrain_path)
101 | 
102 |         summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph)
103 | 
104 |         for step in range(self.max_iterators):
105 |             start_time = time.time()
106 |             np_images, np_labels, np_objects_num = self.dataset.batch()
107 | 
108 |             _, loss_value = sess.run(
109 |                 [self.train_op, self.total_loss],
110 |                 feed_dict={
111 |                     self.images: np_images,
112 |                     self.labels: np_labels,
113 |                     self.objects_num: np_objects_num
114 |                 })
115 | 
116 |             duration = time.time() - start_time
117 | 
118 |             assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
119 | 
120 |             if step % 1 == 0:
121 |                 num_examples_per_step = self.dataset.batch_size
122 |                 examples_per_sec = num_examples_per_step / duration
123 |                 sec_per_batch = float(duration)
124 | 
125 |                 format_str = ('%s: step %d, loss = %.2f '
126 |                               '(%.1f examples/sec; %.3f sec/batch)')
127 |                 print(format_str % (datetime.now(), step, loss_value,
128 |                                     examples_per_sec, sec_per_batch))
129 |                 sys.stdout.flush()
130 |             if step % 1000 == 0:
131 |                 summary_str = sess.run(
132 |                     summary_op,
133 |                     feed_dict={
134 |                         self.images: np_images,
135 |                         self.labels: np_labels,
136 |                         self.objects_num: np_objects_num
137 |                     })
138 |                 summary_writer.add_summary(summary_str, step)
139 |             if step % 5000 == 0:
140 |                 saver_train.save(sess,
141 |                                  self.train_dir + '/model.ckpt')
142 |         sess.close()
143 | 
144 |     def process_predicts(self, predicts, cell_size):
145 |         p_classes = predicts[0, :, :, 0:1]
146 |         C = predicts[0, :, :, 1:3]
147 |         coordinate = predicts[0, :, :, 3:]
148 | 
149 |         p_classes = np.reshape(p_classes, (cell_size, cell_size, 1, 1))
150 |         C = np.reshape(C, (cell_size, cell_size, 2, 1))
151 | 
152 |         P = C * p_classes
153 | 
154 |         # print P[5,1, 0, :]
155 | 
156 |         index = np.argmax(P)
157 |         index = np.unravel_index(index, P.shape)
158 |         class_num = index[3]
159 | 
160 |         coordinate = np.reshape(coordinate, (cell_size, cell_size, 2, 4))
161 |         max_coordinate = coordinate[index[0], index[1], index[2], :]
162 | 
163 |         xcenter = max_coordinate[0]
164 |         ycenter = max_coordinate[1]
165 |         w = max_coordinate[2]
166 |         h = max_coordinate[3]
167 | 
168 |         xcenter = (index[1] + xcenter) * (self.width / cell_size)
169 |         ycenter = (index[0] + ycenter) * (self.height / cell_size)
170 | 
171 |         w = w * self.width
172 |         h = h * self.height
173 | 
174 |         xmin = xcenter - w / 2.0
175 |         ymin = ycenter - h / 2.0
176 | 
177 |         xmax = xmin + w
178 |         ymax = ymin + h
179 | 
180 |         xmin = max(0, xmin)
181 |         xmax = max(0, xmax)
182 |         return xmin, ymin, xmax, ymax, class_num
183 | 
184 |     def model_predict(self, single_image):
185 |         saver_pretrain = tf.train.Saver(max_to_keep=3)
186 | 
187 |         init = tf.global_variables_initializer()
188 |         sess = tf.Session()
189 |         sess.run(init)
190 | 
191 |         if self.pretrain_path != "None":
192 |             saver_pretrain.restore(sess, self.pretrain_path)
193 | 
194 |         start_time = time.time()
195 | 
196 |         predics_info = sess.run(
197 |             self.predicts,
198 |             feed_dict={
199 |                 self.images: single_image
200 |             })
201 | 
202 |         duration = time.time() - start_time
203 | 
204 |         xmin, ymin, xmax, ymax, class_num = self.process_predicts(
205 |             predics_info["predicts_g9"], cell_size=9)
206 |         # xmin, ymin, xmax, ymax, class_num = self.process_predicts(
207 |         #     predics_info["predicts_g15"])
208 |         sess.close()
209 | 
210 |         return (xmin, ymin, xmax, ymax, class_num)
211 | 


--------------------------------------------------------------------------------
/eagle/brain/yolo/net.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/1
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import tensorflow as tf
 12 | 
 13 | 
 14 | class Net(object):
 15 |     """Base Net class
 16 |     """
 17 | 
 18 |     def __init__(self, common_params, net_params):
 19 |         if not isinstance(common_params, dict):
 20 |             raise TypeError("common_params must be dict")
 21 |         if not isinstance(net_params, dict):
 22 |             raise TypeError("net_params must be dict")
 23 | 
 24 |         # pretrained variable collection
 25 |         self.pretrained_collection = []
 26 |         # trainable variable collection
 27 |         self.trainable_collection = []
 28 | 
 29 |     def _variable_on_cpu(self, name, shape, initializer, pretrain=True,
 30 |                          train=True):
 31 |         """Helper to create a Variable stored on CPU memory.
 32 | 
 33 |         Args:
 34 |           name: name of the Variable
 35 |           shape: list of ints
 36 |           initializer: initializer of Variable
 37 | 
 38 |         Returns:
 39 |           Variable Tensor
 40 |         """
 41 |         # with tf.device('/cpu:0'):
 42 |         #     var = tf.get_variable(name, shape, initializer=initializer,
 43 |         #                           dtype=tf.float32)
 44 |         #     if pretrain:
 45 |         #         self.pretrained_collection.append(var)
 46 |         #     if train:
 47 |         #         self.trainable_collection.append(var)
 48 | 
 49 |         var = tf.get_variable(name, shape, initializer=initializer,
 50 |                               dtype=tf.float32)
 51 |         if pretrain:
 52 |             self.pretrained_collection.append(var)
 53 |         if train:
 54 |             self.trainable_collection.append(var)
 55 |         return var
 56 | 
 57 |     def _variable_with_weight_decay(self, name, shape, stddev, wd,
 58 |                                     pretrain=True, train=True):
 59 |         """Helper to create an initialized Variable with weight decay.
 60 | 
 61 |         Note that the Variable is initialized with truncated normal distribution
 62 |         A weight decay is added only if one is specified.
 63 | 
 64 |         Args:
 65 |           name: name of the variable
 66 |           shape: list of ints
 67 |           stddev: standard devision of a truncated Gaussian
 68 |           wd: add L2Loss weight decay multiplied by this float. If None, weight
 69 |           decay is not added for this Variable.
 70 | 
 71 |         Returns:
 72 |           Variable Tensor
 73 |         """
 74 |         var = self._variable_on_cpu(name, shape,
 75 |                                     tf.truncated_normal_initializer(
 76 |                                         stddev=stddev, dtype=tf.float32),
 77 |                                     pretrain, train)
 78 |         if wd is not None:
 79 |             weight_decay = tf.multiply(tf.nn.l2_loss(var), wd,
 80 |                                        name='weight_loss')
 81 |             tf.add_to_collection('losses', weight_decay)
 82 |         return var
 83 | 
 84 |     def conv2d(self, scope, input, kernel_size, stride=1, pretrain=True,
 85 |                train=True):
 86 |         """convolutional layer
 87 | 
 88 |         Args:
 89 |           input: 4-D tensor [batch_size, height, width, depth]
 90 |           scope: variable_scope name
 91 |           kernel_size: [k_height, k_width, in_channel, out_channel]
 92 |           stride: int32
 93 |         Return:
 94 |           output: 4-D tensor [batch_size, height/stride, width/stride, out_channels]
 95 |         """
 96 |         with tf.variable_scope(scope) as scope:
 97 |             kernel = self._variable_with_weight_decay('weights',
 98 |                                                       shape=kernel_size,
 99 |                                                       stddev=5e-2,
100 |                                                       wd=self.weight_decay,
101 |                                                       pretrain=pretrain,
102 |                                                       train=train)
103 |             conv = tf.nn.conv2d(input, kernel, [1, stride, stride, 1],
104 |                                 padding='SAME')
105 |             biases = self._variable_on_cpu('biases', kernel_size[3:],
106 |                                            tf.constant_initializer(0.0),
107 |                                            pretrain, train)
108 |             bias = tf.nn.bias_add(conv, biases)
109 |             conv1 = self.leaky_relu(bias)
110 | 
111 |         return conv1
112 | 
113 |     def max_pool(self, input, kernel_size, stride):
114 |         """max_pool layer
115 | 
116 |         Args:
117 |           input: 4-D tensor [batch_zie, height, width, depth]
118 |           kernel_size: [k_height, k_width]
119 |           stride: int32
120 |         Return:
121 |           output: 4-D tensor [batch_size, height/stride, width/stride, depth]
122 |         """
123 |         return tf.nn.max_pool(input,
124 |                               ksize=[1, kernel_size[0], kernel_size[1], 1],
125 |                               strides=[1, stride, stride, 1],
126 |                               padding='SAME')
127 | 
128 |     def local(self, scope, input, in_dimension, out_dimension, leaky=True,
129 |               pretrain=True, train=True):
130 |         """Fully connection layer
131 | 
132 |         Args:
133 |           scope: variable_scope name
134 |           input: [batch_size, ???]
135 |           out_dimension: int32
136 |         Return:
137 |           output: 2-D tensor [batch_size, out_dimension]
138 |         """
139 |         with tf.variable_scope(scope) as scope:
140 |             reshape = tf.reshape(input, [tf.shape(input)[0], -1])
141 | 
142 |             weights = self._variable_with_weight_decay('weights',
143 |                                                        shape=[in_dimension,
144 |                                                               out_dimension],
145 |                                                        stddev=0.04,
146 |                                                        wd=self.weight_decay,
147 |                                                        pretrain=pretrain,
148 |                                                        train=train)
149 |             biases = self._variable_on_cpu('biases', [out_dimension],
150 |                                            tf.constant_initializer(0.0),
151 |                                            pretrain, train)
152 |             local = tf.matmul(reshape, weights) + biases
153 | 
154 |             if leaky:
155 |                 local = self.leaky_relu(local)
156 |             else:
157 |                 local = tf.identity(local, name=scope.name)
158 | 
159 |         return local
160 | 
161 |     def leaky_relu(self, x, alpha=0.1, dtype=tf.float32):
162 |         """leaky relu
163 |         if x > 0:
164 |           return x
165 |         else:
166 |           return alpha * x
167 |         Args:
168 |           x : Tensor
169 |           alpha: float
170 |         Return:
171 |           y : Tensor
172 |         """
173 |         x = tf.cast(x, dtype=dtype)
174 |         bool_mask = (x > 0)
175 |         mask = tf.cast(bool_mask, dtype=dtype)
176 |         return 1.0 * mask * x + alpha * (1 - mask) * x
177 | 
178 |     def inference(self, images):
179 |         """Build the yolo model
180 | 
181 |         Args:
182 |           images:  4-D tensor [batch_size, image_height, image_width, channels]
183 |         Returns:
184 |           predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell]
185 |         """
186 |         raise NotImplementedError
187 | 
188 |     def loss(self, predicts, labels, objects_num):
189 |         """Add Loss to all the trainable variables
190 | 
191 |         Args:
192 |           predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell]
193 |           ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell)
194 |           labels  : 3-D tensor of [batch_size, max_objects, 5]
195 |           objects_num: 1-D tensor [batch_size]
196 |         """
197 |         raise NotImplementedError
198 | 


--------------------------------------------------------------------------------
/datum/utils/tools.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/1/3
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import re
 13 | import cv2
 14 | 
 15 | import numpy as np
 16 | import xml.dom.minidom
 17 | import random
 18 | from lxml.etree import Element, SubElement, tostring
 19 | 
 20 | # 提取图像对应的标注的数据
 21 | def fetch_anno_targets_info(abs_anno_path, is_label_text=False):
 22 |     if not os.path.exists(abs_anno_path):
 23 |         raise IOError("No Such annotation file !")
 24 |     with open(abs_anno_path, "r") as anno_reader:
 25 |         total_annos = list()
 26 |         for line in anno_reader:
 27 |             line = line.strip()
 28 |             sub_anno = re.split("\(|\,|\)", line)
 29 |             a = [int(item) for item in sub_anno if len(item)]
 30 |             if len(a) == 5:
 31 |                 if is_label_text:
 32 |                     total_annos.append(a[:4]+[config.idx_sign_dict[a[-1]]])
 33 |                 else:
 34 |                     total_annos.append(a)
 35 |         return total_annos
 36 | 
 37 | def fetch_xml_format(src_img_data, f_name, anno_list, dataset):
 38 |     img_height, img_width, img_channle = src_img_data.shape
 39 | 
 40 |     node_root = Element('annotation')
 41 |     node_folder = SubElement(node_root, 'folder')
 42 |     node_folder.text = dataset
 43 |     node_filename = SubElement(node_root, 'filename')
 44 |     node_filename.text = f_name
 45 | 
 46 |     node_size = SubElement(node_root, 'size')
 47 |     node_width = SubElement(node_size, 'width')
 48 |     node_width.text = str(img_width)
 49 |     node_height = SubElement(node_size, 'height')
 50 |     node_height.text = str(img_height)
 51 |     node_depth = SubElement(node_size, 'depth')
 52 |     node_depth.text = str(img_channle)
 53 | 
 54 |     for anno_target in anno_list:
 55 |         node_object = SubElement(node_root, 'object')
 56 |         node_name = SubElement(node_object, 'name')
 57 |         node_name.text = anno_target[-1]
 58 |         node_difficult = SubElement(node_object, 'difficult')
 59 |         node_difficult.text = '0'
 60 |         node_bndbox = SubElement(node_object, 'bndbox')
 61 |         node_xmin = SubElement(node_bndbox, 'xmin')
 62 |         node_xmin.text = str(1 if anno_target[0]<0 else anno_target[0])
 63 |         node_ymin = SubElement(node_bndbox, 'ymin')
 64 |         node_ymin.text = str(1 if anno_target[1]<0 else anno_target[1])
 65 |         node_xmax = SubElement(node_bndbox, 'xmax')
 66 |         node_xmax.text = str(img_width-1 if anno_target[2]>=img_width else anno_target[2])
 67 |         node_ymax = SubElement(node_bndbox, 'ymax')
 68 |         node_ymax.text = str(img_height-1 if anno_target[3]>=img_height else anno_target[3])
 69 |     xml_obj = tostring(node_root, pretty_print=True)
 70 |     xml_obj = xml_obj.decode("utf8")
 71 |     return xml_obj
 72 | 
 73 | # 给定一个标记文件，找到对应的目标的位置信息
 74 | def extract_target_from_xml(filename):
 75 |     if not os.path.exists(filename):
 76 |         raise IOError(filename + " not exists !")
 77 |     # 使用minidom解析器打开 XML 文档
 78 |     DOMTree = xml.dom.minidom.parse(filename)
 79 |     collection = DOMTree.documentElement
 80 |     # 获取集合中所有的目标
 81 |     targets = collection.getElementsByTagName("object")
 82 |     res = []
 83 |     for target in targets:
 84 |         target_name = target.getElementsByTagName('name')[0].childNodes[0].data
 85 |         bndbox = target.getElementsByTagName("bndbox")[0]
 86 |         xmin = bndbox.getElementsByTagName("xmin")[0].childNodes[0].data
 87 |         ymin = bndbox.getElementsByTagName("ymin")[0].childNodes[0].data
 88 |         xmax = bndbox.getElementsByTagName("xmax")[0].childNodes[0].data
 89 |         ymax = bndbox.getElementsByTagName("ymax")[0].childNodes[0].data
 90 |         res.append([int(xmin), int(ymin), int(xmax), int(ymax), target_name])
 91 |     return res
 92 | 
 93 | # 原始数据中多目标的显示
 94 | def show_targets(img_dir, anno_dir):
 95 |     for img_name in os.listdir(img_dir):
 96 |         if img_name.startswith("._"):
 97 |             continue
 98 |         abs_img_path = img_dir+img_name
 99 |         abs_anno_path = anno_dir+img_name.replace("jpg", "xml")
100 |         target_annos = extract_target_from_xml(abs_anno_path)
101 |         image = cv2.imread(abs_img_path)
102 |         for target_info in target_annos:
103 |             xmin, ymin, xmax, ymax = target_info[:4]
104 |             cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
105 |         cv2.imshow("src", image)
106 |         cv2.waitKey()
107 | 
108 | # 采用蓄水池采样算法对序列进行采样
109 | def rand_selected_file(file_list, K_ratio=2/7):
110 |     K = int(len(file_list) * K_ratio)
111 |     res = list()
112 |     for i in range(0, len(file_list)):
113 |         if i < K:
114 |             res.append(file_list[i])
115 |         else:
116 |             M = random.randint(0, i)
117 |             if M < K:
118 |                 res[M] = file_list[i]
119 |     return res
120 | 
121 | def calc_rgb_mean():
122 |     r_list, g_list, b_list = list(), list(), list()
123 |     with open("/Volumes/projects/repos/RSI/LSD10/total.txt", "r") as reader:
124 |         for line in reader.readlines():
125 |             line = line.strip()
126 |             src_img = cv2.imread(line)
127 |             b, g, r = cv2.split(src_img)
128 |             b_list.append(np.mean(b))
129 |             g_list.append(np.mean(g))
130 |             r_list.append(np.mean(r))
131 |     print(np.mean(r_list))
132 |     print(np.mean(g_list))
133 |     print(np.mean(b_list))
134 | """
135 | 104.480289006
136 | 107.307103097
137 | 95.8043901467
138 | """
139 | 
140 | # 从样本中裁剪出制定的大小的候选样本，这其中必须要包含相应的目标
141 | def crop_samples(src_image, anno_targets, SSD_IMG_W=512, SSD_IMG_H=512):
142 | 
143 |     def _crop_valid(area, anno_targets):
144 |         anno_res = []
145 |         for info in anno_targets:
146 |             if ((info[0] >= area[0] and info[1] >= area[1]) and
147 |                 (info[2] <= area[2] and info[3] <= area[3])):
148 |                 anno_res.append(
149 |                     [info[0] - area[0], info[1] - area[1],
150 |                      info[2] - area[0], info[3] - area[1],
151 |                      info[-1]])
152 |             if (info[0] >= area[0] and info[1] >= area[1] and
153 |                 info[0] < area[2] and info[1] < area[3] and
154 |                 (not (info[2] <= area[2] and info[3] <= area[3]))):
155 |                 base = (info[2] - info[0]) * (info[3] - info[1])
156 |                 x_max_min = min(info[2], area[2])
157 |                 y_max_min = min(info[3], area[3])
158 |                 new_square = (x_max_min - info[0]) * (y_max_min - info[1])
159 |                 if new_square / base >= 0.8:
160 |                     anno_res.append(
161 |                         [info[0] - area[0], info[1] - area[1],
162 |                          x_max_min - area[0], y_max_min - area[1],
163 |                          info[-1]])
164 |         return anno_res
165 | 
166 |     def _random_crop_for_target():
167 |         img_height, img_width = src_image.shape[:2]
168 |         crop_list, anno_list = [], []
169 |         for idx in range(0, len(anno_targets)):
170 |             c_x = (anno_targets[idx][0] + anno_targets[idx][2]) // 2
171 |             c_y = (anno_targets[idx][1] + anno_targets[idx][3]) // 2
172 | 
173 |             u_x = random.randint(max(0, c_x - SSD_IMG_W // 2), anno_targets[idx][0])
174 |             u_y = random.randint(max(0, c_y - SSD_IMG_H // 2), anno_targets[idx][1])
175 | 
176 |             area = [u_x, u_y, u_x + SSD_IMG_W, u_y + SSD_IMG_H]
177 |             # 检测当前的候选框中是否包含了目标，并算出目标在给定图像的位置
178 |             trans_targets = _crop_valid(area, anno_targets)
179 |             if trans_targets:
180 |                 crop_list.append(area)
181 |                 anno_list.append(trans_targets)
182 |         return crop_list, anno_list
183 | 
184 |     def _align_crop_for_target():
185 |         h, w = src_image.shape[:2]
186 |         crop_list, anno_list = [], []
187 |         for lx in range(0, max(1, w-SSD_IMG_W+1), SSD_IMG_W//5):
188 |             for ly in range(0, max(1, h-SSD_IMG_H+1), SSD_IMG_H//5):
189 |                 u_x, u_y = lx, ly
190 |                 # if lx + SSD_IMG_W > w:
191 |                 #     u_x = w - SSD_IMG_W
192 |                 # if ly + SSD_IMG_H > h:
193 |                 #     u_y = h - SSD_IMG_H
194 |                 area = [u_x, u_y, u_x + SSD_IMG_W, u_y + SSD_IMG_H]
195 |                 trans_targets = list()
196 |                 trans_targets = _crop_valid(area, anno_targets)
197 |                 if trans_targets:
198 |                     crop_list.append(area)
199 |                     anno_list.append(trans_targets)
200 |         return crop_list, anno_list
201 | 
202 |     crop_list, anno_list = _align_crop_for_target()
203 |     return crop_list, anno_list
204 | 
205 | 
206 | if __name__ == '__main__':
207 |     a = fetch_anno_targets_info(
208 |         "/Volumes/projects/repos/RSI/NWPUVHR10/sub_annotation/001.txt")
209 |     print(a)


--------------------------------------------------------------------------------
/eagle/observe/augmentors/blur.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/2/28
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | 
 12 | import cv2
 13 | import numpy as np
 14 | from scipy import ndimage
 15 | 
 16 | import eagle.utils as eu
 17 | from eagle.observe.base.meta import Augmentor
 18 | from eagle.parameter import StochasticParameter
 19 | from eagle.parameter import Deterministic, DiscreteUniform, Uniform
 20 | 
 21 | 
 22 | class GaussianBlur(Augmentor):
 23 |     """
 24 |     Augmenter to blur images using gaussian kernels.
 25 | 
 26 |     Examples
 27 |     --------
 28 |     >>> aug = iaa.GaussianBlur(sigma=1.5)
 29 | 
 30 |     blurs all images using a gaussian kernel with standard deviation 1.5.
 31 | 
 32 |     >>> aug = iaa.GaussianBlur(sigma=(0.0, 3.0))
 33 | 
 34 |     blurs images using a gaussian kernel with a random standard deviation
 35 |     from the range 0.0 <= x <= 3.0. The value is sampled per image.
 36 |     """
 37 | 
 38 |     def __init__(self, sigma=0, name=None, deterministic=False, random_state=None):
 39 |         super(GaussianBlur, self).__init__(
 40 |             name=name, deterministic=deterministic, random_state=random_state)
 41 | 
 42 |         if eu.is_single_number(sigma):
 43 |             self.sigma = Deterministic(sigma)
 44 |         elif eu.is_iterable(sigma):
 45 |             eu.do_assert(len(sigma) == 2,
 46 |                          "Expected tuple/list with 2 entries, got %d entries." % (len(sigma),))
 47 |             self.sigma = Uniform(sigma[0], sigma[1])
 48 |         elif isinstance(sigma, StochasticParameter):
 49 |             self.sigma = sigma
 50 |         else:
 51 |             raise Exception("Expected float, int, tuple/list with 2 entries or StochasticParameter. Got %s." % (type(sigma),))
 52 | 
 53 |         self.eps = 0.001 # epsilon value to estimate whether sigma is above 0
 54 | 
 55 |     def _augment_images(self, images, random_state, parents, hooks):
 56 |         result = images
 57 |         nb_images = len(images)
 58 |         samples = self.sigma.draw_samples((nb_images,), random_state=random_state)
 59 |         for i in range(nb_images):
 60 |             nb_channels = images[i].shape[2]
 61 |             sig = samples[i]
 62 |             if sig > 0 + self.eps:
 63 |                 # note that while gaussian_filter can be applied to all channels
 64 |                 # at the same time, that should not be done here, because then
 65 |                 # the blurring would also happen across channels (e.g. red
 66 |                 # values might be mixed with blue values in RGB)
 67 |                 for channel in range(nb_channels):
 68 |                     result[i][:, :, channel] = ndimage.gaussian_filter(result[i][:, :, channel], sig)
 69 |         return result
 70 | 
 71 |     def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks):
 72 |         return keypoints_on_images
 73 | 
 74 |     def get_parameters(self):
 75 |         return [self.sigma]
 76 | 
 77 | 
 78 | class AverageBlur(Augmentor):
 79 |     """
 80 |     Blur an image by computing simple means over neighbourhoods.
 81 | 
 82 |     Examples
 83 |     --------
 84 |     >>> aug = iaa.AverageBlur(k=5)
 85 | 
 86 |     Blurs all images using a kernel size of 5x5.
 87 | 
 88 |     >>> aug = iaa.AverageBlur(k=(2, 5))
 89 | 
 90 |     Blurs images using a varying kernel size per image, which is sampled
 91 |     from the interval [2..5].
 92 | 
 93 |     >>> aug = iaa.AverageBlur(k=((5, 7), (1, 3)))
 94 | 
 95 |     Blurs images using a varying kernel size per image, which's height
 96 |     is sampled from the interval [5..7] and which's width is sampled
 97 |     from [1..3].
 98 |     """
 99 | 
100 |     def __init__(self, k=1, name=None, deterministic=False, random_state=None):
101 |         super(AverageBlur, self).__init__(
102 |             name=name, deterministic=deterministic, random_state=random_state)
103 | 
104 |         self.mode = "single"
105 |         if eu.is_single_number(k):
106 |             self.k = Deterministic(int(k))
107 |         elif eu.is_iterable(k):
108 |             eu.do_assert(len(k) == 2)
109 |             if all([eu.is_single_number(ki) for ki in k]):
110 |                 self.k = DiscreteUniform(int(k[0]), int(k[1]))
111 |             elif all([isinstance(ki, StochasticParameter) for ki in k]):
112 |                 self.mode = "two"
113 |                 self.k = (k[0], k[1])
114 |             else:
115 |                 k_tuple = [None, None]
116 |                 if eu.is_single_number(k[0]):
117 |                     k_tuple[0] = Deterministic(int(k[0]))
118 |                 elif eu.is_iterable(k[0]) and all(
119 |                         [eu.is_single_number(ki) for ki in k[0]]):
120 |                     k_tuple[0] = DiscreteUniform(int(k[0][0]), int(k[0][1]))
121 |                 else:
122 |                     raise Exception("k[0] expected to be int or tuple of two ints, got %s" % (type(k[0]),))
123 | 
124 |                 if eu.is_single_number(k[1]):
125 |                     k_tuple[1] = Deterministic(int(k[1]))
126 |                 elif eu.is_iterable(k[1]) and all(
127 |                         [eu.is_single_number(ki) for ki in k[1]]):
128 |                     k_tuple[1] = DiscreteUniform(int(k[1][0]), int(k[1][1]))
129 |                 else:
130 |                     raise Exception("k[1] expected to be int or tuple of two ints, got %s" % (type(k[1]),))
131 | 
132 |                 self.mode = "two"
133 |                 self.k = k_tuple
134 |         elif isinstance(k, StochasticParameter):
135 |             self.k = k
136 |         else:
137 |             raise Exception("Expected int, tuple/list with 2 entries or StochasticParameter. Got %s." % (type(k),))
138 | 
139 |     def _augment_images(self, images, random_state, parents, hooks):
140 |         result = images
141 |         nb_images = len(images)
142 |         if self.mode == "single":
143 |             samples = self.k.draw_samples((nb_images,), random_state=random_state)
144 |             samples = (samples, samples)
145 |         else:
146 |             samples = (
147 |                 self.k[0].draw_samples((nb_images,), random_state=random_state),
148 |                 self.k[1].draw_samples((nb_images,), random_state=random_state),
149 |             )
150 |         for i in range(nb_images):
151 |             kh, kw = samples[0][i], samples[1][i]
152 |             #print(images.shape, result.shape, result[i].shape)
153 |             kernel_impossible = (kh == 0 or kw == 0)
154 |             kernel_does_nothing = (kh == 1 and kw == 1)
155 |             if not kernel_impossible and not kernel_does_nothing:
156 |                 image_aug = cv2.blur(result[i], (kh, kw))
157 |                 # cv2.blur() removes channel axis for single-channel images
158 |                 if image_aug.ndim == 2:
159 |                     image_aug = image_aug[..., np.newaxis]
160 |                 result[i] = image_aug
161 |         return result
162 | 
163 |     def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks):
164 |         return keypoints_on_images
165 | 
166 |     def get_parameters(self):
167 |         return [self.k]
168 | 
169 | 
170 | class MedianBlur(Augmentor):
171 |     """
172 |     Blur an image by computing median values over neighbourhoods.
173 | 
174 |     Median blurring can be used to remove small dirt from images.
175 |     At larger kernel sizes, its effects have some similarity with Superpixels.
176 | 
177 |     Examples
178 |     --------
179 |     >>> aug = iaa.MedianBlur(k=5)
180 | 
181 |     blurs all images using a kernel size of 5x5.
182 | 
183 |     >>> aug = iaa.MedianBlur(k=(3, 7))
184 | 
185 |     blurs images using a varying kernel size per image, which is
186 |     and odd value sampled from the interval [3..7], i.e. 3 or 5 or 7.
187 |     """
188 | 
189 |     def __init__(self, k=1, name=None, deterministic=False, random_state=None):
190 |         super(MedianBlur, self).__init__(
191 |             name=name, deterministic=deterministic, random_state=random_state)
192 | 
193 |         if eu.is_single_number(k):
194 |             eu.do_assert(k % 2 != 0,
195 |                          "Expected k to be odd, got %d. Add or subtract 1." % (int(k),))
196 |             self.k = Deterministic(int(k))
197 |         elif eu.is_iterable(k):
198 |             eu.do_assert(len(k) == 2)
199 |             eu.do_assert(all([eu.is_single_number(ki) for ki in k]))
200 |             eu.do_assert(k[0] % 2 != 0,
201 |                          "Expected k[0] to be odd, got %d. Add or subtract 1." % (int(k[0]),))
202 |             eu.do_assert(k[1] % 2 != 0,
203 |                          "Expected k[1] to be odd, got %d. Add or subtract 1." % (int(k[1]),))
204 |             self.k = DiscreteUniform(int(k[0]), int(k[1]))
205 |         elif isinstance(k, StochasticParameter):
206 |             self.k = k
207 |         else:
208 |             raise Exception("Expected int, tuple/list with 2 entries or StochasticParameter. Got %s." % (type(k),))
209 | 
210 |     def _augment_images(self, images, random_state, parents, hooks):
211 |         result = images
212 |         nb_images = len(images)
213 |         samples = self.k.draw_samples((nb_images,), random_state=random_state)
214 |         for i in range(nb_images):
215 |             ki = samples[i]
216 |             if ki > 1:
217 |                 ki = ki + 1 if ki % 2 == 0 else ki
218 |                 image_aug = cv2.medianBlur(result[i], ki)
219 |                 # cv2.medianBlur() removes channel axis for single-channel
220 |                 # images
221 |                 if image_aug.ndim == 2:
222 |                     image_aug = image_aug[..., np.newaxis]
223 |                 result[i] = image_aug
224 |         return result
225 | 
226 |     def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks):
227 |         return keypoints_on_images
228 | 
229 |     def get_parameters(self):
230 |         return [self.k]
231 | 


--------------------------------------------------------------------------------
/datum/models/ssd/ssd_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/3/7
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import json
 12 | import random
 13 | from queue import Queue
 14 | from threading import Thread
 15 | 
 16 | import cv2
 17 | import numpy as np
 18 | 
 19 | from datum.meta.dataset import DataSet
 20 | from datum.models.ssd.box_encoder import BoxEncoder
 21 | 
 22 | 
 23 | class SSDDataSet(DataSet):
 24 |     """TextDataSet
 25 |     process text input file dataset
 26 |     text file format:
 27 |     image_path xmin1 ymin1 xmax1 ymax1 class1 xmin2 ymin2 xmax2 ymax2 class2
 28 |     """
 29 | 
 30 |     def __init__(self, common_params, dataset_params, box_encoder_params):
 31 |         super(SSDDataSet, self).__init__(common_params, dataset_params)
 32 | 
 33 |         # process params
 34 |         self.width = int(common_params['image_size'])
 35 |         self.height = int(common_params['image_size'])
 36 |         self.channel = int(common_params["image_channel"])
 37 |         self.batch_size = int(common_params['batch_size'])
 38 |         self.num_classes = int(common_params["num_classes"])
 39 | 
 40 |         self.data_path = str(dataset_params['path'])
 41 |         self.thread_num = int(dataset_params['thread_num'])
 42 |         self.classes = json.loads(dataset_params["classes"])
 43 |         self.box_output_format = json.loads(dataset_params["box_output_format"])
 44 |         self.is_need_bg = True if dataset_params["is_need_bg"] == "True" else False
 45 | 
 46 |         self.upper_resize_rate = float(dataset_params["upper_resize_rate"])
 47 |         self.lower_resize_rate = float(dataset_params["lower_resize_rate"])
 48 | 
 49 |         self.box_encoder = BoxEncoder(common_params, box_encoder_params)
 50 | 
 51 |         # record and image_label queue
 52 |         self.record_queue = Queue(maxsize=10000)
 53 |         self.image_label_queue = Queue(maxsize=2000)
 54 | 
 55 |         self.record_list = []
 56 | 
 57 |         # filling the record_list
 58 |         input_file = open(self.data_path, 'r')
 59 | 
 60 |         for line in input_file:
 61 |             line = line.strip()
 62 |             ss = line.split(' ')
 63 |             ss[1:] = [float(num) for num in ss[1:]]
 64 |             # 文件中存储的类别都是从0开始的，如果需要在处理前添加background这个类别
 65 |             # 需要将background这个设置为0，其他的类别编号自动+1
 66 |             if self.is_need_bg:
 67 |                 self.classes.insert(0, "background")
 68 |                 step_len = len(self.box_output_format)
 69 |                 start_class_idx = self.box_output_format.index("class_id") + 1
 70 |                 for i in range(start_class_idx, len(ss), step_len):
 71 |                     ss[i] += 1
 72 |             self.record_list.append(ss)
 73 | 
 74 |         self.record_point = 0
 75 |         self.record_number = len(self.record_list)
 76 | 
 77 |         self.num_batch_per_epoch = int(self.record_number / self.batch_size)
 78 | 
 79 |         t_record_producer = Thread(target=self.record_producer)
 80 |         t_record_producer.daemon = True
 81 |         t_record_producer.start()
 82 | 
 83 |         for i in range(self.thread_num):
 84 |             t = Thread(target=self.record_customer)
 85 |             t.daemon = True
 86 |             t.start()
 87 | 
 88 |     def record_producer(self):
 89 |         while True:
 90 |             if self.record_point % self.record_number == 0:
 91 |                 random.shuffle(self.record_list)
 92 |                 self.record_point = 0
 93 |             self.record_queue.put(self.record_list[self.record_point])
 94 |             self.record_point += 1
 95 | 
 96 |     def record_customer(self):
 97 |         while True:
 98 |             item = self.record_queue.get()
 99 |             out = self.record_process(item)
100 |             if out is not None:
101 |                 # 在归整完数据之后，要对object_label中使用BoxEncoder的调用
102 |                 image, gt_labels = out[:]
103 |                 # gt_labels from
104 |                 # [xmin, ymin, xmax, ymax] --> [xmin, xmax, ymin, ymax]
105 |                 # print(gt_labels)
106 |                 for cell in gt_labels:
107 |                     cell[1], cell[2] = cell[2], cell[1]
108 |                 # print(gt_labels)
109 |                 y_true_encoded = self.box_encoder.encode_y_sample(gt_labels)
110 |                 self.image_label_queue.put([image, y_true_encoded])
111 | 
112 |     def record_process(self, record):
113 |         """对于每个样本的数据具体该如何处理
114 |         Args: record --> [image_path, xmin, ymin, xmax, ymax, class_id]
115 |         Returns:
116 |           image: 3-D ndarray
117 |           labels: 2-D list [[xmin, ymin, xmax, ymax, class_id]]
118 |         """
119 |         image = cv2.imread(record[0])
120 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
121 |         h = image.shape[0]
122 |         w = image.shape[1]
123 | 
124 |         real_rate = w / h
125 |         target_rate = self.width / self.height
126 | 
127 |         if (target_rate - self.lower_resize_rate
128 |                 <= real_rate <= target_rate + self.upper_resize_rate):
129 |             width_rate = self.width * 1.0 / w
130 |             height_rate = self.height * 1.0 / h
131 | 
132 |             image = cv2.resize(image, (self.height, self.width))
133 |             labels = []
134 |             i = 1
135 |             while i < len(record):
136 |                 xmin = record[i]
137 |                 ymin = record[i + 1]
138 |                 xmax = record[i + 2]
139 |                 ymax = record[i + 3]
140 |                 class_id = record[i + 4]
141 |                 labels.append([xmin * width_rate, ymin * height_rate,
142 |                                xmax * width_rate, ymax * height_rate,
143 |                                class_id])
144 |                 i += 5
145 |             return [image, labels]
146 |         elif real_rate > target_rate + self.upper_resize_rate:
147 |             # 当前的图像不满足直接resize的比例，需要按照最短边进行一定比例进行裁减
148 |             h0 = h
149 |             w0 = np.ceil(h0 * (target_rate + self.upper_resize_rate)).astype(np.int32)
150 |             # we should crop from (0, 0)
151 |             image = image[:, 0:w0]
152 |             image = cv2.resize(image, (self.height, self.width))
153 |             width_rate = self.width * 1.0 / w0
154 |             height_rate = self.height * 1.0 / h0
155 | 
156 |             # 处理原始目标区域在裁减之后的图像中的实际位置
157 |             labels = []
158 |             i = 1
159 |             while i < len(record):
160 |                 xmin = record[i]
161 |                 ymin = record[i + 1]
162 |                 xmax = record[i + 2]
163 |                 ymax = record[i + 3]
164 |                 class_id = record[i + 4]
165 |                 if xmin < w0 - 1 and xmax <= w0 - 1:
166 |                     labels.append([xmin * width_rate, ymin * height_rate,
167 |                                    xmax * width_rate, ymax * height_rate,
168 |                                    class_id])
169 |                 elif xmin < w0 - 1 and xmax > w0 - 1:
170 |                     if (w0 - 1 - xmin) / (xmax - xmin) >= 0.6:
171 |                         labels.append([xmin * width_rate, ymin * height_rate,
172 |                                        w0-1, ymax * height_rate,
173 |                                        class_id])
174 |                     else:
175 |                         pass
176 |                 else:
177 |                     pass
178 |                 i += 5
179 |             # 若没有目标符合变换要求，就将这个数据丢弃
180 |             if len(labels) != 0:
181 |                 return [image, labels]
182 |             else:
183 |                 return None
184 |         elif real_rate < target_rate - self.lower_resize_rate:
185 |             w0 = w
186 |             h0 = np.ceil(w0 / (target_rate - self.lower_resize_rate)).astype(np.int32)
187 |             # we should crop from (0, 0)
188 |             image = image[0:h0, :]
189 |             image = cv2.resize(image, (self.height, self.width))
190 |             width_rate = self.width * 1.0 / w0
191 |             height_rate = self.height * 1.0 / h0
192 | 
193 |             # 处理原始目标区域在裁减之后的图像中的实际位置
194 |             labels = []
195 |             i = 1
196 |             while i < len(record):
197 |                 xmin = record[i]
198 |                 ymin = record[i + 1]
199 |                 xmax = record[i + 2]
200 |                 ymax = record[i + 3]
201 |                 class_id = record[i + 4]
202 |                 if ymin < h0 - 1 and ymax <= h0 - 1:
203 |                     labels.append([xmin * width_rate, ymin * height_rate,
204 |                                    xmax * width_rate, ymax * height_rate,
205 |                                    class_id])
206 |                 elif ymin < h0 - 1 < ymax:
207 |                     if (h0 - 1 - ymin) / (ymax - ymin) >= 0.6:
208 |                         labels.append([xmin * width_rate, ymin * height_rate,
209 |                                        xmax * width_rate, h0 - 1,
210 |                                        class_id])
211 |                     else:
212 |                         pass
213 |                 else:
214 |                     pass
215 |                 i += 5
216 |             # 若没有目标符合变换要求，就将这个数据丢弃
217 |             if len(labels) != 0:
218 |                 return [image, labels]
219 |             else:
220 |                 return None
221 |         else:
222 |             pass
223 | 
224 |     def batch(self):
225 |         """get batch
226 |         Returns:
227 |           images: 4-D ndarray [batch_size, height, width, 3]
228 |           labels: (batch_size, #boxes, #classes + 4 + 4 + 4)
229 |         """
230 |         images = []
231 |         labels = []
232 |         for i in range(self.batch_size):
233 |             image, label = self.image_label_queue.get()
234 |             images.append(image)
235 |             labels.append(label)
236 |         images = np.asarray(images, dtype=np.float32)
237 |         images = images / 255 * 2 - 1
238 |         labels = np.concatenate(labels, axis=0)
239 |         # labels = np.asarray(labels, dtype=np.float32)
240 |         return images, labels
241 | 


--------------------------------------------------------------------------------
/eagle/brain/ssd/anchor_boxes.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2017/12/18
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | 
 14 | # import keras.backend as K
 15 | from keras.engine.topology import Layer
 16 | from keras.engine.topology import InputSpec
 17 | 
 18 | from eagle.brain.ssd.box_encode_decode_utils import convert_coordinates
 19 | 
 20 | 
 21 | class AnchorBoxes(Layer):
 22 |     '''
 23 |     Input shape:
 24 |         4D tensor of shape 
 25 |             `(batch, channels, height, width)` if dim_ordering == 'th'
 26 |             `(batch, height, width, channels)` if dim_ordering == 'tf'
 27 |     Output shape:
 28 |         5D tensor of shape `(batch, height, width, n_boxes, 8)`.
 29 |         The last axis contains the four anchor box coordinates and the four variance values for each box.
 30 |     '''
 31 |     def __init__(self,
 32 |                  img_height, img_width,
 33 |                  this_scale, next_scale,
 34 |                  aspect_ratios=[0.5, 1.0, 2.0],
 35 |                  two_boxes_for_ar1=True,
 36 |                  variances=[1.0, 1.0, 1.0, 1.0],
 37 |                  coords='centroids', normalize_coords=False, **kwargs):
 38 |         '''
 39 |         this_scale (float): A float in [0, 1], the scaling factor for the size of the generated anchor boxes
 40 |                 as a fraction of the shorter side of the input image.
 41 |         next_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if
 42 |                 `self.two_boxes_for_ar1 == True`.
 43 |         aspect_ratios (list, optional): The list of aspect ratios for which default boxes are to be
 44 |                 generated for this layer. Defaults to [0.5, 1.0, 2.0].
 45 |         two_boxes_for_ar1 (bool, optional): Only relevant if `aspect_ratios` contains 1.
 46 |                 If `True`, two default boxes will be generated for aspect ratio 1. The first will be generated
 47 |                 using the scaling factor for the respective layer, the second one will be generated using
 48 |                 geometric mean of said scaling factor and next bigger scaling factor. Defaults to `True`.
 49 |         variances (list, optional): A list of 4 floats >0 with scaling factors (actually it's not factors but divisors
 50 |                 to be precise) for the encoded predicted box coordinates. A variance value of 1.0 would apply
 51 |                 no scaling at all to the predictions, while values in (0,1) upscale the encoded predictions and values greater
 52 |                 than 1.0 downscale the encoded predictions. If you want to reproduce the configuration of the original SSD,
 53 |                 set this to `[0.1, 0.1, 0.2, 0.2]`, provided the coordinate Others is 'centroids'. Defaults to `[1.0, 1.0, 1.0, 1.0]`.
 54 |         coords (str, optional): The box coordinate Others to be used. Can be either 'centroids' for the Others
 55 |                 `(cx, cy, w, h)` (box center coordinates, width, and height) or 'minmax' for the Others
 56 |                 `(xmin, xmax, ymin, ymax)`. Defaults to 'centroids'.
 57 |         normalize_coords (bool, optional): Set to `True` if the model uses relative instead of absolute coordinates,
 58 |                 i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. Defaults to `False`.
 59 |         '''
 60 |         if (this_scale < 0) or (this_scale > 1) or (next_scale < 0):
 61 |             raise ValueError("this_scale or next_scale must be in [0, 1]")
 62 | 
 63 |         self.img_height = img_height
 64 |         self.img_width = img_width
 65 |         self.this_scale = this_scale
 66 |         self.next_scale = next_scale
 67 |         self.aspect_ratios = aspect_ratios
 68 |         self.two_boxes_for_ar1 = two_boxes_for_ar1
 69 |         self.variances = variances
 70 |         self.coords = coords
 71 |         self.normalize_coords = normalize_coords
 72 |         
 73 |         # Compute the number of boxes per cell
 74 |         if (1 in aspect_ratios) & two_boxes_for_ar1:
 75 |             self.n_boxes = len(aspect_ratios) + 1
 76 |         else:
 77 |             self.n_boxes = len(aspect_ratios)
 78 | 
 79 |         super(AnchorBoxes, self).__init__(**kwargs)
 80 | 
 81 |     def build(self, input_shape):
 82 |         self.input_spec = [InputSpec(shape=input_shape)]
 83 |         super(AnchorBoxes, self).build(input_shape)
 84 | 
 85 |     def call(self, x, mask=None):
 86 |         # Compute box width and height for each aspect ratio
 87 |         # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
 88 |         self.aspect_ratios = np.sort(self.aspect_ratios)
 89 |         size = min(self.img_height, self.img_width)
 90 |         # Compute the box widths and and heights for all aspect ratios
 91 |         wh_list = []
 92 |         for ar in self.aspect_ratios:
 93 |             if (ar == 1) & self.two_boxes_for_ar1:
 94 |                 # Compute the regular default box for aspect ratio 1 and...
 95 |                 w = self.this_scale * size * np.sqrt(ar)
 96 |                 h = self.this_scale * size / np.sqrt(ar)
 97 |                 wh_list.append((w, h))
 98 |                 # ...also compute one slightly larger version using the geometric mean of this scale value and the next
 99 |                 w = np.sqrt(self.this_scale * self.next_scale) * size * np.sqrt(ar)
100 |                 h = np.sqrt(self.this_scale * self.next_scale) * size / np.sqrt(ar)
101 |                 wh_list.append((w, h))
102 |             else:
103 |                 w = self.this_scale * size * np.sqrt(ar)
104 |                 h = self.this_scale * size / np.sqrt(ar)
105 |                 wh_list.append((w, h))
106 |         wh_list = np.array(wh_list)
107 | 
108 |         # We need the shape of the input tensor
109 |         batch_size, feature_map_height, feature_map_width, feature_map_channels = x.get_shape().as_list()
110 | 
111 |         # Compute the grid of box center points. They are identical for all aspect ratios
112 |         cell_width = self.img_width / feature_map_width
113 |         cell_height = self.img_height / feature_map_height
114 |         cx = np.linspace(cell_width/2, self.img_width-cell_width/2, feature_map_width)
115 |         cy = np.linspace(cell_height/2, self.img_height-cell_height/2, feature_map_height)
116 |         cx_grid, cy_grid = np.meshgrid(cx, cy)
117 |         # This is necessary for np.tile() to do what we want further down
118 |         cx_grid = np.expand_dims(cx_grid, -1)
119 |         # This is necessary for np.tile() to do what we want further down
120 |         cy_grid = np.expand_dims(cy_grid, -1)
121 | 
122 |         # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
123 |         # where the last dimension will contain `(cx, cy, w, h)`
124 |         boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4))
125 | 
126 |         boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx
127 |         boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy
128 |         boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w
129 |         boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h
130 | 
131 |         # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)`
132 |         boxes_tensor = convert_coordinates(
133 |             boxes_tensor, start_index=0, conversion='centroids2minmax')
134 | 
135 |         # `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
136 |         if self.normalize_coords:
137 |             boxes_tensor[:, :, :, :2] /= self.img_width
138 |             boxes_tensor[:, :, :, 2:] /= self.img_height
139 | 
140 |         if self.coords == 'centroids':
141 |             # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth
142 |             # Convert `(xmin, xmax, ymin, ymax)` back to `(cx, cy, w, h)`
143 |             boxes_tensor = convert_coordinates(
144 |                 boxes_tensor, start_index=0, conversion='minmax2centroids')
145 | 
146 |         # 4: Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape
147 |         #    as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis.
148 |         # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)`
149 |         variances_tensor = np.zeros_like(boxes_tensor)
150 |         # Long live broadcasting
151 |         variances_tensor += self.variances
152 |         # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)`
153 |         boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1)
154 | 
155 |         # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along
156 |         # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)`
157 |         boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
158 |         boxes_tensor = tf.tile(
159 |             tf.constant(boxes_tensor, dtype='float32'),
160 |             (x.get_shape().as_list()[0], 1, 1, 1, 1))
161 | 
162 |         return boxes_tensor
163 | 
164 |     def compute_output_shape(self, input_shape):
165 |         batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape
166 |         return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)
167 | 
168 |     def get_config(self):
169 |         config = {
170 |             'img_height': self.img_height,
171 |             'img_width': self.img_width,
172 |             'this_scale': self.this_scale,
173 |             'next_scale': self.next_scale,
174 |             'aspect_ratios': list(self.aspect_ratios),
175 |             'two_boxes_for_ar1': self.two_boxes_for_ar1,
176 |             'variances': list(self.variances),
177 |             'coords': self.coords,
178 |             'normalize_coords': self.normalize_coords
179 |         }
180 |         base_config = super(AnchorBoxes, self).get_config()
181 |         return dict(list(base_config.items()) + list(config.items()))
182 | 


--------------------------------------------------------------------------------
/eagle/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2018/2/26
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import sys
 12 | import math
 13 | import numbers
 14 | import numpy as np
 15 | 
 16 | import cv2
 17 | from scipy import misc
 18 | 
 19 | 
 20 | CURRENT_RANDOM_STATE = np.random.RandomState(42)
 21 | 
 22 | 
 23 | def seed(seedval):
 24 |     CURRENT_RANDOM_STATE.seed(seedval)
 25 | 
 26 | 
 27 | def current_random_state():
 28 |     return CURRENT_RANDOM_STATE
 29 | 
 30 | 
 31 | def new_random_state(seed=None, fully_random=False):
 32 |     if seed is None:
 33 |         if not fully_random:
 34 |             seed = CURRENT_RANDOM_STATE.randint(0, 10 ** 6, 1)[0]
 35 |     return np.random.RandomState(seed)
 36 | 
 37 | 
 38 | def dummy_random_state():
 39 |     return np.random.RandomState(1)
 40 | 
 41 | 
 42 | def copy_random_state(random_state, force_copy=False):
 43 |     if random_state == np.random and not force_copy:
 44 |         return random_state
 45 |     else:
 46 |         rs_copy = dummy_random_state()
 47 |         orig_state = random_state.get_state()
 48 |         rs_copy.set_state(orig_state)
 49 |         return rs_copy
 50 | 
 51 | 
 52 | def forward_random_state(random_state):
 53 |     random_state.uniform()
 54 | 
 55 | 
 56 | def do_assert(condition, message="Assertion Failed"):
 57 |     if not condition:
 58 |         raise AssertionError(str(message))
 59 | 
 60 | 
 61 | def is_np_array(val):
 62 |     return isinstance(val, np.ndarray)
 63 | 
 64 | 
 65 | def is_iterable(val):
 66 |     return isinstance(val, (tuple, list))
 67 | 
 68 | 
 69 | def is_callable(val):
 70 |     if sys.version_info[0] == 3 and sys.version_info[1] <= 2:
 71 |         return hasattr(val, '__call__')
 72 |     else:
 73 |         return callable(val)
 74 | 
 75 | 
 76 | def is_string(val):
 77 |     return isinstance(val, str)
 78 | 
 79 | 
 80 | def is_single_integer(val):
 81 |     return isinstance(val, numbers.Integral)
 82 | 
 83 | 
 84 | def is_single_float(val):
 85 |     return isinstance(val, numbers.Real) and not is_single_integer(val)
 86 | 
 87 | 
 88 | def is_single_number(val):
 89 |     return isinstance(val, numbers.Real) or isinstance(val, numbers.Integral)
 90 | 
 91 | 
 92 | def is_integer_array(val):
 93 |     return is_np_array(val) and issubclass(val.dtype.type, numbers.Integral)
 94 | 
 95 | 
 96 | def copy_dtypes_for_restore(images):
 97 |     return images.dtype if is_np_array(images) else [image.dtype for image in images]
 98 | 
 99 | def restore_augmented_images_dtypes_(images, orig_dtypes):
100 |     if is_np_array(images):
101 |         images = images.astype(orig_dtypes)
102 |     else:
103 |         for i in range(len(images)):
104 |             images[i] = images[i].astype(orig_dtypes[i])
105 | 
106 | def restore_augmented_images_dtypes(images, orig_dtypes):
107 |     if is_np_array(images):
108 |         images = np.copy(images)
109 |     else:
110 |         images = [np.copy(image) for image in images]
111 |     return restore_augmented_images_dtypes_(images, orig_dtypes)
112 | 
113 | def clip_augmented_images_(images, minval, maxval):
114 |     if is_np_array(images):
115 |         np.clip(images, minval, maxval, out=images)
116 |     else:
117 |         for i in range(len(images)):
118 |             np.clip(images[i], minval, maxval, out=images[i])
119 | 
120 | def clip_augmented_images(images, minval, maxval):
121 |     if is_np_array(images):
122 |         images = np.copy(images)
123 |     else:
124 |         images = [np.copy(image) for image in images]
125 |     return clip_augmented_images_(images, minval, maxval)
126 | 
127 | # --------------------------------------------------------------------------------
128 | # Basic Function about the Image Utils
129 | # --------------------------------------------------------------------------------
130 | 
131 | def imresize_many_images(images, sizes=None, interpolation=None):
132 |     """
133 |     Resize many images to a specified size.
134 | 
135 |     Parameters
136 |     ----------
137 |     images : (N,H,W,C) ndarray
138 |         Array of the images to resize.
139 |         Expected to usually be of dtype uint8.
140 | 
141 |     sizes : iterable of two ints
142 |         The new size in (height, width)
143 |         Others.
144 | 
145 |     interpolation : None or string or int, optional(default=None)
146 |         The interpolation to use during resize.
147 |         If int, then expected to be one of:
148 |             * cv2.INTER_NEAREST (nearest neighbour interpolation)
149 |             * cv2.INTER_LINEAR (linear interpolation)
150 |             * cv2.INTER_AREA (area interpolation)
151 |             * cv2.INTER_CUBIC (cubic interpolation)
152 |         If string, then expected to be one of:
153 |             * "nearest" (identical to cv2.INTER_NEAREST)
154 |             * "linear" (identical to cv2.INTER_LINEAR)
155 |             * "area" (identical to cv2.INTER_AREA)
156 |             * "cubic" (identical to cv2.INTER_CUBIC)
157 |         If None, the interpolation will be chosen automatically. For size
158 |         increases, area interpolation will be picked and for size decreases,
159 |         linear interpolation will be picked.
160 | 
161 |     Returns
162 |     -------
163 |     result : (N,H',W',C) ndarray
164 |         Array of the resized images.
165 | 
166 |     """
167 |     s = images.shape
168 |     do_assert(len(s) == 4, s)
169 |     nb_images = s[0]
170 |     im_height, im_width = s[1], s[2]
171 |     nb_channels = s[3]
172 |     height, width = sizes[0], sizes[1]
173 | 
174 |     if height == im_height and width == im_width:
175 |         return np.copy(images)
176 | 
177 |     ip = interpolation
178 |     do_assert(ip is None or ip in ["nearest", "linear", "area", "cubic", cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC])
179 |     if ip is None:
180 |         if height > im_height or width > im_width:
181 |             ip = cv2.INTER_AREA
182 |         else:
183 |             ip = cv2.INTER_LINEAR
184 |     elif ip in ["nearest", cv2.INTER_NEAREST]:
185 |         ip = cv2.INTER_NEAREST
186 |     elif ip in ["linear", cv2.INTER_LINEAR]:
187 |         ip = cv2.INTER_LINEAR
188 |     elif ip in ["area", cv2.INTER_AREA]:
189 |         ip = cv2.INTER_AREA
190 |     elif ip in ["cubic", cv2.INTER_CUBIC]:
191 |         ip = cv2.INTER_CUBIC
192 |     else:
193 |         raise Exception("Invalid interpolation order")
194 | 
195 |     result = np.zeros((nb_images, height, width, nb_channels), dtype=np.uint8)
196 |     for img_idx in range(nb_images):
197 |         # TODO fallback to scipy here if image isn't uint8
198 |         result_img = cv2.resize(images[img_idx], (width, height), interpolation=ip)
199 |         if len(result_img.shape) == 2:
200 |             result_img = result_img[:, :, np.newaxis]
201 |         result[img_idx] = result_img
202 |     return result
203 | 
204 | 
205 | def imresize_single_image(image, sizes, interpolation=None):
206 |     """
207 |     Resizes a single image.
208 | 
209 |     Parameters
210 |     ----------
211 |     image : (H,W,C) ndarray or (H,W) ndarray
212 |         Array of the image to resize.
213 |         Expected to usually be of dtype uint8.
214 | 
215 |     sizes : iterable of two ints
216 |         See `imresize_many_images()`.
217 | 
218 |     interpolation : None or string or int, optional(default=None)
219 |         See `imresize_many_images()`.
220 | 
221 |     Returns
222 |     -------
223 |     out : (H',W',C) ndarray or (H',W') ndarray
224 |         The resized image.
225 | 
226 |     """
227 |     grayscale = False
228 |     if image.ndim == 2:
229 |         grayscale = True
230 |         image = image[:, :, np.newaxis]
231 |     do_assert(len(image.shape) == 3, image.shape)
232 |     rs = imresize_many_images(image[np.newaxis, :, :, :], sizes, interpolation=interpolation)
233 |     if grayscale:
234 |         return np.squeeze(rs[0, :, :, 0])
235 |     else:
236 |         return rs[0, ...]
237 | 
238 | 
239 | def draw_grid(images, rows=None, cols=None):
240 |     """
241 |     Converts multiple input images into a single image showing them in a grid.
242 | 
243 |     Parameters
244 |     ----------
245 |     images : (N,H,W,3) ndarray or iterable of (H,W,3) array
246 |         The input images to convert to a grid.
247 |         Expected to be RGB and have dtype uint8.
248 | 
249 |     rows : None or int, optional(default=None)
250 |         The number of rows to show in the grid.
251 |         If None, it will be automatically derived.
252 | 
253 |     cols : None or int, optional(default=None)
254 |         The number of cols to show in the grid.
255 |         If None, it will be automatically derived.
256 | 
257 |     Returns
258 |     -------
259 |     grid : (H',W',3) ndarray
260 |         Image of the generated grid.
261 | 
262 |     """
263 |     if is_np_array(images):
264 |         do_assert(images.ndim == 4)
265 |     else:
266 |         do_assert(is_iterable(images) and is_np_array(images[0]) and images[0].ndim == 3)
267 | 
268 |     nb_images = len(images)
269 |     do_assert(nb_images > 0)
270 |     cell_height = max([image.shape[0] for image in images])
271 |     cell_width = max([image.shape[1] for image in images])
272 |     channels = set([image.shape[2] for image in images])
273 |     do_assert(len(channels) == 1, "All images are expected to have the same number of channels, but got channel set %s with length %d instead." % (str(channels), len(channels)))
274 |     nb_channels = list(channels)[0]
275 |     if rows is None and cols is None:
276 |         rows = cols = int(math.ceil(math.sqrt(nb_images)))
277 |     elif rows is not None:
278 |         cols = int(math.ceil(nb_images / rows))
279 |     elif cols is not None:
280 |         rows = int(math.ceil(nb_images / cols))
281 |     do_assert(rows * cols >= nb_images)
282 | 
283 |     width = cell_width * cols
284 |     height = cell_height * rows
285 |     grid = np.zeros((height, width, nb_channels), dtype=np.uint8)
286 |     cell_idx = 0
287 |     for row_idx in range(rows):
288 |         for col_idx in range(cols):
289 |             if cell_idx < nb_images:
290 |                 image = images[cell_idx]
291 |                 cell_y1 = cell_height * row_idx
292 |                 cell_y2 = cell_y1 + image.shape[0]
293 |                 cell_x1 = cell_width * col_idx
294 |                 cell_x2 = cell_x1 + image.shape[1]
295 |                 grid[cell_y1:cell_y2, cell_x1:cell_x2, :] = image
296 |             cell_idx += 1
297 | 
298 |     return grid
299 | 
300 | def show_grid(images, rows=None, cols=None):
301 |     """
302 |     Converts the input images to a grid image and shows it in a new window.
303 | 
304 |     This function wraps around scipy.misc.imshow(), which requires the
305 |     `see <image>` command to work. On Windows systems, this tends to not be
306 |     the case.
307 | 
308 |     Parameters
309 |     ----------
310 |     images : (N,H,W,3) ndarray or iterable of (H,W,3) array
311 |         See `draw_grid()`.
312 | 
313 |     rows : None or int, optional(default=None)
314 |         See `draw_grid()`.
315 | 
316 |     cols : None or int, optional(default=None)
317 |         See `draw_grid()`.
318 | 
319 |     """
320 |     grid = draw_grid(images, rows=rows, cols=cols)
321 |     misc.imshow(grid)


--------------------------------------------------------------------------------
/eagle/brain/ssd/loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 IW.
  2 | # All rights reserved.
  3 | #
  4 | # Author: liuguiyang <liuguiyangnwpu@gmail.com>
  5 | # Date:   2017/12/18
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import tensorflow as tf
 12 | 
 13 | 
 14 | class Loss:
 15 | 
 16 |     def __init__(self, neg_pos_ratio=3, n_neg_min=0, alpha=1.0):
 17 |         """
 18 |         Arguments:
 19 |             neg_pos_ratio (int, optional): The maximum ratio of negative (i.e. background)
 20 |                 to positive ground truth boxes to include in the loss computation.
 21 |                 There are no actual background ground truth boxes of course, but `y_true`
 22 |                 contains anchor boxes labeled with the background class. Since
 23 |                 the number of background boxes in `y_true` will usually exceed
 24 |                 the number of positive boxes by far, it is necessary to balance
 25 |                 their influence on the loss. Defaults to 3 following the paper.
 26 |             n_neg_min (int, optional): The minimum number of negative ground truth boxes to
 27 |                 enter the loss computation *per batch*. This argument can be used to make
 28 |                 sure that the model learns from a minimum number of negatives in batches
 29 |                 in which there are very few, or even none at all, positive ground truth
 30 |                 boxes. It defaults to 0 and if used, it should be set to a value that
 31 |                 stands in reasonable proportion to the batch size used for training.
 32 |             alpha (float, optional): A factor to weight the localization loss in the
 33 |                 computation of the total loss. Defaults to 1.0 following the paper.
 34 |         """
 35 |         self.neg_pos_ratio = neg_pos_ratio
 36 |         self.n_neg_min = n_neg_min
 37 |         self.alpha = alpha
 38 | 
 39 |     def smooth_L1_loss(self, y_true, y_pred):
 40 |         '''
 41 |         Compute smooth L1 loss, see references.
 42 |         Arguments:
 43 |             y_true (nD tensor): A TensorFlow tensor of any shape containing the ground truth data.
 44 |                 In this context, the expected tensor has shape `(batch_size, #boxes, 4)` and
 45 |                 contains the ground truth bounding box coordinates, where the last dimension
 46 |                 contains `(xmin, xmax, ymin, ymax)`.
 47 |             y_pred (nD tensor): A TensorFlow tensor of identical structure to `y_true` containing
 48 |                 the predicted data, in this context the predicted bounding box coordinates.
 49 |         Returns:
 50 |             The smooth L1 loss, a nD-1 Tensorflow tensor. In this context a 2D tensor
 51 |             of shape (batch, n_boxes_total).
 52 |         References:
 53 |             https://arxiv.org/abs/1504.08083
 54 |         '''
 55 |         absolute_loss = tf.abs(y_true - y_pred)
 56 |         square_loss = 0.5 * (y_true - y_pred)**2
 57 |         l1_loss = tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5)
 58 |         return tf.reduce_sum(l1_loss, axis=-1)
 59 | 
 60 |     def log_loss(self, y_true, y_pred):
 61 |         '''
 62 |         Compute the softmax log loss.
 63 |         Arguments:
 64 |             y_true (nD tensor): A TensorFlow tensor of any shape containing the ground truth data.
 65 |                 In this context, the expected tensor has shape (batch_size, #boxes, #classes)
 66 |                 and contains the ground truth bounding box categories.
 67 |             y_pred (nD tensor): A TensorFlow tensor of identical structure to `y_true` containing
 68 |                 the predicted data, in this context the predicted bounding box categories.
 69 |         Returns:
 70 |             The softmax log loss, a nD-1 Tensorflow tensor. In this context a 2D tensor
 71 |             of shape (batch, n_boxes_total).
 72 |         '''
 73 |         # Make sure that `y_pred` doesn't contain any zeros (which would break the log function)
 74 |         y_pred = tf.maximum(y_pred, 1e-15)
 75 |         # Compute the log loss
 76 |         log_loss = -tf.reduce_sum(y_true * tf.log(y_pred), axis=-1)
 77 |         return log_loss
 78 | 
 79 |     def compute_loss(self, y_true, y_pred):
 80 |         '''
 81 |         Compute the loss of the SSD model prediction against the ground truth.
 82 |         Arguments:
 83 |             y_true (array): A Numpy array of shape `(batch_size, #boxes, #classes + 12)`,
 84 |                 where `#boxes` is the total number of boxes that the model predicts
 85 |                 per image. Be careful to make sure that the index of each given
 86 |                 box in `y_true` is the same as the index for the corresponding
 87 |                 box in `y_pred`. The last axis must have length `#classes + 12` and contain
 88 |                 `[classes one-hot encoded, 4 ground truth box coordinate offsets, 8 arbitrary entries]`
 89 |                 in this order, including the background class. The last eight entries of the
 90 |                 last axis are not used by this function and therefore their contents are
 91 |                 irrelevant, they only exist so that `y_true` has the same shape as `y_pred`,
 92 |                 where the last four entries of the last axis contain the anchor box
 93 |                 coordinates, which are needed during inference. Important: Boxes that
 94 |                 you want the cost function to ignore need to have a one-hot
 95 |                 class vector of all zeros.
 96 |             y_pred (Keras tensor): The model prediction. The shape is identical
 97 |                 to that of `y_true`, i.e. `(batch_size, #boxes, #classes + 12)`.
 98 |                 The last axis must contain entries in the Others
 99 |                 `[classes one-hot encoded, 4 predicted box coordinate offsets, 8 arbitrary entries]`.
100 |         Returns:
101 |             A scalar, the total multitask loss for classification and localization.
102 |         '''
103 |         self.neg_pos_ratio = tf.constant(self.neg_pos_ratio)
104 |         self.n_neg_min = tf.constant(self.n_neg_min)
105 |         self.alpha = tf.constant(self.alpha)
106 | 
107 |         # Output dtype: tf.int32
108 |         batch_size = tf.shape(y_pred)[0]
109 |         # Output dtype: tf.int32, note that `n_boxes` in this context denotes the total number of boxes per image,
110 |         # not the number of boxes per cell
111 |         n_boxes = tf.shape(y_pred)[1]
112 | 
113 |         # 1: Compute the losses for class and box predictions for every box
114 | 
115 |         # Output shape: (batch_size, n_boxes)
116 |         classification_loss = tf.to_float(
117 |             self.log_loss(
118 |                 y_true[:, :, :-12],
119 |                 y_pred[:, :, :-12])
120 |         )
121 |         # Output shape: (batch_size, n_boxes)
122 |         localization_loss = tf.to_float(
123 |             self.smooth_L1_loss(
124 |                 y_true[:, :, -12:-8],
125 |                 y_pred[:, :, -12:-8])
126 |         )
127 | 
128 |         # 2: Compute the classification losses for the positive and negative targets
129 | 
130 |         # Create masks for the positive and negative ground truth classes
131 |         # Tensor of shape (batch_size, n_boxes)
132 |         negatives = y_true[:, :, 0]
133 |         # Tensor of shape (batch_size, n_boxes)
134 |         positives = tf.to_float(tf.reduce_max(y_true[:, :, 1:-12], axis=-1))
135 | 
136 |         # Count the number of positive boxes (classes 1 to n) in y_true across the whole batch
137 |         n_positive = tf.reduce_sum(positives)
138 | 
139 |         # Now mask all negative boxes and sum up the losses for the positive boxes PER batch item
140 |         # (Keras loss functions must output one scalar loss value PER batch item, rather than just
141 |         # one scalar for the entire batch, that's why we're not summing across all axes)
142 |         pos_class_loss = tf.reduce_sum(classification_loss * positives, axis=-1) # Tensor of shape (batch_size,)
143 | 
144 |         # Compute the classification loss for the negative default boxes (if there are any)
145 | 
146 |         # First, compute the classification loss for all negative boxes
147 |         neg_class_loss_all = classification_loss * negatives # Tensor of shape (batch_size, n_boxes)
148 |         # The number of non-zero loss entries in `neg_class_loss_all`
149 |         n_neg_losses = tf.count_nonzero(neg_class_loss_all, dtype=tf.int32)
150 |         # What's the point of `n_neg_losses`?
151 |         # For the next step, which will be to compute which negative boxes enter the classification loss,
152 |         # we don't just want to know how many negative ground truth boxes there are,
153 |         # but for how many of those there actually is a positive (i.e. non-zero) loss.
154 |         # This is necessary because `tf.nn.top-k()` in the function below will pick the top k boxes with
155 |         # the highest losses no matter what, even if it receives a vector where all losses are zero.
156 |         # In the unlikely event that all negative classification losses ARE actually zero though,
157 |         # this behavior might lead to `tf.nn.top-k()` returning the indices of positive boxes,
158 |         # leading to an incorrect negative classification loss computation, and hence an incorrect overall loss computation.
159 |         # We therefore need to make sure that `n_negative_keep`, which assumes the role of the `k` argument in `tf.nn.top-k()`,
160 |         # is at most the number of negative boxes for which there is a positive classification loss.
161 | 
162 |         # Compute the number of negative examples we want to account for in the loss
163 |         # We'll keep at most `self.neg_pos_ratio` times the number of positives in `y_true`, but at least `self.n_neg_min` (unless `n_neg_loses` is smaller)
164 |         n_negative_keep = tf.minimum(tf.maximum(self.neg_pos_ratio * tf.to_int32(n_positive), self.n_neg_min), n_neg_losses)
165 | 
166 |         # In the unlikely case when either (1) there are no negative ground truth boxes at all
167 |         # or (2) the classification loss for all negative boxes is zero, return zero as the `neg_class_loss`
168 |         def f1():
169 |             return tf.zeros([batch_size])
170 |         # Otherwise compute the negative loss
171 |         def f2():
172 |             # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that
173 |             # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model
174 |             # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest.
175 | 
176 |             # To do this, we reshape `neg_class_loss_all` to 1D...
177 |             neg_class_loss_all_1D = tf.reshape(neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,)
178 |             # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those...
179 |             values, indices = tf.nn.top_k(neg_class_loss_all_1D, n_negative_keep, False) # We don't need sorting
180 |             # ...and with these indices we'll create a mask...
181 |             # Tensor of shape (batch_size * n_boxes,)
182 |             negatives_keep = tf.scatter_nd(
183 |                 indices=tf.expand_dims(indices, axis=1),
184 |                 updates=tf.ones_like(indices, dtype=tf.int32),
185 |                 shape=tf.shape(neg_class_loss_all_1D))
186 |             # Tensor of shape (batch_size, n_boxes)
187 |             negatives_keep = tf.to_float(tf.reshape(negatives_keep, [batch_size, n_boxes]))
188 |             # ...and use it to keep only those boxes and mask all other classification losses
189 |             # Tensor of shape (batch_size,)
190 |             neg_class_loss = tf.reduce_sum(classification_loss * negatives_keep, axis=-1)
191 |             return neg_class_loss
192 | 
193 |         neg_class_loss = tf.cond(tf.equal(n_neg_losses, tf.constant(0)), f1, f2)
194 | 
195 |         # Tensor of shape (batch_size,)
196 |         class_loss = pos_class_loss + neg_class_loss
197 | 
198 |         # 3: Compute the localization loss for the positive targets
199 |         #    We don't penalize localization loss for negative predicted boxes (obviously: there are no ground truth boxes they would correspond to)
200 | 
201 |         # Tensor of shape (batch_size,)
202 |         loc_loss = tf.reduce_sum(localization_loss * positives, axis=-1)
203 | 
204 |         # 4: Compute the total loss
205 | 
206 |         total_loss = (class_loss + self.alpha * loc_loss) / tf.maximum(1.0, n_positive) # In case `n_positive == 0`
207 |         # Keras has the annoying habit of dividing the loss by the batch size, which sucks in our case
208 |         # because the relevant criterion to average our loss over is the number of positive boxes in the batch
209 |         # (by which we're dividing in the line above), not the batch size. So in order to revert Keras' averaging
210 |         # over the batch size, we'll have to multiply by it.
211 |         total_loss *= tf.to_float(batch_size)
212 | 
213 |         return tf.reduce_mean(total_loss, axis=-1)
214 | 


--------------------------------------------------------------------------------