├── datum ├── README.md ├── __init__.py ├── meta │ ├── __init__.py │ └── dataset.py ├── models │ ├── __init__.py │ ├── ssd │ │ ├── __init__.py │ │ └── ssd_dataset.py │ └── yolo │ │ ├── __init__.py │ │ ├── yolo_dataset.py │ │ └── yolo_batch_dataset.py └── utils │ ├── __init__.py │ ├── process_config.py │ └── tools.py ├── results ├── test_res_image │ ├── loss1.png │ ├── loss2.png │ └── loss3.png └── README.md ├── Others ├── __init__.py ├── vedia │ ├── __init__.py │ ├── show.py │ └── convert2voc.py ├── voc │ ├── __init__.py │ └── process_pascal_voc.py ├── satellite │ ├── __init__.py │ ├── process.py │ ├── bbox_cluster.py │ ├── clip_video.py │ └── prepare_trainsamples.py ├── lsd12 │ ├── __init__.py │ ├── label_config.py │ ├── check_dataset.py │ └── format_input.py └── README.md ├── checks ├── __init__.py ├── brain │ ├── __init__.py │ ├── ssd │ │ ├── __init__.py │ │ └── check_ssd_model.py │ └── yolo │ │ ├── __init__.py │ │ └── check_yolo_model.py ├── datasets │ ├── __init__.py │ └── check_ssd_dataset.py └── observe │ ├── __init__.py │ ├── check_median_blur.py │ ├── check_gaussian_blur.py │ ├── check_average_blur.py │ ├── check_color.py │ ├── check_parameters.py │ └── check_background.py ├── eagle ├── __init__.py ├── brain │ ├── __init__.py │ ├── solver │ │ ├── __init__.py │ │ ├── solver.py │ │ ├── yolo_solver.py │ │ ├── ssd_solver.py │ │ └── yolo_u_solver.py │ ├── ssd │ │ ├── __init__.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── components.py │ │ │ └── net.py │ │ ├── normalization.py │ │ ├── anchor_boxes.py │ │ └── loss.py │ ├── yolo │ │ ├── __init__.py │ │ └── net.py │ └── rotation │ │ ├── __init__.py │ │ └── yolo │ │ ├── __init__.py │ │ └── net.py ├── observe │ ├── __init__.py │ ├── base │ │ └── __init__.py │ └── augmentors │ │ ├── __init__.py │ │ ├── flip.py │ │ ├── arithmetic.py │ │ └── blur.py ├── README.md └── utils.py ├── examples ├── __init__.py ├── ssd │ ├── __init__.py │ └── vgg_trainer.py ├── unet │ ├── __init__.py │ ├── train.py │ └── predict.py └── yolo │ ├── __init__.py │ └── train.py ├── conf ├── yolo_train.cfg ├── yolo_train_server.cfg ├── yolo_unet_train_server.cfg ├── yolo_unet_train.cfg ├── ssd_train.cfg ├── ssd_train_server.cfg ├── dilated_ssd_train.cfg └── ssd_train_512.cfg └── README.md /datum/README.md: -------------------------------------------------------------------------------- 1 | ## DataSets 2 | 3 | 主要是编写统一的数据集处理逻辑。 4 | 5 | ### 目录结构 6 | 7 | ### 配置信息 8 | -------------------------------------------------------------------------------- /results/test_res_image/loss1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guiyang882/DL.EyeSight/HEAD/results/test_res_image/loss1.png -------------------------------------------------------------------------------- /results/test_res_image/loss2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guiyang882/DL.EyeSight/HEAD/results/test_res_image/loss2.png -------------------------------------------------------------------------------- /results/test_res_image/loss3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/guiyang882/DL.EyeSight/HEAD/results/test_res_image/loss3.png -------------------------------------------------------------------------------- /results/README.md: -------------------------------------------------------------------------------- 1 | ## 存储模型结果 2 | 3 | ``` 4 | results 5 | ├── ssd 6 | │   ├── train_model 7 | │   └── pretrain 8 | └── yolo 9 | ├── train_model 10 | └── pretrain 11 | ``` -------------------------------------------------------------------------------- /Others/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /checks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/26 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /datum/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/26 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/6 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /Others/vedia/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/12 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /Others/voc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/15 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /checks/brain/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /datum/meta/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /datum/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/8 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /datum/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/brain/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/26 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /examples/ssd/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/6 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /examples/unet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/6 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /examples/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/6 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /Others/satellite/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/15 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /checks/brain/ssd/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/9 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /checks/brain/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/14 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /checks/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/11 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /checks/observe/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /datum/models/ssd/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/8 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /datum/models/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/8 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/brain/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/4 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/brain/ssd/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/1 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/brain/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/1 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/observe/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/26 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/brain/rotation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/12 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/brain/ssd/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/1 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/observe/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/26 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /Others/lsd12/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/1/3 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | -------------------------------------------------------------------------------- /eagle/brain/rotation/yolo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/12 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /eagle/observe/augmentors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/26 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function -------------------------------------------------------------------------------- /Others/README.md: -------------------------------------------------------------------------------- 1 | ## 数据集格式统一 2 | 主要包括各种数据集的数据统一组织 3 | 4 | ### 数据集列表 5 | - [x] VOC数据集 6 | - [x] VEDIA数据集 7 | - [ ] KITTI数据集 8 | - [ ] NWPU-VHR 9 | - [ ] DOTA(A Large-scale Dataset for Object Detection in Aerial Images) 10 | - [ ] RSOD-Dataset 11 | - [ ] INRIA aerial image dataset 12 | 13 | 14 | ### 数据集的介绍 15 | -------------------------------------------------------------------------------- /conf/yolo_train.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | image_size: 448 3 | image_width: 448 4 | image_height: 448 5 | batch_size: 32 6 | num_classes: 20 7 | max_objects_per_image: 20 8 | 9 | [DataSet] 10 | path: /Volumes/projects/DataSets/VOC/pascal_voc_2007.txt 11 | thread_num: 8 12 | 13 | [Net] 14 | weight_decay: 0.0005 15 | cell_size: 7 16 | boxes_per_cell: 2 17 | object_scale: 1 18 | noobject_scale: 0.5 19 | class_scale: 1 20 | coord_scale: 5 21 | 22 | [Solver] 23 | lr: 0.0005 24 | moment: 0.9 25 | max_iterators: 1000000 26 | pretrain_model_path: /Users/liuguiyang/github.com/DL.EyeSight/results/yolo/pretrain/yolo_tiny.ckpt 27 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/yolo/train_model/ -------------------------------------------------------------------------------- /conf/yolo_train_server.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | image_size: 448 3 | image_width: 448 4 | image_height: 448 5 | batch_size: 64 6 | num_classes: 20 7 | max_objects_per_image: 20 8 | 9 | [DataSet] 10 | path: /home/ai-i-liuguiyang/repos_ssd/VOC_DATA/pascal_voc_2007.txt 11 | thread_num: 10 12 | 13 | [Net] 14 | weight_decay: 0.0005 15 | cell_size: 7 16 | boxes_per_cell: 2 17 | object_scale: 1 18 | noobject_scale: 0.5 19 | class_scale: 1 20 | coord_scale: 5 21 | 22 | [Solver] 23 | lr: 0.0005 24 | moment: 0.9 25 | max_iterators: 1000000 26 | pretrain_model_path: /home/ai-i-liuguiyang/proj/DL.EyeSight/results/yolo/pretrain/yolo_tiny.ckpt 27 | train_dir: /home/ai-i-liuguiyang/proj/DL.EyeSight/results/yolo/train_model/ -------------------------------------------------------------------------------- /datum/meta/dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | 12 | class DataSet(object): 13 | def __init__(self, common_params, dataset_params): 14 | if not isinstance(common_params, dict): 15 | raise TypeError("common_params must be dict") 16 | if not isinstance(dataset_params, dict): 17 | raise TypeError("dataset_params must be dict") 18 | 19 | 20 | def batch(self): 21 | raise NotImplementedError 22 | -------------------------------------------------------------------------------- /eagle/brain/solver/solver.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/4 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | 12 | class Solver(object): 13 | def __init__(self, dataset, net, common_params, solver_params): 14 | if not isinstance(common_params, dict): 15 | raise TypeError("common_params must be dict") 16 | if not isinstance(solver_params, dict): 17 | raise TypeError("solver_params must be dict") 18 | 19 | def solve(self): 20 | raise NotImplementedError 21 | -------------------------------------------------------------------------------- /conf/yolo_unet_train_server.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | image_size: 512 3 | image_width: 512 4 | image_height: 512 5 | batch_size: 128 6 | num_classes: 1 7 | max_objects_per_image: 30 8 | 9 | [DataSet] 10 | path: /home/ai-i-liuguiyang/datasets/CSUVideo/512x512/train.txt 11 | thread_num: 8 12 | 13 | [Net] 14 | weight_decay: 0.0005 15 | # cell_size: 9, 15 16 | cell_size: 9 17 | boxes_per_cell: 2 18 | object_scale: 1 19 | noobject_scale: 0.5 20 | class_scale: 1 21 | coord_scale: 5 22 | 23 | [Solver] 24 | lr: 0.0005 25 | moment: 0.9 26 | max_iterators: 100000 27 | pretrain_model_path: /home/ai-i-liuguiyang/github.com/DL.EyeSight/results/unet/pretrain/model.ckpt 28 | train_dir: /home/ai-i-liuguiyang/github.com/DL.EyeSight/results/unet/train_model -------------------------------------------------------------------------------- /conf/yolo_unet_train.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | # 当is_predict == True时, batch_size==1, 只进行前馈操作 3 | # 当is_predict == False时, 进行网络训练 4 | is_predict: True 5 | image_size: 512 6 | image_width: 512 7 | image_height: 512 8 | batch_size: 32 9 | num_classes: 1 10 | max_objects_per_image: 30 11 | 12 | [DataSet] 13 | path: /Volumes/projects/DataSets/CSUVideo/512x512/train.txt 14 | thread_num: 8 15 | 16 | [Net] 17 | weight_decay: 0.0005 18 | # cell_size: 9, 15 19 | cell_size: 9 20 | boxes_per_cell: 2 21 | object_scale: 1 22 | noobject_scale: 0.5 23 | class_scale: 1 24 | coord_scale: 5 25 | 26 | [Solver] 27 | lr: 0.0005 28 | moment: 0.9 29 | max_iterators: 100000 30 | pretrain_model_path: /Users/liuguiyang/github.com/DL.EyeSight/results/unet/pretrain/model.ckpt 31 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/unet/train_model/ -------------------------------------------------------------------------------- /Others/lsd12/label_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/1/3 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | sign_idx_dict = { 12 | "airplane": 1, 13 | "ship": 2, 14 | "storagetank": 3, 15 | "baseballdiamond": 4, 16 | "tenniscourt": 5, 17 | "basketballcourt": 6, 18 | "groundtrackfield": 7, 19 | "harbor": 8, 20 | "bridge": 9, 21 | "vehicle": 10, 22 | "car": 10, 23 | "vehiclecar": 10, 24 | "campingcar": 11, 25 | "van": 11, 26 | "pickup": 12, 27 | "truck": 12, 28 | "tractor": 12 29 | } 30 | 31 | idx_sign_dict = { 32 | 1: "airplane", 33 | 2: "ship", 34 | 3: "storagetank", 35 | 4: "baseballdiamond", 36 | 5: "tenniscourt", 37 | 6: "basketballcourt", 38 | 7: "groundtrackfield", 39 | 8: "harbor", 40 | 9: "bridge", 41 | 10: "vehicle", 42 | 11: "van", 43 | 12: "truck" 44 | } 45 | -------------------------------------------------------------------------------- /checks/datasets/check_ssd_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/11 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from optparse import OptionParser 12 | 13 | from datum.utils.process_config import process_config 14 | from datum.models.ssd.ssd_dataset import SSDDataSet 15 | 16 | 17 | parser = OptionParser() 18 | parser.add_option("-c", "--conf", 19 | dest="configure", 20 | help="configure filename") 21 | (options, args) = parser.parse_args() 22 | if options.configure: 23 | conf_file = str(options.configure) 24 | else: 25 | print('please sspecify --conf configure filename') 26 | exit(0) 27 | 28 | common_params, dataset_params, net_params, solver_params, box_encoder_params = \ 29 | process_config(conf_file) 30 | 31 | 32 | data_generator = SSDDataSet(common_params, dataset_params, box_encoder_params) 33 | data_generator.batch() 34 | -------------------------------------------------------------------------------- /checks/brain/yolo/check_yolo_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/14 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import tensorflow as tf 12 | from optparse import OptionParser 13 | 14 | from datum.utils.process_config import process_config 15 | from eagle.brain.yolo.yolo_u_net import YOLOUNet 16 | 17 | 18 | parser = OptionParser() 19 | parser.add_option("-c", "--conf", 20 | dest="configure", 21 | help="configure filename") 22 | (options, args) = parser.parse_args() 23 | if options.configure: 24 | conf_file = str(options.configure) 25 | else: 26 | print('please sspecify --conf configure filename') 27 | exit(0) 28 | 29 | common_params, dataset_params, net_params, solver_params = \ 30 | process_config(conf_file) 31 | 32 | net = YOLOUNet(common_params, net_params) 33 | images = tf.placeholder(dtype=tf.float32, shape=(32, 512, 512, 3)) 34 | model_spec = net.inference(images) 35 | print(model_spec) 36 | -------------------------------------------------------------------------------- /checks/brain/ssd/check_ssd_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/9 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import tensorflow as tf 12 | from optparse import OptionParser 13 | 14 | from datum.utils.process_config import process_config 15 | from eagle.brain.ssd.models.vgg import SSDVGG 16 | 17 | 18 | parser = OptionParser() 19 | parser.add_option("-c", "--conf", 20 | dest="configure", 21 | help="configure filename") 22 | (options, args) = parser.parse_args() 23 | if options.configure: 24 | conf_file = str(options.configure) 25 | else: 26 | print('please sspecify --conf configure filename') 27 | exit(0) 28 | 29 | common_params, dataset_params, net_params, solver_params, box_encoder_params = \ 30 | process_config(conf_file) 31 | 32 | net = SSDVGG(common_params, net_params, box_encoder_params) 33 | images = tf.placeholder(dtype=tf.float32, shape=(32, 300, 300, 3)) 34 | model_spec = net.inference(images) 35 | print(model_spec) 36 | -------------------------------------------------------------------------------- /examples/yolo/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/6 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from optparse import OptionParser 12 | 13 | from datum.utils.process_config import process_config 14 | from datum.models.yolo.yolo_dataset import YoloDataSet 15 | from eagle.brain.solver.yolo_solver import YoloSolver 16 | from eagle.brain.yolo.yolo_tiny_net import YoloTinyNet 17 | 18 | parser = OptionParser() 19 | parser.add_option("-c", "--conf", dest="configure", 20 | help="configure filename") 21 | (options, args) = parser.parse_args() 22 | if options.configure: 23 | conf_file = str(options.configure) 24 | else: 25 | print('please sspecify --conf configure filename') 26 | exit(0) 27 | 28 | common_params, dataset_params, net_params, solver_params = process_config(conf_file) 29 | dataset = YoloDataSet(common_params, dataset_params) 30 | net = YoloTinyNet(common_params, net_params) 31 | solver = YoloSolver(dataset, net, common_params, solver_params) 32 | solver.solve() -------------------------------------------------------------------------------- /eagle/README.md: -------------------------------------------------------------------------------- 1 | ## 工程的主体模块 2 | ```shell 3 | eagle 4 | ├── brain 5 | │   ├── rotation 6 | │   │   └── yolo 7 | │   ├── solver 8 | │   │   ├── solver.py 9 | │   │   ├── ssd_solver.py 10 | │   │   └── yolo_solver.py 11 | │   ├── ssd 12 | │   │   ├── anchor_boxes.py 13 | │   │   ├── box_encode_decode_utils.py 14 | │   │   ├── loss.py 15 | │   │   ├── models 16 | │   │   │   ├── components.py 17 | │   │   │   ├── net.py 18 | │   │   │   ├── squeezenet_300.py 19 | │   │   │   ├── squeezenet_512.py 20 | │   │   │   └── vgg.py 21 | │   │   └── normalization.py 22 | │   └── yolo 23 | │   ├── net.py 24 | │   ├── yolo_net.py 25 | │   └── yolo_tiny_net.py 26 | ├── observe 27 | │   ├── augmentors 28 | │   │   ├── arithmetic.py 29 | │   │   ├── blur.py 30 | │   │   ├── color.py 31 | │   │   └── flip.py 32 | │   └── base 33 | ├── parameter.py 34 | └── utils.py 35 | 36 | eagle 37 | ├── README.md 38 | ├── brain 检测算法的核心模块 39 | │   ├── solver 对于模型的处理框架 40 | │   ├── ssd SSD检测模型相关文件 41 | │   │   └── models 42 | │   └── yolo Yolo检测模型相关文件 43 | ├── observe 前期数据预处理的模块 44 | │   ├── augmentors 图像处理方法部分代码 45 | │   └── base 基本的处理框架的父类信息 46 | ├── parameter.py 对于随机参数的控制部分代码 47 | ├── trainer 实际调用的代码的处理逻辑 48 | └── utils.py 对于工程中各个部分的通用代码 49 | 50 | ``` -------------------------------------------------------------------------------- /examples/unet/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/6 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from optparse import OptionParser 12 | 13 | from datum.utils.process_config import process_config 14 | # from datum.models.yolo.yolo_dataset import YoloDataSet 15 | from datum.models.yolo.yolo_batch_dataset import YoloDataSet 16 | from eagle.brain.solver.yolo_u_solver import YoloUSolver 17 | from eagle.brain.yolo.yolo_u_net import YoloUNet 18 | 19 | parser = OptionParser() 20 | parser.add_option("-c", "--conf", dest="configure", 21 | help="configure filename") 22 | (options, args) = parser.parse_args() 23 | if options.configure: 24 | conf_file = str(options.configure) 25 | else: 26 | print('please sspecify --conf configure filename') 27 | exit(0) 28 | 29 | common_params, dataset_params, net_params, solver_params = process_config(conf_file) 30 | print("After Proces Config File !") 31 | dataset = YoloDataSet(common_params, dataset_params) 32 | print("Prepared DataSet !") 33 | net = YoloUNet(common_params, net_params) 34 | print("Building the Deep Learning Model !") 35 | solver = YoloUSolver(dataset, net, common_params, solver_params) 36 | print("Now Start Learning Best Parameters !") 37 | solver.solve() 38 | -------------------------------------------------------------------------------- /eagle/brain/rotation/yolo/net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/1 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | 12 | class Net(object): 13 | def __init__(self, common_params, net_params): 14 | if not isinstance(common_params, dict): 15 | raise TypeError("common_params must be dict") 16 | if not isinstance(net_params, dict): 17 | raise TypeError("net_params must be dict") 18 | 19 | def inference(self, images): 20 | """Build the yolo model 21 | Args: 22 | images: 4-D tensor [batch_size, image_height, image_width, channels] 23 | Returns: 24 | predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell] 25 | """ 26 | raise NotImplementedError 27 | 28 | def loss(self, predicts, labels, objects_num): 29 | """Add Loss to all the trainable variables 30 | Args: 31 | predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell] 32 | ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell) 33 | labels : 3-D tensor of [batch_size, max_objects, 5] 34 | objects_num: 1-D tensor [batch_size] 35 | """ 36 | raise NotImplementedError 37 | -------------------------------------------------------------------------------- /eagle/brain/ssd/normalization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2017/12/18 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | import keras.backend as K 14 | from keras.engine.topology import Layer 15 | from keras.engine.topology import InputSpec 16 | 17 | 18 | class L2Normalization(Layer): 19 | def __init__(self, gamma_init=20, **kwargs): 20 | if K.image_dim_ordering() == 'tf': 21 | self.axis = 3 22 | else: 23 | self.axis = 1 24 | self.gamma_init = gamma_init 25 | super(L2Normalization, self).__init__(**kwargs) 26 | 27 | def build(self, input_shape): 28 | self.input_spec = [InputSpec(shape=input_shape)] 29 | gamma = self.gamma_init * np.ones((input_shape[self.axis],)) 30 | self.gamma = K.variable(gamma, name="{}_gamma".format(self.name)) 31 | self.trainable_weights = [self.gamma] 32 | super(L2Normalization, self).build(input_shape) 33 | 34 | def call(self, x, mask=None): 35 | output = K.l2_normalize(x, self.axis) 36 | output *= self.gamma 37 | return output 38 | 39 | def get_config(self): 40 | config = { 41 | 'gamma_init': self.gamma_init 42 | } 43 | base_config = super(L2Normalization, self).get_config() 44 | return dict(list(base_config.items()) + list(config.items())) 45 | -------------------------------------------------------------------------------- /conf/ssd_train.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | model_name: VGG 3 | image_size: 300 4 | image_width: 300 5 | image_height: 300 6 | image_channel: 3 7 | num_classes: 1 8 | batch_size: 10 9 | is_predict: False 10 | 11 | [DataSet] 12 | # 数据集中数据的信息存储 [image_path, xmin, ymin, xmax, ymax, class_id] 13 | path: /Volumes/projects/DataSets/CSUVideo/300x300/train_samples.txt 14 | # 是否需要添加背景这个类别,默认背景的类别为0,程序自动添加,其它label自动加一 15 | is_need_bg: True 16 | # 数据集中的类别信息必须和path文件中的一致 17 | classes: ["airplane"] 18 | # path文件中数据的格式规定 19 | box_output_format: ["xmin", "xmax", "ymin", "ymax", "class_id"] 20 | # 数据预处理组织中的进程数目 21 | thread_num: 8 22 | # 当原始图像在进行resize时出现比例不一致问题 23 | # 在给你的范围内可以直接resize,其他的范围需要进行裁剪然后在resize 24 | upper_resize_rate: 0.2 25 | lower_resize_rate: 0.2 26 | 27 | [BoxEncoder] 28 | # the spatial dimensions of the model's predictor layers to create the anchor boxes. 29 | predictor_sizes: [[37, 37], [18, 18], [9, 9], [5, 5], [3, 3], [1, 1]] 30 | scales: [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] 31 | aspect_ratios_per_layer: [[0.5, 1.0, 2.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]] 32 | two_boxes_for_ar1: True 33 | variances: [0.1, 0.1, 0.2, 0.2] 34 | coords: centroids 35 | normalize_coords: True 36 | pos_iou_threshold: 0.5 37 | neg_iou_threshold: 0.2 38 | 39 | [Net] 40 | neg_pos_ratio=3 41 | n_neg_min=0 42 | loss_alpha=1.0 43 | 44 | [Solver] 45 | lr: 0.0001 46 | beta_1=0.9 47 | beta_2=0.999 48 | epsilon=1e-08 49 | decay=5e-04 50 | max_iterators: 10000 51 | #pretrain_model_path: /Volumes/projects/github.com/Object.Tracking.Video/trainer/weights/ssd300_weights_epoch-00_loss-2.3397_val_loss-3.6407.h5 52 | pretrain_model_path: None 53 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/ssd/train_model/ -------------------------------------------------------------------------------- /checks/observe/check_median_blur.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import cv2 12 | import numpy as np 13 | from skimage import data 14 | 15 | import eagle.utils as eu 16 | from eagle.observe.augmentors.blur import MedianBlur 17 | 18 | TIME_PER_STEP = 5000 19 | NB_AUGS_PER_IMAGE = 10 20 | 21 | def main(): 22 | image = data.astronaut() 23 | image = eu.imresize_single_image(image, (64, 64)) 24 | print("image shape:", image.shape) 25 | print("Press any key or wait %d ms to proceed to the next image." % (TIME_PER_STEP,)) 26 | 27 | k = [ 28 | 1, 29 | 3, 30 | 5, 31 | 7, 32 | (3, 3), 33 | (1, 11) 34 | ] 35 | 36 | cv2.namedWindow("aug", cv2.WINDOW_NORMAL) 37 | cv2.resizeWindow("aug", 64*NB_AUGS_PER_IMAGE, 64) 38 | #cv2.imshow("aug", image[..., ::-1]) 39 | #cv2.waitKey(TIME_PER_STEP) 40 | 41 | for ki in k: 42 | aug = MedianBlur(k=ki) 43 | img_aug = [aug.augment_image(image) for _ in range(NB_AUGS_PER_IMAGE)] 44 | img_aug = np.hstack(img_aug) 45 | print("dtype", img_aug.dtype, "averages", np.average(img_aug, axis=tuple(range(0, img_aug.ndim-1)))) 46 | #print("dtype", img_aug.dtype, "averages", img_aug.mean(axis=range(1, img_aug.ndim))) 47 | 48 | # title = "k=%s" % (str(ki),) 49 | # img_aug = ia.draw_text(img_aug, x=5, y=5, text=title) 50 | 51 | cv2.imshow("aug", img_aug[..., ::-1]) # here with rgb2bgr 52 | cv2.waitKey(TIME_PER_STEP) 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /checks/observe/check_gaussian_blur.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import cv2 12 | import numpy as np 13 | from skimage import data 14 | 15 | import eagle.utils as eu 16 | from eagle.observe.augmentors.blur import MedianBlur 17 | 18 | TIME_PER_STEP = 5000 19 | NB_AUGS_PER_IMAGE = 10 20 | 21 | 22 | def main(): 23 | image = data.astronaut() 24 | image = eu.imresize_single_image(image, (128, 128)) 25 | print("image shape:", image.shape) 26 | print("Press any key or wait %d ms to proceed to the next image." % (TIME_PER_STEP,)) 27 | 28 | k = [ 29 | 1, 30 | 3, 31 | 5, 32 | 7, 33 | (3, 3), 34 | (1, 11) 35 | ] 36 | 37 | cv2.namedWindow("aug", cv2.WINDOW_NORMAL) 38 | cv2.resizeWindow("aug", 128*NB_AUGS_PER_IMAGE, 128) 39 | #cv2.imshow("aug", image[..., ::-1]) 40 | #cv2.waitKey(TIME_PER_STEP) 41 | 42 | for ki in k: 43 | aug = MedianBlur(k=ki) 44 | img_aug = [aug.augment_image(image) for _ in range(NB_AUGS_PER_IMAGE)] 45 | img_aug = np.hstack(img_aug) 46 | print("dtype", img_aug.dtype, "averages", np.average(img_aug, axis=tuple(range(0, img_aug.ndim-1)))) 47 | #print("dtype", img_aug.dtype, "averages", img_aug.mean(axis=range(1, img_aug.ndim))) 48 | 49 | # title = "k=%s" % (str(ki),) 50 | # img_aug = ia.draw_text(img_aug, x=5, y=5, text=title) 51 | 52 | cv2.imshow("aug", img_aug[..., ::-1]) # here with rgb2bgr 53 | cv2.waitKey(TIME_PER_STEP) 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /conf/ssd_train_server.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | image_size: 300 3 | image_width: 300 4 | image_height: 300 5 | image_channel: 3 6 | num_classes: 20 7 | batch_size: 64 8 | 9 | [DataSet] 10 | # 数据集中数据的信息存储 [image_path, xmin, ymin, xmax, ymax, class_id] 11 | path: /home/ai-i-liuguiyang/datasets/VOC/total.txt 12 | # 是否需要添加背景这个类别,默认背景的类别为0,程序自动添加,其它label自动加一 13 | is_need_bg: True 14 | # 数据集中的类别信息必须和path文件中的一致 15 | classes: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 16 | # path文件中数据的格式规定 17 | box_output_format: ["xmin", "xmax", "ymin", "ymax", "class_id"] 18 | # 数据预处理组织中的进程数目 19 | thread_num: 10 20 | # 当原始图像在进行resize时出现比例不一致问题 21 | # 在给你的范围内可以直接resize,其他的范围需要进行裁剪然后在resize 22 | upper_resize_rate: 0.2 23 | lower_resize_rate: 0.2 24 | 25 | [BoxEncoder] 26 | # the spatial dimensions of the model's predictor layers to create the anchor boxes. 27 | predictor_sizes: [[37, 37], [18, 18], [9, 9], [5, 5], [3, 3], [1, 1]] 28 | scales: [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] 29 | aspect_ratios_per_layer: [[0.5, 1.0, 2.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]] 30 | two_boxes_for_ar1: True 31 | variances: [0.1, 0.1, 0.2, 0.2] 32 | coords: centroids 33 | normalize_coords: True 34 | pos_iou_threshold: 0.5 35 | neg_iou_threshold: 0.2 36 | 37 | [Net] 38 | neg_pos_ratio=3 39 | n_neg_min=0 40 | loss_alpha=1.0 41 | 42 | [Solver] 43 | lr: 0.0001 44 | beta_1=0.9 45 | beta_2=0.999 46 | epsilon=1e-08 47 | decay=5e-04 48 | max_iterators: 100000 49 | pretrain_model_path: /home/ai-i-liuguiyang/github.com/DL.EyeSight/results/ssd/pretrain/model.ckpt-64000 50 | train_dir: /home/ai-i-liuguiyang/github.com/DL.EyeSight/results/ssd/train_model/ -------------------------------------------------------------------------------- /examples/ssd/vgg_trainer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2017/12/18 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from optparse import OptionParser 12 | 13 | import os, sys 14 | abs_path = os.path.abspath(__file__) 15 | proj_root = "/".join(abs_path.split("/")[:-3]) 16 | sys.path.insert(0, proj_root) 17 | 18 | from importlib import reload 19 | reload(sys) 20 | 21 | from datum.utils.process_config import process_config 22 | from datum.models.ssd.ssd_dataset import SSDDataSet 23 | from eagle.brain.ssd.models.vgg import SSDVGG 24 | from eagle.brain.ssd.models.vgg_dilated import SSDVGGDilated 25 | from eagle.brain.solver.ssd_solver import SSDSolver 26 | 27 | parser = OptionParser() 28 | parser.add_option("-c", "--conf", 29 | dest="configure", 30 | help="configure filename") 31 | (options, args) = parser.parse_args() 32 | if options.configure: 33 | conf_file = str(options.configure) 34 | else: 35 | print('please specify --conf configure filename') 36 | exit(0) 37 | 38 | common_params, dataset_params, net_params, solver_params, box_encoder_params = \ 39 | process_config(conf_file) 40 | 41 | data_generator = SSDDataSet(common_params, dataset_params, box_encoder_params) 42 | model_name = common_params.get("model_name", "VGG") 43 | if model_name == "VGG": 44 | net = SSDVGG(common_params, net_params, box_encoder_params) 45 | elif model_name == "VGG-Dilated": 46 | net = SSDVGGDilated(common_params, net_params, box_encoder_params) 47 | else: 48 | raise ValueError("model_name is not fitted !", model_name) 49 | solver = SSDSolver(data_generator, net, common_params, solver_params) 50 | solver.solve() 51 | -------------------------------------------------------------------------------- /checks/observe/check_average_blur.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import cv2 12 | import numpy as np 13 | from skimage import data 14 | 15 | import eagle.utils as eu 16 | from eagle.observe.augmentors.blur import AverageBlur 17 | 18 | 19 | TIME_PER_STEP = 5000 20 | NB_AUGS_PER_IMAGE = 10 21 | 22 | 23 | def main(): 24 | image = data.astronaut() 25 | image = eu.imresize_single_image(image, (64, 64)) 26 | print("image shape:", image.shape) 27 | print("Press any key or wait %d ms to proceed to the next image." % (TIME_PER_STEP,)) 28 | 29 | k = [ 30 | 1, 31 | 2, 32 | 4, 33 | 8, 34 | 16, 35 | (8, 8), 36 | (1, 8), 37 | ((1, 1), (8, 8)), 38 | ((1, 16), (1, 16)), 39 | ((1, 16), 1) 40 | ] 41 | 42 | cv2.namedWindow("aug", cv2.WINDOW_NORMAL) 43 | cv2.resizeWindow("aug", 64*NB_AUGS_PER_IMAGE, 64) 44 | #cv2.imshow("aug", image[..., ::-1]) 45 | #cv2.waitKey(TIME_PER_STEP) 46 | 47 | for ki in k: 48 | aug = AverageBlur(k=ki) 49 | img_aug = [aug.augment_image(image) for _ in range(NB_AUGS_PER_IMAGE)] 50 | img_aug = np.hstack(img_aug) 51 | print("dtype", img_aug.dtype, "averages", np.average(img_aug, axis=tuple(range(0, img_aug.ndim-1)))) 52 | #print("dtype", img_aug.dtype, "averages", img_aug.mean(axis=range(1, img_aug.ndim))) 53 | 54 | # title = "k=%s" % (str(ki),) 55 | # img_aug = ia.draw_text(img_aug, x=5, y=5, text=title) 56 | 57 | cv2.imshow("aug", img_aug[..., ::-1]) # here with rgb2bgr 58 | cv2.waitKey(TIME_PER_STEP) 59 | 60 | if __name__ == "__main__": 61 | main() 62 | -------------------------------------------------------------------------------- /conf/dilated_ssd_train.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | model_name: VGG-Dilated 3 | image_size: 300 4 | image_width: 300 5 | image_height: 300 6 | image_channel: 3 7 | num_classes: 20 8 | batch_size: 10 9 | is_predict: False 10 | 11 | [DataSet] 12 | # 数据集中数据的信息存储 [image_path, xmin, ymin, xmax, ymax, class_id] 13 | path: /Volumes/projects/DataSets/VOC/pascal_voc_2007.txt 14 | # 是否需要添加背景这个类别,默认背景的类别为0,程序自动添加,其它label自动加一 15 | is_need_bg: True 16 | # 数据集中的类别信息必须和path文件中的一致 17 | classes: ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 18 | # path文件中数据的格式规定 19 | box_output_format: ["xmin", "xmax", "ymin", "ymax", "class_id"] 20 | # 数据预处理组织中的进程数目 21 | thread_num: 8 22 | # 当原始图像在进行resize时出现比例不一致问题 23 | # 在给你的范围内可以直接resize,其他的范围需要进行裁剪然后在resize 24 | upper_resize_rate: 0.2 25 | lower_resize_rate: 0.2 26 | 27 | [BoxEncoder] 28 | # the spatial dimensions of the model's predictor layers to create the anchor boxes. 29 | predictor_sizes: [[37, 37], [18, 18], [9, 9], [5, 5], [3, 3], [1, 1]] 30 | scales: [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] 31 | aspect_ratios_per_layer: [[0.5, 1.0, 2.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]] 32 | two_boxes_for_ar1: True 33 | variances: [0.1, 0.1, 0.2, 0.2] 34 | coords: centroids 35 | normalize_coords: True 36 | pos_iou_threshold: 0.5 37 | neg_iou_threshold: 0.2 38 | 39 | [Net] 40 | neg_pos_ratio=3 41 | n_neg_min=0 42 | loss_alpha=1.0 43 | 44 | [Solver] 45 | lr: 0.0001 46 | beta_1=0.9 47 | beta_2=0.999 48 | epsilon=1e-08 49 | decay=5e-04 50 | max_iterators: 1000000 51 | #pretrain_model_path: /Volumes/projects/github.com/Object.Tracking.Video/trainer/weights/ssd300_weights_epoch-00_loss-2.3397_val_loss-3.6407.h5 52 | pretrain_model_path: None 53 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/yolo/train_model/ -------------------------------------------------------------------------------- /conf/ssd_train_512.cfg: -------------------------------------------------------------------------------- 1 | [Common] 2 | model_name: VGG-Dilated 3 | image_size: 512 4 | image_width: 512 5 | image_height: 512 6 | image_channel: 3 7 | num_classes: 12 8 | batch_size: 1 9 | is_predict: False 10 | 11 | [DataSet] 12 | # 数据集中数据的信息存储 [image_path, xmin, ymin, xmax, ymax, class_id] 13 | # 数据集中的target的标记从0开始,顺序和classes中的下标一致 14 | path: /Volumes/projects/DataSets/LSD12/train_data_list.txt 15 | # 是否需要添加背景这个类别,默认背景的类别为0,程序自动添加,其它label自动加一 16 | is_need_bg: True 17 | # 数据集中的类别信息必须和path文件中的一致 18 | classes: ["airplane", "ship", "storagetank", "baseballdiamond", "tenniscourt", "basketballcourt", "groundtrackfield", "harbor", "bridge", "vehicle", "van", "truck"] 19 | # path文件中数据的格式规定 20 | box_output_format: ["xmin", "xmax", "ymin", "ymax", "class_id"] 21 | # 数据预处理组织中的进程数目 22 | thread_num: 8 23 | # 当原始图像在进行resize时出现比例不一致问题 24 | # 在给你的范围内可以直接resize,其他的范围需要进行裁剪然后在resize 25 | upper_resize_rate: 0.2 26 | lower_resize_rate: 0.2 27 | 28 | [BoxEncoder] 29 | # the spatial dimensions of the model's predictor layers to create the anchor boxes. 30 | predictor_sizes: [[64, 64], [32, 32], [16, 16], [7, 7], [3, 3], [1, 1]] 31 | scales: [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] 32 | aspect_ratios_per_layer: [[0.5, 1.0, 2.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.333333, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]] 33 | two_boxes_for_ar1: True 34 | variances: [0.1, 0.1, 0.2, 0.2] 35 | coords: centroids 36 | normalize_coords: True 37 | pos_iou_threshold: 0.5 38 | neg_iou_threshold: 0.2 39 | 40 | [Net] 41 | neg_pos_ratio=3 42 | n_neg_min=0 43 | loss_alpha=1.0 44 | 45 | [Solver] 46 | lr: 0.0001 47 | beta_1=0.9 48 | beta_2=0.999 49 | epsilon=1e-08 50 | decay=5e-04 51 | max_iterators: 1000000 52 | #pretrain_model_path: /Volumes/projects/github.com/Object.Tracking.Video/trainer/weights/ssd300_weights_epoch-00_loss-2.3397_val_loss-3.6407.h5 53 | pretrain_model_path: None 54 | train_dir: /Users/liuguiyang/github.com/DL.EyeSight/results/dilated/train_model/ -------------------------------------------------------------------------------- /eagle/brain/ssd/models/components.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/7 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from keras.layers import Activation, Conv2D, Concatenate 12 | from keras.layers import BatchNormalization 13 | 14 | def _fire(x, filters, name="fire"): 15 | sq_filters, ex1_filters, ex2_filters = filters 16 | squeeze = Conv2D(sq_filters, (1, 1), activation="relu", padding="same", kernel_initializer="he_normal", name=name+"/squeeze1x1")(x) 17 | expand1 = Conv2D(ex1_filters, (1, 1), activation="relu", padding="same", kernel_initializer="he_normal", name=name+"/expand1x1")(squeeze) 18 | expand2 = Conv2D(ex2_filters, (3, 3), activation="relu", padding="same", kernel_initializer="he_normal", name=name+"/expand3x3")(squeeze) 19 | x = Concatenate(axis=-1, name=name+"/concate")([expand1, expand2]) 20 | return x 21 | 22 | def _fire_with_bn(x, filters, name="fire"): 23 | sq_filters, ex1_filters, ex2_filters = filters 24 | squeeze = Conv2D(sq_filters, (1, 1), activation="relu", padding="same", kernel_initializer="he_normal", name=name+"/squeeze1x1")(x) 25 | expand1 = Activation(activation="relu", name=name+"/relu_expand1x1")(BatchNormalization(name=name+"/expand1x1/bn")(Conv2D(ex1_filters, (1, 1), strides=(1, 1), padding="same", kernel_initializer="he_normal", name=name+"/expand1x1")(squeeze))) 26 | expand2 = Activation(activation="relu", name=name+"/relu_expand3x3")(BatchNormalization(name=name+"/expand3x3/bn")(Conv2D(ex2_filters, (3, 3), strides=(1, 1), padding="same", kernel_initializer="he_normal", name=name+"/expand3x3")(squeeze))) 27 | x = Concatenate(axis=-1, name=name+"/concate")([expand1, expand2]) 28 | return x 29 | 30 | def _conv2D_with_bn(x, n_filters, k_size, k_stride, name, pad="same"): 31 | x = Conv2D(n_filters, k_size, strides=(k_stride, k_stride), padding=pad, kernel_initializer="he_normal", name=name+"/conv")(x) 32 | x = BatchNormalization(name=name+"/bn")(x) 33 | x = Activation(activation="relu", name=name+"/relu")(x) 34 | return x 35 | -------------------------------------------------------------------------------- /examples/unet/predict.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/6 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import cv2 12 | import numpy as np 13 | from optparse import OptionParser 14 | 15 | from datum.utils.process_config import process_config 16 | # from datum.models.yolo.yolo_dataset import YoloDataSet 17 | from datum.models.yolo.yolo_batch_dataset import YoloDataSet 18 | from eagle.brain.solver.yolo_u_solver import YoloUSolver 19 | from eagle.brain.yolo.yolo_u_net import YoloUNet 20 | 21 | parser = OptionParser() 22 | parser.add_option("-c", "--conf", dest="configure", 23 | help="configure filename") 24 | (options, args) = parser.parse_args() 25 | if options.configure: 26 | conf_file = str(options.configure) 27 | else: 28 | print('please specify --conf configure filename') 29 | exit(0) 30 | 31 | common_params, dataset_params, net_params, solver_params = process_config(conf_file) 32 | print("After Proces Config File !") 33 | # dataset = YoloDataSet(common_params, dataset_params) 34 | # print("Prepared DataSet !") 35 | net = YoloUNet(common_params, net_params) 36 | print("Building the Deep Learning Model !") 37 | solver = YoloUSolver(None, net, common_params, solver_params) 38 | print("Now Start Learning Best Parameters !") 39 | image_path = "/Volumes/projects/DataSets/CSUVideo/512x512/large_tunisia_total/JPEGImages/000011_1428_408_1940_920_35.jpg" 40 | 41 | img_width, img_height = 512, 512 42 | single_image = cv2.imread(image_path) 43 | resized_img = cv2.resize(single_image, (img_height, img_width)) 44 | np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB) 45 | np_img = np_img.astype(np.float32) 46 | np_img = np_img / 255.0 * 2 - 1 47 | np_img = np.reshape(np_img, (1, img_height, img_width, 3)) 48 | 49 | (xmin, ymin, xmax, ymax, class_num) = solver.model_predict(np_img) 50 | 51 | cv2.rectangle(resized_img, (int(xmin), int(ymin)), 52 | (int(xmax), int(ymax)), (0, 0, 255)) 53 | # cv2.imwrite('cat_out.jpg', resized_img) 54 | cv2.imshow('cat_out.jpg', resized_img) 55 | cv2.waitKey() 56 | -------------------------------------------------------------------------------- /Others/lsd12/check_dataset.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | from datum.utils.tools import extract_target_from_xml 4 | from Others.lsd12.label_config import sign_idx_dict, idx_sign_dict 5 | 6 | 7 | dataset_dir = "/Volumes/projects/DataSets/LSD12/" 8 | 9 | 10 | def get_true_id_label(label_name): 11 | """ 12 | :return: label_id, label_name 13 | """ 14 | return sign_idx_dict[label_name], idx_sign_dict[sign_idx_dict[label_name]] 15 | 16 | 17 | def disp_image(): 18 | with open(dataset_dir + "total.txt", "r") as reader: 19 | for line in reader.readlines(): 20 | line = line.strip() 21 | image_name = line + ".jpg" 22 | anno_name = line + ".xml" 23 | image = cv2.imread(dataset_dir + "JPEGImages/" + image_name) 24 | anno_list = extract_target_from_xml(dataset_dir + "Annotations/" + anno_name) 25 | for item in anno_list: 26 | label_id, label_name = get_true_id_label(item[-1]) 27 | item[-1] = label_name 28 | xmin, ymin, xmax, ymax = item[:4] 29 | cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) 30 | cv2.imshow("src", image) 31 | cv2.waitKey() 32 | 33 | 34 | def convert_standard(): 35 | output_path = dataset_dir + "train_data_list.txt" 36 | write_handler = open(output_path, "w") 37 | 38 | with open(dataset_dir + "train.txt", "r") as reader: 39 | for line in reader.readlines(): 40 | line = line.strip() 41 | image_path = dataset_dir + "JPEGImages/" + line + ".jpg" 42 | anno_path = dataset_dir + "Annotations/" + line + ".xml" 43 | anno_list = extract_target_from_xml(anno_path) 44 | anno_str_list = [] 45 | for item in anno_list: 46 | label_id, label_name = get_true_id_label(item[-1]) 47 | item[-1] = label_id - 1 48 | item = [str(cell) for cell in item] 49 | anno_str_list.append(" ".join(item)) 50 | anno_info = " ".join(anno_str_list) 51 | write_handler.write(image_path + " " + anno_info + "\n") 52 | write_handler.close() 53 | print("save the convert_data info to ", output_path) 54 | 55 | 56 | if __name__ == '__main__': 57 | convert_standard() 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 图像视频中的目标检测 2 | 本工程主要目的是集成深度学习中常用的目标检测模型,并利用目标检测模型进行图像和视频中的检测! 3 | 4 | ## 开发环境 5 | ```shell 6 | OS: Ubuntu 16.04 7 | Python: Python 3.6.0 8 | Tensorflow: 1.4.1 version 9 | Opencv: 3.2.0 version for python 10 | ``` 11 | 12 | ## 实现模型介绍 13 | * SSD+VGG 14 | * SSD+Res 15 | * SSD+Inception 16 | * SSD+SqueezeNet 17 | * SSD+Deconvolution 18 | * YOLO 19 | 20 | ## 运行说明 21 | 1. 先准备好数据集 22 | ```shell 23 | cat /Volumes/projects/DataSets/VOC2007/voc_train.txt 24 | image_path01 xmin ymin xmax ymax class_id xmin ymin xmax ymax class_id 25 | image_path02 xmin ymin xmax ymax class_id xmin ymin xmax ymax class_id 26 | image_path03 xmin ymin xmax ymax class_id xmin ymin xmax ymax class_id 27 | 28 | PS:Class_id从0开始编号,顺序同cfg文件中的label顺序一致 29 | ``` 30 | 2. 修改配置文件 31 | 配置文件存放在根目录下:**conf/ssd_train.cfg** 32 | 其中还有若干配置项,进行修改 33 | 34 | 3. 运行程序 35 | 进入到example/ssd目录中 36 | ``` 37 | python vgg_trainer.py -c ../../conf/ssd_train.cfg 38 | ``` 39 | 40 | ## TODOLISTS 41 | - [x] 整理文件目录结构,按照设计模式进行 42 | - [x] 增加数据预处理的PipeLine 43 | - [x] 图像插值 44 | - [x] 图像镜像操作(左右,上下) 45 | - [x] 添加随机噪声(各种模糊操作) 46 | - [x] 对比度拉伸 47 | - [x] 饱和度变化 48 | - [x] 图像锐化 49 | - [x] 提高模型训练速度 50 | - [ ] RawData ---> TFRecords 51 | - [x] Single Process ---> Multi Processes 52 | - [ ] 检测过程的可视化 53 | - [x] 编写检测网络结构模型文件 54 | - [x] 对数据集的处理结构的统一接口 55 | - [x] 编写对模块的测试文件 56 | 57 | ## 实验结果 58 | - YOLOv1模型在Pascal VOC数据集上的表现 59 | 60 | 61 | 66 | 71 | 76 | 77 |
62 |
63 | 64 |
65 |
67 |
68 | 69 |
70 |
72 |
73 | 74 |
75 |
78 | 79 | 80 | ## 联系我 81 | * New Issues 82 | * Send me E-mail: liuguiyangnwpu@163.com 83 | -------------------------------------------------------------------------------- /Others/satellite/process.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/15 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import codecs 13 | import xml.etree.ElementTree as ET 14 | 15 | 16 | data_dir = "/Volumes/projects/DataSets/CSUVideo/512x512" 17 | image_sets = ["large_000013363_total", "large_000014631_total", 18 | "large_minneapolis_1_total", "large_tunisia_total"] 19 | 20 | def parse_xml(xml_file): 21 | """ 22 | Args: 23 | xml_file: the input xml file path 24 | 25 | Returns: 26 | image_path: string 27 | labels: list of [xmin, ymin, xmax, ymax, class] 28 | """ 29 | tree = ET.parse(xml_file) 30 | root = tree.getroot() 31 | labels = [] 32 | 33 | for item in root: 34 | if item.tag == 'object': 35 | obj_num = 1 36 | bndbox = item.find("bndbox") 37 | xmin = int(float(bndbox.find("xmin").text)) 38 | ymin = int(float(bndbox.find("ymin").text)) 39 | xmax = int(float(bndbox.find("xmax").text)) 40 | ymax = int(float(bndbox.find("ymax").text)) 41 | labels.append([xmin, ymin, xmax, ymax, obj_num]) 42 | 43 | return labels 44 | 45 | 46 | def convert_list2str(labels): 47 | return ",".join([",".join(list(map(str, item))) for item in labels]) 48 | 49 | 50 | for dataset in image_sets: 51 | anno_prefix = "/".join([data_dir, dataset, "Annotations"]) 52 | image_prefix = "/".join([data_dir, dataset, "JPEGImages"]) 53 | with codecs.open(data_dir + "/" + dataset + ".txt", "w", "utf8") as writer: 54 | for anno_name in os.listdir(anno_prefix): 55 | if anno_name.startswith("."): 56 | continue 57 | anno_path = "/".join([anno_prefix, anno_name]) 58 | image_name = anno_name.replace("xml", "jpg") 59 | image_path = "/".join([image_prefix, image_name]) 60 | if not os.path.isfile(image_path): 61 | print("{} not found !".format(image_path)) 62 | labels = parse_xml(anno_path) 63 | anno_info = convert_list2str(labels) 64 | writer.write("{},{}\n".format(image_path, anno_info)) 65 | -------------------------------------------------------------------------------- /Others/vedia/show.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/12 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | # 主要是可视化带有旋转角度的图像中的标注 12 | 13 | import os 14 | import codecs 15 | 16 | import cv2 17 | import numpy as np 18 | 19 | data_prefix = "/Volumes/projects/DataSets/VEDIA/" 20 | images_dir = ["512/Vehicules512/", "1024/Vehicules1024/"] 21 | annotations_filepath = ["512/Annotations512/annotation512.txt", 22 | "1024/Annotations1024/annotation1024.txt"] 23 | 24 | 25 | def show_image(): 26 | for img_dir, anno_file in zip(images_dir, annotations_filepath): 27 | abs_img_dir = data_prefix + img_dir 28 | abs_anno_path = data_prefix + anno_file 29 | if not os.path.isfile(abs_anno_path): 30 | raise ValueError("{} file not found !".format(abs_anno_path)) 31 | images_dict = {} 32 | with codecs.open(abs_anno_path, "r", "utf8") as reader: 33 | for line in reader: 34 | line = line.strip().split(' ') 35 | name_prefix = line[0] + "_co.png" 36 | image_path = abs_img_dir + name_prefix 37 | if image_path in images_dict.keys(): 38 | images_dict[image_path].append(line) 39 | else: 40 | images_dict[image_path] = [line] 41 | for img_path in images_dict.keys(): 42 | if not os.path.isfile(img_path): 43 | raise IOError("{} image path not found !".format(img_path)) 44 | image = cv2.imread(img_path) 45 | 46 | for line in images_dict[img_path]: 47 | center_x, center_y = float(line[1]), float(line[2]) 48 | rotate_theta = float(line[3]) 49 | points = np.array(list(map(float, line[4:12])), 50 | np.int32).reshape((2, -1)).T 51 | fully_contain = int(line[-2]) 52 | occluded = int(line[-1]) 53 | 54 | points = points.reshape((-1, 1, 2)) 55 | cv2.polylines(image, [points], True, color=(255, 0, 0)) 56 | 57 | cv2.imshow("src", image) 58 | cv2.waitKey() 59 | 60 | 61 | if __name__ == '__main__': 62 | show_image() -------------------------------------------------------------------------------- /checks/observe/check_color.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import cv2 12 | import numpy as np 13 | from skimage import data 14 | 15 | import eagle.utils as eu 16 | from eagle.observe.augmentors.flip import Fliplr 17 | from eagle.observe.augmentors.arithmetic import Add 18 | from eagle.observe.augmentors.color import WithChannels, WithColorspace 19 | 20 | TIME_PER_STEP = 10000 21 | 22 | 23 | def main_WithChannels(): 24 | image = data.astronaut() 25 | print("image shape:", image.shape) 26 | print("Press any key or wait %d ms to proceed to the next image." % (TIME_PER_STEP,)) 27 | 28 | children_all = [ 29 | ("hflip", Fliplr(1)), 30 | ("add", Add(50)) 31 | ] 32 | 33 | channels_all = [ 34 | None, 35 | 0, 36 | [], 37 | [0], 38 | [0, 1], 39 | [1, 2], 40 | [0, 1, 2] 41 | ] 42 | 43 | cv2.namedWindow("aug", cv2.WINDOW_NORMAL) 44 | cv2.imshow("aug", image[..., ::-1]) 45 | cv2.waitKey(TIME_PER_STEP) 46 | 47 | for children_title, children in children_all: 48 | for channels in channels_all: 49 | aug = WithChannels(channels=channels, children=children) 50 | img_aug = aug.augment_image(image) 51 | print("dtype", img_aug.dtype, "averages", np.average(img_aug, axis=tuple(range(0, img_aug.ndim-1)))) 52 | #print("dtype", img_aug.dtype, "averages", img_aug.mean(axis=range(1, img_aug.ndim))) 53 | 54 | # title = "children=%s | channels=%s" % (children_title, channels) 55 | # img_aug = ia.draw_text(img_aug, x=5, y=5, text=title) 56 | 57 | cv2.imshow("aug", img_aug[..., ::-1]) # here with rgb2bgr 58 | cv2.waitKey(TIME_PER_STEP) 59 | 60 | 61 | def main_WithColorspace(): 62 | image = data.astronaut() 63 | print("image shape:", image.shape) 64 | 65 | aug = WithColorspace( 66 | from_colorspace="RGB", 67 | to_colorspace="HSV", 68 | children=WithChannels(0, Add(50)) 69 | ) 70 | 71 | aug_no_colorspace = WithChannels(0, Add(50)) 72 | 73 | img_show = np.hstack([ 74 | image, 75 | aug.augment_image(image), 76 | aug_no_colorspace.augment_image(image) 77 | ]) 78 | 79 | cv2.namedWindow("aug", cv2.WINDOW_NORMAL) 80 | cv2.imshow("aug", img_show[..., ::-1]) 81 | cv2.waitKey(TIME_PER_STEP) 82 | 83 | if __name__ == "__main__": 84 | # main_WithChannels() 85 | main_WithColorspace() 86 | -------------------------------------------------------------------------------- /Others/satellite/bbox_cluster.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/15 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | # 使用聚类方法进行目标框的聚类操作 12 | import os 13 | import numpy as np 14 | 15 | import matplotlib.pyplot as plt 16 | from matplotlib.font_manager import FontProperties 17 | 18 | from Others.satellite.process import parse_xml 19 | 20 | # print(plt.rcParams.keys()) 21 | # font = FontProperties(fname='/Library/Fonts/ufonts.com_fangsong.ttf') 22 | font = FontProperties(fname='/Users/liuguiyang/Library/Fonts/仿宋_GB2312.ttf') 23 | 24 | data_dir = "/Volumes/projects/DataSets/CSUVideo/source" 25 | # namesets = ["large_000013363_total", "large_000014631_total", 26 | # "large_minneapolis_1_total", "large_tunisia_total"] 27 | namesets = ["large_000013363_total"] 28 | 29 | 30 | datum = [] 31 | for name in namesets: 32 | anno_prefix = "/".join([data_dir, name, "Annotations"]) 33 | for anno_name in os.listdir(anno_prefix): 34 | if anno_name.startswith("."): 35 | continue 36 | anno_path = "/".join([anno_prefix, anno_name]) 37 | # [xmin, ymin, xmax, ymax, class_id] 38 | labels = parse_xml(anno_path) 39 | if len(labels) == 0: 40 | continue 41 | datum.extend(labels) 42 | datum = np.array(datum, np.int32) 43 | 44 | datum_width = datum[:, 2] - datum[:, 0] 45 | datum_height = datum[:, 3] - datum[:, 1] 46 | datum_ratio = datum_width / datum_height 47 | 48 | print(datum_width.shape) 49 | print(datum_height.shape) 50 | print(datum_ratio.shape) 51 | d = {} 52 | for i in datum_width: 53 | d.setdefault(i, 0) 54 | d[i] += 1 55 | x_w = d.keys() 56 | y_w = d.values() 57 | 58 | d = {} 59 | for i in datum_height: 60 | d.setdefault(i, 0) 61 | d[i] += 1 62 | x_h = d.keys() 63 | y_h = d.values() 64 | 65 | select_1 = plt.scatter(x_w, y_w, marker="o", label=u'目标宽的分布') 66 | select_2 = plt.scatter(x_h, y_h, marker="*", label=u'目标高的分布') 67 | plt.legend(handles=[select_1, select_2], prop=font) 68 | 69 | plt.title(u"目标尺寸分布图", fontproperties=font) 70 | plt.xlabel(u"尺寸/像素", fontproperties=font) 71 | plt.ylabel(u"数量/个", fontproperties=font) 72 | plt.savefig("h_w_distribution.png", dpi=300) 73 | # plt.show() 74 | # datum_width = datum_width.reshape((datum_width.shape[0], 1)) 75 | # datum_height = datum_height.reshape((datum_height.shape[0], 1)) 76 | # d = np.concatenate([datum_width, datum_height], axis=1) 77 | # plt.scatter(d[:, 0], d[:, 1]) 78 | # plt.show() 79 | # kmeans= KMeans(n_clusters=3, random_state=0).fit(datum_width) 80 | # print(kmeans.labels_) 81 | # print(kmeans.cluster_centers_) 82 | -------------------------------------------------------------------------------- /datum/utils/process_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from configparser import ConfigParser 12 | 13 | 14 | def process_config(conf_file): 15 | """process configure file to generate CommonParams, DataSetParams, NetParams 16 | 17 | Args: 18 | conf_file: configure file path 19 | Returns: 20 | CommonParams, DataSetParams, NetParams, SolverParams 21 | """ 22 | common_params = {} 23 | dataset_params = {} 24 | net_params = {} 25 | solver_params = {} 26 | box_encoder_params = {} 27 | 28 | # configure_parser 29 | config = ConfigParser() 30 | config.read(conf_file, encoding="utf8") 31 | 32 | # sections and options 33 | for section in config.sections(): 34 | # construct common_params 35 | if section == 'Common': 36 | for option in config.options(section): 37 | common_params[option] = config.get(section, option) 38 | # construct dataset_params 39 | if section == 'DataSet': 40 | for option in config.options(section): 41 | dataset_params[option] = config.get(section, option) 42 | # construct net_params 43 | if section == 'Net': 44 | for option in config.options(section): 45 | net_params[option] = config.get(section, option) 46 | # construct solver_params 47 | if section == 'Solver': 48 | for option in config.options(section): 49 | solver_params[option] = config.get(section, option) 50 | 51 | # construct box_encoder_params 52 | if section == 'BoxEncoder': 53 | for option in config.options(section): 54 | box_encoder_params[option] = config.get(section, option) 55 | 56 | # 检测当前任务是需要进行测试还是进行训练 57 | if "is_predict" in common_params.keys(): 58 | if common_params["is_predict"] == "True": 59 | common_params["is_predict"] = True 60 | common_params["batch_size"] = 1 61 | else: 62 | common_params["is_predict"] = False 63 | 64 | if len(box_encoder_params) == 0: 65 | return common_params, dataset_params, net_params, solver_params 66 | 67 | return common_params, dataset_params, net_params, solver_params, box_encoder_params 68 | 69 | 70 | if __name__ == '__main__': 71 | common_params, dataset_params, net_params, solver_params = process_config( 72 | "../../conf/yolo_unet_train.cfg") 73 | print(common_params) 74 | print(dataset_params) 75 | # import json 76 | # print(json.loads(dataset_params["classes"])) 77 | # print(net_params["aspect_ratios"]) 78 | # print(json.loads(net_params["aspect_ratios"])) -------------------------------------------------------------------------------- /eagle/observe/augmentors/flip.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | import eagle.utils as eu 14 | from eagle.observe.base.meta import Augmentor 15 | from eagle.parameter import StochasticParameter, Binomial 16 | 17 | 18 | class Fliplr(Augmentor): 19 | def __init__(self, p=0, name=None, deterministic=False, random_state=None): 20 | super(Fliplr, self).__init__( 21 | name=name, deterministic=deterministic, random_state=random_state) 22 | 23 | if eu.is_single_number(p): 24 | self.p = Binomial(p) 25 | elif isinstance(p, StochasticParameter): 26 | self.p = p 27 | else: 28 | raise Exception("Expected p type StochasticParameter") 29 | 30 | def _augment_images(self, images, random_state, parents, hooks): 31 | nb_images = len(images) 32 | samples = self.p.draw_samples((nb_images,), random_state=random_state) 33 | for i in range(nb_images): 34 | if samples[i] == 1: 35 | images[i] = np.fliplr(images[i]) 36 | return images 37 | 38 | def _augment_keypoints(self, 39 | keypoints_on_images, random_state, parents, hooks): 40 | nb_images = len(keypoints_on_images) 41 | samples = self.p.draw_samples((nb_images, ), random_state=random_state) 42 | for i, kps_oi in enumerate(keypoints_on_images): 43 | if samples[i] == 1: 44 | width = kps_oi.shape[1] 45 | for kp in kps_oi.keypoints: 46 | kp.x = (width - 1) - kp.x 47 | return keypoints_on_images 48 | 49 | def get_parameters(self): 50 | return [self.p] 51 | 52 | 53 | class Flipud(Augmentor): 54 | def __init__(self, p=0, name=None, deterministic=False, random_state=None): 55 | super(Flipud, self).__init__( 56 | name=name, deterministic=deterministic, random_state=random_state) 57 | if eu.is_single_number(p): 58 | self.p = Binomial(p) 59 | elif isinstance(p, StochasticParameter): 60 | self.p = p 61 | else: 62 | raise Exception("Expected p type StochasticParameter") 63 | 64 | def _augment_images(self, images, random_state, parents, hooks): 65 | nb_images = len(images) 66 | samples = self.p.draw_samples((nb_images,), random_state=random_state) 67 | for i in range(nb_images): 68 | if samples[i] == 1: 69 | images[i] = np.flipud(images[i]) 70 | return images 71 | 72 | def _augment_keypoints(self, 73 | keypoints_on_images, random_state, parents, hooks): 74 | nb_images = len(keypoints_on_images) 75 | samples = self.p.draw_samples((nb_images,), random_state=random_state) 76 | for i, kps_oi in enumerate(keypoints_on_images): 77 | if samples[i] == 1: 78 | height = kps_oi.shape[0] 79 | for kp in kps_oi.keypoints: 80 | kp.y = (height - 1) - kp.y 81 | return keypoints_on_images 82 | 83 | def get_parameters(self): 84 | return [self.p] 85 | -------------------------------------------------------------------------------- /Others/voc/process_pascal_voc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import xml.etree.ElementTree as ET 13 | 14 | 15 | classes_name = [ 16 | "aeroplane", "bicycle", "bird", "boat", "bottle", 17 | "bus", "car", "cat", "chair", "cow", "diningtable", 18 | "dog", "horse", "motorbike", "person", "pottedplant", 19 | "sheep", "sofa", "train","tvmonitor" 20 | ] 21 | 22 | classes_num = { 23 | 'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3, 'bottle': 4, 24 | 'bus': 5, 'car': 6, 'cat': 7, 'chair': 8, 'cow': 9, 'diningtable': 10, 25 | 'dog': 11, 'horse': 12, 'motorbike': 13, 'person': 14, 'pottedplant': 15, 26 | 'sheep': 16, 'sofa': 17, 'train': 18, 'tvmonitor': 19 27 | } 28 | 29 | DATA_ROOT = "/Volumes/projects/DataSets/VOC" 30 | DATA_PATH = os.path.join(DATA_ROOT, "VOCdevkit/") 31 | OUTPUT_PATH = os.path.join(DATA_ROOT, "pascal_voc_{}.txt") 32 | 33 | 34 | def parse_xml(xml_file, year=2007): 35 | """ 36 | Args: 37 | xml_file: the input xml file path 38 | 39 | Returns: 40 | image_path: string 41 | labels: list of [xmin, ymin, xmax, ymax, class] 42 | """ 43 | tree = ET.parse(xml_file) 44 | root = tree.getroot() 45 | image_path = '' 46 | labels = [] 47 | 48 | for item in root: 49 | if item.tag == 'filename': 50 | if year == 2007: 51 | image_path = os.path.join( 52 | DATA_PATH, 'VOC2007/JPEGImages', item.text) 53 | if year == 2012: 54 | image_path = os.path.join( 55 | DATA_PATH, 'VOC2012/JPEGImages', item.text) 56 | elif item.tag == 'object': 57 | obj_name = item[0].text 58 | obj_num = classes_num[obj_name] 59 | bndbox = item.find("bndbox") 60 | xmin = int(float(bndbox.find("xmin").text)) 61 | ymin = int(float(bndbox.find("ymin").text)) 62 | xmax = int(float(bndbox.find("xmax").text)) 63 | ymax = int(float(bndbox.find("ymax").text)) 64 | labels.append([xmin, ymin, xmax, ymax, obj_num]) 65 | 66 | return image_path, labels 67 | 68 | 69 | def convert_to_string(image_path, labels): 70 | out_string = '' 71 | out_string += image_path 72 | for label in labels: 73 | for i in label: 74 | out_string += ' ' + str(i) 75 | out_string += '\n' 76 | 77 | return out_string 78 | 79 | 80 | def run_main(year=2007): 81 | print("Start format voc {} data !".format(year)) 82 | out_file = open(OUTPUT_PATH.format(year), "w") 83 | if year == 2007: 84 | xml_dir = os.path.join(DATA_PATH, "VOC2007/Annotations/") 85 | if year == 2012: 86 | xml_dir = os.path.join(DATA_PATH, "VOC2012/Annotations/") 87 | 88 | xml_list = os.listdir(xml_dir) 89 | 90 | xml_list = [xml_dir + tmp for tmp in xml_list] 91 | for xml in xml_list: 92 | if not os.path.isfile(xml): 93 | print("{} not xml file path.".format(xml)) 94 | image_path, labels = parse_xml(xml, year=year) 95 | record = convert_to_string(image_path, labels) 96 | out_file.write(record) 97 | out_file.close() 98 | 99 | if __name__ == '__main__': 100 | run_main(year=2007) 101 | run_main(year=2012) 102 | -------------------------------------------------------------------------------- /checks/observe/check_parameters.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | 15 | from eagle.parameter import ( 16 | Binomial, Choice, DiscreteUniform, Normal, Uniform, Deterministic, Clip, 17 | Discretize, Multiply, Add, Divide, Power, Absolute 18 | ) 19 | 20 | 21 | def main(): 22 | params = [ 23 | ("Binomial(0.1)", Binomial(0.1)), 24 | ("Choice", Choice([0, 1, 2])), 25 | ("Choice with p", Choice([0, 1, 2], p=[0.1, 0.2, 0.7])), 26 | ("DiscreteUniform(0, 10)", DiscreteUniform(0, 10)), 27 | ("Normal(0, 1)", Normal(0, 1)), 28 | ("Normal(1, 1)", Normal(1, 1)), 29 | ("Normal(1, 2)", Normal(0, 2)), 30 | ("Normal(Choice([-1, 1]), 2)", Normal(Choice([-1, 1]), 2)), 31 | ("Discretize(Normal(0, 1.0))", Discretize(Normal(0, 1.0))), 32 | ("Uniform(0, 10)", Uniform(0, 10)), 33 | ("Deterministic(1)", Deterministic(1)), 34 | ("Clip(Normal(0, 1), 0, None)", Clip(Normal(0, 1), minval=0, maxval=None)), 35 | ("Multiply(Uniform(0, 10), 2)", Multiply(Uniform(0, 10), 2)), 36 | ("Add(Uniform(0, 10), 5)", Add(Uniform(0, 10), 5)), 37 | ("Absolute(Normal(0, 1))", Absolute(Normal(0, 1))) 38 | ] 39 | 40 | params_arithmetic = [ 41 | ("Normal(0, 1.0)", Normal(0.0, 1.0)), 42 | ("Normal(0, 1.0) + 5", Normal(0.0, 1.0) + 5), 43 | ("5 + Normal(0, 1.0)", 5 + Normal(0.0, 1.0)), 44 | ("5 + Normal(0, 1.0)", Add(5, Normal(0.0, 1.0), elementwise=True)), 45 | ("Normal(0, 1.0) * 10", Normal(0.0, 1.0) * 10), 46 | ("10 * Normal(0, 1.0)", 10 * Normal(0.0, 1.0)), 47 | ("10 * Normal(0, 1.0)", Multiply(10, Normal(0.0, 1.0), elementwise=True)), 48 | ("Normal(0, 1.0) / 10", Normal(0.0, 1.0) / 10), 49 | ("10 / Normal(0, 1.0)", 10 / Normal(0.0, 1.0)), 50 | ("10 / Normal(0, 1.0)", Divide(10, Normal(0.0, 1.0), elementwise=True)), 51 | ("Normal(0, 1.0) ** 2", Normal(0.0, 1.0) ** 2), 52 | ("2 ** Normal(0, 1.0)", 2 ** Normal(0.0, 1.0)), 53 | ("2 ** Normal(0, 1.0)", Power(2, Normal(0.0, 1.0), elementwise=True)) 54 | ] 55 | 56 | params_noise = [ 57 | # ("SimplexNoise", SimplexNoise()), 58 | # ("Sigmoid(SimplexNoise)", Sigmoid(SimplexNoise())), 59 | # ("SimplexNoise(linear)", SimplexNoise(upscale_method="linear")), 60 | # ("SimplexNoise(nearest)", SimplexNoise(upscale_method="nearest")), 61 | # ("FrequencyNoise((-4, 4))", FrequencyNoise(exponent=(-4, 4))), 62 | # ("FrequencyNoise(-2)", FrequencyNoise(exponent=-2)), 63 | # ("FrequencyNoise(2)", FrequencyNoise(exponent=2)) 64 | ] 65 | 66 | images_params = [param.draw_distribution_graph() for (title, param) in params] 67 | images_arithmetic = [param.draw_distribution_graph() for (title, param) in params_arithmetic] 68 | 69 | show_multi_array(images_params) 70 | show_multi_array(images_arithmetic) 71 | 72 | 73 | def show_multi_array(image_arrays): 74 | n = len(image_arrays) 75 | h, w, c = image_arrays[0].shape 76 | print("arrays num: {},single image shape: {}".format(n, image_arrays[0].shape)) 77 | 78 | if n == 1: 79 | plt.imshow(image_arrays[0]) 80 | plt.show() 81 | return 82 | 83 | if int(np.sqrt(n)) ** 2 < n: 84 | n = int(np.sqrt(n)) + 1 85 | else: 86 | n = int(np.sqrt(n)) 87 | 88 | large_image = np.zeros((h*n, w*n, c), dtype=image_arrays[0].dtype) 89 | for i, img in enumerate(image_arrays): 90 | x1, y1 = (i%n)*w, (i//n)*h 91 | x2, y2 = (i%n+1)*w, (i//n+1)*h 92 | large_image[y1:y2, x1:x2] = img 93 | plt.imshow(large_image) 94 | plt.show() 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /checks/observe/check_background.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/1 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | from skimage import data 13 | import matplotlib.pyplot as plt 14 | 15 | import eagle.utils as eu 16 | from eagle.observe.base.meta import Sequential 17 | from eagle.observe.augmentors.flip import Fliplr, Flipud 18 | from eagle.observe.base.basetype import KeyPoint, KeyPointsOnImage 19 | from eagle.observe.base.basebatch import Batch, BatchLoader, BackgroundAugmentor 20 | 21 | 22 | def main(): 23 | augseq = Sequential([ 24 | Fliplr(0.5), 25 | Flipud(0.5) 26 | ]) 27 | 28 | print("------------------") 29 | print("augseq.augment_batches(batches, background=True)") 30 | print("------------------") 31 | batches = list(load_images()) 32 | batches_aug = augseq.augment_batches(batches, background=True) 33 | images_aug = [] 34 | keypoints_aug = [] 35 | for batch_aug in batches_aug: 36 | images_aug.append(batch_aug.images_aug) 37 | keypoints_aug.append(batch_aug.keypoints_aug) 38 | grid = draw_grid(images_aug, keypoints_aug) 39 | print(grid.shape) 40 | plt.imshow(grid) 41 | plt.show() 42 | 43 | print("------------------") 44 | print("augseq.augment_batches(batches, background=True) -> only images") 45 | print("------------------") 46 | batches = list(load_images()) 47 | batches = [batch.images for batch in batches] 48 | batches_aug = augseq.augment_batches(batches, background=True) 49 | images_aug = [] 50 | keypoints_aug = None 51 | for batch_aug in batches_aug: 52 | images_aug.append(batch_aug) 53 | plt.imshow(draw_grid(images_aug, keypoints_aug)) 54 | plt.show() 55 | 56 | print("------------------") 57 | print("BackgroundAugmenter") 58 | print("------------------") 59 | batch_loader = BatchLoader(load_images) 60 | bg_augmenter = BackgroundAugmentor(batch_loader, augseq) 61 | images_aug = [] 62 | keypoints_aug = [] 63 | while True: 64 | print("Next batch...") 65 | batch = bg_augmenter.get_batch() 66 | if batch is None: 67 | print("Finished.") 68 | break 69 | images_aug.append(batch.images_aug) 70 | keypoints_aug.append(batch.keypoints_aug) 71 | plt.imshow(draw_grid(images_aug, keypoints_aug)) 72 | plt.show() 73 | 74 | 75 | def load_images(): 76 | batch_size = 4 77 | astronaut = data.astronaut() 78 | astronaut = eu.imresize_single_image(astronaut, (64, 64)) 79 | kps = KeyPointsOnImage([KeyPoint(x=15, y=25)], shape=astronaut.shape) 80 | counter = 0 81 | for i in range(10): 82 | batch_images = [] 83 | batch_kps = [] 84 | for b in range(batch_size): 85 | batch_images.append(astronaut) 86 | batch_kps.append(kps) 87 | counter += 1 88 | batch = Batch( 89 | images=np.array(batch_images, dtype=np.uint8), 90 | keypoints=batch_kps 91 | ) 92 | yield batch 93 | 94 | 95 | def draw_grid(images_aug, keypoints_aug): 96 | if keypoints_aug is None: 97 | keypoints_aug = [] 98 | for bidx in range(len(images_aug)): 99 | keypoints_aug.append([None for _ in images_aug[bidx]]) 100 | 101 | images_kps_batches = [] 102 | for bidx in range(len(images_aug)): 103 | images_kps_batch = [] 104 | for image, kps in zip(images_aug[bidx], keypoints_aug[bidx]): 105 | if kps is None: 106 | image_kps = image 107 | else: 108 | image_kps = kps.draw_on_image(image, size=5, color=[255, 0, 0]) 109 | images_kps_batch.append(image_kps) 110 | images_kps_batches.extend(images_kps_batch) 111 | 112 | grid = eu.draw_grid(images_kps_batches, cols=len(images_aug[0])) 113 | return grid 114 | 115 | if __name__ == "__main__": 116 | main() 117 | -------------------------------------------------------------------------------- /Others/satellite/clip_video.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @contact: liuguiyang15@mails.ucas.edu.cn 4 | @file: clip_video.py 5 | @time: 2018/5/2 16:57 6 | """ 7 | 8 | # 主要是将吉林一号的视频数据进行裁剪,将图像的尺寸降下来,把没有的区域先去掉 9 | 10 | import os 11 | import cv2 12 | from sklearn.utils import shuffle 13 | 14 | from datum.utils.tools import extract_target_from_xml 15 | 16 | 17 | video_names = [ 18 | "large_000013363_total", "large_000014631_total", 19 | "large_minneapolis_1_total", "large_tunisia_total"] 20 | # video_names = ["large_tunisia_total"] 21 | root_dir_path = "/Volumes/projects/DataSets/CSUVideo/" 22 | src_dir_path = root_dir_path + "吉林一号视频逐帧/" 23 | clip_save_dir_path = root_dir_path + "标注结果图/" 24 | clip_spec_infos = { 25 | # "large_000013363_total": { 26 | # 'xmin': 750, 'ymin': 0, 27 | # 'xmax': 3750, 'ymax': 2700 28 | # }, 29 | "large_000013363_total": { 30 | 'xmin': 0, 'ymin': 0, 31 | 'xmax': 4096, 'ymax': 3072 32 | }, 33 | # "large_000014631_total": { 34 | # 'xmin': 0, 'ymin': 500, 35 | # 'xmax': 3400, 'ymax': 3050 36 | # }, 37 | "large_000014631_total": { 38 | 'xmin': 0, 'ymin': 0, 39 | 'xmax': 4096, 'ymax': 3072 40 | }, 41 | "large_minneapolis_1_total": { 42 | 'xmin': 0, 'ymin': 0, 43 | 'xmax': 4096, 'ymax': 2160 44 | }, 45 | "large_tunisia_total": { 46 | 'xmin': 0, 'ymin': 0, 47 | 'xmax': 4096, 'ymax': 2160 48 | } 49 | } 50 | 51 | 52 | def clipping_video(is_show=False, is_save_anno=True, is_save_image=False, is_save_anno_image=False): 53 | for video_name in video_names: 54 | xmin, ymin = clip_spec_infos[video_name]["xmin"], clip_spec_infos[video_name]["ymin"] 55 | xmax, ymax = clip_spec_infos[video_name]["xmax"], clip_spec_infos[video_name]["ymax"] 56 | 57 | video_image_dir_path = src_dir_path + video_name + "/JPEGImages/" 58 | anno_image_dir_path = src_dir_path + video_name + "/Annotations/" 59 | if is_show: 60 | cv2.namedWindow("src", cv2.WINDOW_NORMAL) 61 | N = len(os.listdir(video_image_dir_path)) 62 | for image_id in range(1, N+1): 63 | image_path = video_image_dir_path + "%06d.jpg" % image_id 64 | anno_path = anno_image_dir_path + "%06d.xml" % image_id 65 | if not os.path.exists(anno_path): 66 | print(anno_path) 67 | continue 68 | anno_lists = extract_target_from_xml(anno_path) 69 | print(anno_path, len(anno_lists)) 70 | 71 | image = cv2.imread(image_path) 72 | image = image[ymin:ymax, xmin:xmax] 73 | if is_save_image: 74 | cv2.imwrite( 75 | clip_save_dir_path + video_name + "/JPEGImages/%06d.jpg" % image_id, 76 | image, 77 | [int(cv2.IMWRITE_JPEG_QUALITY), 100]) 78 | 79 | for anno in anno_lists: 80 | a_xmin, a_ymin, a_xmax, a_ymax = anno[:4] 81 | x1 = a_xmin - xmin 82 | y1 = a_ymin - ymin 83 | x2 = a_xmax - xmin 84 | y2 = a_ymax - ymin 85 | cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2) 86 | 87 | if is_save_anno_image: 88 | cv2.imwrite( 89 | clip_save_dir_path + video_name + "/JPEGImages/%06d.jpg" % image_id, 90 | image, 91 | [int(cv2.IMWRITE_JPEG_QUALITY), 100]) 92 | 93 | if is_show: 94 | cv2.imshow("src", image) 95 | ch = cv2.waitKey(0) 96 | if ch == ord('q'): 97 | return 98 | 99 | # 存储当前的目标在裁剪过后的图像中的位置信息 100 | if is_save_anno: 101 | save_new_anno_file = clip_save_dir_path + video_name + "/Annotations/%06d.txt" % image_id 102 | with open(save_new_anno_file, "w") as writer: 103 | writer.write("x1,y1,x2,y2,label\n") 104 | for item in anno_lists: 105 | writer.write("{},{},{},{},{}\n".format(*item)) 106 | 107 | 108 | # 随机采样:提取20%的图像数据进行模型训练 109 | def shuffle_samples(): 110 | for video_name in video_names: 111 | image_dir_path = clip_save_dir_path + video_name + "/JPEGImages/" 112 | anno_dir_path = clip_save_dir_path + video_name + "/Annotations/" 113 | 114 | images_list = os.listdir(image_dir_path) 115 | N = len(images_list) 116 | selected_list = shuffle(images_list)[0:int(0.1 * N)] 117 | for item in images_list: 118 | if item not in selected_list: 119 | anno_name = item.split(".")[0] + ".txt" 120 | os.remove(image_dir_path + item) 121 | os.remove(anno_dir_path + anno_name) 122 | 123 | 124 | def crop_image_by_window(): 125 | pass 126 | 127 | 128 | if __name__ == '__main__': 129 | clipping_video(is_show=False, is_save_anno=True, is_save_image=False, is_save_anno_image=True) 130 | # shuffle_samples() 131 | pass 132 | -------------------------------------------------------------------------------- /Others/satellite/prepare_trainsamples.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @contact: liuguiyang15@mails.ucas.edu.cn 4 | @file: prepare_trainsamples.py 5 | @time: 2018/5/17 13:05 6 | """ 7 | 8 | # 将吉林一号卫星数据按照指定的格式进行组织 9 | import os 10 | import random 11 | import cv2 12 | 13 | 14 | train_video = ["large_000014631_total", "large_minneapolis_1_total", "large_tunisia_total"] 15 | anno_dir_prefix = "/Volumes/projects/DataSets/CSUVideo/video_with_annotation/" 16 | image_dir_prefix = "/Volumes/projects/DataSets/CSUVideo/src_video_frame/" 17 | save_dir_prefix = "/Volumes/projects/DataSets/CSUVideo/300x300/" 18 | 19 | SUB_IMG_WID, SUB_IMG_HEI, SUB_OVERLAP = 300, 300, 80 20 | 21 | 22 | def twoboxes_overlap(box1, box2): 23 | x1 = max(box1[0], box2[0]) 24 | y1 = max(box1[1], box2[1]) 25 | x2 = min(box1[2], box2[2]) 26 | y2 = min(box1[3], box2[3]) 27 | if x2 <= x1 or y2 <= y1: 28 | return 0 29 | return (x2 - x1) * (y2 - y1) 30 | 31 | 32 | def crop_image(image_path, anno_path, video_name, image_name): 33 | target_annos = [] 34 | with open(anno_path, "r") as reader: 35 | cnt = 0 36 | for line in reader: 37 | cnt += 1 38 | if cnt == 1: 39 | continue 40 | line = list(map(int, line.strip().split(",")[:-1])) 41 | target_annos.append(line) 42 | 43 | def select_subimage_anno(w, h): 44 | select_box = [] 45 | for box in target_annos: 46 | x1, y1, x2, y2 = box 47 | x11, y11 = x1 - w, y1 - h 48 | x22, y22 = x2 - w, y2 - h 49 | gx1, gy1 = w, h 50 | gx2, gy2 = w + SUB_IMG_WID, h + SUB_IMG_HEI 51 | overlap_area = twoboxes_overlap(box, [gx1, gy1, gx2, gy2]) 52 | if overlap_area <= 0: 53 | continue 54 | new_box = [max(0, x11), max(0, y11), min(x22, SUB_IMG_WID), min(y22, SUB_IMG_HEI)] 55 | if overlap_area / ((x22 - x11) * (y22 - y11)) >= 0.7: 56 | select_box.append(new_box) 57 | return select_box 58 | 59 | image_data = cv2.imread(image_path) 60 | H, W = image_data.shape[:2] 61 | cnt = 0 62 | for h in range(0, H, SUB_IMG_HEI-SUB_OVERLAP): 63 | for w in range(0, W, SUB_IMG_WID-SUB_OVERLAP): 64 | if h + SUB_IMG_HEI >= H: 65 | h = H - SUB_IMG_HEI 66 | if w + SUB_IMG_WID >= W: 67 | w = W - SUB_IMG_WID 68 | cnt += 1 69 | sub_image = image_data[h:h+SUB_IMG_HEI, w:w+SUB_IMG_WID] 70 | select_annos = select_subimage_anno(w, h) 71 | if len(select_annos) == 0: 72 | continue 73 | # print(len(select_annos), select_annos) 74 | # for box in select_annos: 75 | # x1, y1, x2, y2 = box 76 | # cv2.rectangle(sub_image, (x1, y1), (x2, y2), (0, 0, 255), 2) 77 | # cv2.imshow("src", sub_image) 78 | # cv2.waitKey() 79 | image_name = image_name.split(".")[0] 80 | if not os.path.isdir(save_dir_prefix + video_name + "/JPEGImages/"): 81 | os.makedirs(save_dir_prefix + video_name + "/JPEGImages/") 82 | if not os.path.isdir(save_dir_prefix + video_name + "/Annotations/"): 83 | os.makedirs(save_dir_prefix + video_name + "/Annotations/") 84 | save_image_path = save_dir_prefix + video_name + "/JPEGImages/{}_{}_{}.jpg".format(image_name, w, h) 85 | save_anno_path = save_dir_prefix + video_name + "/Annotations/{}_{}_{}.txt".format(image_name, w, h) 86 | cv2.imwrite(save_image_path, sub_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) 87 | with open(save_anno_path, "w") as writer: 88 | for box in select_annos: 89 | writer.write(",".join(map(str, box)) + "\n") 90 | 91 | # for video_name in train_video: 92 | # image_dir_path = image_dir_prefix + video_name + "/JPEGImages/" 93 | # anno_dir_path = anno_dir_prefix + video_name + "/Annotations/" 94 | # anno_list = os.listdir(anno_dir_path) 95 | # random.shuffle(anno_list) 96 | # anno_list = random.sample(anno_list, int(len(anno_list) * 0.06)) 97 | # for anno_name in anno_list: 98 | # anno_path = anno_dir_path + anno_name 99 | # image_path = image_dir_path + anno_name.replace("txt", "jpg") 100 | # print(anno_path) 101 | # print(image_path) 102 | # crop_image(image_path, anno_path, video_name, anno_name) 103 | 104 | 105 | train_sample_path = save_dir_prefix + "train_samples.txt" 106 | writer = open(train_sample_path, "w") 107 | for video_name in train_video: 108 | image_dir_path = save_dir_prefix + video_name + "/JPEGImages/" 109 | anno_dir_path = save_dir_prefix + video_name + "/Annotations/" 110 | anno_list = os.listdir(anno_dir_path) 111 | for anno_name in anno_list: 112 | anno_path = anno_dir_path + anno_name 113 | image_path = image_dir_path + anno_name.replace("txt", "jpg") 114 | anno_detail = "" 115 | with open(anno_path, "r") as reader: 116 | anno_info = [] 117 | for line in reader: 118 | line = line.strip().split(",") + ["0"] 119 | anno_info.append(" ".join(line)) 120 | anno_detail = " ".join(anno_info) 121 | writer.write("{} {}\n".format(image_path, anno_detail)) 122 | -------------------------------------------------------------------------------- /datum/models/yolo/yolo_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import random 12 | import cv2 13 | import numpy as np 14 | from queue import Queue 15 | from threading import Thread 16 | 17 | from datum.meta.dataset import DataSet 18 | 19 | 20 | class YoloDataSet(DataSet): 21 | """TextDataSet 22 | process text input file dataset 23 | text file format: 24 | image_path xmin1 ymin1 xmax1 ymax1 class1 xmin2 ymin2 xmax2 ymax2 class2 25 | """ 26 | 27 | def __init__(self, common_params, dataset_params): 28 | super(YoloDataSet, self).__init__(common_params, dataset_params) 29 | 30 | # process params 31 | self.data_path = str(dataset_params['path']) 32 | self.width = int(common_params['image_size']) 33 | self.height = int(common_params['image_size']) 34 | self.batch_size = int(common_params['batch_size']) 35 | self.thread_num = int(dataset_params['thread_num']) 36 | self.max_objects = int(common_params['max_objects_per_image']) 37 | 38 | # record and image_label queue 39 | self.record_queue = Queue(maxsize=10000) 40 | self.image_label_queue = Queue(maxsize=5000) 41 | 42 | self.record_list = [] 43 | 44 | # filling the record_list 45 | input_file = open(self.data_path, 'r') 46 | 47 | for line in input_file: 48 | line = line.strip() 49 | if ',' in line: 50 | ss = line.split(',') 51 | else: 52 | ss = line.split(' ') 53 | ss[1:] = [float(num) for num in ss[1:]] 54 | self.record_list.append(ss) 55 | 56 | self.record_point = 0 57 | self.record_number = len(self.record_list) 58 | 59 | self.num_batch_per_epoch = int(self.record_number / self.batch_size) 60 | 61 | t_record_producer = Thread(target=self.record_producer) 62 | t_record_producer.daemon = True 63 | t_record_producer.start() 64 | 65 | for i in range(self.thread_num): 66 | t = Thread(target=self.record_customer) 67 | t.daemon = True 68 | t.start() 69 | 70 | def record_producer(self): 71 | while True: 72 | if self.record_point % self.record_number == 0: 73 | random.shuffle(self.record_list) 74 | self.record_point = 0 75 | self.record_queue.put(self.record_list[self.record_point]) 76 | self.record_point += 1 77 | 78 | def record_customer(self): 79 | while True: 80 | item = self.record_queue.get() 81 | out = self.record_process(item) 82 | self.image_label_queue.put(out) 83 | 84 | def record_process(self, record): 85 | """record process 86 | Args: record 87 | Returns: 88 | image: 3-D ndarray 89 | labels: 2-D list [self.max_objects, 5] (xcenter, ycenter, w, h, class_num) 90 | object_num: total object number int 91 | """ 92 | image = cv2.imread(record[0]) 93 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 94 | h = image.shape[0] 95 | w = image.shape[1] 96 | 97 | width_rate = self.width * 1.0 / w 98 | height_rate = self.height * 1.0 / h 99 | 100 | image = cv2.resize(image, (self.height, self.width)) 101 | 102 | labels = [[0, 0, 0, 0, 0]] * self.max_objects 103 | i = 1 104 | object_num = 0 105 | while i < len(record): 106 | xmin = record[i] 107 | ymin = record[i + 1] 108 | xmax = record[i + 2] 109 | ymax = record[i + 3] 110 | class_num = record[i + 4] 111 | 112 | xcenter = (xmin + xmax) * 1.0 / 2 * width_rate 113 | ycenter = (ymin + ymax) * 1.0 / 2 * height_rate 114 | 115 | box_w = (xmax - xmin) * width_rate 116 | box_h = (ymax - ymin) * height_rate 117 | 118 | labels[object_num] = [xcenter, ycenter, box_w, box_h, class_num] 119 | object_num += 1 120 | i += 5 121 | if object_num >= self.max_objects: 122 | break 123 | return [image, labels, object_num] 124 | 125 | def batch(self): 126 | """get batch 127 | Returns: 128 | images: 4-D ndarray [batch_size, height, width, 3] 129 | labels: 3-D ndarray [batch_size, max_objects, 5] 130 | objects_num: 1-D ndarray [batch_size] 131 | """ 132 | images = [] 133 | labels = [] 134 | objects_num = [] 135 | for i in range(self.batch_size): 136 | image, label, object_num = self.image_label_queue.get() 137 | images.append(image) 138 | labels.append(label) 139 | objects_num.append(object_num) 140 | images = np.asarray(images, dtype=np.float32) 141 | images = images / 255 * 2 - 1 142 | labels = np.asarray(labels, dtype=np.float32) 143 | objects_num = np.asarray(objects_num, dtype=np.int32) 144 | return images, labels, objects_num 145 | -------------------------------------------------------------------------------- /eagle/brain/solver/yolo_solver.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/4 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import sys 12 | import time 13 | from datetime import datetime 14 | 15 | import numpy as np 16 | import tensorflow as tf 17 | 18 | from eagle.brain.solver.solver import Solver 19 | 20 | 21 | class YoloSolver(Solver): 22 | def __init__(self, dataset, net, common_params, solver_params): 23 | super(YoloSolver, self).__init__(dataset, net, common_params, solver_params) 24 | 25 | # process params 26 | self.width = int(common_params['image_size']) 27 | self.height = int(common_params['image_size']) 28 | self.batch_size = int(common_params['batch_size']) 29 | self.max_objects = int(common_params['max_objects_per_image']) 30 | 31 | self.moment = float(solver_params['moment']) 32 | self.learning_rate = float(solver_params['lr']) 33 | self.train_dir = str(solver_params['train_dir']) 34 | self.max_iterators = int(solver_params['max_iterators']) 35 | self.pretrain_path = str(solver_params['pretrain_model_path']) 36 | 37 | self.dataset = dataset 38 | self.net = net 39 | 40 | # construct graph 41 | self.construct_graph() 42 | 43 | def _train(self): 44 | """Train model 45 | 46 | Create an optimizer and apply to all trainable variables. 47 | 48 | Args: 49 | total_loss: Total loss from net.loss() 50 | global_step: Integer Variable counting the number of training steps 51 | processed 52 | Returns: 53 | train_op: op for training 54 | """ 55 | 56 | opt = tf.train.MomentumOptimizer(self.learning_rate, self.moment) 57 | grads = opt.compute_gradients(self.total_loss) 58 | 59 | apply_gradient_op = opt.apply_gradients(grads, 60 | global_step=self.global_step) 61 | 62 | return apply_gradient_op 63 | 64 | def construct_graph(self): 65 | # construct graph 66 | self.global_step = tf.Variable(0, trainable=False) 67 | self.images = tf.placeholder(tf.float32, ( 68 | self.batch_size, self.height, self.width, 3)) 69 | self.labels = tf.placeholder(tf.float32, 70 | (self.batch_size, self.max_objects, 5)) 71 | self.objects_num = tf.placeholder(tf.int32, (self.batch_size)) 72 | 73 | self.predicts = self.net.inference(self.images) 74 | self.total_loss, self.nilboy = self.net.loss(self.predicts, self.labels, 75 | self.objects_num) 76 | 77 | tf.summary.scalar('loss', self.total_loss) 78 | self.train_op = self._train() 79 | 80 | def solve(self): 81 | saver_pretrain = tf.train.Saver(self.net.pretrained_collection) 82 | saver_train = tf.train.Saver(self.net.trainable_collection, max_to_keep=3) 83 | 84 | init = tf.global_variables_initializer() 85 | 86 | summary_op = tf.summary.merge_all() 87 | 88 | sess = tf.Session() 89 | 90 | sess.run(init) 91 | saver_pretrain.restore(sess, self.pretrain_path) 92 | 93 | summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph) 94 | 95 | for step in range(self.max_iterators): 96 | start_time = time.time() 97 | np_images, np_labels, np_objects_num = self.dataset.batch() 98 | 99 | _, loss_value, nilboy = sess.run( 100 | [self.train_op, self.total_loss, self.nilboy], 101 | feed_dict={self.images: np_images, self.labels: np_labels, 102 | self.objects_num: np_objects_num}) 103 | # loss_value, nilboy = sess.run([self.total_loss, self.nilboy], feed_dict={self.images: np_images, self.labels: np_labels, self.objects_num: np_objects_num}) 104 | 105 | 106 | duration = time.time() - start_time 107 | 108 | assert not np.isnan(loss_value), 'Model diverged with loss = NaN' 109 | 110 | if step % 10 == 0: 111 | num_examples_per_step = self.dataset.batch_size 112 | examples_per_sec = num_examples_per_step / duration 113 | sec_per_batch = float(duration) 114 | 115 | format_str = ('%s: step %d, loss = %.2f ' 116 | '(%.1f examples/sec; %.3f sec/batch)') 117 | print(format_str % (datetime.now(), step, loss_value, 118 | examples_per_sec, sec_per_batch)) 119 | sys.stdout.flush() 120 | if step % 1000 == 0: 121 | summary_str = sess.run(summary_op, 122 | feed_dict={self.images: np_images, 123 | self.labels: np_labels, 124 | self.objects_num: np_objects_num}) 125 | summary_writer.add_summary(summary_str, step) 126 | if step % 5000 == 0: 127 | saver_train.save(sess, 128 | self.train_dir + '/model.ckpt', 129 | global_step=step) 130 | sess.close() 131 | -------------------------------------------------------------------------------- /Others/vedia/convert2voc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/4/3 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import codecs 13 | from shutil import copyfile 14 | import numpy as np 15 | 16 | from xml.dom.minidom import parseString 17 | from lxml.etree import Element, SubElement, tostring 18 | import pprint 19 | import cv2 20 | 21 | # label_id_map = { 22 | # "" 23 | # } 24 | 25 | def format_voc_string(filename, anno_infos): 26 | # anno_cell in anno_infos 27 | ## anno_cell is dict{"label": "car", "p1":[x1, y1], "p2":[x2, y2]} 28 | 29 | node_root = Element('annotation') 30 | 31 | node_filename = SubElement(node_root, 'filename') 32 | node_filename.text = filename 33 | 34 | image = cv2.imread(filename) 35 | width, height = image.shape[:2] 36 | 37 | node_size = SubElement(node_root, 'size') 38 | node_width = SubElement(node_size, 'width') 39 | node_width.text = str(width) 40 | 41 | node_height = SubElement(node_size, 'height') 42 | node_height.text = str(height) 43 | 44 | node_depth = SubElement(node_size, 'depth') 45 | node_depth.text = '3' 46 | 47 | for anno_cell in anno_infos: 48 | node_object = SubElement(node_root, 'object') 49 | node_name = SubElement(node_object, 'name') 50 | node_name.text = anno_cell["label"] 51 | 52 | node_difficult = SubElement(node_object, 'difficult') 53 | node_difficult.text = '0' 54 | 55 | node_bndbox = SubElement(node_object, 'bndbox') 56 | node_x1 = SubElement(node_bndbox, 'x1') 57 | node_x1.text = str(anno_cell["p1"][0]) 58 | node_y1 = SubElement(node_bndbox, 'y1') 59 | node_y1.text = str(anno_cell["p1"][1]) 60 | 61 | node_x2 = SubElement(node_bndbox, 'x2') 62 | node_x2.text = str(anno_cell["p2"][0]) 63 | node_y2 = SubElement(node_bndbox, 'y2') 64 | node_y2.text = str(anno_cell["p2"][1]) 65 | 66 | node_x3 = SubElement(node_bndbox, 'x3') 67 | node_x3.text = str(anno_cell["p3"][0]) 68 | node_y3 = SubElement(node_bndbox, 'y3') 69 | node_y3.text = str(anno_cell["p3"][1]) 70 | 71 | node_x4 = SubElement(node_bndbox, 'x4') 72 | node_x4.text = str(anno_cell["p4"][0]) 73 | node_y4 = SubElement(node_bndbox, 'y4') 74 | node_y4.text = str(anno_cell["p4"][1]) 75 | 76 | xml = tostring(node_root, pretty_print=True) 77 | # dom = parseString(xml) 78 | return xml 79 | 80 | data_prefix = "/Volumes/projects/DataSets/VEDIA/" 81 | images_dir = ["512/Vehicules512/", "1024/Vehicules1024/"] 82 | annotations_filepath = ["512/Annotations512/annotation512.txt"] 83 | 84 | def convert(): 85 | save_dir_prefix = "/Volumes/projects/DataSets/VEDIA/VOCFORMAT/" 86 | 87 | label_set = set() 88 | for img_dir, anno_file in zip(images_dir, annotations_filepath): 89 | abs_img_dir = data_prefix + img_dir 90 | abs_anno_path = data_prefix + anno_file 91 | if not os.path.isfile(abs_anno_path): 92 | raise ValueError("{} file not found !".format(abs_anno_path)) 93 | images_dict = {} 94 | with codecs.open(abs_anno_path, "r", "utf8") as reader: 95 | for line in reader: 96 | line = line.strip().split(' ') 97 | name_prefix = line[0] + "_co.png" 98 | image_path = abs_img_dir + name_prefix 99 | images_dict.setdefault(image_path, []) 100 | images_dict[image_path].append(line) 101 | 102 | for img_path in images_dict.keys(): 103 | if not os.path.isfile(img_path): 104 | raise IOError("{} image path not found !".format(img_path)) 105 | 106 | anno_infos = list() 107 | for line in images_dict[img_path]: 108 | anno_cell = dict() 109 | center_x, center_y = float(line[1]), float(line[2]) 110 | rotate_theta = float(line[3]) 111 | points = np.array(list(map(float, line[4:12])), 112 | np.int32).reshape((2, -1)).T 113 | fully_contain = int(line[-2]) 114 | occluded = int(line[-1]) 115 | if occluded: 116 | continue 117 | # print(points.shape) 118 | label = line[-3] 119 | label_set.add(label) 120 | anno_cell["label"] = label 121 | anno_cell["p1"] = points[0, :] 122 | anno_cell["p2"] = points[1, :] 123 | anno_cell["p3"] = points[2, :] 124 | anno_cell["p4"] = points[3, :] 125 | anno_infos.append(anno_cell) 126 | if len(anno_infos) == 0: 127 | print(img_path) 128 | continue 129 | 130 | # copy file to dest 131 | image_name = img_path.split("/")[-1] 132 | copyfile(img_path, save_dir_prefix + "JPEGImages/" + image_name) 133 | voc_xml = format_voc_string(img_path, anno_infos) 134 | anno_name = img_path.split("/")[-1].replace("png", "xml") 135 | anno_file_path = save_dir_prefix + "Annotations/" + anno_name 136 | with open(anno_file_path, "wb") as writer: 137 | writer.write(voc_xml) 138 | # print(voc_xml) 139 | # return 140 | print(label_set) 141 | 142 | if __name__ == '__main__': 143 | convert() -------------------------------------------------------------------------------- /eagle/brain/solver/ssd_solver.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/4 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import sys 12 | import time 13 | from datetime import datetime 14 | 15 | import numpy as np 16 | import tensorflow as tf 17 | 18 | from eagle.brain.solver.solver import Solver 19 | 20 | 21 | class SSDSolver(Solver): 22 | def __init__(self, dataset, net, common_params, solver_params): 23 | super(SSDSolver, self).__init__( 24 | dataset, net, common_params, solver_params) 25 | 26 | # process params 27 | self.width = int(common_params['image_size']) 28 | self.height = int(common_params['image_size']) 29 | self.batch_size = int(common_params['batch_size']) 30 | 31 | self.decay = float(solver_params['decay']) 32 | self.beta_1 = float(solver_params['beta_1']) 33 | self.beta_2 = float(solver_params['beta_2']) 34 | self.epsilon = float(solver_params['epsilon']) 35 | self.learning_rate = float(solver_params['lr']) 36 | self.train_dir = str(solver_params['train_dir']) 37 | self.max_iterators = int(solver_params['max_iterators']) 38 | self.pretrain_path = str(solver_params['pretrain_model_path']) 39 | 40 | self.dataset = dataset 41 | self.net = net 42 | 43 | # construct graph 44 | self.build_model() 45 | 46 | def _train(self): 47 | opt = tf.train.AdamOptimizer( 48 | learning_rate=self.learning_rate, 49 | beta1=self.beta_1, 50 | beta2=self.beta_2, 51 | epsilon=self.epsilon) 52 | grads = opt.compute_gradients(self.total_loss) 53 | apply_gradient_op = opt.apply_gradients(grads, 54 | global_step=self.global_step) 55 | return apply_gradient_op 56 | 57 | def build_model(self): 58 | self.global_step = tf.Variable(0, trainable=False) 59 | self.images = tf.placeholder( 60 | tf.float32, 61 | shape=(self.batch_size, self.height, self.width, 3)) 62 | model_spec = self.net.inference(self.images) 63 | self.predicts = model_spec["predictions"] 64 | predict_shape = model_spec["predictions"].get_shape().as_list() 65 | boxes_num = predict_shape[1] 66 | encode_length = predict_shape[2] 67 | 68 | ''' 69 | Input Image (300, 300, 3): 70 | [32, 37, 37, 4, 8] ---> (cx, cy, w, h, variances) 71 | [32, 18, 18, 6, 8] 72 | [32, 9, 9, 6, 8] 73 | [32, 5, 5, 6, 8] 74 | [32, 3, 3, 4, 8] 75 | [32, 1, 1, 4, 8] 76 | ==> 37^2*4 + 18^2*6 + 9^2*6 + 5^2*6 + 3^2*6 + 1^2*4 = 8096 77 | ''' 78 | 79 | self.labels = tf.placeholder( 80 | tf.float32, 81 | shape=(self.batch_size, boxes_num, encode_length)) 82 | 83 | self.total_loss = self.net.loss(y_true=self.labels, 84 | y_pred=self.predicts) 85 | 86 | tf.summary.scalar('loss', self.total_loss) 87 | self.train_op = self._train() 88 | 89 | def solve(self): 90 | saver = tf.train.Saver(max_to_keep=3) 91 | 92 | init = tf.global_variables_initializer() 93 | summary_op = tf.summary.merge_all() 94 | 95 | sess = tf.Session() 96 | sess.run(init) 97 | if self.pretrain_path != "None": 98 | saver.restore(sess, self.pretrain_path) 99 | 100 | summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph) 101 | 102 | for step in range(self.max_iterators): 103 | start_time = time.time() 104 | np_images, np_labels = self.dataset.batch() 105 | 106 | _, loss_value = sess.run( 107 | [self.train_op, self.total_loss], 108 | feed_dict={ 109 | self.images: np_images, 110 | self.labels: np_labels 111 | }) 112 | 113 | duration = time.time() - start_time 114 | 115 | assert not np.isnan(loss_value), 'Model diverged with loss = NaN' 116 | 117 | if step % 10 == 0: 118 | num_examples_per_step = self.dataset.batch_size 119 | examples_per_sec = num_examples_per_step / duration 120 | sec_per_batch = float(duration) 121 | 122 | format_str = ('%s: step %d, loss = %.2f ' 123 | '(%.1f examples/sec; %.3f sec/batch)') 124 | print(format_str % (datetime.now(), step, loss_value, 125 | examples_per_sec, sec_per_batch)) 126 | sys.stdout.flush() 127 | if step % 1000 == 0: 128 | summary_str = sess.run(summary_op, 129 | feed_dict={ 130 | self.images: np_images, 131 | self.labels: np_labels 132 | }) 133 | summary_writer.add_summary(summary_str, step) 134 | if step % 2000 == 0: 135 | saver.save(sess, 136 | self.train_dir + '/model.ckpt', global_step=step) 137 | saver.save(sess, self.train_dir + '/model.ckpt', global_step=step) 138 | sess.close() 139 | -------------------------------------------------------------------------------- /eagle/observe/augmentors/arithmetic.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import cv2 12 | import numpy as np 13 | from scipy import ndimage 14 | 15 | import eagle.utils as eu 16 | from eagle.observe.base.meta import Augmentor 17 | from eagle.parameter import StochasticParameter 18 | from eagle.parameter import Deterministic, DiscreteUniform, Binomial 19 | 20 | 21 | class Add(Augmentor): 22 | """ 23 | Add a value to all pixels in an image. 24 | 25 | Parameters 26 | ---------- 27 | value : int or iterable of two ints or StochasticParameter, optional(default=0) 28 | Value to add to all 29 | pixels. 30 | * If an int, then that value will be used for all images. 31 | * If a tuple (a, b), then a value from the discrete range [a .. b] 32 | will be used. 33 | * If a StochasticParameter, then a value will be sampled per image 34 | from that parameter. 35 | 36 | per_channel : bool or float, optional(default=False) 37 | Whether to use the same value for all channels (False) 38 | or to sample a new value for each channel (True). 39 | If this value is a float p, then for p percent of all images 40 | `per_channel` will be treated as True, otherwise as False. 41 | 42 | name : string, optional(default=None) 43 | See `Augmenter.__init__()` 44 | 45 | deterministic : bool, optional(default=False) 46 | See `Augmenter.__init__()` 47 | 48 | random_state : int or np.random.RandomState or None, optional(default=None) 49 | See `Augmenter.__init__()` 50 | 51 | Examples 52 | -------- 53 | >>> aug = iaa.Add(10) 54 | 55 | always adds a value of 10 to all pixels in the image. 56 | 57 | >>> aug = iaa.Add((-10, 10)) 58 | 59 | adds a value from the discrete range [-10 .. 10] to all pixels of 60 | the input images. The exact value is sampled per image. 61 | 62 | >>> aug = iaa.Add((-10, 10), per_channel=True) 63 | 64 | adds a value from the discrete range [-10 .. 10] to all pixels of 65 | the input images. The exact value is sampled per image AND channel, 66 | i.e. to a red-channel it might add 5 while subtracting 7 from the 67 | blue channel of the same image. 68 | 69 | >>> aug = iaa.Add((-10, 10), per_channel=0.5) 70 | 71 | same as previous example, but the `per_channel` feature is only active 72 | for 50 percent of all images. 73 | 74 | """ 75 | 76 | def __init__(self, value=0, per_channel=False, name=None, 77 | deterministic=False, random_state=None): 78 | super(Add, self).__init__(name=name, deterministic=deterministic, random_state=random_state) 79 | 80 | if eu.is_single_integer(value): 81 | eu.do_assert(-255 <= value <= 255, 82 | "Expected value to have range [-255, 255], got value %d." % (value,)) 83 | self.value = Deterministic(value) 84 | elif eu.is_iterable(value): 85 | eu.do_assert(len(value) == 2, 86 | "Expected tuple/list with 2 entries, got %d entries." % (len(value),)) 87 | self.value = DiscreteUniform(value[0], value[1]) 88 | elif isinstance(value, StochasticParameter): 89 | self.value = value 90 | else: 91 | raise Exception("Expected float or int, tuple/list with 2 entries or StochasticParameter. Got %s." % (type(value),)) 92 | 93 | if per_channel in [True, False, 0, 1, 0.0, 1.0]: 94 | self.per_channel = Deterministic(int(per_channel)) 95 | elif eu.is_single_number(per_channel): 96 | eu.do_assert(0 <= per_channel <= 1.0, 97 | "Expected bool, or number in range [0, 1.0] for per_channel, got %s." % (type(per_channel),)) 98 | self.per_channel = Binomial(per_channel) 99 | else: 100 | raise Exception("Expected per_channel to be boolean or number or StochasticParameter") 101 | 102 | def _augment_images(self, images, random_state, parents, hooks): 103 | input_dtypes = eu.copy_dtypes_for_restore(images) 104 | 105 | result = images 106 | nb_images = len(images) 107 | seeds = random_state.randint(0, 10**6, (nb_images,)) 108 | for i in range(nb_images): 109 | image = images[i].astype(np.int32) 110 | rs_image = eu.new_random_state(seeds[i]) 111 | per_channel = self.per_channel.draw_sample(random_state=rs_image) 112 | if per_channel == 1: 113 | nb_channels = image.shape[2] 114 | samples = self.value.draw_samples((nb_channels,), random_state=rs_image) 115 | for c, sample in enumerate(samples): 116 | # TODO make value range more flexible 117 | eu.do_assert(-255 <= sample <= 255) 118 | image[..., c] += sample 119 | else: 120 | sample = self.value.draw_sample(random_state=rs_image) 121 | # TODO make value range more flexible 122 | eu.do_assert(-255 <= sample <= 255) 123 | image += sample 124 | result[i] = image 125 | 126 | # TODO make value range more flexible 127 | eu.clip_augmented_images_(result, 0, 255) 128 | eu.restore_augmented_images_dtypes_(result, input_dtypes) 129 | 130 | return result 131 | 132 | def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): 133 | return keypoints_on_images 134 | 135 | def get_parameters(self): 136 | return [self.value] 137 | -------------------------------------------------------------------------------- /datum/models/yolo/yolo_batch_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/5 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import random 12 | import cv2 13 | import numpy as np 14 | from queue import Queue 15 | from threading import Thread, Lock 16 | 17 | from datum.meta.dataset import DataSet 18 | 19 | 20 | class YoloDataSet(DataSet): 21 | """TextDataSet 22 | process text input file dataset 23 | text file format: 24 | image_path xmin1 ymin1 xmax1 ymax1 class1 xmin2 ymin2 xmax2 ymax2 class2 25 | """ 26 | 27 | def __init__(self, common_params, dataset_params): 28 | super(YoloDataSet, self).__init__(common_params, dataset_params) 29 | 30 | # process params 31 | self.data_path = str(dataset_params['path']) 32 | self.width = int(common_params['image_size']) 33 | self.height = int(common_params['image_size']) 34 | self.batch_size = int(common_params['batch_size']) 35 | self.thread_num = int(dataset_params['thread_num']) 36 | self.max_objects = int(common_params['max_objects_per_image']) 37 | 38 | # record and image_label queue 39 | self.image_label_queue = Queue(maxsize=100) 40 | 41 | self.record_list = [] 42 | 43 | # filling the record_list 44 | input_file = open(self.data_path, 'r') 45 | 46 | for line in input_file: 47 | line = line.strip() 48 | if ',' in line: 49 | ss = line.split(',') 50 | else: 51 | ss = line.split(' ') 52 | ss[1:] = [float(num) for num in ss[1:]] 53 | self.record_list.append(ss) 54 | 55 | self.record_point = 0 56 | self.record_number = len(self.record_list) 57 | self.record_number_lock = Lock() 58 | 59 | for i in range(self.thread_num): 60 | t_record_producer = Thread(target=self.record_producer) 61 | t_record_producer.daemon = True 62 | t_record_producer.start() 63 | 64 | # for i in range(self.thread_num): 65 | # t = Thread(target=self.record_customer) 66 | # t.daemon = True 67 | # t.start() 68 | 69 | def record_producer(self): 70 | def update_shuffle(): 71 | if self.record_point % self.record_number == 0: 72 | random.shuffle(self.record_list) 73 | self.record_point = 0 74 | 75 | while True: 76 | outs = list() 77 | while len(outs) < self.batch_size: 78 | item = self.record_list[self.record_point] 79 | out = self.record_process(item) 80 | outs.append(out) 81 | self.record_number_lock.acquire() 82 | self.record_point += 1 83 | update_shuffle() 84 | self.record_number_lock.release() 85 | 86 | self.image_label_queue.put(outs) 87 | 88 | # def record_customer(self): 89 | # while True: 90 | # item = self.record_queue.get() 91 | # out = self.record_process(item) 92 | # self.image_label_queue.put(out) 93 | 94 | def record_process(self, record): 95 | """record process 96 | Args: record 97 | Returns: 98 | image: 3-D ndarray 99 | labels: 2-D list [self.max_objects, 5] (xcenter, ycenter, w, h, class_num) 100 | object_num: total object number int 101 | """ 102 | image = cv2.imread(record[0]) 103 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 104 | h = image.shape[0] 105 | w = image.shape[1] 106 | 107 | width_rate = self.width * 1.0 / w 108 | height_rate = self.height * 1.0 / h 109 | 110 | image = cv2.resize(image, (self.height, self.width)) 111 | 112 | labels = [[0, 0, 0, 0, 0]] * self.max_objects 113 | i = 1 114 | object_num = 0 115 | while i < len(record): 116 | xmin = record[i] 117 | ymin = record[i + 1] 118 | xmax = record[i + 2] 119 | ymax = record[i + 3] 120 | class_num = record[i + 4] 121 | 122 | xcenter = (xmin + xmax) * 1.0 / 2 * width_rate 123 | ycenter = (ymin + ymax) * 1.0 / 2 * height_rate 124 | 125 | box_w = (xmax - xmin) * width_rate 126 | box_h = (ymax - ymin) * height_rate 127 | 128 | labels[object_num] = [xcenter, ycenter, box_w, box_h, class_num] 129 | object_num += 1 130 | i += 5 131 | if object_num >= self.max_objects: 132 | break 133 | return [image, labels, object_num] 134 | 135 | def batch(self): 136 | """get batch 137 | Returns: 138 | images: 4-D ndarray [batch_size, height, width, 3] 139 | labels: 3-D ndarray [batch_size, max_objects, 5] 140 | objects_num: 1-D ndarray [batch_size] 141 | """ 142 | images = [] 143 | labels = [] 144 | objects_num = [] 145 | outs = self.image_label_queue.get() 146 | for i in range(self.batch_size): 147 | image, label, object_num = outs[i][:] 148 | images.append(image) 149 | labels.append(label) 150 | objects_num.append(object_num) 151 | 152 | # for i in range(self.batch_size): 153 | # image, label, object_num = self.image_label_queue.get() 154 | # images.append(image) 155 | # labels.append(label) 156 | # objects_num.append(object_num) 157 | images = np.asarray(images, dtype=np.float32) 158 | images = images / 255 * 2 - 1 159 | labels = np.asarray(labels, dtype=np.float32) 160 | objects_num = np.asarray(objects_num, dtype=np.int32) 161 | return images, labels, objects_num 162 | -------------------------------------------------------------------------------- /Others/lsd12/format_input.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/1/3 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import sys 13 | 14 | p1 = "/".join(os.path.abspath(__file__).split("/")[:-3]) 15 | sys.path.insert(0, p1) 16 | p1 = "/".join(os.path.abspath(__file__).split("/")[:-2]) 17 | sys.path.insert(0, p1) 18 | p1 = "/".join(os.path.abspath(__file__).split("/")[:-1]) 19 | sys.path.insert(0, p1) 20 | 21 | from importlib import reload 22 | reload(sys) 23 | 24 | import cv2 25 | 26 | from datum.utils import tools 27 | from Others.lsd12 import label_config as config 28 | 29 | 30 | nwpu_voc_dir = "/Volumes/projects/repos/RSI/NWPUVHR10/NWPUVOCFORMAT/" 31 | nwpu_voc_image_dir = nwpu_voc_dir + "JPEGImages/" 32 | nwpu_voc_anno_dir = nwpu_voc_dir + "Annotations/" 33 | 34 | vedia_voc_dir = "/Volumes/projects/repos/RSI/VEDAI/VEDIAVOCFORAMT/" 35 | vedia_voc_image_dir = vedia_voc_dir + "JPEGImages/" 36 | vedia_voc_anno_dir = vedia_voc_dir + "Annotations/" 37 | 38 | lsd_voc_dir = "/Volumes/projects/repos/RSI/LSD10/" 39 | lsd_voc_image_dir = lsd_voc_dir + "JPEGImages/" 40 | lsd_voc_anno_dir = lsd_voc_dir + "Annotations/" 41 | 42 | 43 | # 先确定每个原始数据集中的训练集和测试集 44 | def split_dataset(): 45 | nwpu_img_list = os.listdir(nwpu_voc_image_dir) 46 | vedia_img_list = os.listdir(vedia_voc_image_dir) 47 | test_nwpu_img_list = tools.rand_selected_file(nwpu_img_list) 48 | test_vedia_img_list = tools.rand_selected_file(vedia_img_list) 49 | with open(nwpu_voc_dir+"test.txt", "w") as test_nwpu_writer: 50 | for item in test_nwpu_img_list: 51 | test_nwpu_writer.write("{}\n".format(item)) 52 | with open(nwpu_voc_dir+"train.txt", "w") as train_nwpu_writer: 53 | for item in nwpu_img_list: 54 | if item not in test_nwpu_img_list: 55 | train_nwpu_writer.write("{}\n".format(item)) 56 | with open(vedia_voc_dir+"test.txt", "w") as test_vedia_writer: 57 | for item in test_vedia_img_list: 58 | test_vedia_writer.write("{}\n".format(item)) 59 | with open(vedia_voc_dir+"train.txt", "w") as train_vedia_writer: 60 | for item in vedia_img_list: 61 | if item not in test_vedia_img_list: 62 | train_vedia_writer.write("{}\n".format(item)) 63 | 64 | 65 | # 更新数据集中的label信息 66 | def flush_dataset(): 67 | for anno_name in os.listdir(lsd_voc_anno_dir): 68 | abs_anno_path = lsd_voc_anno_dir + anno_name 69 | print(abs_anno_path) 70 | anno_targets = tools.extract_target_from_xml(abs_anno_path) 71 | new_anno_targets = list() 72 | for anno_info in anno_targets: 73 | label_name = anno_info[-1] 74 | label_id = config.sign_idx_dict[label_name] 75 | label_name = config.idx_sign_dict[label_id] 76 | new_anno_info = anno_info[:-1] + [label_name] 77 | new_anno_targets.append(new_anno_info) 78 | src_image = cv2.imread( 79 | lsd_voc_image_dir+anno_name.replace("xml", "jpg")) 80 | xml_obj = tools.fetch_xml_format( 81 | src_image, anno_name.replace("xml", "jpg"), new_anno_targets) 82 | with open(lsd_voc_anno_dir+anno_name, "w") as writer: 83 | writer.write(xml_obj) 84 | 85 | # 获取标准的目标的label 86 | def get_true_label_name(label_name): 87 | label_id = config.sign_idx_dict[label_name] 88 | label_name = config.idx_sign_dict[label_id] 89 | return label_name 90 | 91 | 92 | # 将图像中非指定尺度数据进行标准化 93 | def format_corp_images(): 94 | for anno_name in os.listdir(lsd_voc_anno_dir): 95 | abs_anno_path = lsd_voc_anno_dir + anno_name 96 | abs_img_path = lsd_voc_image_dir + anno_name.replace("xml", "jpg") 97 | image_name = anno_name.replace("xml", "jpg") 98 | src_image = cv2.imread(abs_img_path) 99 | if src_image.shape == (512, 512, 3): 100 | continue 101 | 102 | h, w = src_image.shape[:2] 103 | if h <= 512 and w <= 512: 104 | continue 105 | 106 | print(abs_img_path) 107 | anno_targets = tools.extract_target_from_xml(abs_anno_path) 108 | new_anno_targets = list() 109 | for anno_info in anno_targets: 110 | label_name = get_true_label_name(anno_info[-1]) 111 | new_anno_info = anno_info[:-1] + [label_name] 112 | new_anno_targets.append(new_anno_info) 113 | crop_list, anno_list = tools.crop_samples(src_image, new_anno_targets) 114 | 115 | for i in range(len(crop_list)): 116 | x0, y0, x1, y1 = crop_list[i] 117 | # roi = im[y1:y2, x1:x2] opencv中类似NUMPY的裁剪 118 | sub_img = src_image[y0:y1, x0:x1] 119 | f_name = image_name[:-4] + "_%d_%d_%d_%d_%d.jpg" % (x0, y0, x1, y1, i) 120 | cv2.imwrite(lsd_voc_image_dir + f_name, sub_img, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) 121 | a_name = image_name[:-4]+ "_%d_%d_%d_%d_%d.xml" % (x0, y0, x1, y1, i) 122 | xml_obj = tools.fetch_xml_format(src_image, f_name, anno_list[i], "LSD12") 123 | with open(lsd_voc_anno_dir + a_name, "w") as writer: 124 | writer.write(xml_obj) 125 | 126 | os.remove(abs_img_path) 127 | os.remove(abs_anno_path) 128 | 129 | # 根据图像文件列表,对数据集进行切分 130 | from sklearn.model_selection import train_test_split 131 | 132 | def split_train_valid_test(): 133 | save_dir = "/Volumes/projects/repos/RSI/LSD10/" 134 | file_path = save_dir + "total.txt" 135 | image_list = list() 136 | with open(file_path, "r") as h: 137 | for line in h: 138 | line = line.strip() 139 | image_list.append(line) 140 | X_train, X_test = train_test_split(image_list, test_size=0.3, random_state=42) 141 | print(len(X_train), len(X_test)) 142 | X_train, X_valid = train_test_split(X_train, test_size=0.2, random_state=42) 143 | print(len(X_train), len(X_valid)) 144 | with open(save_dir+"train.txt", "w") as h1: 145 | for line in X_train: 146 | h1.write("{}\n".format(line)) 147 | with open(save_dir+"valid.txt", "w") as h2: 148 | for line in X_valid: 149 | h2.write("{}\n".format(line)) 150 | with open(save_dir+"test.txt", "w") as h3: 151 | for line in X_test: 152 | h3.write("{}\n".format(line)) 153 | 154 | 155 | if __name__ == '__main__': 156 | split_train_valid_test() 157 | # Others.show_targets(lsd_voc_image_dir, lsd_voc_anno_dir) 158 | pass 159 | -------------------------------------------------------------------------------- /eagle/brain/ssd/models/net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/8 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import tensorflow as tf 12 | 13 | 14 | class Net(object): 15 | def __init__(self, common_params, net_params): 16 | if not isinstance(common_params, dict): 17 | raise TypeError("common_params must be dict") 18 | if not isinstance(net_params, dict): 19 | raise TypeError("net_params must be dict") 20 | 21 | # pretrained variable collection 22 | self.pretrained_collection = [] 23 | # trainable variable collection 24 | self.trainable_collection = [] 25 | 26 | def _variable_on_cpu(self, name, shape, initializer, pretrain=True, 27 | train=True): 28 | """Helper to create a Variable stored on CPU memory. 29 | 30 | Args: 31 | name: name of the Variable 32 | shape: list of ints 33 | initializer: initializer of Variable 34 | 35 | Returns: 36 | Variable Tensor 37 | """ 38 | # with tf.device('/cpu:0'): 39 | # var = tf.get_variable(name, shape, initializer=initializer, 40 | # dtype=tf.float32) 41 | # if pretrain: 42 | # self.pretrained_collection.append(var) 43 | # if train: 44 | # self.trainable_collection.append(var) 45 | 46 | var = tf.get_variable(name, shape, initializer=initializer, 47 | dtype=tf.float32) 48 | if pretrain: 49 | self.pretrained_collection.append(var) 50 | if train: 51 | self.trainable_collection.append(var) 52 | return var 53 | 54 | def _variable_with_weight_decay(self, name, shape, stddev, wd, 55 | pretrain=True, train=True): 56 | """Helper to create an initialized Variable with weight decay. 57 | 58 | Note that the Variable is initialized with truncated normal distribution 59 | A weight decay is added only if one is specified. 60 | 61 | Args: 62 | name: name of the variable 63 | shape: list of ints 64 | stddev: standard devision of a truncated Gaussian 65 | wd: add L2Loss weight decay multiplied by this float. If None, weight 66 | decay is not added for this Variable. 67 | 68 | Returns: 69 | Variable Tensor 70 | """ 71 | var = self._variable_on_cpu(name, shape, 72 | tf.truncated_normal_initializer( 73 | stddev=stddev, dtype=tf.float32), 74 | pretrain, train) 75 | if wd is not None: 76 | weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, 77 | name='weight_loss') 78 | tf.add_to_collection('losses', weight_decay) 79 | return var 80 | 81 | def conv2d(self, scope, input, kernel_size, stride=1, pretrain=True, 82 | train=True): 83 | """convolutional layer 84 | 85 | Args: 86 | input: 4-D tensor [batch_size, height, width, depth] 87 | scope: variable_scope name 88 | kernel_size: [k_height, k_width, in_channel, out_channel] 89 | stride: int32 90 | Return: 91 | output: 4-D tensor [batch_size, height/stride, width/stride, out_channels] 92 | """ 93 | with tf.variable_scope(scope) as scope: 94 | kernel = self._variable_with_weight_decay('weights', 95 | shape=kernel_size, 96 | stddev=5e-2, 97 | wd=self.weight_decay, 98 | pretrain=pretrain, 99 | train=train) 100 | conv = tf.nn.conv2d(input, kernel, [1, stride, stride, 1], 101 | padding='SAME') 102 | biases = self._variable_on_cpu('biases', kernel_size[3:], 103 | tf.constant_initializer(0.0), 104 | pretrain, train) 105 | bias = tf.nn.bias_add(conv, biases) 106 | conv1 = self.leaky_relu(bias) 107 | 108 | return conv1 109 | 110 | def max_pool(self, input, kernel_size, stride): 111 | """max_pool layer 112 | 113 | Args: 114 | input: 4-D tensor [batch_zie, height, width, depth] 115 | kernel_size: [k_height, k_width] 116 | stride: int32 117 | Return: 118 | output: 4-D tensor [batch_size, height/stride, width/stride, depth] 119 | """ 120 | return tf.nn.max_pool(input, 121 | ksize=[1, kernel_size[0], kernel_size[1], 1], 122 | strides=[1, stride, stride, 1], 123 | padding='SAME') 124 | 125 | def local(self, scope, input, in_dimension, out_dimension, leaky=True, 126 | pretrain=True, train=True): 127 | """Fully connection layer 128 | 129 | Args: 130 | scope: variable_scope name 131 | input: [batch_size, ???] 132 | out_dimension: int32 133 | Return: 134 | output: 2-D tensor [batch_size, out_dimension] 135 | """ 136 | with tf.variable_scope(scope) as scope: 137 | reshape = tf.reshape(input, [tf.shape(input)[0], -1]) 138 | 139 | weights = self._variable_with_weight_decay('weights', 140 | shape=[in_dimension, 141 | out_dimension], 142 | stddev=0.04, 143 | wd=self.weight_decay, 144 | pretrain=pretrain, 145 | train=train) 146 | biases = self._variable_on_cpu('biases', [out_dimension], 147 | tf.constant_initializer(0.0), 148 | pretrain, train) 149 | local = tf.matmul(reshape, weights) + biases 150 | 151 | if leaky: 152 | local = self.leaky_relu(local) 153 | else: 154 | local = tf.identity(local, name=scope.name) 155 | 156 | return local 157 | 158 | def leaky_relu(self, x, alpha=0.1, dtype=tf.float32): 159 | """leaky relu 160 | if x > 0: 161 | return x 162 | else: 163 | return alpha * x 164 | Args: 165 | x : Tensor 166 | alpha: float 167 | Return: 168 | y : Tensor 169 | """ 170 | x = tf.cast(x, dtype=dtype) 171 | bool_mask = (x > 0) 172 | mask = tf.cast(bool_mask, dtype=dtype) 173 | return 1.0 * mask * x + alpha * (1 - mask) * x 174 | 175 | def inference(self, images): 176 | """Build the yolo model 177 | 178 | Args: 179 | images: 4-D tensor [batch_size, image_height, image_width, channels] 180 | Returns: 181 | predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell] 182 | """ 183 | raise NotImplementedError 184 | 185 | def loss(self, y_true, y_pred): 186 | raise NotImplementedError 187 | -------------------------------------------------------------------------------- /eagle/brain/solver/yolo_u_solver.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/4 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import sys 12 | import time 13 | from datetime import datetime 14 | 15 | import numpy as np 16 | import tensorflow as tf 17 | 18 | from eagle.brain.solver.solver import Solver 19 | 20 | 21 | class YoloUSolver(Solver): 22 | def __init__(self, dataset, net, common_params, solver_params): 23 | super(YoloUSolver, self).__init__(dataset, net, common_params, solver_params) 24 | 25 | # process params 26 | self.width = int(common_params['image_size']) 27 | self.height = int(common_params['image_size']) 28 | self.batch_size = int(common_params['batch_size']) 29 | self.max_objects = int(common_params['max_objects_per_image']) 30 | 31 | self.moment = float(solver_params['moment']) 32 | self.learning_rate = float(solver_params['lr']) 33 | self.train_dir = str(solver_params['train_dir']) 34 | self.max_iterators = int(solver_params['max_iterators']) 35 | self.pretrain_path = str(solver_params['pretrain_model_path']) 36 | 37 | self.dataset = dataset 38 | self.net = net 39 | 40 | # construct graph 41 | self.construct_graph() 42 | 43 | def _train(self): 44 | """Train model 45 | 46 | Create an optimizer and apply to all trainable variables. 47 | 48 | Args: 49 | total_loss: Total loss from net.loss() 50 | global_step: Integer Variable counting the number of training steps 51 | processed 52 | Returns: 53 | train_op: op for training 54 | """ 55 | 56 | opt = tf.train.MomentumOptimizer(self.learning_rate, self.moment) 57 | grads = opt.compute_gradients(self.total_loss) 58 | 59 | apply_gradient_op = opt.apply_gradients(grads, 60 | global_step=self.global_step) 61 | 62 | return apply_gradient_op 63 | 64 | def construct_graph(self): 65 | # construct graph 66 | self.global_step = tf.Variable(0, trainable=False) 67 | self.images = tf.placeholder(tf.float32, ( 68 | self.batch_size, self.height, self.width, 3)) 69 | self.labels = tf.placeholder(tf.float32, 70 | (self.batch_size, self.max_objects, 5)) 71 | self.objects_num = tf.placeholder(tf.int32, (self.batch_size)) 72 | 73 | self.predicts = self.net.inference(self.images) 74 | 75 | self.net.set_cell_size(grid_size=9) 76 | total_loss_g9, nilboy_g9 = self.net.loss( 77 | self.predicts["predicts_g9"], self.labels, self.objects_num) 78 | self.net.set_cell_size(grid_size=15) 79 | total_loss_g15, nilboy_g15 = self.net.loss( 80 | self.predicts["predicts_g15"], self.labels, self.objects_num) 81 | 82 | # self.nilboy_g9 = nilboy_g9 83 | # self.nilboy_g15 = nilboy_g15 84 | self.total_loss = 0.5 * (total_loss_g9 + total_loss_g15) 85 | tf.summary.scalar('loss', self.total_loss) 86 | self.train_op = self._train() 87 | 88 | def solve(self): 89 | saver_pretrain = tf.train.Saver(max_to_keep=3) 90 | saver_train = tf.train.Saver(max_to_keep=3) 91 | 92 | init = tf.global_variables_initializer() 93 | 94 | summary_op = tf.summary.merge_all() 95 | 96 | sess = tf.Session() 97 | 98 | sess.run(init) 99 | if self.pretrain_path != "None": 100 | saver_pretrain.restore(sess, self.pretrain_path) 101 | 102 | summary_writer = tf.summary.FileWriter(self.train_dir, sess.graph) 103 | 104 | for step in range(self.max_iterators): 105 | start_time = time.time() 106 | np_images, np_labels, np_objects_num = self.dataset.batch() 107 | 108 | _, loss_value = sess.run( 109 | [self.train_op, self.total_loss], 110 | feed_dict={ 111 | self.images: np_images, 112 | self.labels: np_labels, 113 | self.objects_num: np_objects_num 114 | }) 115 | 116 | duration = time.time() - start_time 117 | 118 | assert not np.isnan(loss_value), 'Model diverged with loss = NaN' 119 | 120 | if step % 1 == 0: 121 | num_examples_per_step = self.dataset.batch_size 122 | examples_per_sec = num_examples_per_step / duration 123 | sec_per_batch = float(duration) 124 | 125 | format_str = ('%s: step %d, loss = %.2f ' 126 | '(%.1f examples/sec; %.3f sec/batch)') 127 | print(format_str % (datetime.now(), step, loss_value, 128 | examples_per_sec, sec_per_batch)) 129 | sys.stdout.flush() 130 | if step % 1000 == 0: 131 | summary_str = sess.run( 132 | summary_op, 133 | feed_dict={ 134 | self.images: np_images, 135 | self.labels: np_labels, 136 | self.objects_num: np_objects_num 137 | }) 138 | summary_writer.add_summary(summary_str, step) 139 | if step % 5000 == 0: 140 | saver_train.save(sess, 141 | self.train_dir + '/model.ckpt') 142 | sess.close() 143 | 144 | def process_predicts(self, predicts, cell_size): 145 | p_classes = predicts[0, :, :, 0:1] 146 | C = predicts[0, :, :, 1:3] 147 | coordinate = predicts[0, :, :, 3:] 148 | 149 | p_classes = np.reshape(p_classes, (cell_size, cell_size, 1, 1)) 150 | C = np.reshape(C, (cell_size, cell_size, 2, 1)) 151 | 152 | P = C * p_classes 153 | 154 | # print P[5,1, 0, :] 155 | 156 | index = np.argmax(P) 157 | index = np.unravel_index(index, P.shape) 158 | class_num = index[3] 159 | 160 | coordinate = np.reshape(coordinate, (cell_size, cell_size, 2, 4)) 161 | max_coordinate = coordinate[index[0], index[1], index[2], :] 162 | 163 | xcenter = max_coordinate[0] 164 | ycenter = max_coordinate[1] 165 | w = max_coordinate[2] 166 | h = max_coordinate[3] 167 | 168 | xcenter = (index[1] + xcenter) * (self.width / cell_size) 169 | ycenter = (index[0] + ycenter) * (self.height / cell_size) 170 | 171 | w = w * self.width 172 | h = h * self.height 173 | 174 | xmin = xcenter - w / 2.0 175 | ymin = ycenter - h / 2.0 176 | 177 | xmax = xmin + w 178 | ymax = ymin + h 179 | 180 | xmin = max(0, xmin) 181 | xmax = max(0, xmax) 182 | return xmin, ymin, xmax, ymax, class_num 183 | 184 | def model_predict(self, single_image): 185 | saver_pretrain = tf.train.Saver(max_to_keep=3) 186 | 187 | init = tf.global_variables_initializer() 188 | sess = tf.Session() 189 | sess.run(init) 190 | 191 | if self.pretrain_path != "None": 192 | saver_pretrain.restore(sess, self.pretrain_path) 193 | 194 | start_time = time.time() 195 | 196 | predics_info = sess.run( 197 | self.predicts, 198 | feed_dict={ 199 | self.images: single_image 200 | }) 201 | 202 | duration = time.time() - start_time 203 | 204 | xmin, ymin, xmax, ymax, class_num = self.process_predicts( 205 | predics_info["predicts_g9"], cell_size=9) 206 | # xmin, ymin, xmax, ymax, class_num = self.process_predicts( 207 | # predics_info["predicts_g15"]) 208 | sess.close() 209 | 210 | return (xmin, ymin, xmax, ymax, class_num) 211 | -------------------------------------------------------------------------------- /eagle/brain/yolo/net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/1 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import tensorflow as tf 12 | 13 | 14 | class Net(object): 15 | """Base Net class 16 | """ 17 | 18 | def __init__(self, common_params, net_params): 19 | if not isinstance(common_params, dict): 20 | raise TypeError("common_params must be dict") 21 | if not isinstance(net_params, dict): 22 | raise TypeError("net_params must be dict") 23 | 24 | # pretrained variable collection 25 | self.pretrained_collection = [] 26 | # trainable variable collection 27 | self.trainable_collection = [] 28 | 29 | def _variable_on_cpu(self, name, shape, initializer, pretrain=True, 30 | train=True): 31 | """Helper to create a Variable stored on CPU memory. 32 | 33 | Args: 34 | name: name of the Variable 35 | shape: list of ints 36 | initializer: initializer of Variable 37 | 38 | Returns: 39 | Variable Tensor 40 | """ 41 | # with tf.device('/cpu:0'): 42 | # var = tf.get_variable(name, shape, initializer=initializer, 43 | # dtype=tf.float32) 44 | # if pretrain: 45 | # self.pretrained_collection.append(var) 46 | # if train: 47 | # self.trainable_collection.append(var) 48 | 49 | var = tf.get_variable(name, shape, initializer=initializer, 50 | dtype=tf.float32) 51 | if pretrain: 52 | self.pretrained_collection.append(var) 53 | if train: 54 | self.trainable_collection.append(var) 55 | return var 56 | 57 | def _variable_with_weight_decay(self, name, shape, stddev, wd, 58 | pretrain=True, train=True): 59 | """Helper to create an initialized Variable with weight decay. 60 | 61 | Note that the Variable is initialized with truncated normal distribution 62 | A weight decay is added only if one is specified. 63 | 64 | Args: 65 | name: name of the variable 66 | shape: list of ints 67 | stddev: standard devision of a truncated Gaussian 68 | wd: add L2Loss weight decay multiplied by this float. If None, weight 69 | decay is not added for this Variable. 70 | 71 | Returns: 72 | Variable Tensor 73 | """ 74 | var = self._variable_on_cpu(name, shape, 75 | tf.truncated_normal_initializer( 76 | stddev=stddev, dtype=tf.float32), 77 | pretrain, train) 78 | if wd is not None: 79 | weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, 80 | name='weight_loss') 81 | tf.add_to_collection('losses', weight_decay) 82 | return var 83 | 84 | def conv2d(self, scope, input, kernel_size, stride=1, pretrain=True, 85 | train=True): 86 | """convolutional layer 87 | 88 | Args: 89 | input: 4-D tensor [batch_size, height, width, depth] 90 | scope: variable_scope name 91 | kernel_size: [k_height, k_width, in_channel, out_channel] 92 | stride: int32 93 | Return: 94 | output: 4-D tensor [batch_size, height/stride, width/stride, out_channels] 95 | """ 96 | with tf.variable_scope(scope) as scope: 97 | kernel = self._variable_with_weight_decay('weights', 98 | shape=kernel_size, 99 | stddev=5e-2, 100 | wd=self.weight_decay, 101 | pretrain=pretrain, 102 | train=train) 103 | conv = tf.nn.conv2d(input, kernel, [1, stride, stride, 1], 104 | padding='SAME') 105 | biases = self._variable_on_cpu('biases', kernel_size[3:], 106 | tf.constant_initializer(0.0), 107 | pretrain, train) 108 | bias = tf.nn.bias_add(conv, biases) 109 | conv1 = self.leaky_relu(bias) 110 | 111 | return conv1 112 | 113 | def max_pool(self, input, kernel_size, stride): 114 | """max_pool layer 115 | 116 | Args: 117 | input: 4-D tensor [batch_zie, height, width, depth] 118 | kernel_size: [k_height, k_width] 119 | stride: int32 120 | Return: 121 | output: 4-D tensor [batch_size, height/stride, width/stride, depth] 122 | """ 123 | return tf.nn.max_pool(input, 124 | ksize=[1, kernel_size[0], kernel_size[1], 1], 125 | strides=[1, stride, stride, 1], 126 | padding='SAME') 127 | 128 | def local(self, scope, input, in_dimension, out_dimension, leaky=True, 129 | pretrain=True, train=True): 130 | """Fully connection layer 131 | 132 | Args: 133 | scope: variable_scope name 134 | input: [batch_size, ???] 135 | out_dimension: int32 136 | Return: 137 | output: 2-D tensor [batch_size, out_dimension] 138 | """ 139 | with tf.variable_scope(scope) as scope: 140 | reshape = tf.reshape(input, [tf.shape(input)[0], -1]) 141 | 142 | weights = self._variable_with_weight_decay('weights', 143 | shape=[in_dimension, 144 | out_dimension], 145 | stddev=0.04, 146 | wd=self.weight_decay, 147 | pretrain=pretrain, 148 | train=train) 149 | biases = self._variable_on_cpu('biases', [out_dimension], 150 | tf.constant_initializer(0.0), 151 | pretrain, train) 152 | local = tf.matmul(reshape, weights) + biases 153 | 154 | if leaky: 155 | local = self.leaky_relu(local) 156 | else: 157 | local = tf.identity(local, name=scope.name) 158 | 159 | return local 160 | 161 | def leaky_relu(self, x, alpha=0.1, dtype=tf.float32): 162 | """leaky relu 163 | if x > 0: 164 | return x 165 | else: 166 | return alpha * x 167 | Args: 168 | x : Tensor 169 | alpha: float 170 | Return: 171 | y : Tensor 172 | """ 173 | x = tf.cast(x, dtype=dtype) 174 | bool_mask = (x > 0) 175 | mask = tf.cast(bool_mask, dtype=dtype) 176 | return 1.0 * mask * x + alpha * (1 - mask) * x 177 | 178 | def inference(self, images): 179 | """Build the yolo model 180 | 181 | Args: 182 | images: 4-D tensor [batch_size, image_height, image_width, channels] 183 | Returns: 184 | predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell] 185 | """ 186 | raise NotImplementedError 187 | 188 | def loss(self, predicts, labels, objects_num): 189 | """Add Loss to all the trainable variables 190 | 191 | Args: 192 | predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell] 193 | ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell) 194 | labels : 3-D tensor of [batch_size, max_objects, 5] 195 | objects_num: 1-D tensor [batch_size] 196 | """ 197 | raise NotImplementedError 198 | -------------------------------------------------------------------------------- /datum/utils/tools.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/1/3 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import re 13 | import cv2 14 | 15 | import numpy as np 16 | import xml.dom.minidom 17 | import random 18 | from lxml.etree import Element, SubElement, tostring 19 | 20 | # 提取图像对应的标注的数据 21 | def fetch_anno_targets_info(abs_anno_path, is_label_text=False): 22 | if not os.path.exists(abs_anno_path): 23 | raise IOError("No Such annotation file !") 24 | with open(abs_anno_path, "r") as anno_reader: 25 | total_annos = list() 26 | for line in anno_reader: 27 | line = line.strip() 28 | sub_anno = re.split("\(|\,|\)", line) 29 | a = [int(item) for item in sub_anno if len(item)] 30 | if len(a) == 5: 31 | if is_label_text: 32 | total_annos.append(a[:4]+[config.idx_sign_dict[a[-1]]]) 33 | else: 34 | total_annos.append(a) 35 | return total_annos 36 | 37 | def fetch_xml_format(src_img_data, f_name, anno_list, dataset): 38 | img_height, img_width, img_channle = src_img_data.shape 39 | 40 | node_root = Element('annotation') 41 | node_folder = SubElement(node_root, 'folder') 42 | node_folder.text = dataset 43 | node_filename = SubElement(node_root, 'filename') 44 | node_filename.text = f_name 45 | 46 | node_size = SubElement(node_root, 'size') 47 | node_width = SubElement(node_size, 'width') 48 | node_width.text = str(img_width) 49 | node_height = SubElement(node_size, 'height') 50 | node_height.text = str(img_height) 51 | node_depth = SubElement(node_size, 'depth') 52 | node_depth.text = str(img_channle) 53 | 54 | for anno_target in anno_list: 55 | node_object = SubElement(node_root, 'object') 56 | node_name = SubElement(node_object, 'name') 57 | node_name.text = anno_target[-1] 58 | node_difficult = SubElement(node_object, 'difficult') 59 | node_difficult.text = '0' 60 | node_bndbox = SubElement(node_object, 'bndbox') 61 | node_xmin = SubElement(node_bndbox, 'xmin') 62 | node_xmin.text = str(1 if anno_target[0]<0 else anno_target[0]) 63 | node_ymin = SubElement(node_bndbox, 'ymin') 64 | node_ymin.text = str(1 if anno_target[1]<0 else anno_target[1]) 65 | node_xmax = SubElement(node_bndbox, 'xmax') 66 | node_xmax.text = str(img_width-1 if anno_target[2]>=img_width else anno_target[2]) 67 | node_ymax = SubElement(node_bndbox, 'ymax') 68 | node_ymax.text = str(img_height-1 if anno_target[3]>=img_height else anno_target[3]) 69 | xml_obj = tostring(node_root, pretty_print=True) 70 | xml_obj = xml_obj.decode("utf8") 71 | return xml_obj 72 | 73 | # 给定一个标记文件,找到对应的目标的位置信息 74 | def extract_target_from_xml(filename): 75 | if not os.path.exists(filename): 76 | raise IOError(filename + " not exists !") 77 | # 使用minidom解析器打开 XML 文档 78 | DOMTree = xml.dom.minidom.parse(filename) 79 | collection = DOMTree.documentElement 80 | # 获取集合中所有的目标 81 | targets = collection.getElementsByTagName("object") 82 | res = [] 83 | for target in targets: 84 | target_name = target.getElementsByTagName('name')[0].childNodes[0].data 85 | bndbox = target.getElementsByTagName("bndbox")[0] 86 | xmin = bndbox.getElementsByTagName("xmin")[0].childNodes[0].data 87 | ymin = bndbox.getElementsByTagName("ymin")[0].childNodes[0].data 88 | xmax = bndbox.getElementsByTagName("xmax")[0].childNodes[0].data 89 | ymax = bndbox.getElementsByTagName("ymax")[0].childNodes[0].data 90 | res.append([int(xmin), int(ymin), int(xmax), int(ymax), target_name]) 91 | return res 92 | 93 | # 原始数据中多目标的显示 94 | def show_targets(img_dir, anno_dir): 95 | for img_name in os.listdir(img_dir): 96 | if img_name.startswith("._"): 97 | continue 98 | abs_img_path = img_dir+img_name 99 | abs_anno_path = anno_dir+img_name.replace("jpg", "xml") 100 | target_annos = extract_target_from_xml(abs_anno_path) 101 | image = cv2.imread(abs_img_path) 102 | for target_info in target_annos: 103 | xmin, ymin, xmax, ymax = target_info[:4] 104 | cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) 105 | cv2.imshow("src", image) 106 | cv2.waitKey() 107 | 108 | # 采用蓄水池采样算法对序列进行采样 109 | def rand_selected_file(file_list, K_ratio=2/7): 110 | K = int(len(file_list) * K_ratio) 111 | res = list() 112 | for i in range(0, len(file_list)): 113 | if i < K: 114 | res.append(file_list[i]) 115 | else: 116 | M = random.randint(0, i) 117 | if M < K: 118 | res[M] = file_list[i] 119 | return res 120 | 121 | def calc_rgb_mean(): 122 | r_list, g_list, b_list = list(), list(), list() 123 | with open("/Volumes/projects/repos/RSI/LSD10/total.txt", "r") as reader: 124 | for line in reader.readlines(): 125 | line = line.strip() 126 | src_img = cv2.imread(line) 127 | b, g, r = cv2.split(src_img) 128 | b_list.append(np.mean(b)) 129 | g_list.append(np.mean(g)) 130 | r_list.append(np.mean(r)) 131 | print(np.mean(r_list)) 132 | print(np.mean(g_list)) 133 | print(np.mean(b_list)) 134 | """ 135 | 104.480289006 136 | 107.307103097 137 | 95.8043901467 138 | """ 139 | 140 | # 从样本中裁剪出制定的大小的候选样本,这其中必须要包含相应的目标 141 | def crop_samples(src_image, anno_targets, SSD_IMG_W=512, SSD_IMG_H=512): 142 | 143 | def _crop_valid(area, anno_targets): 144 | anno_res = [] 145 | for info in anno_targets: 146 | if ((info[0] >= area[0] and info[1] >= area[1]) and 147 | (info[2] <= area[2] and info[3] <= area[3])): 148 | anno_res.append( 149 | [info[0] - area[0], info[1] - area[1], 150 | info[2] - area[0], info[3] - area[1], 151 | info[-1]]) 152 | if (info[0] >= area[0] and info[1] >= area[1] and 153 | info[0] < area[2] and info[1] < area[3] and 154 | (not (info[2] <= area[2] and info[3] <= area[3]))): 155 | base = (info[2] - info[0]) * (info[3] - info[1]) 156 | x_max_min = min(info[2], area[2]) 157 | y_max_min = min(info[3], area[3]) 158 | new_square = (x_max_min - info[0]) * (y_max_min - info[1]) 159 | if new_square / base >= 0.8: 160 | anno_res.append( 161 | [info[0] - area[0], info[1] - area[1], 162 | x_max_min - area[0], y_max_min - area[1], 163 | info[-1]]) 164 | return anno_res 165 | 166 | def _random_crop_for_target(): 167 | img_height, img_width = src_image.shape[:2] 168 | crop_list, anno_list = [], [] 169 | for idx in range(0, len(anno_targets)): 170 | c_x = (anno_targets[idx][0] + anno_targets[idx][2]) // 2 171 | c_y = (anno_targets[idx][1] + anno_targets[idx][3]) // 2 172 | 173 | u_x = random.randint(max(0, c_x - SSD_IMG_W // 2), anno_targets[idx][0]) 174 | u_y = random.randint(max(0, c_y - SSD_IMG_H // 2), anno_targets[idx][1]) 175 | 176 | area = [u_x, u_y, u_x + SSD_IMG_W, u_y + SSD_IMG_H] 177 | # 检测当前的候选框中是否包含了目标,并算出目标在给定图像的位置 178 | trans_targets = _crop_valid(area, anno_targets) 179 | if trans_targets: 180 | crop_list.append(area) 181 | anno_list.append(trans_targets) 182 | return crop_list, anno_list 183 | 184 | def _align_crop_for_target(): 185 | h, w = src_image.shape[:2] 186 | crop_list, anno_list = [], [] 187 | for lx in range(0, max(1, w-SSD_IMG_W+1), SSD_IMG_W//5): 188 | for ly in range(0, max(1, h-SSD_IMG_H+1), SSD_IMG_H//5): 189 | u_x, u_y = lx, ly 190 | # if lx + SSD_IMG_W > w: 191 | # u_x = w - SSD_IMG_W 192 | # if ly + SSD_IMG_H > h: 193 | # u_y = h - SSD_IMG_H 194 | area = [u_x, u_y, u_x + SSD_IMG_W, u_y + SSD_IMG_H] 195 | trans_targets = list() 196 | trans_targets = _crop_valid(area, anno_targets) 197 | if trans_targets: 198 | crop_list.append(area) 199 | anno_list.append(trans_targets) 200 | return crop_list, anno_list 201 | 202 | crop_list, anno_list = _align_crop_for_target() 203 | return crop_list, anno_list 204 | 205 | 206 | if __name__ == '__main__': 207 | a = fetch_anno_targets_info( 208 | "/Volumes/projects/repos/RSI/NWPUVHR10/sub_annotation/001.txt") 209 | print(a) -------------------------------------------------------------------------------- /eagle/observe/augmentors/blur.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/28 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | 12 | import cv2 13 | import numpy as np 14 | from scipy import ndimage 15 | 16 | import eagle.utils as eu 17 | from eagle.observe.base.meta import Augmentor 18 | from eagle.parameter import StochasticParameter 19 | from eagle.parameter import Deterministic, DiscreteUniform, Uniform 20 | 21 | 22 | class GaussianBlur(Augmentor): 23 | """ 24 | Augmenter to blur images using gaussian kernels. 25 | 26 | Examples 27 | -------- 28 | >>> aug = iaa.GaussianBlur(sigma=1.5) 29 | 30 | blurs all images using a gaussian kernel with standard deviation 1.5. 31 | 32 | >>> aug = iaa.GaussianBlur(sigma=(0.0, 3.0)) 33 | 34 | blurs images using a gaussian kernel with a random standard deviation 35 | from the range 0.0 <= x <= 3.0. The value is sampled per image. 36 | """ 37 | 38 | def __init__(self, sigma=0, name=None, deterministic=False, random_state=None): 39 | super(GaussianBlur, self).__init__( 40 | name=name, deterministic=deterministic, random_state=random_state) 41 | 42 | if eu.is_single_number(sigma): 43 | self.sigma = Deterministic(sigma) 44 | elif eu.is_iterable(sigma): 45 | eu.do_assert(len(sigma) == 2, 46 | "Expected tuple/list with 2 entries, got %d entries." % (len(sigma),)) 47 | self.sigma = Uniform(sigma[0], sigma[1]) 48 | elif isinstance(sigma, StochasticParameter): 49 | self.sigma = sigma 50 | else: 51 | raise Exception("Expected float, int, tuple/list with 2 entries or StochasticParameter. Got %s." % (type(sigma),)) 52 | 53 | self.eps = 0.001 # epsilon value to estimate whether sigma is above 0 54 | 55 | def _augment_images(self, images, random_state, parents, hooks): 56 | result = images 57 | nb_images = len(images) 58 | samples = self.sigma.draw_samples((nb_images,), random_state=random_state) 59 | for i in range(nb_images): 60 | nb_channels = images[i].shape[2] 61 | sig = samples[i] 62 | if sig > 0 + self.eps: 63 | # note that while gaussian_filter can be applied to all channels 64 | # at the same time, that should not be done here, because then 65 | # the blurring would also happen across channels (e.g. red 66 | # values might be mixed with blue values in RGB) 67 | for channel in range(nb_channels): 68 | result[i][:, :, channel] = ndimage.gaussian_filter(result[i][:, :, channel], sig) 69 | return result 70 | 71 | def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): 72 | return keypoints_on_images 73 | 74 | def get_parameters(self): 75 | return [self.sigma] 76 | 77 | 78 | class AverageBlur(Augmentor): 79 | """ 80 | Blur an image by computing simple means over neighbourhoods. 81 | 82 | Examples 83 | -------- 84 | >>> aug = iaa.AverageBlur(k=5) 85 | 86 | Blurs all images using a kernel size of 5x5. 87 | 88 | >>> aug = iaa.AverageBlur(k=(2, 5)) 89 | 90 | Blurs images using a varying kernel size per image, which is sampled 91 | from the interval [2..5]. 92 | 93 | >>> aug = iaa.AverageBlur(k=((5, 7), (1, 3))) 94 | 95 | Blurs images using a varying kernel size per image, which's height 96 | is sampled from the interval [5..7] and which's width is sampled 97 | from [1..3]. 98 | """ 99 | 100 | def __init__(self, k=1, name=None, deterministic=False, random_state=None): 101 | super(AverageBlur, self).__init__( 102 | name=name, deterministic=deterministic, random_state=random_state) 103 | 104 | self.mode = "single" 105 | if eu.is_single_number(k): 106 | self.k = Deterministic(int(k)) 107 | elif eu.is_iterable(k): 108 | eu.do_assert(len(k) == 2) 109 | if all([eu.is_single_number(ki) for ki in k]): 110 | self.k = DiscreteUniform(int(k[0]), int(k[1])) 111 | elif all([isinstance(ki, StochasticParameter) for ki in k]): 112 | self.mode = "two" 113 | self.k = (k[0], k[1]) 114 | else: 115 | k_tuple = [None, None] 116 | if eu.is_single_number(k[0]): 117 | k_tuple[0] = Deterministic(int(k[0])) 118 | elif eu.is_iterable(k[0]) and all( 119 | [eu.is_single_number(ki) for ki in k[0]]): 120 | k_tuple[0] = DiscreteUniform(int(k[0][0]), int(k[0][1])) 121 | else: 122 | raise Exception("k[0] expected to be int or tuple of two ints, got %s" % (type(k[0]),)) 123 | 124 | if eu.is_single_number(k[1]): 125 | k_tuple[1] = Deterministic(int(k[1])) 126 | elif eu.is_iterable(k[1]) and all( 127 | [eu.is_single_number(ki) for ki in k[1]]): 128 | k_tuple[1] = DiscreteUniform(int(k[1][0]), int(k[1][1])) 129 | else: 130 | raise Exception("k[1] expected to be int or tuple of two ints, got %s" % (type(k[1]),)) 131 | 132 | self.mode = "two" 133 | self.k = k_tuple 134 | elif isinstance(k, StochasticParameter): 135 | self.k = k 136 | else: 137 | raise Exception("Expected int, tuple/list with 2 entries or StochasticParameter. Got %s." % (type(k),)) 138 | 139 | def _augment_images(self, images, random_state, parents, hooks): 140 | result = images 141 | nb_images = len(images) 142 | if self.mode == "single": 143 | samples = self.k.draw_samples((nb_images,), random_state=random_state) 144 | samples = (samples, samples) 145 | else: 146 | samples = ( 147 | self.k[0].draw_samples((nb_images,), random_state=random_state), 148 | self.k[1].draw_samples((nb_images,), random_state=random_state), 149 | ) 150 | for i in range(nb_images): 151 | kh, kw = samples[0][i], samples[1][i] 152 | #print(images.shape, result.shape, result[i].shape) 153 | kernel_impossible = (kh == 0 or kw == 0) 154 | kernel_does_nothing = (kh == 1 and kw == 1) 155 | if not kernel_impossible and not kernel_does_nothing: 156 | image_aug = cv2.blur(result[i], (kh, kw)) 157 | # cv2.blur() removes channel axis for single-channel images 158 | if image_aug.ndim == 2: 159 | image_aug = image_aug[..., np.newaxis] 160 | result[i] = image_aug 161 | return result 162 | 163 | def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): 164 | return keypoints_on_images 165 | 166 | def get_parameters(self): 167 | return [self.k] 168 | 169 | 170 | class MedianBlur(Augmentor): 171 | """ 172 | Blur an image by computing median values over neighbourhoods. 173 | 174 | Median blurring can be used to remove small dirt from images. 175 | At larger kernel sizes, its effects have some similarity with Superpixels. 176 | 177 | Examples 178 | -------- 179 | >>> aug = iaa.MedianBlur(k=5) 180 | 181 | blurs all images using a kernel size of 5x5. 182 | 183 | >>> aug = iaa.MedianBlur(k=(3, 7)) 184 | 185 | blurs images using a varying kernel size per image, which is 186 | and odd value sampled from the interval [3..7], i.e. 3 or 5 or 7. 187 | """ 188 | 189 | def __init__(self, k=1, name=None, deterministic=False, random_state=None): 190 | super(MedianBlur, self).__init__( 191 | name=name, deterministic=deterministic, random_state=random_state) 192 | 193 | if eu.is_single_number(k): 194 | eu.do_assert(k % 2 != 0, 195 | "Expected k to be odd, got %d. Add or subtract 1." % (int(k),)) 196 | self.k = Deterministic(int(k)) 197 | elif eu.is_iterable(k): 198 | eu.do_assert(len(k) == 2) 199 | eu.do_assert(all([eu.is_single_number(ki) for ki in k])) 200 | eu.do_assert(k[0] % 2 != 0, 201 | "Expected k[0] to be odd, got %d. Add or subtract 1." % (int(k[0]),)) 202 | eu.do_assert(k[1] % 2 != 0, 203 | "Expected k[1] to be odd, got %d. Add or subtract 1." % (int(k[1]),)) 204 | self.k = DiscreteUniform(int(k[0]), int(k[1])) 205 | elif isinstance(k, StochasticParameter): 206 | self.k = k 207 | else: 208 | raise Exception("Expected int, tuple/list with 2 entries or StochasticParameter. Got %s." % (type(k),)) 209 | 210 | def _augment_images(self, images, random_state, parents, hooks): 211 | result = images 212 | nb_images = len(images) 213 | samples = self.k.draw_samples((nb_images,), random_state=random_state) 214 | for i in range(nb_images): 215 | ki = samples[i] 216 | if ki > 1: 217 | ki = ki + 1 if ki % 2 == 0 else ki 218 | image_aug = cv2.medianBlur(result[i], ki) 219 | # cv2.medianBlur() removes channel axis for single-channel 220 | # images 221 | if image_aug.ndim == 2: 222 | image_aug = image_aug[..., np.newaxis] 223 | result[i] = image_aug 224 | return result 225 | 226 | def _augment_keypoints(self, keypoints_on_images, random_state, parents, hooks): 227 | return keypoints_on_images 228 | 229 | def get_parameters(self): 230 | return [self.k] 231 | -------------------------------------------------------------------------------- /datum/models/ssd/ssd_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/3/7 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import json 12 | import random 13 | from queue import Queue 14 | from threading import Thread 15 | 16 | import cv2 17 | import numpy as np 18 | 19 | from datum.meta.dataset import DataSet 20 | from datum.models.ssd.box_encoder import BoxEncoder 21 | 22 | 23 | class SSDDataSet(DataSet): 24 | """TextDataSet 25 | process text input file dataset 26 | text file format: 27 | image_path xmin1 ymin1 xmax1 ymax1 class1 xmin2 ymin2 xmax2 ymax2 class2 28 | """ 29 | 30 | def __init__(self, common_params, dataset_params, box_encoder_params): 31 | super(SSDDataSet, self).__init__(common_params, dataset_params) 32 | 33 | # process params 34 | self.width = int(common_params['image_size']) 35 | self.height = int(common_params['image_size']) 36 | self.channel = int(common_params["image_channel"]) 37 | self.batch_size = int(common_params['batch_size']) 38 | self.num_classes = int(common_params["num_classes"]) 39 | 40 | self.data_path = str(dataset_params['path']) 41 | self.thread_num = int(dataset_params['thread_num']) 42 | self.classes = json.loads(dataset_params["classes"]) 43 | self.box_output_format = json.loads(dataset_params["box_output_format"]) 44 | self.is_need_bg = True if dataset_params["is_need_bg"] == "True" else False 45 | 46 | self.upper_resize_rate = float(dataset_params["upper_resize_rate"]) 47 | self.lower_resize_rate = float(dataset_params["lower_resize_rate"]) 48 | 49 | self.box_encoder = BoxEncoder(common_params, box_encoder_params) 50 | 51 | # record and image_label queue 52 | self.record_queue = Queue(maxsize=10000) 53 | self.image_label_queue = Queue(maxsize=2000) 54 | 55 | self.record_list = [] 56 | 57 | # filling the record_list 58 | input_file = open(self.data_path, 'r') 59 | 60 | for line in input_file: 61 | line = line.strip() 62 | ss = line.split(' ') 63 | ss[1:] = [float(num) for num in ss[1:]] 64 | # 文件中存储的类别都是从0开始的,如果需要在处理前添加background这个类别 65 | # 需要将background这个设置为0,其他的类别编号自动+1 66 | if self.is_need_bg: 67 | self.classes.insert(0, "background") 68 | step_len = len(self.box_output_format) 69 | start_class_idx = self.box_output_format.index("class_id") + 1 70 | for i in range(start_class_idx, len(ss), step_len): 71 | ss[i] += 1 72 | self.record_list.append(ss) 73 | 74 | self.record_point = 0 75 | self.record_number = len(self.record_list) 76 | 77 | self.num_batch_per_epoch = int(self.record_number / self.batch_size) 78 | 79 | t_record_producer = Thread(target=self.record_producer) 80 | t_record_producer.daemon = True 81 | t_record_producer.start() 82 | 83 | for i in range(self.thread_num): 84 | t = Thread(target=self.record_customer) 85 | t.daemon = True 86 | t.start() 87 | 88 | def record_producer(self): 89 | while True: 90 | if self.record_point % self.record_number == 0: 91 | random.shuffle(self.record_list) 92 | self.record_point = 0 93 | self.record_queue.put(self.record_list[self.record_point]) 94 | self.record_point += 1 95 | 96 | def record_customer(self): 97 | while True: 98 | item = self.record_queue.get() 99 | out = self.record_process(item) 100 | if out is not None: 101 | # 在归整完数据之后,要对object_label中使用BoxEncoder的调用 102 | image, gt_labels = out[:] 103 | # gt_labels from 104 | # [xmin, ymin, xmax, ymax] --> [xmin, xmax, ymin, ymax] 105 | # print(gt_labels) 106 | for cell in gt_labels: 107 | cell[1], cell[2] = cell[2], cell[1] 108 | # print(gt_labels) 109 | y_true_encoded = self.box_encoder.encode_y_sample(gt_labels) 110 | self.image_label_queue.put([image, y_true_encoded]) 111 | 112 | def record_process(self, record): 113 | """对于每个样本的数据具体该如何处理 114 | Args: record --> [image_path, xmin, ymin, xmax, ymax, class_id] 115 | Returns: 116 | image: 3-D ndarray 117 | labels: 2-D list [[xmin, ymin, xmax, ymax, class_id]] 118 | """ 119 | image = cv2.imread(record[0]) 120 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 121 | h = image.shape[0] 122 | w = image.shape[1] 123 | 124 | real_rate = w / h 125 | target_rate = self.width / self.height 126 | 127 | if (target_rate - self.lower_resize_rate 128 | <= real_rate <= target_rate + self.upper_resize_rate): 129 | width_rate = self.width * 1.0 / w 130 | height_rate = self.height * 1.0 / h 131 | 132 | image = cv2.resize(image, (self.height, self.width)) 133 | labels = [] 134 | i = 1 135 | while i < len(record): 136 | xmin = record[i] 137 | ymin = record[i + 1] 138 | xmax = record[i + 2] 139 | ymax = record[i + 3] 140 | class_id = record[i + 4] 141 | labels.append([xmin * width_rate, ymin * height_rate, 142 | xmax * width_rate, ymax * height_rate, 143 | class_id]) 144 | i += 5 145 | return [image, labels] 146 | elif real_rate > target_rate + self.upper_resize_rate: 147 | # 当前的图像不满足直接resize的比例,需要按照最短边进行一定比例进行裁减 148 | h0 = h 149 | w0 = np.ceil(h0 * (target_rate + self.upper_resize_rate)).astype(np.int32) 150 | # we should crop from (0, 0) 151 | image = image[:, 0:w0] 152 | image = cv2.resize(image, (self.height, self.width)) 153 | width_rate = self.width * 1.0 / w0 154 | height_rate = self.height * 1.0 / h0 155 | 156 | # 处理原始目标区域在裁减之后的图像中的实际位置 157 | labels = [] 158 | i = 1 159 | while i < len(record): 160 | xmin = record[i] 161 | ymin = record[i + 1] 162 | xmax = record[i + 2] 163 | ymax = record[i + 3] 164 | class_id = record[i + 4] 165 | if xmin < w0 - 1 and xmax <= w0 - 1: 166 | labels.append([xmin * width_rate, ymin * height_rate, 167 | xmax * width_rate, ymax * height_rate, 168 | class_id]) 169 | elif xmin < w0 - 1 and xmax > w0 - 1: 170 | if (w0 - 1 - xmin) / (xmax - xmin) >= 0.6: 171 | labels.append([xmin * width_rate, ymin * height_rate, 172 | w0-1, ymax * height_rate, 173 | class_id]) 174 | else: 175 | pass 176 | else: 177 | pass 178 | i += 5 179 | # 若没有目标符合变换要求,就将这个数据丢弃 180 | if len(labels) != 0: 181 | return [image, labels] 182 | else: 183 | return None 184 | elif real_rate < target_rate - self.lower_resize_rate: 185 | w0 = w 186 | h0 = np.ceil(w0 / (target_rate - self.lower_resize_rate)).astype(np.int32) 187 | # we should crop from (0, 0) 188 | image = image[0:h0, :] 189 | image = cv2.resize(image, (self.height, self.width)) 190 | width_rate = self.width * 1.0 / w0 191 | height_rate = self.height * 1.0 / h0 192 | 193 | # 处理原始目标区域在裁减之后的图像中的实际位置 194 | labels = [] 195 | i = 1 196 | while i < len(record): 197 | xmin = record[i] 198 | ymin = record[i + 1] 199 | xmax = record[i + 2] 200 | ymax = record[i + 3] 201 | class_id = record[i + 4] 202 | if ymin < h0 - 1 and ymax <= h0 - 1: 203 | labels.append([xmin * width_rate, ymin * height_rate, 204 | xmax * width_rate, ymax * height_rate, 205 | class_id]) 206 | elif ymin < h0 - 1 < ymax: 207 | if (h0 - 1 - ymin) / (ymax - ymin) >= 0.6: 208 | labels.append([xmin * width_rate, ymin * height_rate, 209 | xmax * width_rate, h0 - 1, 210 | class_id]) 211 | else: 212 | pass 213 | else: 214 | pass 215 | i += 5 216 | # 若没有目标符合变换要求,就将这个数据丢弃 217 | if len(labels) != 0: 218 | return [image, labels] 219 | else: 220 | return None 221 | else: 222 | pass 223 | 224 | def batch(self): 225 | """get batch 226 | Returns: 227 | images: 4-D ndarray [batch_size, height, width, 3] 228 | labels: (batch_size, #boxes, #classes + 4 + 4 + 4) 229 | """ 230 | images = [] 231 | labels = [] 232 | for i in range(self.batch_size): 233 | image, label = self.image_label_queue.get() 234 | images.append(image) 235 | labels.append(label) 236 | images = np.asarray(images, dtype=np.float32) 237 | images = images / 255 * 2 - 1 238 | labels = np.concatenate(labels, axis=0) 239 | # labels = np.asarray(labels, dtype=np.float32) 240 | return images, labels 241 | -------------------------------------------------------------------------------- /eagle/brain/ssd/anchor_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2017/12/18 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import tensorflow as tf 13 | 14 | # import keras.backend as K 15 | from keras.engine.topology import Layer 16 | from keras.engine.topology import InputSpec 17 | 18 | from eagle.brain.ssd.box_encode_decode_utils import convert_coordinates 19 | 20 | 21 | class AnchorBoxes(Layer): 22 | ''' 23 | Input shape: 24 | 4D tensor of shape 25 | `(batch, channels, height, width)` if dim_ordering == 'th' 26 | `(batch, height, width, channels)` if dim_ordering == 'tf' 27 | Output shape: 28 | 5D tensor of shape `(batch, height, width, n_boxes, 8)`. 29 | The last axis contains the four anchor box coordinates and the four variance values for each box. 30 | ''' 31 | def __init__(self, 32 | img_height, img_width, 33 | this_scale, next_scale, 34 | aspect_ratios=[0.5, 1.0, 2.0], 35 | two_boxes_for_ar1=True, 36 | variances=[1.0, 1.0, 1.0, 1.0], 37 | coords='centroids', normalize_coords=False, **kwargs): 38 | ''' 39 | this_scale (float): A float in [0, 1], the scaling factor for the size of the generated anchor boxes 40 | as a fraction of the shorter side of the input image. 41 | next_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if 42 | `self.two_boxes_for_ar1 == True`. 43 | aspect_ratios (list, optional): The list of aspect ratios for which default boxes are to be 44 | generated for this layer. Defaults to [0.5, 1.0, 2.0]. 45 | two_boxes_for_ar1 (bool, optional): Only relevant if `aspect_ratios` contains 1. 46 | If `True`, two default boxes will be generated for aspect ratio 1. The first will be generated 47 | using the scaling factor for the respective layer, the second one will be generated using 48 | geometric mean of said scaling factor and next bigger scaling factor. Defaults to `True`. 49 | variances (list, optional): A list of 4 floats >0 with scaling factors (actually it's not factors but divisors 50 | to be precise) for the encoded predicted box coordinates. A variance value of 1.0 would apply 51 | no scaling at all to the predictions, while values in (0,1) upscale the encoded predictions and values greater 52 | than 1.0 downscale the encoded predictions. If you want to reproduce the configuration of the original SSD, 53 | set this to `[0.1, 0.1, 0.2, 0.2]`, provided the coordinate Others is 'centroids'. Defaults to `[1.0, 1.0, 1.0, 1.0]`. 54 | coords (str, optional): The box coordinate Others to be used. Can be either 'centroids' for the Others 55 | `(cx, cy, w, h)` (box center coordinates, width, and height) or 'minmax' for the Others 56 | `(xmin, xmax, ymin, ymax)`. Defaults to 'centroids'. 57 | normalize_coords (bool, optional): Set to `True` if the model uses relative instead of absolute coordinates, 58 | i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. Defaults to `False`. 59 | ''' 60 | if (this_scale < 0) or (this_scale > 1) or (next_scale < 0): 61 | raise ValueError("this_scale or next_scale must be in [0, 1]") 62 | 63 | self.img_height = img_height 64 | self.img_width = img_width 65 | self.this_scale = this_scale 66 | self.next_scale = next_scale 67 | self.aspect_ratios = aspect_ratios 68 | self.two_boxes_for_ar1 = two_boxes_for_ar1 69 | self.variances = variances 70 | self.coords = coords 71 | self.normalize_coords = normalize_coords 72 | 73 | # Compute the number of boxes per cell 74 | if (1 in aspect_ratios) & two_boxes_for_ar1: 75 | self.n_boxes = len(aspect_ratios) + 1 76 | else: 77 | self.n_boxes = len(aspect_ratios) 78 | 79 | super(AnchorBoxes, self).__init__(**kwargs) 80 | 81 | def build(self, input_shape): 82 | self.input_spec = [InputSpec(shape=input_shape)] 83 | super(AnchorBoxes, self).build(input_shape) 84 | 85 | def call(self, x, mask=None): 86 | # Compute box width and height for each aspect ratio 87 | # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`. 88 | self.aspect_ratios = np.sort(self.aspect_ratios) 89 | size = min(self.img_height, self.img_width) 90 | # Compute the box widths and and heights for all aspect ratios 91 | wh_list = [] 92 | for ar in self.aspect_ratios: 93 | if (ar == 1) & self.two_boxes_for_ar1: 94 | # Compute the regular default box for aspect ratio 1 and... 95 | w = self.this_scale * size * np.sqrt(ar) 96 | h = self.this_scale * size / np.sqrt(ar) 97 | wh_list.append((w, h)) 98 | # ...also compute one slightly larger version using the geometric mean of this scale value and the next 99 | w = np.sqrt(self.this_scale * self.next_scale) * size * np.sqrt(ar) 100 | h = np.sqrt(self.this_scale * self.next_scale) * size / np.sqrt(ar) 101 | wh_list.append((w, h)) 102 | else: 103 | w = self.this_scale * size * np.sqrt(ar) 104 | h = self.this_scale * size / np.sqrt(ar) 105 | wh_list.append((w, h)) 106 | wh_list = np.array(wh_list) 107 | 108 | # We need the shape of the input tensor 109 | batch_size, feature_map_height, feature_map_width, feature_map_channels = x.get_shape().as_list() 110 | 111 | # Compute the grid of box center points. They are identical for all aspect ratios 112 | cell_width = self.img_width / feature_map_width 113 | cell_height = self.img_height / feature_map_height 114 | cx = np.linspace(cell_width/2, self.img_width-cell_width/2, feature_map_width) 115 | cy = np.linspace(cell_height/2, self.img_height-cell_height/2, feature_map_height) 116 | cx_grid, cy_grid = np.meshgrid(cx, cy) 117 | # This is necessary for np.tile() to do what we want further down 118 | cx_grid = np.expand_dims(cx_grid, -1) 119 | # This is necessary for np.tile() to do what we want further down 120 | cy_grid = np.expand_dims(cy_grid, -1) 121 | 122 | # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)` 123 | # where the last dimension will contain `(cx, cy, w, h)` 124 | boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4)) 125 | 126 | boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx 127 | boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy 128 | boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w 129 | boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h 130 | 131 | # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)` 132 | boxes_tensor = convert_coordinates( 133 | boxes_tensor, start_index=0, conversion='centroids2minmax') 134 | 135 | # `normalize_coords` is enabled, normalize the coordinates to be within [0,1] 136 | if self.normalize_coords: 137 | boxes_tensor[:, :, :, :2] /= self.img_width 138 | boxes_tensor[:, :, :, 2:] /= self.img_height 139 | 140 | if self.coords == 'centroids': 141 | # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth 142 | # Convert `(xmin, xmax, ymin, ymax)` back to `(cx, cy, w, h)` 143 | boxes_tensor = convert_coordinates( 144 | boxes_tensor, start_index=0, conversion='minmax2centroids') 145 | 146 | # 4: Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape 147 | # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis. 148 | # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)` 149 | variances_tensor = np.zeros_like(boxes_tensor) 150 | # Long live broadcasting 151 | variances_tensor += self.variances 152 | # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)` 153 | boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1) 154 | 155 | # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along 156 | # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)` 157 | boxes_tensor = np.expand_dims(boxes_tensor, axis=0) 158 | boxes_tensor = tf.tile( 159 | tf.constant(boxes_tensor, dtype='float32'), 160 | (x.get_shape().as_list()[0], 1, 1, 1, 1)) 161 | 162 | return boxes_tensor 163 | 164 | def compute_output_shape(self, input_shape): 165 | batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape 166 | return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8) 167 | 168 | def get_config(self): 169 | config = { 170 | 'img_height': self.img_height, 171 | 'img_width': self.img_width, 172 | 'this_scale': self.this_scale, 173 | 'next_scale': self.next_scale, 174 | 'aspect_ratios': list(self.aspect_ratios), 175 | 'two_boxes_for_ar1': self.two_boxes_for_ar1, 176 | 'variances': list(self.variances), 177 | 'coords': self.coords, 178 | 'normalize_coords': self.normalize_coords 179 | } 180 | base_config = super(AnchorBoxes, self).get_config() 181 | return dict(list(base_config.items()) + list(config.items())) 182 | -------------------------------------------------------------------------------- /eagle/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2018/2/26 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import sys 12 | import math 13 | import numbers 14 | import numpy as np 15 | 16 | import cv2 17 | from scipy import misc 18 | 19 | 20 | CURRENT_RANDOM_STATE = np.random.RandomState(42) 21 | 22 | 23 | def seed(seedval): 24 | CURRENT_RANDOM_STATE.seed(seedval) 25 | 26 | 27 | def current_random_state(): 28 | return CURRENT_RANDOM_STATE 29 | 30 | 31 | def new_random_state(seed=None, fully_random=False): 32 | if seed is None: 33 | if not fully_random: 34 | seed = CURRENT_RANDOM_STATE.randint(0, 10 ** 6, 1)[0] 35 | return np.random.RandomState(seed) 36 | 37 | 38 | def dummy_random_state(): 39 | return np.random.RandomState(1) 40 | 41 | 42 | def copy_random_state(random_state, force_copy=False): 43 | if random_state == np.random and not force_copy: 44 | return random_state 45 | else: 46 | rs_copy = dummy_random_state() 47 | orig_state = random_state.get_state() 48 | rs_copy.set_state(orig_state) 49 | return rs_copy 50 | 51 | 52 | def forward_random_state(random_state): 53 | random_state.uniform() 54 | 55 | 56 | def do_assert(condition, message="Assertion Failed"): 57 | if not condition: 58 | raise AssertionError(str(message)) 59 | 60 | 61 | def is_np_array(val): 62 | return isinstance(val, np.ndarray) 63 | 64 | 65 | def is_iterable(val): 66 | return isinstance(val, (tuple, list)) 67 | 68 | 69 | def is_callable(val): 70 | if sys.version_info[0] == 3 and sys.version_info[1] <= 2: 71 | return hasattr(val, '__call__') 72 | else: 73 | return callable(val) 74 | 75 | 76 | def is_string(val): 77 | return isinstance(val, str) 78 | 79 | 80 | def is_single_integer(val): 81 | return isinstance(val, numbers.Integral) 82 | 83 | 84 | def is_single_float(val): 85 | return isinstance(val, numbers.Real) and not is_single_integer(val) 86 | 87 | 88 | def is_single_number(val): 89 | return isinstance(val, numbers.Real) or isinstance(val, numbers.Integral) 90 | 91 | 92 | def is_integer_array(val): 93 | return is_np_array(val) and issubclass(val.dtype.type, numbers.Integral) 94 | 95 | 96 | def copy_dtypes_for_restore(images): 97 | return images.dtype if is_np_array(images) else [image.dtype for image in images] 98 | 99 | def restore_augmented_images_dtypes_(images, orig_dtypes): 100 | if is_np_array(images): 101 | images = images.astype(orig_dtypes) 102 | else: 103 | for i in range(len(images)): 104 | images[i] = images[i].astype(orig_dtypes[i]) 105 | 106 | def restore_augmented_images_dtypes(images, orig_dtypes): 107 | if is_np_array(images): 108 | images = np.copy(images) 109 | else: 110 | images = [np.copy(image) for image in images] 111 | return restore_augmented_images_dtypes_(images, orig_dtypes) 112 | 113 | def clip_augmented_images_(images, minval, maxval): 114 | if is_np_array(images): 115 | np.clip(images, minval, maxval, out=images) 116 | else: 117 | for i in range(len(images)): 118 | np.clip(images[i], minval, maxval, out=images[i]) 119 | 120 | def clip_augmented_images(images, minval, maxval): 121 | if is_np_array(images): 122 | images = np.copy(images) 123 | else: 124 | images = [np.copy(image) for image in images] 125 | return clip_augmented_images_(images, minval, maxval) 126 | 127 | # -------------------------------------------------------------------------------- 128 | # Basic Function about the Image Utils 129 | # -------------------------------------------------------------------------------- 130 | 131 | def imresize_many_images(images, sizes=None, interpolation=None): 132 | """ 133 | Resize many images to a specified size. 134 | 135 | Parameters 136 | ---------- 137 | images : (N,H,W,C) ndarray 138 | Array of the images to resize. 139 | Expected to usually be of dtype uint8. 140 | 141 | sizes : iterable of two ints 142 | The new size in (height, width) 143 | Others. 144 | 145 | interpolation : None or string or int, optional(default=None) 146 | The interpolation to use during resize. 147 | If int, then expected to be one of: 148 | * cv2.INTER_NEAREST (nearest neighbour interpolation) 149 | * cv2.INTER_LINEAR (linear interpolation) 150 | * cv2.INTER_AREA (area interpolation) 151 | * cv2.INTER_CUBIC (cubic interpolation) 152 | If string, then expected to be one of: 153 | * "nearest" (identical to cv2.INTER_NEAREST) 154 | * "linear" (identical to cv2.INTER_LINEAR) 155 | * "area" (identical to cv2.INTER_AREA) 156 | * "cubic" (identical to cv2.INTER_CUBIC) 157 | If None, the interpolation will be chosen automatically. For size 158 | increases, area interpolation will be picked and for size decreases, 159 | linear interpolation will be picked. 160 | 161 | Returns 162 | ------- 163 | result : (N,H',W',C) ndarray 164 | Array of the resized images. 165 | 166 | """ 167 | s = images.shape 168 | do_assert(len(s) == 4, s) 169 | nb_images = s[0] 170 | im_height, im_width = s[1], s[2] 171 | nb_channels = s[3] 172 | height, width = sizes[0], sizes[1] 173 | 174 | if height == im_height and width == im_width: 175 | return np.copy(images) 176 | 177 | ip = interpolation 178 | do_assert(ip is None or ip in ["nearest", "linear", "area", "cubic", cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC]) 179 | if ip is None: 180 | if height > im_height or width > im_width: 181 | ip = cv2.INTER_AREA 182 | else: 183 | ip = cv2.INTER_LINEAR 184 | elif ip in ["nearest", cv2.INTER_NEAREST]: 185 | ip = cv2.INTER_NEAREST 186 | elif ip in ["linear", cv2.INTER_LINEAR]: 187 | ip = cv2.INTER_LINEAR 188 | elif ip in ["area", cv2.INTER_AREA]: 189 | ip = cv2.INTER_AREA 190 | elif ip in ["cubic", cv2.INTER_CUBIC]: 191 | ip = cv2.INTER_CUBIC 192 | else: 193 | raise Exception("Invalid interpolation order") 194 | 195 | result = np.zeros((nb_images, height, width, nb_channels), dtype=np.uint8) 196 | for img_idx in range(nb_images): 197 | # TODO fallback to scipy here if image isn't uint8 198 | result_img = cv2.resize(images[img_idx], (width, height), interpolation=ip) 199 | if len(result_img.shape) == 2: 200 | result_img = result_img[:, :, np.newaxis] 201 | result[img_idx] = result_img 202 | return result 203 | 204 | 205 | def imresize_single_image(image, sizes, interpolation=None): 206 | """ 207 | Resizes a single image. 208 | 209 | Parameters 210 | ---------- 211 | image : (H,W,C) ndarray or (H,W) ndarray 212 | Array of the image to resize. 213 | Expected to usually be of dtype uint8. 214 | 215 | sizes : iterable of two ints 216 | See `imresize_many_images()`. 217 | 218 | interpolation : None or string or int, optional(default=None) 219 | See `imresize_many_images()`. 220 | 221 | Returns 222 | ------- 223 | out : (H',W',C) ndarray or (H',W') ndarray 224 | The resized image. 225 | 226 | """ 227 | grayscale = False 228 | if image.ndim == 2: 229 | grayscale = True 230 | image = image[:, :, np.newaxis] 231 | do_assert(len(image.shape) == 3, image.shape) 232 | rs = imresize_many_images(image[np.newaxis, :, :, :], sizes, interpolation=interpolation) 233 | if grayscale: 234 | return np.squeeze(rs[0, :, :, 0]) 235 | else: 236 | return rs[0, ...] 237 | 238 | 239 | def draw_grid(images, rows=None, cols=None): 240 | """ 241 | Converts multiple input images into a single image showing them in a grid. 242 | 243 | Parameters 244 | ---------- 245 | images : (N,H,W,3) ndarray or iterable of (H,W,3) array 246 | The input images to convert to a grid. 247 | Expected to be RGB and have dtype uint8. 248 | 249 | rows : None or int, optional(default=None) 250 | The number of rows to show in the grid. 251 | If None, it will be automatically derived. 252 | 253 | cols : None or int, optional(default=None) 254 | The number of cols to show in the grid. 255 | If None, it will be automatically derived. 256 | 257 | Returns 258 | ------- 259 | grid : (H',W',3) ndarray 260 | Image of the generated grid. 261 | 262 | """ 263 | if is_np_array(images): 264 | do_assert(images.ndim == 4) 265 | else: 266 | do_assert(is_iterable(images) and is_np_array(images[0]) and images[0].ndim == 3) 267 | 268 | nb_images = len(images) 269 | do_assert(nb_images > 0) 270 | cell_height = max([image.shape[0] for image in images]) 271 | cell_width = max([image.shape[1] for image in images]) 272 | channels = set([image.shape[2] for image in images]) 273 | do_assert(len(channels) == 1, "All images are expected to have the same number of channels, but got channel set %s with length %d instead." % (str(channels), len(channels))) 274 | nb_channels = list(channels)[0] 275 | if rows is None and cols is None: 276 | rows = cols = int(math.ceil(math.sqrt(nb_images))) 277 | elif rows is not None: 278 | cols = int(math.ceil(nb_images / rows)) 279 | elif cols is not None: 280 | rows = int(math.ceil(nb_images / cols)) 281 | do_assert(rows * cols >= nb_images) 282 | 283 | width = cell_width * cols 284 | height = cell_height * rows 285 | grid = np.zeros((height, width, nb_channels), dtype=np.uint8) 286 | cell_idx = 0 287 | for row_idx in range(rows): 288 | for col_idx in range(cols): 289 | if cell_idx < nb_images: 290 | image = images[cell_idx] 291 | cell_y1 = cell_height * row_idx 292 | cell_y2 = cell_y1 + image.shape[0] 293 | cell_x1 = cell_width * col_idx 294 | cell_x2 = cell_x1 + image.shape[1] 295 | grid[cell_y1:cell_y2, cell_x1:cell_x2, :] = image 296 | cell_idx += 1 297 | 298 | return grid 299 | 300 | def show_grid(images, rows=None, cols=None): 301 | """ 302 | Converts the input images to a grid image and shows it in a new window. 303 | 304 | This function wraps around scipy.misc.imshow(), which requires the 305 | `see ` command to work. On Windows systems, this tends to not be 306 | the case. 307 | 308 | Parameters 309 | ---------- 310 | images : (N,H,W,3) ndarray or iterable of (H,W,3) array 311 | See `draw_grid()`. 312 | 313 | rows : None or int, optional(default=None) 314 | See `draw_grid()`. 315 | 316 | cols : None or int, optional(default=None) 317 | See `draw_grid()`. 318 | 319 | """ 320 | grid = draw_grid(images, rows=rows, cols=cols) 321 | misc.imshow(grid) -------------------------------------------------------------------------------- /eagle/brain/ssd/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 IW. 2 | # All rights reserved. 3 | # 4 | # Author: liuguiyang 5 | # Date: 2017/12/18 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import tensorflow as tf 12 | 13 | 14 | class Loss: 15 | 16 | def __init__(self, neg_pos_ratio=3, n_neg_min=0, alpha=1.0): 17 | """ 18 | Arguments: 19 | neg_pos_ratio (int, optional): The maximum ratio of negative (i.e. background) 20 | to positive ground truth boxes to include in the loss computation. 21 | There are no actual background ground truth boxes of course, but `y_true` 22 | contains anchor boxes labeled with the background class. Since 23 | the number of background boxes in `y_true` will usually exceed 24 | the number of positive boxes by far, it is necessary to balance 25 | their influence on the loss. Defaults to 3 following the paper. 26 | n_neg_min (int, optional): The minimum number of negative ground truth boxes to 27 | enter the loss computation *per batch*. This argument can be used to make 28 | sure that the model learns from a minimum number of negatives in batches 29 | in which there are very few, or even none at all, positive ground truth 30 | boxes. It defaults to 0 and if used, it should be set to a value that 31 | stands in reasonable proportion to the batch size used for training. 32 | alpha (float, optional): A factor to weight the localization loss in the 33 | computation of the total loss. Defaults to 1.0 following the paper. 34 | """ 35 | self.neg_pos_ratio = neg_pos_ratio 36 | self.n_neg_min = n_neg_min 37 | self.alpha = alpha 38 | 39 | def smooth_L1_loss(self, y_true, y_pred): 40 | ''' 41 | Compute smooth L1 loss, see references. 42 | Arguments: 43 | y_true (nD tensor): A TensorFlow tensor of any shape containing the ground truth data. 44 | In this context, the expected tensor has shape `(batch_size, #boxes, 4)` and 45 | contains the ground truth bounding box coordinates, where the last dimension 46 | contains `(xmin, xmax, ymin, ymax)`. 47 | y_pred (nD tensor): A TensorFlow tensor of identical structure to `y_true` containing 48 | the predicted data, in this context the predicted bounding box coordinates. 49 | Returns: 50 | The smooth L1 loss, a nD-1 Tensorflow tensor. In this context a 2D tensor 51 | of shape (batch, n_boxes_total). 52 | References: 53 | https://arxiv.org/abs/1504.08083 54 | ''' 55 | absolute_loss = tf.abs(y_true - y_pred) 56 | square_loss = 0.5 * (y_true - y_pred)**2 57 | l1_loss = tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5) 58 | return tf.reduce_sum(l1_loss, axis=-1) 59 | 60 | def log_loss(self, y_true, y_pred): 61 | ''' 62 | Compute the softmax log loss. 63 | Arguments: 64 | y_true (nD tensor): A TensorFlow tensor of any shape containing the ground truth data. 65 | In this context, the expected tensor has shape (batch_size, #boxes, #classes) 66 | and contains the ground truth bounding box categories. 67 | y_pred (nD tensor): A TensorFlow tensor of identical structure to `y_true` containing 68 | the predicted data, in this context the predicted bounding box categories. 69 | Returns: 70 | The softmax log loss, a nD-1 Tensorflow tensor. In this context a 2D tensor 71 | of shape (batch, n_boxes_total). 72 | ''' 73 | # Make sure that `y_pred` doesn't contain any zeros (which would break the log function) 74 | y_pred = tf.maximum(y_pred, 1e-15) 75 | # Compute the log loss 76 | log_loss = -tf.reduce_sum(y_true * tf.log(y_pred), axis=-1) 77 | return log_loss 78 | 79 | def compute_loss(self, y_true, y_pred): 80 | ''' 81 | Compute the loss of the SSD model prediction against the ground truth. 82 | Arguments: 83 | y_true (array): A Numpy array of shape `(batch_size, #boxes, #classes + 12)`, 84 | where `#boxes` is the total number of boxes that the model predicts 85 | per image. Be careful to make sure that the index of each given 86 | box in `y_true` is the same as the index for the corresponding 87 | box in `y_pred`. The last axis must have length `#classes + 12` and contain 88 | `[classes one-hot encoded, 4 ground truth box coordinate offsets, 8 arbitrary entries]` 89 | in this order, including the background class. The last eight entries of the 90 | last axis are not used by this function and therefore their contents are 91 | irrelevant, they only exist so that `y_true` has the same shape as `y_pred`, 92 | where the last four entries of the last axis contain the anchor box 93 | coordinates, which are needed during inference. Important: Boxes that 94 | you want the cost function to ignore need to have a one-hot 95 | class vector of all zeros. 96 | y_pred (Keras tensor): The model prediction. The shape is identical 97 | to that of `y_true`, i.e. `(batch_size, #boxes, #classes + 12)`. 98 | The last axis must contain entries in the Others 99 | `[classes one-hot encoded, 4 predicted box coordinate offsets, 8 arbitrary entries]`. 100 | Returns: 101 | A scalar, the total multitask loss for classification and localization. 102 | ''' 103 | self.neg_pos_ratio = tf.constant(self.neg_pos_ratio) 104 | self.n_neg_min = tf.constant(self.n_neg_min) 105 | self.alpha = tf.constant(self.alpha) 106 | 107 | # Output dtype: tf.int32 108 | batch_size = tf.shape(y_pred)[0] 109 | # Output dtype: tf.int32, note that `n_boxes` in this context denotes the total number of boxes per image, 110 | # not the number of boxes per cell 111 | n_boxes = tf.shape(y_pred)[1] 112 | 113 | # 1: Compute the losses for class and box predictions for every box 114 | 115 | # Output shape: (batch_size, n_boxes) 116 | classification_loss = tf.to_float( 117 | self.log_loss( 118 | y_true[:, :, :-12], 119 | y_pred[:, :, :-12]) 120 | ) 121 | # Output shape: (batch_size, n_boxes) 122 | localization_loss = tf.to_float( 123 | self.smooth_L1_loss( 124 | y_true[:, :, -12:-8], 125 | y_pred[:, :, -12:-8]) 126 | ) 127 | 128 | # 2: Compute the classification losses for the positive and negative targets 129 | 130 | # Create masks for the positive and negative ground truth classes 131 | # Tensor of shape (batch_size, n_boxes) 132 | negatives = y_true[:, :, 0] 133 | # Tensor of shape (batch_size, n_boxes) 134 | positives = tf.to_float(tf.reduce_max(y_true[:, :, 1:-12], axis=-1)) 135 | 136 | # Count the number of positive boxes (classes 1 to n) in y_true across the whole batch 137 | n_positive = tf.reduce_sum(positives) 138 | 139 | # Now mask all negative boxes and sum up the losses for the positive boxes PER batch item 140 | # (Keras loss functions must output one scalar loss value PER batch item, rather than just 141 | # one scalar for the entire batch, that's why we're not summing across all axes) 142 | pos_class_loss = tf.reduce_sum(classification_loss * positives, axis=-1) # Tensor of shape (batch_size,) 143 | 144 | # Compute the classification loss for the negative default boxes (if there are any) 145 | 146 | # First, compute the classification loss for all negative boxes 147 | neg_class_loss_all = classification_loss * negatives # Tensor of shape (batch_size, n_boxes) 148 | # The number of non-zero loss entries in `neg_class_loss_all` 149 | n_neg_losses = tf.count_nonzero(neg_class_loss_all, dtype=tf.int32) 150 | # What's the point of `n_neg_losses`? 151 | # For the next step, which will be to compute which negative boxes enter the classification loss, 152 | # we don't just want to know how many negative ground truth boxes there are, 153 | # but for how many of those there actually is a positive (i.e. non-zero) loss. 154 | # This is necessary because `tf.nn.top-k()` in the function below will pick the top k boxes with 155 | # the highest losses no matter what, even if it receives a vector where all losses are zero. 156 | # In the unlikely event that all negative classification losses ARE actually zero though, 157 | # this behavior might lead to `tf.nn.top-k()` returning the indices of positive boxes, 158 | # leading to an incorrect negative classification loss computation, and hence an incorrect overall loss computation. 159 | # We therefore need to make sure that `n_negative_keep`, which assumes the role of the `k` argument in `tf.nn.top-k()`, 160 | # is at most the number of negative boxes for which there is a positive classification loss. 161 | 162 | # Compute the number of negative examples we want to account for in the loss 163 | # We'll keep at most `self.neg_pos_ratio` times the number of positives in `y_true`, but at least `self.n_neg_min` (unless `n_neg_loses` is smaller) 164 | n_negative_keep = tf.minimum(tf.maximum(self.neg_pos_ratio * tf.to_int32(n_positive), self.n_neg_min), n_neg_losses) 165 | 166 | # In the unlikely case when either (1) there are no negative ground truth boxes at all 167 | # or (2) the classification loss for all negative boxes is zero, return zero as the `neg_class_loss` 168 | def f1(): 169 | return tf.zeros([batch_size]) 170 | # Otherwise compute the negative loss 171 | def f2(): 172 | # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that 173 | # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model 174 | # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest. 175 | 176 | # To do this, we reshape `neg_class_loss_all` to 1D... 177 | neg_class_loss_all_1D = tf.reshape(neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,) 178 | # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those... 179 | values, indices = tf.nn.top_k(neg_class_loss_all_1D, n_negative_keep, False) # We don't need sorting 180 | # ...and with these indices we'll create a mask... 181 | # Tensor of shape (batch_size * n_boxes,) 182 | negatives_keep = tf.scatter_nd( 183 | indices=tf.expand_dims(indices, axis=1), 184 | updates=tf.ones_like(indices, dtype=tf.int32), 185 | shape=tf.shape(neg_class_loss_all_1D)) 186 | # Tensor of shape (batch_size, n_boxes) 187 | negatives_keep = tf.to_float(tf.reshape(negatives_keep, [batch_size, n_boxes])) 188 | # ...and use it to keep only those boxes and mask all other classification losses 189 | # Tensor of shape (batch_size,) 190 | neg_class_loss = tf.reduce_sum(classification_loss * negatives_keep, axis=-1) 191 | return neg_class_loss 192 | 193 | neg_class_loss = tf.cond(tf.equal(n_neg_losses, tf.constant(0)), f1, f2) 194 | 195 | # Tensor of shape (batch_size,) 196 | class_loss = pos_class_loss + neg_class_loss 197 | 198 | # 3: Compute the localization loss for the positive targets 199 | # We don't penalize localization loss for negative predicted boxes (obviously: there are no ground truth boxes they would correspond to) 200 | 201 | # Tensor of shape (batch_size,) 202 | loc_loss = tf.reduce_sum(localization_loss * positives, axis=-1) 203 | 204 | # 4: Compute the total loss 205 | 206 | total_loss = (class_loss + self.alpha * loc_loss) / tf.maximum(1.0, n_positive) # In case `n_positive == 0` 207 | # Keras has the annoying habit of dividing the loss by the batch size, which sucks in our case 208 | # because the relevant criterion to average our loss over is the number of positive boxes in the batch 209 | # (by which we're dividing in the line above), not the batch size. So in order to revert Keras' averaging 210 | # over the batch size, we'll have to multiply by it. 211 | total_loss *= tf.to_float(batch_size) 212 | 213 | return tf.reduce_mean(total_loss, axis=-1) 214 | --------------------------------------------------------------------------------