├── README.md
├── cython_util
    ├── bbox_overlap.pyx
    ├── bbox_transform_inv.pyx
    ├── bool_anchors_inside_image.pyx
    ├── nms.pyx
    ├── remove_extraboxes.pyx
    ├── setup.py
    └── setup.sh
├── pretrain
    ├── base_vgg16.py
    ├── vgg16.py
    └── vgg16_vehicle.py
├── rcnn
    ├── base_rpn.py
    ├── proposal_layer.py
    ├── rcnn.py
    └── rcnn_vehicle.py
├── rpn
    ├── __init__.py
    ├── rpn.py
    └── rpn_vehicle.py
└── util
    ├── __init__.py
    ├── bbox_transform.py
    ├── data_util.py
    ├── generate_anchors.py
    ├── input_kitti.py
    ├── model.py
    ├── network_util.py
    ├── parse_xml.py
    └── train.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Fast_RCNN_tensorflow
 2 | Implementation of Faster RCNN by Tensorflow (In development)  
 3 | 
 4 | ・Complete  
 5 | Load Images of KiTTI Object Detection Datasets  
 6 | Preprocessing for Network Input  
 7 | RPN(Region Proposal Network)  
 8 | Proposal Layer(Convert rpn to rois)  
 9 | 
10 | ・ToDO  
11 | Trainer for RCNN  
12 | 
13 | ```
14 | # Prepare KiTTI Datasets
15 | http://www.cvlibs.net/datasets/kitti/eval_object.php
16 | 
17 | # Compile Cython File
18 | cd cython_util
19 | ./setup.sh
20 | 
21 | # Training RPN  
22 | cd rpn
23 | python rpn.py
24 | ```
25 | 
26 | # ROI Pooling
27 | ROI Pooling layer was implemented by this repository  
28 | https://github.com/deepsense-io/roi-pooling
29 | 


--------------------------------------------------------------------------------
/cython_util/bbox_overlap.pyx:
--------------------------------------------------------------------------------
  1 | cimport cython
  2 | import numpy as np
  3 | cimport numpy as np
  4 | 
  5 | from libc.math cimport log
  6 | 
  7 | DTYPE = np.float
  8 | ctypedef np.float_t DTYPE_t
  9 | 
 10 | DTYPE_int = np.int
 11 | ctypedef np.int_t DTYPE_int_t
 12 | 
 13 | def bbox_overlaps(
 14 |         np.ndarray[DTYPE_t, ndim=4] anchors,
 15 |         np.ndarray[DTYPE_int_t, ndim=3] is_inside,
 16 |         object gt_boxes):
 17 |     """
 18 |     Parameters
 19 |     ----------
 20 |     anchors: (Batch_Size, K, A, 4) ndarray of float
 21 |     is_inside: (Batch_Size, K, A) ndarray of int
 22 |     gt_boxes: (Batch, G, 4) ndarray of float
 23 |     Returns
 24 |     -------
 25 |     """
 26 |     cdef unsigned int Batch_Size = anchors.shape[0]
 27 |     cdef unsigned int K = anchors.shape[1]
 28 |     cdef unsigned int A = anchors.shape[2]
 29 |     cdef unsigned int G
 30 |     cdef np.ndarray[DTYPE_t, ndim=4] overlaps
 31 |     cdef np.ndarray[DTYPE_int_t, ndim=3] true_index = np.zeros((Batch_Size, K, A), dtype=DTYPE_int)
 32 |     cdef np.ndarray[DTYPE_int_t, ndim=3] false_index = np.zeros((Batch_Size, K, A), dtype=DTYPE_int)
 33 |     cdef DTYPE_t iw, ih, box_area
 34 |     cdef DTYPE_t ua
 35 |     cdef DTYPE_t max_overlap
 36 |     cdef DTYPE_t ex_width, ex_height, ex_center_x, ex_center_y, gt_width, gt_height, gt_center_x, gt_center_y
 37 |     cdef unsigned int k, a, b, g, max_k, max_a, max_g
 38 | 
 39 |     max_g = 0
 40 |     for b in range(Batch_Size):
 41 |         if max_g < gt_boxes[b].shape[0]:
 42 |             max_g = gt_boxes[b].shape[0]
 43 | 
 44 |     overlaps = np.zeros((Batch_Size, K, A, max_g))
 45 | 
 46 |     for b in range(Batch_Size):
 47 |         G = gt_boxes[b].shape[0]
 48 |         for g in range(G):
 49 |             box_area = (
 50 |                 (gt_boxes[b][g, 2] - gt_boxes[b][g, 0] + 1) *
 51 |                 (gt_boxes[b][g, 3] - gt_boxes[b][g, 1] + 1)
 52 |             )
 53 |             max_overlap = 0
 54 |             max_k = 0
 55 |             max_a = 0
 56 |             for k in range(K):
 57 |                   for a in range(A):
 58 |                       if is_inside[b, k, a] == 1:
 59 |                           iw = (
 60 |                               min(anchors[b, k, a, 2], gt_boxes[b][g, 2]) -
 61 |                               max(anchors[b, k, a, 0], gt_boxes[b][g, 0]) + 1
 62 |                           )
 63 |                           if iw > 0:
 64 |                               ih = (
 65 |                                   min(anchors[b, k, a, 3], gt_boxes[b][g, 3]) -
 66 |                                   max(anchors[b, k, a, 1], gt_boxes[b][g, 1]) + 1
 67 |                               )
 68 |                               if ih > 0:
 69 |                                   ua = float(
 70 |                                       (anchors[b, k, a, 2] - anchors[b, k, a, 0] + 1) *
 71 |                                       (anchors[b, k, a, 3] - anchors[b, k, a, 1] + 1) +
 72 |                                       box_area - iw * ih
 73 |                                   )
 74 |                                   overlaps[b, k, a, g] = iw * ih / ua
 75 |                                   if max_overlap < ((iw * ih / ua)):
 76 |                                       max_overlap = iw * ih / ua
 77 |                                       max_k = k
 78 |                                       max_a = a
 79 |             true_index[b, max_k, max_a] = 1
 80 | 
 81 | 
 82 |         for k in range(K):
 83 |               for a in range(A):
 84 |                   if is_inside[b, k, a] == 1:
 85 |                       max_overlap = 0
 86 |                       max_g = 0
 87 |                       for g in range(G):
 88 |                           if overlaps[b, k, a, g] > 0:
 89 |                               if max_overlap < (overlaps[b, k, a, g]):
 90 |                                   max_overlap = overlaps[b, k, a, g]
 91 |                                   max_g = g
 92 |                       if max_overlap > 0.7:
 93 |                           true_index[b, k, a] = 1
 94 |                       else:
 95 |                           if max_overlap <= 0.3:
 96 |                               false_index[b, k, a] = 1
 97 | 
 98 |                       if true_index[b, k, a] == 1:
 99 |                           ex_width = anchors[b, k, a, 2] - anchors[b, k, a, 0] + 1
100 |                           ex_height = anchors[b, k, a, 3] - anchors[b, k, a, 1] + 1
101 |                           ex_center_x = anchors[b, k, a, 0] + ex_width / 2.0
102 |                           ex_center_y = anchors[b, k, a, 1] + ex_height / 2.0
103 |                           gt_width = gt_boxes[b][max_g, 2] - gt_boxes[b][max_g, 0] + 1
104 |                           gt_height = gt_boxes[b][max_g, 3] - gt_boxes[b][max_g, 1] + 1
105 |                           gt_center_x = gt_boxes[b][max_g, 0] + gt_width / 2.0
106 |                           gt_center_y = gt_boxes[b][max_g, 1] + gt_height / 2.0
107 | 
108 |                           anchors[b, k, a, 0] = (gt_center_x - ex_center_x) / (ex_width)
109 |                           anchors[b, k, a, 1] = (gt_center_y - ex_center_y) / (ex_height)
110 |                           anchors[b, k, a, 2] = log(gt_width / (ex_width))
111 |                           anchors[b, k, a, 3] = log(gt_height / (ex_height))
112 |     return anchors, true_index, false_index
113 | 


--------------------------------------------------------------------------------
/cython_util/bbox_transform_inv.pyx:
--------------------------------------------------------------------------------
 1 | cimport cython
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | from libc.math cimport exp
 6 | from nms cimport bbox_transform_inv
 7 | 
 8 | DTYPE = np.float
 9 | ctypedef np.float_t DTYPE_t
10 | 
11 | def bbox_transform_inv_clip(
12 |         np.ndarray[DTYPE_t, ndim=3] anchors,
13 |         np.ndarray[DTYPE_t, ndim=3] rpn_bbox,
14 |         unsigned int image_width,
15 |         unsigned int image_height):
16 |     """
17 |     Parameters
18 |     ----------
19 |     anchors: (Batch_Sizes, K*A, 4) ndarray of float
20 |     rpn_bbox: (Batch_Size, K*A, 4) ndarray of float
21 |     -------
22 |     """
23 |     cdef unsigned int B = anchors.shape[0]
24 |     cdef unsigned int KA = anchors.shape[1]
25 |     cdef DTYPE_t ex_width, ex_height, ex_center_x, ex_center_y, gt_width, gt_height, gt_center_x, gt_center_y
26 |     cdef unsigned int ka, b
27 | 
28 |     for b in range(Batch_Size):
29 |         for ka in range(KA):
30 |             ex_width = anchors[b, ka, 2] - anchors[b, ka, 0] + 1
31 |             ex_height = anchors[b, ka, 3] - anchors[b, ka, 1] + 1
32 |             ex_center_x = anchors[b, ka, 0] + ex_width / 2.0
33 |             ex_center_y = anchors[b, ka, 1] + ex_height / 2.0
34 | 
35 |             pred_center_x = rpn_bbox[b, ka, 0] * ex_width + ex_center_x
36 |             pred_center_y = rpn_bbox[b, ka, 1] * ex_height + ex_center_y
37 |             pred_width = exp(rpn_bbox[b, ka, 2]) * ex_width
38 |             pred_height = exp(rpn_bbox[b, ka, 3]) * ex_height
39 | 
40 |             anchors[b, ka, 0] = max(pred_center_x - pred_width / 2.0, 0)
41 |             anchors[b, ka, 1] = max(pred_center_y - pred_height / 2.0, 0)
42 |             anchors[b, ka, 2] = min(pred_center_x + pred_width / 2.0, image_width-1)
43 |             anchors[b, ka, 3] = min(pred_center_y + pred_height / 2.0, image_height-1)
44 | 
45 | 
46 |     return anchors
47 | 


--------------------------------------------------------------------------------
/cython_util/bool_anchors_inside_image.pyx:
--------------------------------------------------------------------------------
 1 | cimport cython
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | DTYPE = np.float
 6 | ctypedef np.float_t DTYPE_t
 7 | 
 8 | DTYPE_int = np.int
 9 | ctypedef np.int_t DTYPE_int_t
10 | 
11 | def batch_inside_image(
12 |         np.ndarray[DTYPE_t, ndim=4] boxes,
13 |         unsigned int width,
14 |         unsigned int height):
15 |     """
16 |     Parameters
17 |     ----------
18 |     boxes: (B, K, A, 4) ndarray of float
19 |     width: width of input images
20 |     height: height of input images
21 |     Returns
22 |     -------
23 |     is_inside: (B, N, K) ndarray of overlap between boxes and query_boxes
24 |     """
25 |     cdef unsigned int B = boxes.shape[0]
26 |     cdef unsigned int K = boxes.shape[1]
27 |     cdef unsigned int A = boxes.shape[2]
28 |     cdef np.ndarray[DTYPE_int_t, ndim=3] is_inside = np.zeros((B, K, A), dtype=DTYPE_int)
29 |     cdef unsigned int k, a, b
30 |     for b in range(B):
31 |         for k in range(K):
32 |             for a in range(A):
33 |                 if boxes[b, k, a, 0] >= 0:
34 |                     if boxes[b, k, a, 1] >= 0:
35 |                         if boxes[b, k, a, 2] < width:
36 |                             if boxes[b, k, a, 3] < height:
37 |                                 is_inside[b, k, a] = 1
38 |     return is_inside
39 | 
40 | def inside_image(
41 |         np.ndarray[DTYPE_t, ndim=3] boxes,
42 |         unsigned int width,
43 |         unsigned int height):
44 |     """
45 |     Parameters
46 |     ----------
47 |     boxes: (K, A, 4) ndarray of float
48 |     width: width of input images
49 |     height: height of input images
50 |     Returns
51 |     -------
52 |     is_inside: (N, K) ndarray of overlap between boxes and query_boxes
53 |     """
54 |     cdef unsigned int K = boxes.shape[0]
55 |     cdef unsigned int A = boxes.shape[1]
56 |     cdef np.ndarray[DTYPE_int_t, ndim=2] is_inside = np.zeros((K, A), dtype=DTYPE_int)
57 |     cdef unsigned int k, a
58 |     for k in range(K):
59 |         for a in range(A):
60 |             if boxes[k, a, 0] >= 0:
61 |                 if boxes[k, a, 1] >= 0:
62 |                     if boxes[k, a, 2] < width:
63 |                         if boxes[k, a, 3] < height:
64 |                             is_inside[k, a] = 1
65 |     return is_inside
66 | 


--------------------------------------------------------------------------------
/cython_util/nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | cimport cython
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | cdef np.ndarray[np.int_t, ndim=1] cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/cython_util/remove_extraboxes.pyx:
--------------------------------------------------------------------------------
 1 | cimport cython
 2 | import numpy as np
 3 | cimport numpy as np
 4 | 
 5 | DTYPE_int = np.int
 6 | ctypedef np.int_t DTYPE_int_t
 7 | 
 8 | def remove_extraboxes(
 9 |         np.ndarray[DTYPE_int_t, ndim=1] array1,
10 |         np.ndarray[DTYPE_int_t, ndim=1] array2,
11 |         np.ndarray[DTYPE_int_t, ndim=1] select,
12 |         np.ndarray[DTYPE_int_t, ndim=1] batch):
13 |     """
14 |     Parameters
15 |     ----------
16 |     array1: (A) ndarray of int
17 |     array2: (A) ndarray of int
18 |     select: (B) ndarray of int
19 |     Returns
20 |     -------
21 |     extract_array1 : (64) ndarray of index of remove boxes
22 |     extract_array2 : (64) ndarray of index of remove boxes
23 |     """
24 |     cdef unsigned int remove_size = select.shape[0]
25 |     cdef np.ndarray[DTYPE_int_t, ndim=1] extract_array1 = np.zeros((remove_size), dtype=DTYPE_int)
26 |     cdef np.ndarray[DTYPE_int_t, ndim=1] extract_array2 = np.zeros((remove_size), dtype=DTYPE_int)
27 |     cdef unsigned int rs
28 | 
29 |     for rs in range(remove_size):
30 |         extract_array1[rs] = array1[select[rs]]
31 |         extract_array2[rs] = array2[select[rs]]
32 |     return batch, extract_array1, extract_array2
33 | 


--------------------------------------------------------------------------------
/cython_util/setup.py:
--------------------------------------------------------------------------------
  1 | #python setup.py build_ext --inplace
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import os
 10 | from os.path import join as pjoin
 11 | import numpy as np
 12 | from distutils.core import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     for dir in path.split(os.pathsep):
 19 |         binpath = pjoin(dir, name)
 20 |         if os.path.exists(binpath):
 21 |             return os.path.abspath(binpath)
 22 |     return None
 23 | 
 24 | def locate_cuda():
 25 |     """Locate the CUDA environment on the system
 26 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 27 |     and values giving the absolute path to each directory.
 28 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 29 |     is based on finding 'nvcc' in the PATH.
 30 |     """
 31 | 
 32 |     # first check if the CUDAHOME env variable is in use
 33 |     if 'CUDAHOME' in os.environ:
 34 |         home = os.environ['CUDAHOME']
 35 |         nvcc = pjoin(home, 'bin', 'nvcc')
 36 |     else:
 37 |         # otherwise, search the PATH for NVCC
 38 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 39 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 40 |         if nvcc is None:
 41 |           return None;
 42 |         home = os.path.dirname(os.path.dirname(nvcc))
 43 | 
 44 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 45 |                   'include': pjoin(home, 'include'),
 46 |                   'lib64': pjoin(home, 'lib64')}
 47 |     for k, v in cudaconfig.iteritems():
 48 |         if not os.path.exists(v):
 49 |             return None;
 50 | 
 51 |     return cudaconfig
 52 | 
 53 | CUDA = locate_cuda()
 54 | 
 55 | # Obtain the numpy include directory.  This logic works across numpy versions.
 56 | try:
 57 |     numpy_include = np.get_include()
 58 | except AttributeError:
 59 |     numpy_include = np.get_numpy_include()
 60 | 
 61 | def customize_compiler_for_nvcc(self):
 62 |     """inject deep into distutils to customize how the dispatch
 63 |     to gcc/nvcc works.
 64 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 65 |     injected in, and still have the right customizations (i.e.
 66 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 67 |     the OO route, I have this. Note, it's kindof like a wierd functional
 68 |     subclassing going on."""
 69 | 
 70 |     # tell the compiler it can processes .cu
 71 |     self.src_extensions.append('.cu')
 72 | 
 73 |     # save references to the default compiler_so and _comple methods
 74 |     default_compiler_so = self.compiler_so
 75 |     super = self._compile
 76 | 
 77 |     # now redefine the _compile method. This gets executed for each
 78 |     # object but distutils doesn't have the ability to change compilers
 79 |     # based on source extension: we add it.
 80 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 81 |         print extra_postargs
 82 |         if os.path.splitext(src)[1] == '.cu':
 83 |             # use the cuda for .cu files
 84 |             self.set_executable('compiler_so', CUDA['nvcc'])
 85 |             # use only a subset of the extra_postargs, which are 1-1 translated
 86 |             # from the extra_compile_args in the Extension class
 87 |             postargs = extra_postargs['nvcc']
 88 |         else:
 89 |             postargs = extra_postargs['gcc']
 90 | 
 91 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 92 |         # reset the default compiler_so, which we might have changed for cuda
 93 |         self.compiler_so = default_compiler_so
 94 | 
 95 |     # inject our redefined _compile method into the class
 96 |     self._compile = _compile
 97 | 
 98 | 
 99 | # run the customize_compiler
100 | class custom_build_ext(build_ext):
101 |     def build_extensions(self):
102 |         customize_compiler_for_nvcc(self.compiler)
103 |         build_ext.build_extensions(self)
104 | 
105 | ext_modules = [
106 |     Extension(
107 |         "nms",
108 |         ["nms.pyx"],
109 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
110 |         include_dirs = [numpy_include]
111 |     ),
112 |     Extension(
113 |         "bbox_transform_inv",
114 |         ["bbox_transform_inv.pyx"],
115 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
116 |         include_dirs = [numpy_include]
117 |     ),
118 |     Extension(
119 |         "bbox_overlap",
120 |         ["bbox_overlap.pyx"],
121 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
122 | 	include_dirs = [numpy_include]
123 | 	),
124 |     Extension(
125 |         "bool_anchors_inside_image",
126 |         ["bool_anchors_inside_image.pyx"],
127 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
128 | 	include_dirs = [numpy_include]
129 | 	),
130 |     Extension(
131 |         "remove_extraboxes",
132 |         ["remove_extraboxes.pyx"],
133 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
134 | 	include_dirs = [numpy_include]
135 | 	),
136 | ]
137 | 
138 | # if CUDA:
139 | #     ext_modules.append(
140 | #         Extension('nms.gpu_nms',
141 | #             ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
142 | #             library_dirs=[CUDA['lib64']],
143 | #             libraries=['cudart'],
144 | #             language='c++',
145 | #             runtime_library_dirs=[CUDA['lib64']],
146 | #             # this syntax is specific to this build system
147 | #             # we're only going to use certain compiler args with nvcc and not with gcc
148 | #             # the implementation of this trick is in customize_compiler() below
149 | #             extra_compile_args={'gcc': ["-Wno-unused-function"],
150 | #                                 'nvcc': ['-arch=sm_35',
151 | #                                          '--ptxas-options=-v',
152 | #                                          '-c',
153 | #                                          '--compiler-options',
154 | #                                          "'-fPIC'"]},
155 | #             include_dirs = [numpy_include, CUDA['include']]
156 | #         )
157 | #     )
158 | 
159 | setup(
160 |     ext_modules=ext_modules,
161 |     cmdclass={'build_ext': custom_build_ext},
162 | )
163 | 


--------------------------------------------------------------------------------
/cython_util/setup.sh:
--------------------------------------------------------------------------------
1 | python setup.py build_ext --inplace
2 | 
3 | 


--------------------------------------------------------------------------------
/pretrain/base_vgg16.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import time
  7 | 
  8 | VGG_MEAN = [103.939, 116.779, 123.68]
  9 | 
 10 | class Vgg16:
 11 |     def __init__(self, vgg16_npy_path=None):
 12 |         if vgg16_npy_path is None:
 13 |             path = inspect.getfile(Vgg16)
 14 |             path = os.path.abspath(os.path.join(path, os.pardir))
 15 |             path = os.path.join(path, "vgg16.npy")
 16 |             vgg16_npy_path = path
 17 |             print(path)
 18 | 
 19 |         self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
 20 |         print("npy file loaded")
 21 | 
 22 |     def build_model(self, bgr):
 23 |         """
 24 |         load variable from npy to build the VGG
 25 |         :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
 26 |         """
 27 |         start_time = time.time()
 28 |         print("build model started")
 29 |         # rgb_scaled = rgb * 1.0
 30 | 
 31 |         # Convert RGB to BGR
 32 |         # red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
 33 |         # bgr = tf.concat(axis=3, values=[
 34 |         #     blue,
 35 |         #     green,
 36 |         #     red,
 37 |         # ])
 38 | 
 39 |         self.conv1_1 = self.conv_layer(bgr, "conv1_1")
 40 |         self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
 41 |         self.pool1 = self.max_pool(self.conv1_2, 'pool1')
 42 | 
 43 |         self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
 44 |         self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
 45 |         self.pool2 = self.max_pool(self.conv2_2, 'pool2')
 46 | 
 47 |         self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
 48 |         self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
 49 |         self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
 50 |         self.pool3 = self.max_pool(self.conv3_3, 'pool3')
 51 | 
 52 |         self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
 53 |         self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
 54 |         self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
 55 |         self.pool4 = self.max_pool(self.conv4_3, 'pool4')
 56 | 
 57 |         self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
 58 |         self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
 59 |         self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
 60 | 
 61 |         self.data_dict = None
 62 |         print(("build model finished: %ds" % (time.time() - start_time)))
 63 | 
 64 |     def avg_pool(self, bottom, name):
 65 |         return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
 66 | 
 67 |     def max_pool(self, bottom, name):
 68 |         return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
 69 | 
 70 |     def conv_layer(self, bottom, name):
 71 |         with tf.variable_scope(name):
 72 |             filt = self.get_conv_filter(name)
 73 | 
 74 |             conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
 75 | 
 76 |             conv_biases = self.get_bias(name)
 77 |             bias = tf.nn.bias_add(conv, conv_biases)
 78 | 
 79 |             relu = tf.nn.relu(bias)
 80 |             return relu
 81 | 
 82 |     def fc_layer(self, bottom, name):
 83 |         with tf.variable_scope(name):
 84 |             shape = bottom.get_shape().as_list()
 85 |             dim = 1
 86 |             for d in shape[1:]:
 87 |                 dim *= d
 88 |             x = tf.reshape(bottom, [-1, dim])
 89 | 
 90 |             weights = self.get_fc_weight(name)
 91 |             biases = self.get_bias(name)
 92 | 
 93 |             # Fully connected layer. Note that the '+' operation automatically
 94 |             # broadcasts the biases.
 95 |             fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
 96 | 
 97 |             return fc
 98 | 
 99 |     def get_conv_filter(self, name):
100 |         return tf.Variable(self.data_dict[name][0], name="filter")
101 | 
102 |     def get_bias(self, name):
103 |         return tf.Variable(self.data_dict[name][1], name="biases")
104 | 
105 |     def get_fc_weight(self, name):
106 |         return tf.Variable(self.data_dict[name][0], name="weights")
107 | 


--------------------------------------------------------------------------------
/pretrain/vgg16.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import time
  7 | 
  8 | VGG_MEAN = [103.939, 116.779, 123.68]
  9 | 
 10 | 
 11 | class Vgg16:
 12 |     def __init__(self, vgg16_npy_path=None):
 13 |         if vgg16_npy_path is None:
 14 |             path = inspect.getfile(Vgg16)
 15 |             path = os.path.abspath(os.path.join(path, os.pardir))
 16 |             path = os.path.join(path, "vgg16.npy")
 17 |             vgg16_npy_path = path
 18 |             print(path)
 19 | 
 20 |         self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
 21 |         print("npy file loaded")
 22 | 
 23 |     def build_model(self, rgb):
 24 |         """
 25 |         load variable from npy to build the VGG
 26 |         :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
 27 |         """
 28 | 
 29 |         start_time = time.time()
 30 |         print("build model started")
 31 |         rgb_scaled = rgb * 255.0
 32 | 
 33 |         # Convert RGB to BGR
 34 |         red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
 35 |         assert red.get_shape().as_list()[1:] == [224, 224, 1]
 36 |         assert green.get_shape().as_list()[1:] == [224, 224, 1]
 37 |         assert blue.get_shape().as_list()[1:] == [224, 224, 1]
 38 |         bgr = tf.concat(axis=3, values=[
 39 |             blue - VGG_MEAN[0],
 40 |             green - VGG_MEAN[1],
 41 |             red - VGG_MEAN[2],
 42 |         ])
 43 |         assert bgr.get_shape().as_list()[1:] == [224, 224, 3]
 44 | 
 45 |         self.conv1_1 = self.conv_layer(bgr, "conv1_1")
 46 |         self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
 47 |         self.pool1 = self.max_pool(self.conv1_2, 'pool1')
 48 | 
 49 |         self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
 50 |         self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
 51 |         self.pool2 = self.max_pool(self.conv2_2, 'pool2')
 52 | 
 53 |         self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
 54 |         self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
 55 |         self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
 56 |         self.pool3 = self.max_pool(self.conv3_3, 'pool3')
 57 | 
 58 |         self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
 59 |         self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
 60 |         self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
 61 |         self.pool4 = self.max_pool(self.conv4_3, 'pool4')
 62 | 
 63 |         self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
 64 |         self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
 65 |         self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
 66 |         self.pool5 = self.max_pool(self.conv5_3, 'pool5')
 67 | 
 68 |         self.fc6 = self.fc_layer(self.pool5, "fc6")
 69 |         assert self.fc6.get_shape().as_list()[1:] == [4096]
 70 |         self.relu6 = tf.nn.relu(self.fc6)
 71 | 
 72 |         self.fc7 = self.fc_layer(self.relu6, "fc7")
 73 |         self.relu7 = tf.nn.relu(self.fc7)
 74 | 
 75 |         self.fc8 = self.fc_layer(self.relu7, "fc8")
 76 | 
 77 |         self.prob = tf.nn.softmax(self.fc8, name="prob")
 78 | 
 79 |         self.data_dict = None
 80 |         print(("build model finished: %ds" % (time.time() - start_time)))
 81 | 
 82 |     def avg_pool(self, bottom, name):
 83 |         return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
 84 | 
 85 |     def max_pool(self, bottom, name):
 86 |         return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
 87 | 
 88 |     def conv_layer(self, bottom, name):
 89 |         with tf.variable_scope(name):
 90 |             filt = self.get_conv_filter(name)
 91 | 
 92 |             conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
 93 | 
 94 |             conv_biases = self.get_bias(name)
 95 |             bias = tf.nn.bias_add(conv, conv_biases)
 96 | 
 97 |             relu = tf.nn.relu(bias)
 98 |             return relu
 99 | 
100 |     def fc_layer(self, bottom, name):
101 |         with tf.variable_scope(name):
102 |             shape = bottom.get_shape().as_list()
103 |             dim = 1
104 |             for d in shape[1:]:
105 |                 dim *= d
106 |             x = tf.reshape(bottom, [-1, dim])
107 | 
108 |             weights = self.get_fc_weight(name)
109 |             biases = self.get_bias(name)
110 | 
111 |             # Fully connected layer. Note that the '+' operation automatically
112 |             # broadcasts the biases.
113 |             fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
114 | 
115 |             return fc
116 | 
117 |     def get_conv_filter(self, name):
118 |         return tf.constant(self.data_dict[name][0], name="filter")
119 | 
120 |     def get_bias(self, name):
121 |         return tf.constant(self.data_dict[name][1], name="biases")
122 | 
123 |     def get_fc_weight(self, name):
124 |         return tf.constant(self.data_dict[name][0], name="weights")
125 | 


--------------------------------------------------------------------------------
/pretrain/vgg16_vehicle.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | import time
  7 | 
  8 | VGG_MEAN = [103.939, 116.779, 123.68]
  9 | 
 10 | class Vgg16:
 11 |     def __init__(self, vgg16_npy_path=None):
 12 |         if vgg16_npy_path is None:
 13 |             path = inspect.getfile(Vgg16)
 14 |             path = os.path.abspath(os.path.join(path, os.pardir))
 15 |             path = os.path.join(path, "vgg16.npy")
 16 |             vgg16_npy_path = path
 17 |             print(path)
 18 | 
 19 |         self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
 20 |         print("npy file loaded")
 21 | 
 22 |     def build_model(self, bgr):
 23 |         """
 24 |         load variable from npy to build the VGG
 25 |         :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
 26 |         """
 27 |         start_time = time.time()
 28 |         print("build model started")
 29 |         # rgb_scaled = rgb * 1.0
 30 | 
 31 |         # Convert RGB to BGR
 32 |         # red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
 33 |         # bgr = tf.concat(axis=3, values=[
 34 |         #     blue,
 35 |         #     green,
 36 |         #     red,
 37 |         # ])
 38 | 
 39 |         self.conv1_1 = self.conv_layer(bgr, "conv1_1", training=False)
 40 |         self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2", training=False)
 41 |         self.pool1 = self.max_pool(self.conv1_2, 'pool1')
 42 | 
 43 |         self.conv2_1 = self.conv_layer(self.pool1, "conv2_1", training=False)
 44 |         self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2", training=False)
 45 |         self.pool2 = self.max_pool(self.conv2_2, 'pool2')
 46 | 
 47 |         self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
 48 |         self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
 49 |         self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
 50 |         self.pool3 = self.max_pool(self.conv3_3, 'pool3')
 51 | 
 52 |         self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
 53 |         self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
 54 |         self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
 55 | 
 56 |         self.data_dict = None
 57 |         print(("build model finished: %ds" % (time.time() - start_time)))
 58 | 
 59 |     def avg_pool(self, bottom, name):
 60 |         return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
 61 | 
 62 |     def max_pool(self, bottom, name):
 63 |         return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
 64 | 
 65 |     def conv_layer(self, bottom, name, training=True):
 66 |         with tf.variable_scope(name):
 67 |             filt = self.get_conv_filter(name, training=training)
 68 | 
 69 |             conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
 70 | 
 71 |             conv_biases = self.get_bias(name, training=training)
 72 |             bias = tf.nn.bias_add(conv, conv_biases)
 73 | 
 74 |             relu = tf.nn.relu(bias)
 75 |             return relu
 76 | 
 77 |     def fc_layer(self, bottom, name):
 78 |         with tf.variable_scope(name):
 79 |             shape = bottom.get_shape().as_list()
 80 |             dim = 1
 81 |             for d in shape[1:]:
 82 |                 dim *= d
 83 |             x = tf.reshape(bottom, [-1, dim])
 84 | 
 85 |             weights = self.get_fc_weight(name)
 86 |             biases = self.get_bias(name)
 87 | 
 88 |             # Fully connected layer. Note that the '+' operation automatically
 89 |             # broadcasts the biases.
 90 |             fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
 91 | 
 92 |             return fc
 93 | 
 94 |     def get_conv_filter(self, name, training=True):
 95 |         return tf.Variable(self.data_dict[name][0], name="filter", trainable=training)
 96 | 
 97 |     def get_bias(self, name, training=True):
 98 |         return tf.Variable(self.data_dict[name][1], name="biases", trainable=training)
 99 | 
100 |     def get_fc_weight(self, name):
101 |         return tf.Variable(self.data_dict[name][0], name="weights")
102 | 


--------------------------------------------------------------------------------
/rcnn/base_rpn.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import os
 3 | 
 4 | import numpy as np
 5 | import tensorflow as tf
 6 | import time
 7 | 
 8 | VGG_MEAN = [103.939, 116.779, 123.68]
 9 | 
10 | class RPN:
11 |     def __init__(self, vgg16_npy_path=None):
12 |         if vgg16_npy_path is None:
13 |             path = inspect.getfile(Vgg16)
14 |             path = os.path.abspath(os.path.join(path, os.pardir))
15 |             path = os.path.join(path, "vgg16.npy")
16 |             vgg16_npy_path = path
17 |             print(path)
18 | 
19 |         self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
20 |         print("npy file loaded")
21 | 
22 |     def build_model(self, bgr):
23 |         """
24 |         load variable from npy to build the VGG
25 |         :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
26 |         """
27 |         start_time = time.time()
28 |         print("build model started")
29 | 
30 |         self.conv1_1 = self.conv_layer(bgr, "conv1_1")
31 |         self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
32 |         self.pool1 = self.max_pool(self.conv1_2, 'pool1')
33 | 
34 |         self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
35 |         self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
36 |         self.pool2 = self.max_pool(self.conv2_2, 'pool2')
37 | 
38 |         self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
39 |         self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
40 |         self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
41 |         self.pool3 = self.max_pool(self.conv3_3, 'pool3')
42 | 
43 |         self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
44 |         self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
45 |         self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
46 | 
47 |         self.data_dict = None
48 |         print(("build model finished: %ds" % (time.time() - start_time)))
49 | 
50 |     def avg_pool(self, bottom, name):
51 |         return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
52 | 
53 |     def max_pool(self, bottom, name):
54 |         return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
55 | 
56 |     def conv_layer(self, bottom, name):
57 |         with tf.variable_scope(name):
58 |             filt = self.get_conv_filter(name)
59 | 
60 |             conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
61 | 
62 |             conv_biases = self.get_bias(name)
63 |             bias = tf.nn.bias_add(conv, conv_biases)
64 | 
65 |             relu = tf.nn.relu(bias)
66 |             return relu
67 | 
68 |     def fc_layer(self, bottom, name):
69 |         with tf.variable_scope(name):
70 |             shape = bottom.get_shape().as_list()
71 |             dim = 1
72 |             for d in shape[1:]:
73 |                 dim *= d
74 |             x = tf.reshape(bottom, [-1, dim])
75 | 
76 |             weights = self.get_fc_weight(name)
77 |             biases = self.get_bias(name)
78 | 
79 |             # Fully connected layer. Note that the '+' operation automatically
80 |             # broadcasts the biases.
81 |             fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
82 | 
83 |             return fc
84 | 
85 |     def get_conv_filter(self, name):
86 |         return tf.Variable(self.data_dict[name][0], name="filter")
87 | 
88 |     def get_bias(self, name):
89 |         return tf.Variable(self.data_dict[name][1], name="biases")
90 | 
91 |     def get_fc_weight(self, name):
92 |         return tf.Variable(self.data_dict[name][0], name="weights")
93 | 


--------------------------------------------------------------------------------
/rcnn/proposal_layer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | def rpn_to_roi():
4 |     pass
5 | 


--------------------------------------------------------------------------------
/rcnn/rcnn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | sys.path.append("../")
  5 | sys.path.append("../util")
  6 | sys.path.append("../cython_util")
  7 | sys.path.append("../pretrain")
  8 | import glob
  9 | import cv2
 10 | import numpy as np
 11 | # from vgg16 import vgg16
 12 | from input_kitti import *
 13 | from data_util import *
 14 | from parse_xml import parseXML
 15 | from vgg16_vehicle import Vgg16 as Vgg
 16 | import tensorflow as tf
 17 | from network_util import *
 18 | from bbox_overlap import bbox_overlaps
 19 | from remove_extraboxes import remove_extraboxes
 20 | from bool_anchors_inside_image import batch_inside_image
 21 | from generate_anchors import generate_anchors
 22 | 
 23 | """
 24 | ・collect dataset of cars
 25 | ・Preprocessing BBOX and Label for training
 26 | ・try roi_pooling layer
 27 | ・Extract ROI using mitmul tools
 28 | ・NMS
 29 | """
 30 | 
 31 | """Flow of Fast RCNN
 32 | ###############################################################################
 33 | In this state, Create Input Images and ROI Labels
 34 | 
 35 | 1. input batch images and GroundTruth BBox from datasets *folder name, batch size
 36 |    Image shape is [batch size, width, height, channel], tf.float32, vgg normalized, bgr
 37 |    Bounding Box shape is [batch size, center_x, center_y, width, height]
 38 | 
 39 | 2. get candicate bounding box from images.
 40 | 
 41 |    # Implemented
 42 | 3. resize input images to input size   *size of resize    if needed.
 43 |    if this operation was done, you should adjust bounding box according to it.
 44 |    Both of Candicate and GroundTruth Bounding Boxes.
 45 |    In thesis, Image size is in [600, 1000]
 46 |    In this Implemention, input image has dynamic shape between [600, 1000]
 47 | 
 48 | 4. convert candicate bounding box to ROI label.
 49 | 
 50 | 5. calculate IOU between ROI label and GroundTruth label.
 51 |    IOU is Intersection Over Union.
 52 | 
 53 | 6. Select Bounding Box from IOU.
 54 |    IOU > 0.5 is correct label, IOU = [0.1 0.5) is a false label(background).
 55 |    Correct Label is 25%, BackGround Label is 75%.
 56 |    Number of Label is 128, Batch Size is 2, so each image has 64 ROIs
 57 | 
 58 | ###############################################################################
 59 | In this stage, Calculate Loss
 60 | 
 61 | 7. Input data to ROI Pooling Layer is Conv5_3 Feature Map and ROIs
 62 |    Input shape is Feature map (batch, width, height, 512), ROIs (Num of ROIs, 5)
 63 |    ROIs, ex:) [0, left, height, right, bottom]. First Element is the index of batch
 64 | 
 65 | 8. Through ROI Pooling Layer, Output Shape is [Num of ROIs, 7, 7, 512]
 66 | 
 67 | 9. Reshape it to [Num of ROIs, -1], and then connect to Fully Connected Layer.
 68 | 
 69 | 10.Output Layer has two section, one is class prediction, the other is its bounding box prediction.
 70 |    class prediction shape is [Num of ROIs, Num of Class + 1]
 71 |    bounding box prediction shape is [Num of ROIs, 4 * (Num of Class + 1)]
 72 | 
 73 | 11.Loss Function
 74 |    Regularize bounding box value [center_x, center_y, w, h] into
 75 |    [(GroundTruth x - pred_x) / pred_w, (GroundTruth y - pred_y) / pred_h, log(GroundTruth w / pred_w), log(GroundTruth h / pred_h)]
 76 |    Class prediction is by softmax with loss.
 77 |    Bounding Box prediction is by smooth_L1 loss
 78 | ###############################################################################
 79 | In this stage, Describe Datasets.
 80 | 1. PASCAL VOC2007
 81 | 2. KITTI Datasets
 82 | 3. Udacity Datasets
 83 | """
 84 | 
 85 | def create_optimizer(all_loss, lr=0.001, var_list=None):
 86 |     opt = tf.train.AdamOptimizer(lr)
 87 |     if var_list is None:
 88 |         return opt.minimize(all_loss)
 89 |     optimizer = opt.minimize(all_loss, var_list=var_list)
 90 |     return optimizer
 91 | 
 92 | class RPN_ExtendedLayer(object):
 93 |     def __init__(self):
 94 |         pass
 95 | 
 96 |     def build_model(self, input_layer, use_batchnorm=False, is_training=True, activation=tf.nn.relu, anchors=1):
 97 |         self.rpn_conv = convBNLayer(input_layer, use_batchnorm, is_training, 512, 512, 3, 1, name="conv_rpn", activation=activation)
 98 |         # shape is [Batch, 2(bg/fg) * 9(anchors=3scale*3aspect ratio)]
 99 |         self.rpn_cls = convBNLayer(self.rpn_conv, use_batchnorm, is_training, 512, anchors*2, 1, 1, name="rpn_cls", activation=activation)
100 |         rpn_shape = self.rpn_cls.get_shape().as_list()
101 |         rpn_shape = tf.shape(self.rpn_cls)
102 |         self.rpn_cls = tf.reshape(self.rpn_cls, [rpn_shape[0], rpn_shape[1], rpn_shape[2], anchors, 2])
103 |         self.rpn_cls = tf.nn.softmax(self.rpn_cls, dim=-1)[:, :, :, :, 0]
104 |         self.rpn_cls = tf.reshape(self.rpn_cls, [rpn_shape[0], rpn_shape[1]*rpn_shape[2]*anchors]) # for loss
105 |         # shape is [Batch, 4(x, y, w, h) * 9(anchors=3scale*3aspect ratio)]
106 |         self.rpn_bbox = convBNLayer(self.rpn_conv, use_batchnorm, is_training, 512, anchors*4, 1, 1, name="rpn_bbox", activation=activation)
107 |         self.rpn_bbox = tf.reshape(self.rpn_bbox, [rpn_shape[0], rpn_shape[1]*rpn_shape[2]*anchors, 4])
108 | 
109 | class VGG(object):
110 |     def __init__(self):
111 |         pass
112 | 
113 |     def build_model(self, input_layer, activation=tf.nn.relu, anchors=1):
114 |         self.conv1_1 = convLayer(images, 3, 64, 3, 1, activation=activation, name="conv1_1")
115 |         self.conv1_2 = convLayer(self.conv1_1, 64, 64, 3, 1, activation=activation, name="conv1_2")
116 |         self.pool1 = maxpool2d(self.conv1_2, kernel=2, stride=2, name="pool1")
117 | 
118 |         self.conv2_1 = convLayer(self.pool1, 64, 128, 3, 1, activation=activation, name="conv2_1")
119 |         self.conv2_2 = convLayer(self.conv2_1, 128, 128, 3, 1, activation=activation, name="conv2_2")
120 |         self.pool2 = maxpool2d(self.conv2_2, kernel=2, stride=2, name="pool2")
121 | 
122 |         self.conv3_1 = convLayer(self.pool2, 128, 256, 3, 1, activation=activation, name="conv3_1")
123 |         self.conv3_2 = convLayer(self.conv3_1, 256, 256, 3, 1, activation=activation, name="conv3_2")
124 |         self.conv3_3 = convLayer(self.conv3_2, 256, 256, 3, 1, activation=activation, name="conv3_3")
125 |         self.pool3 = maxpool2d(self.conv3_3, kernel=2, stride=2, name="pool3")
126 | 
127 |         self.conv4_1 = convLayer(self.pool2, 256, 512, 3, 1, activation=activation, name="conv4_1")
128 |         self.conv4_2 = convLayer(self.conv4_1, 512, 512, 3, 1, activation=activation, name="conv4_2")
129 |         self.conv4_3 = convLayer(self.conv4_2, 512, 512, 3, 1, activation=activation, name="conv4_3")
130 |         self.pool4 = maxpool2d(self.conv4_3, kernel=2, stride=2, name="pool4")
131 | 
132 |         self.conv5_1 = convLayer(self.pool2, 512, 512, 3, 1, activation=activation, name="conv5_1")
133 |         self.conv5_2 = convLayer(self.conv5_1, 512, 512, 3, 1, activation=activation, name="conv5_2")
134 |         self.conv5_3 = convLayer(self.conv5_2, 512, 512, 3, 1, activation=activation, name="conv5_3")
135 | 
136 | def propose_for_rois(rpn_cls, rpn_bbox, gt_labels, feat_stride, scales, ratios, feature_shape, image_size, num_of_rois=128):
137 |     """
138 |         **rpn_modelから、実際の大きさまでスケールさせる**
139 |         1. 小さなbounding boxを排除(feature_stride * roi size?)
140 |         2. scoreから6000個を抽出
141 |         3. NMSをかけて、300個以下まで候補を絞る
142 |         ここまでが物体候補領域の抽出
143 |         ーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーーー
144 |         4. gt_boxesと候補領域でoverlapsを計算する
145 |         overlapsが0.5以上ならGroundTruth, [0.1, 0.5)ならFalseであるとする　　* ここまでBatchでよい
146 |         ここの計算でReshapeされたROI, 正解Class Label, 正解Regression Label, そのindex番号の計算が行われる
147 |         5. rpn_modelをclass label[?]とregression label[?, 4]にReshapeし、indexで値を取ってくる
148 | 
149 |         input
150 |         1. Pred class Label
151 |         2. Pred regression Label
152 |         3. GroundTruth class Label
153 |         4. GroundTruth regression Label
154 | 
155 |         output
156 |         1. 候補領域の計算されたROI（batch number, x, y, w, h), 数は[?]
157 |         2. 候補領域の正解Class Label(batch number, 2) car or not
158 |         3. 候補領域の正解Regression Label(batch number, 4) x, y, w, h
159 |         　　これも事前に正規化しておく必要があります
160 | 
161 |         ここではBack Propは計算されない
162 |         indexのみ計算される  indexのOutputのShapeは、[?]
163 |         ROIs[index]で、これが次の層に伝搬される
164 |     """
165 |     image_size = images.shape[1:3]
166 |     width = feature_shape[0]
167 |     height = feature_shape[1]
168 |     batch_size = gt_labels.shape[0]
169 |     A = scales.shape[0] * len(ratios)
170 |     K = width * height
171 | 
172 |     center_x = np.arange(0, height) * feat_stride
173 |     center_y = np.arange(0, width) * feat_stride
174 |     center_x, center_y = np.meshgrid(center_x, center_y)
175 |     centers = np.zeros((batch_size, width*height, 4))
176 |     centers[:] = np.vstack((center_x.ravel(), center_y.ravel(),
177 |                         center_x.ravel(), center_y.ravel())).transpose()
178 |     anchors = np.zeros((batch_size, A, 4))
179 |     anchors = generate_anchors(scales=scales, ratios=ratios) # Shape is [A, 4]
180 |     anchors = centers.reshape(batch_size, K, 1, 4) + anchors # [Batch, K, A, 4]
181 |     # gt_labels: Shape is [Batch, G, 4]
182 |     # rpn_bbox: Shape is [Batch, K*A, 4]
183 |     # rpn_cls: Shape is [Batch, K*A]
184 |     # rois: Shape is [Num of ROIs, 5] 5 is [batch index, left, top, right, bottom]
185 |     # gt_cls: Shape is [Num of ROIs, 2] 0 is GroundTruth, 1 is otherwise
186 |     # gt_boxes: Shape is [Num of ROIs, 4] Value is Normalized by proposal target lay
187 | 
188 |     # Convert anchors into proposals via bbox transformations
189 |     # clip predicted boxes to image
190 |     # proposals: Shape is [Batch, K*A, 4]
191 |     # scores: Shape is [Batch, K*A]
192 |     # anchors: Shape is [Batch, K*A, 4]
193 |     anchors = bbox_transform_inv_clip(anchors, rpn_bbox, image_size[1], image_height[0])
194 |     for bs in range(batch_size):
195 |         keep = _filter_boxes(anchors[bs], min_size)
196 |         proposals = anchors[bs, keep]
197 |         scores = rpn_cls[bs, keep]
198 |         order = scores.ravel().argsort()[-6000:]
199 |         proposals = proposals[order]
200 |         scores = scores[order]
201 |         keep = nms(np.hstack((proposals, scores)), 0.7)
202 |         if post_nms_topN > 0:
203 |             keep = keep[:300]
204 |         proposals = proposals[keep, :]
205 |         scores = scores[keep]
206 | 
207 |         # Sample ROIs
208 |         #ここから128枚(64枚: fg 16, bg 48)
209 |         computed_gt_boxes, true_index, false_index = bbox_overlaps(
210 |             proposals,
211 |             scores,
212 |             gt_labels)
213 |         # for i in range(batch_size):
214 |         true_where = np.where(true_index == 1)
215 |         num_true = len(true_where[0])
216 | 
217 |         if num_true > 16:
218 |             select = np.random.choice(num_true, num_true - 16, replace=False)
219 |             num_true = 16
220 |             batch = np.ones((select.shape[0]), dtype=np.int) * bs
221 |             true_where = remove_extraboxes(true_where[0], select, batch)
222 |             true_index[true_where] = 0
223 | 
224 |         false_where = np.where(false_index[i] == 1)
225 |         num_false = len(false_where[0])
226 |         select = np.random.choice(num_false, num_false - (64-num_true), replace=False)
227 |         batch = np.ones((select.shape[0]), dtype=np.int) * bs
228 |         false_where = remove_extraboxes(false_where[0], select, batch)
229 |         false_index[false_where] = 0
230 |         batch_inds.append(keep.shape[0])
231 | 
232 | 
233 |         true_index = None
234 |         false_index = None
235 |         final_index = None
236 |         # TODO Concatenate true_index and false_index
237 |         proposals = proposals[final_index]
238 |         gt_cls = true_index
239 |         gt_cls[bs, true_index, 0] = 1
240 |         gt_cls[bs, false_index, 1] = 1
241 |         gt_boxes[bs, true_index] = computed_gt_boxes[true_index]
242 |         rois[bs] = (proposals[true_index] / 4).astype(np.int32)
243 |     return rois, gt_cls, gt_boxes
244 | 
245 | 
246 | def _filter_boxes(boxes, min_size):
247 |     """Remove all boxes with any side smaller than min_size."""
248 |     ws = boxes[:, 2] - boxes[:, 0] + 1
249 |     hs = boxes[:, 3] - boxes[:, 1] + 1
250 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
251 |     return keep
252 | 
253 | def proposal_target_layer(self, feature_map, rpn_model, gt_labels, feat_stride, scales, ratios, feature_shape, images, num_of_rois=num_of_rois, feat_stride=16, name=""):
254 |     """
255 |     gt_labels: Shape is [Batch, Num of GroundTruth Num, 4]
256 |     rois: Shape is [Num of ROIs, 5] 5 is [batch index, left, top, right, bottom]
257 |     gt_cls: Shape is [Num of ROIs, 2] 0 is GroundTruth, 1 is otherwise
258 |     gt_boxes: Shape is [Num of ROIs, 4] Value is Normalized by proposal target layer
259 | 
260 |     Gradient will not deliver to RPN Layer
261 |     """
262 | 
263 |     with tf.variable_scope(name):
264 |         rois, gt_cls, gt_boxes = tf.py_func(propose_for_rois, \
265 |             [rpn_model.rpn_cls, rpn_model.rpn_bbox, gt_labels, feat_stride, scales, ratios, feature_shape, images],[tf.int8,tf.float32,tf.float32])
266 | 
267 |         rois = tf.convert_to_tensor(rois, name="rois")
268 |         gt_cls = tf.convert_to_tensor(gt_cls, name="gt_cls")
269 |         gt_boxes = tf.convert_to_tensor(gt_boxes, name="gt_boxes")
270 |         return rois, gt_cls, gt_boxes
271 | 
272 | class FAST_RCNN(object):
273 |     def __init__(self, roi_size):
274 |         self.roi_size = roi_size
275 | 
276 |     def build_model(self, feature_map, rois, rpn_model, activation=tf.nn.relu):
277 |         # input_layer shape is [Batch, K, A, ]
278 |         self.roi_layer = roi_pooling(feature_map, rois, self.roi_size[0], self.roi_size[1])
279 |         # input_shape [num_of_rois, channel, roi size, roi size]
280 |         self.pool_5 = tf.reshape(roi_layer, [-1, self.roi_size[0]*self.roi_size[1]*512])
281 |         self.fc6 = vgg_fully(self.pool_5, [self.roi_size[0]*self.roi_size[1]*512, 4096], name="fc6", is_training=is_training)
282 |         self.fc7 = vgg_fully(self.fc6, [4096, 4096], name="fc7")
283 |         self.fc8 = vgg_fully(self.fc7, [4096, 6], name="fc8")
284 |         # output shape [num_of_rois, 2]
285 |         self.obj_class = tf.nn.softmax(self.fc8[:, :2], dim=-1)
286 |         # output shape [num_of_rois, 8]
287 |         self.bbox_regression = self.fc8[:, 2:]
288 | 
289 | def rpn(sess, vggpath=None, image_shape=(300, 300), \
290 |               is_training=None, use_batchnorm=False, activation=tf.nn.relu, anchors=9):
291 |     images = tf.placeholder(tf.float32, [None, None, None, 3])
292 |     phase_train = tf.placeholder(tf.bool, name="phase_traing") if is_training else None
293 | 
294 |     vgg = VGG()
295 |     vgg.build_model(images)
296 |     with tf.variable_scope("rpn_model"):
297 |         rpn = RPN_ExtendedLayer()
298 |         rpn.build_model(vgg.conv5_3, use_batchnorm=use_batchnorm, is_training=is_training, activation=activation, anchors=anchors)
299 | 
300 |     if is_training:
301 |         rcnn_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_model")
302 |         sess.run(tf.variables_initializer(rcnn_vars))
303 |     return vgg.conv5_3, rpn, images, phase_train
304 | 
305 | def fast_rcnn(sess, feature_map, rpn_model, gt_labels, roi_size=(7, 7), \
306 |               is_training=None, use_batchnorm=False, activation=tf.nn.relu, num_of_rois=128):
307 |     """Model Definition of Fast RCNN
308 |     In thesis, Roi Size is (7, 7), channel is 512
309 |     """
310 |     with tf.variable_scope("fast_rcnn"):
311 |         # gt_labels: Shape is [Batch, Num of GroundTruth Num, 4]
312 |         # rois: Shape is [Num of ROIs, 5] 5 is [batch index, left, top, right, bottom]
313 |         # gt_cls: Shape is [Num of ROIs, 2] 0 is GroundTruth, 1 is otherwise
314 |         # gt_boxes: Shape is [Num of ROIs, 4] Value is Normalized by proposal target layer
315 |         rois, gt_cls, gt_boxes = proposal_target_layer(feature_map, rpn_model, gt_labels, num_of_rois=num_of_rois)
316 |         rcnn = FAST_RCNN(roi_size)
317 |         rcnn.build_model(feature_map, rois)
318 | 
319 |     if is_training:
320 |         rcnn_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="fast_rcnn")
321 |         sess.run(tf.variables_initializer(rcnn_vars))
322 | 
323 |     return rcnn, rcnn_vars
324 | 
325 | def train_rcnn(batch_size, image_dir, label_dir, epoch=101, lr=0.01, feature_shape=(64, 19), \
326 |                   is_training=True, use_batchnorm=False, activation=tf.nn.relu, \
327 |                   scales=np.array([5, 8, 12, 16, 32]), ratios=[0.3, 0.5, 0.8, 1], feature_stride=16):
328 |     import time
329 |     training_epochs = epoch
330 | 
331 |     with tf.Session() as sess:
332 |         vgg_featuremap, rpn_model, images, phase_train = rpn(sess, vggpath=vggpath, is_training=False, roi_size=(7, 7), \
333 |                                          use_batchnorm=use_batchnorm, activation=activation, anchors=scales.shape[0]*len(ratios))
334 |         saver = tf.train.Saver()
335 |         new_saver = tf.train.import_meta_graph("../rpn/rpn_model40.ckpt.meta")
336 |         last_model = "../rpn/rpn_model40.ckpt"
337 |         saver.restore(sess, last_model)
338 | 
339 |         rcnn_model, rcnn_vars = fast_rcnn(sess, vgg_featuremap, rpn_model, roi_size=roi_size, activation=activation)
340 | 
341 |         total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss, g_bboxes, true_index, false_index = rpn_loss(rcnn_model.rcnn_cls, rcnn_model.rcnn_bbox)
342 |         # Only Training RCNN Layer
343 |         optimizer = create_optimizer(total_loss, lr=lr, var_list=rcnn_vars)
344 | 
345 |         init = tf.global_variables_initializer()
346 |         sess.run(init)
347 | 
348 |         image_pathlist, label_pathlist = get_pathlist(image_dir, label_dir)
349 |         for epoch in range(training_epochs):
350 |             for batch_images, batch_labels in generator__Image_and_label(image_pathlist, label_pathlist, batch_size=batch_size):
351 |                 start = time.time()
352 |                 candicate_anchors, batch_true_index, batch_false_index = create_Labels_For_Loss(batch_labels, feat_stride=feature_stride, \
353 |                     feature_shape=(batch_images.shape[1]//feature_stride +1, batch_images.shape[2]//feature_stride), \
354 |                     scales=scales, ratios=ratios, image_size=batch_images.shape[1:3])
355 |                 print "batch time", time.time() - start
356 |                 print batch_true_index[batch_true_index==1].shape
357 |                 print batch_false_index[batch_false_index==1].shape
358 | 
359 |                 sess.run(optimizer, feed_dict={images:batch_images, g_bboxes: candicate_anchors, true_index:batch_true_index, false_index:batch_false_index})
360 |                 tl, cl, bl, tol, fol = sess.run([total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss], feed_dict={images:batch_images, g_bboxes: candicate_anchors, true_index:batch_true_index, false_index:batch_false_index})
361 |                 print("Epoch:", '%04d' % (epoch+1), "total loss=", "{:.9f}".format(tl))
362 |                 print("Epoch:", '%04d' % (epoch+1), "closs loss=", "{:.9f}".format(cl))
363 |                 print("Epoch:", '%04d' % (epoch+1), "bbox loss=", "{:.9f}".format(bl))
364 |                 print("Epoch:", '%04d' % (epoch+1), "true loss=", "{:.9f}".format(tol))
365 |                 print("Epoch:", '%04d' % (epoch+1), "false loss=", "{:.9f}".format(fol))
366 |     print("Optimization Finished")
367 | 
368 | def smooth_L1(x):
369 |     l2 = 0.5 * (x**2.0)
370 |     l1 = tf.abs(x) - 0.5
371 | 
372 |     condition = tf.less(tf.abs(x), 1.0)
373 |     loss = tf.where(condition, l2, l1)
374 |     return loss
375 | 
376 | def rpn_loss(rpn_cls, rpn_bbox):
377 |     """Calculate Class Loss and Bounding Regression Loss.
378 | 
379 |     # Args:
380 |         obj_class: Prediction of object class. Shape is [ROIs*Batch_Size, 2]
381 |         bbox_regression: Prediction of bounding box. Shape is [ROIs*Batch_Size, 4]
382 |     """
383 |     rpn_shape = rpn_cls.get_shape().as_list()
384 |     g_bbox = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2], 4])
385 |     true_index = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2]])
386 |     false_index = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2]])
387 |     elosion = 0.00001
388 |     true_obj_loss = -tf.reduce_sum(tf.multiply(tf.log(rpn_cls[:, :, :, 0]+elosion), true_index))
389 |     false_obj_loss = -tf.reduce_sum(tf.multiply(tf.log(rpn_cls[:, :, :, 1]+elosion), false_index))
390 |     obj_loss = tf.add(true_obj_loss, false_obj_loss)
391 |     cls_loss = tf.div(obj_loss, 16) # L(cls) / N(cls) N=batch size
392 | 
393 |     bbox_loss = smooth_L1(tf.subtract(rpn_bbox, g_bbox))
394 |     bbox_loss = tf.reduce_sum(tf.multiply(tf.reduce_sum(bbox_loss, 3), true_index))
395 |     bbox_loss = tf.multiply(tf.div(bbox_loss, 1197), 100) # rpn_shape[1]*rpn_shape[2]
396 |     # bbox_loss = bbox_loss / rpn_shape[1]
397 | 
398 |     total_loss = tf.add(cls_loss, bbox_loss)
399 |     return total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss, g_bbox, true_index, false_index
400 | 
401 | def create_Labels_For_Loss(gt_boxes, feat_stride=16, feature_shape=(64, 19), \
402 |                            scales=np.array([8, 16, 32]), ratios=[0.5, 0.8, 1], \
403 |                            image_size=(300, 1000)):
404 |     """This Function is processed before network input
405 |     Number of Candicate Anchors is Feature Map width * heights
406 |     Number of Predicted Anchors is Batch Num * Feature Map Width * Heights * 9
407 |     """
408 |     width = feature_shape[0]
409 |     height = feature_shape[1]
410 |     batch_size = gt_boxes.shape[0]
411 |     # shifts is the all candicate anchors(prediction of bounding boxes)
412 |     center_x = np.arange(0, height) * feat_stride
413 |     center_y = np.arange(0, width) * feat_stride
414 |     center_x, center_y = np.meshgrid(center_x, center_y)
415 |     # Shape is [Batch, Width*Height, 4]
416 |     centers = np.zeros((batch_size, width*height, 4))
417 |     centers[:] = np.vstack((center_x.ravel(), center_y.ravel(),
418 |                         center_x.ravel(), center_y.ravel())).transpose()
419 |     A = scales.shape[0] * len(ratios)
420 |     K = width * height # width * height
421 |     anchors = np.zeros((batch_size, A, 4))
422 |     anchors = generate_anchors(scales=scales, ratios=ratios) # Shape is [A, 4]
423 | 
424 |     candicate_anchors = centers.reshape(batch_size, K, 1, 4) + anchors # [Batch, K, A, 4]
425 | 
426 |     # shape is [B, K, A]
427 |     is_inside = batch_inside_image(candicate_anchors, image_size[1], image_size[0])
428 | 
429 |     # candicate_anchors: Shape is [Batch, K, A, 4]
430 |     # gt_boxes: Shape is [Batch, G, 4]
431 |     # true_index: Shape is [Batch, K, A]
432 |     # false_index: Shape is [Batch, K, A]
433 |     candicate_anchors, true_index, false_index = bbox_overlaps(
434 |         np.ascontiguousarray(candicate_anchors, dtype=np.float),
435 |         is_inside,
436 |         gt_boxes)
437 | 
438 |     for i in range(batch_size):
439 |         true_where = np.where(true_index[i] == 1)
440 |         num_true = len(true_where[0])
441 | 
442 |         if num_true > 64:
443 |             select = np.random.choice(num_true, num_true - 64, replace=False)
444 |             num_true = 64
445 |             batch = np.ones((select.shape[0]), dtype=np.int) * i
446 |             true_where = remove_extraboxes(true_where[0], true_where[1], select, batch)
447 |             true_index[true_where] = 0
448 | 
449 |         false_where = np.where(false_index[i] == 1)
450 |         num_false = len(false_where[0])
451 |         select = np.random.choice(num_false, num_false - (128-num_true), replace=False)
452 |         batch = np.ones((select.shape[0]), dtype=np.int) * i
453 |         false_where = remove_extraboxes(false_where[0], false_where[1], select, batch)
454 |         false_index[false_where] = 0
455 | 
456 |     return candicate_anchors, true_index, false_index
457 | 
458 | if __name__ == '__main__':
459 |     import matplotlib.pyplot as plt
460 | 


--------------------------------------------------------------------------------
/rcnn/rcnn_vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | import glob
  5 | import cv2
  6 | import dlib
  7 | import numpy as np
  8 | # from vgg16 import vgg16
  9 | from input_kitti import *
 10 | from util import *
 11 | from parse_xml import parseXML
 12 | from base_vgg16 import Vgg16
 13 | import tensorflow as tf
 14 | # from utility.image.data_augmentation.flip import Flip
 15 | sys.path.append("/Users/tsujiyuuki/env_python/code/my_code/Data_Augmentation")
 16 | 
 17 | """
 18 | ・collect dataset of cars
 19 | ・Preprocessing BBOX and Label for training
 20 | ・try roi_pooling layer
 21 | ・Extract ROI using mitmul tools
 22 | ・NMS
 23 | """
 24 | 
 25 | """Flow of Fast RCNN
 26 | ###############################################################################
 27 | In this state, Create Input Images and ROI Labels
 28 | 
 29 | 1. input batch images and GroundTruth BBox from datasets *folder name, batch size
 30 |    Image shape is [batch size, width, height, channel], tf.float32, vgg normalized, bgr
 31 |    Bounding Box shape is [batch size, center_x, center_y, width, height]
 32 | 
 33 | 2. get candicate bounding box from images.
 34 | 
 35 |    # Implemented
 36 | 3. resize input images to input size   *size of resize    if needed.
 37 |    if this operation was done, you should adjust bounding box according to it.
 38 |    Both of Candicate and GroundTruth Bounding Boxes.
 39 |    In thesis, Image size is in [600, 1000]
 40 |    In this Implemention, input image has dynamic shape between [600, 1000]
 41 | 
 42 | 4. convert candicate bounding box to ROI label.
 43 | 
 44 | 5. calculate IOU between ROI label and GroundTruth label.
 45 |    IOU is Intersection Over Union.
 46 | 
 47 | 6. Select Bounding Box from IOU.
 48 |    IOU > 0.5 is correct label, IOU = [0.1 0.5) is a false label(background).
 49 |    Correct Label is 25%, BackGround Label is 75%.
 50 |    Number of Label is 128, Batch Size is 2, so each image has 64 ROIs
 51 | 
 52 | ###############################################################################
 53 | In this stage, Calculate Loss
 54 | 
 55 | 7. Input data to ROI Pooling Layer is Conv5_3 Feature Map and ROIs
 56 |    Input shape is Feature map (batch, width, height, 512), ROIs (Num of ROIs, 5)
 57 |    ROIs, ex:) [0, left, height, right, bottom]. First Element is the index of batch
 58 | 
 59 | 8. Through ROI Pooling Layer, Output Shape is [Num of ROIs, 7, 7, 512]
 60 | 
 61 | 9. Reshape it to [Num of ROIs, -1], and then connect to Fully Connected Layer.
 62 | 
 63 | 10.Output Layer has two section, one is class prediction, the other is its bounding box prediction.
 64 |    class prediction shape is [Num of ROIs, Num of Class + 1]
 65 |    bounding box prediction shape is [Num of ROIs, 4 * (Num of Class + 1)]
 66 | 
 67 | 11.Loss Function
 68 |    Regularize bounding box value [center_x, center_y, w, h] into
 69 |    [(GroundTruth x - pred_x) / pred_w, (GroundTruth y - pred_y) / pred_h, log(GroundTruth w / pred_w), log(GroundTruth h / pred_h)]
 70 |    Class prediction is by softmax with loss.
 71 |    Bounding Box prediction is by smooth_L1 loss
 72 | ###############################################################################
 73 | In this stage, Describe Datasets.
 74 | 1. PASCAL VOC2007
 75 | 2. KITTI Datasets
 76 | 3. Udacity Datasets
 77 | """
 78 | 
 79 | def create_optimizer(all_loss, lr=0.001, var_list=None):
 80 |     opt = tf.train.AdamOptimizer(lr)
 81 |     if var_list is None:
 82 |         return opt.minimize(all_loss)
 83 |     optimizer = opt.minimize(all_loss, var_list=var_list)
 84 |     return optimizer
 85 | 
 86 | class RPN_ExtendedLayer(object):
 87 |     def __init__(self):
 88 |         pass
 89 | 
 90 |     def build_model(self, input_layer, use_batchnorm=False, is_training=True, activation=tf.nn.relu, anchors=1):
 91 |         self.rpn_conv = convBNLayer(input_layer, use_batchnorm, is_training, 512, 512, 3, 1, name="conv_rpn", activation=activation)
 92 |         # shape is [Batch, 2(bg/fg) * 9(anchors=3scale*3aspect ratio)]
 93 |         self.rpn_cls = convBNLayer(self.rpn_conv, use_batchnorm, is_training, 512, anchors*2, 1, 1, name="rpn_cls", activation=activation)
 94 |         rpn_shape = self.rpn_cls.get_shape().as_list()
 95 |         rpn_shape = tf.shape(self.rpn_cls)
 96 |         self.rpn_cls = tf.reshape(self.rpn_cls, [rpn_shape[0], rpn_shape[1], rpn_shape[2], anchors, 2])
 97 |         self.rpn_cls = tf.nn.softmax(self.rpn_cls, dim=-1)[:, :, :, :, 0]
 98 |         self.rpn_cls = tf.reshape(self.rpn_cls, [rpn_shape[0], rpn_shape[1]*rpn_shape[2]*anchors])
 99 |         # shape is [Batch, 4(x, y, w, h) * 9(anchors=3scale*3aspect ratio)]
100 |         self.rpn_bbox = convBNLayer(self.rpn_conv, use_batchnorm, is_training, 512, anchors*4, 1, 1, name="rpn_bbox", activation=activation)
101 |         self.rpn_bbox = tf.reshape(self.rpn_bbox, [rpn_shape[0], rpn_shape[1]*rpn_shape[2]*anchors, 4])
102 | 
103 | class RPN(object):
104 |     def __init__(self):
105 |         pass
106 | 
107 |     def build_model(self, input_layer, activation=tf.nn.relu, anchors=1):
108 |         self.conv1_1 = convLayer(images, 3, 64, 3, 1, activation=activation, name="conv1_1")
109 |         self.conv1_2 = convLayer(self.conv1_1, 64, 64, 3, 1, activation=activation, name="conv1_2")
110 |         self.pool1 = maxpool2d(self.conv1_2, kernel=2, stride=2, name="pool1")
111 | 
112 |         self.conv2_1 = convLayer(self.pool1, 64, 128, 3, 1, activation=activation, name="conv2_1")
113 |         self.conv2_2 = convLayer(self.conv2_1, 128, 128, 3, 1, activation=activation, name="conv2_2")
114 |         self.pool2 = maxpool2d(self.conv2_2, kernel=2, stride=2, name="pool2")
115 | 
116 |         self.conv3_1 = convLayer(self.pool2, 128, 256, 3, 1, activation=activation, name="conv3_1")
117 |         self.conv3_2 = convLayer(self.conv3_1, 256, 256, 3, 1, activation=activation, name="conv3_2")
118 |         self.conv3_3 = convLayer(self.conv3_2, 256, 256, 3, 1, activation=activation, name="conv3_3")
119 |         self.pool3 = maxpool2d(self.conv3_3, kernel=2, stride=2, name="pool3")
120 | 
121 |         self.conv4_1 = convLayer(self.pool2, 256, 512, 3, 1, activation=activation, name="conv4_1")
122 |         self.conv4_2 = convLayer(self.conv4_1, 512, 512, 3, 1, activation=activation, name="conv4_2")
123 |         self.conv4_3 = convLayer(self.conv4_2, 512, 512, 3, 1, activation=activation, name="conv4_3")
124 | 
125 | class FAST_RCNN(object):
126 |     def __init__(self):
127 |         pass
128 | 
129 |     def build_model(self, input_layer, use_batchnorm=False, is_training=True, activation=tf.nn.relu, anchors=1):
130 |         self.conv1_1 = convBNLayer(images, False, is_training, activatioin=tf.nn.relu, name="conv1_1")
131 |         self.conv1_2 = convBNLayer()
132 | 
133 | def rpn(sess, vggpath=None, image_shape=(300, 300), \
134 |               is_training=None, use_batchnorm=False, activation=tf.nn.relu, anchors=9):
135 |     images = tf.placeholder(tf.float32, [None, None, None, 3])
136 |     phase_train = tf.placeholder(tf.bool, name="phase_traing") if is_training else None
137 | 
138 |     vgg = Vgg(vgg16_npy_path=vggpath)
139 |     vgg.build_model(images)
140 |     rpn = RPN_ExtendedLayer()
141 |     rpn.build_model(vgg.conv5_3, use_batchnorm=use_batchnorm, is_training=is_training, activation=activation, anchors=anchors)
142 |     return vgg.conv5_3, rpn, images, phase_train
143 | 
144 | def fast_rcnn(sess, model, rois, roi_size=(7, 7), image_shape=(300, 300), \
145 |               is_training=None, use_batchnorm=False, activation=tf.nn.relu, num_of_rois=128):
146 |     """Model Definition of Fast RCNN
147 |     In thesis, Roi Size is (7, 7), channel is 512
148 |     """
149 |     with tf.variable_scope("fast_rcnn"):
150 |         # roi shape [Num of ROIs, X, Y, W, H]
151 |         roi_layer = roi_pooling(model, rois, roi_size[0], roi_size[1])
152 |         # input_shape [num_of_rois, channel, roi size, roi size]
153 |         pool_5 = tf.reshape(roi_layer, [num_of_rois, roi_size[0]*roi_size[1]*512])
154 |         fc6 = fully_connected(pool_5, [roi_size[0]*roi_size[1]*512, 4096], name="fc6", is_training=is_training)
155 |         fc7 = fully_connected(fc6, [4096, 4096], name="fc7", is_training=is_training)
156 |         # output shape [num_of_rois, 2]
157 |         obj_class = tf.nn.softmax(fully_connected(fc7, [4096, 2], name="fc_class", activation=None, use_batchnorm=None), dim=-1)
158 |         # output shape [num_of_rois, 8]
159 |         bbox_regression = fully_connected(fc7, [4096, 8], name="fc_bbox", activation=None, use_batchnorm=None)
160 | 
161 | def train_rpn(batch_size, image_dir, label_dir, epoch=101, lr=0.01, feature_shape=(64, 19), \
162 |                   vggpath="../pretrain/vgg16.npy", use_batchnorm=False, activation=tf.nn.relu, \
163 |                   scales=np.array([5, 8, 12, 16, 32]), ratios=[0.3, 0.5, 0.8, 1], feature_stride=16):
164 |     import time
165 |     training_epochs = epoch
166 | 
167 |     with tf.Session() as sess:
168 |         vgg_featuremap, rpn_model, images, phase_train = rpn(sess, vggpath=vggpath, is_training=False, \
169 |                                          use_batchnorm=use_batchnorm, activation=activation, anchors=scales.shape[0]*len(ratios))
170 |         saver = tf.train.Saver()
171 |         new_saver = tf.train.import_meta_graph("../rpn/rpn_model40.ckpt.meta")
172 |         last_model = "../rpn/rpn_model40.ckpt"
173 |         saver.restore(sess, last_model)
174 | 
175 |         with tf.variable_scope("fast-rcnn"):
176 |             rcnn_model = fast_rcnn(vgg_featuremap, rpn_model, activation=activation)
177 | 
178 |         if is_training:
179 |             rcnn_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="fast-rcnn")
180 |             sess.run(tf.variables_initializer(rcnn_vars))
181 | 
182 |         total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss, g_bboxes, true_index, false_index = rpn_loss(rcnn_model.rcnn_cls, rcnn_model.rcnn_bbox)
183 |         # Only Training RCNN Layer
184 |         optimizer = create_optimizer(total_loss, lr=lr, var_list=rcnn_vars)
185 | 
186 |         init = tf.global_variables_initializer()
187 |         sess.run(init)
188 | 
189 |         image_pathlist, label_pathlist = get_pathlist(image_dir, label_dir)
190 |         for epoch in range(training_epochs):
191 |             for batch_images, batch_labels in generator__Image_and_label(image_pathlist, label_pathlist, batch_size=batch_size):
192 |                 start = time.time()
193 |                 candicate_anchors, batch_true_index, batch_false_index = create_Labels_For_Loss(batch_labels, feat_stride=feature_stride, \
194 |                     feature_shape=(batch_images.shape[1]//feature_stride +1, batch_images.shape[2]//feature_stride), \
195 |                     scales=scales, ratios=ratios, image_size=batch_images.shape[1:3])
196 |                 print "batch time", time.time() - start
197 |                 print batch_true_index[batch_true_index==1].shape
198 |                 print batch_false_index[batch_false_index==1].shape
199 | 
200 |                 sess.run(optimizer, feed_dict={images:batch_images, g_bboxes: candicate_anchors, true_index:batch_true_index, false_index:batch_false_index})
201 |                 tl, cl, bl, tol, fol = sess.run([total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss], feed_dict={images:batch_images, g_bboxes: candicate_anchors, true_index:batch_true_index, false_index:batch_false_index})
202 |                 print("Epoch:", '%04d' % (epoch+1), "total loss=", "{:.9f}".format(tl))
203 |                 print("Epoch:", '%04d' % (epoch+1), "closs loss=", "{:.9f}".format(cl))
204 |                 print("Epoch:", '%04d' % (epoch+1), "bbox loss=", "{:.9f}".format(bl))
205 |                 print("Epoch:", '%04d' % (epoch+1), "true loss=", "{:.9f}".format(tol))
206 |                 print("Epoch:", '%04d' % (epoch+1), "false loss=", "{:.9f}".format(fol))
207 |     print("Optimization Finished")
208 | 
209 | def smooth_L1(x):
210 |     l2 = 0.5 * (x**2.0)
211 |     l1 = tf.abs(x) - 0.5
212 | 
213 |     condition = tf.less(tf.abs(x), 1.0)
214 |     loss = tf.where(condition, l2, l1)
215 |     return loss
216 | 
217 | def rpn_loss(rpn_cls, rpn_bbox):
218 |     """Calculate Class Loss and Bounding Regression Loss.
219 | 
220 |     # Args:
221 |         obj_class: Prediction of object class. Shape is [ROIs*Batch_Size, 2]
222 |         bbox_regression: Prediction of bounding box. Shape is [ROIs*Batch_Size, 4]
223 |     """
224 |     rpn_shape = rpn_cls.get_shape().as_list()
225 |     g_bbox = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2], 4])
226 |     true_index = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2]])
227 |     false_index = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2]])
228 |     elosion = 0.00001
229 |     true_obj_loss = -tf.reduce_sum(tf.multiply(tf.log(rpn_cls[:, :, :, 0]+elosion), true_index))
230 |     false_obj_loss = -tf.reduce_sum(tf.multiply(tf.log(rpn_cls[:, :, :, 1]+elosion), false_index))
231 |     obj_loss = tf.add(true_obj_loss, false_obj_loss)
232 |     cls_loss = tf.div(obj_loss, 16) # L(cls) / N(cls) N=batch size
233 | 
234 |     bbox_loss = smooth_L1(tf.subtract(rpn_bbox, g_bbox))
235 |     bbox_loss = tf.reduce_sum(tf.multiply(tf.reduce_sum(bbox_loss, 3), true_index))
236 |     bbox_loss = tf.multiply(tf.div(bbox_loss, 1197), 100) # rpn_shape[1]*rpn_shape[2]
237 |     # bbox_loss = bbox_loss / rpn_shape[1]
238 | 
239 |     total_loss = tf.add(cls_loss, bbox_loss)
240 |     return total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss, g_bbox, true_index, false_index
241 | 
242 | def create_Labels_For_Loss(gt_boxes, feat_stride=16, feature_shape=(64, 19), \
243 |                            scales=np.array([8, 16, 32]), ratios=[0.5, 0.8, 1], \
244 |                            image_size=(300, 1000)):
245 |     """This Function is processed before network input
246 |     Number of Candicate Anchors is Feature Map width * heights
247 |     Number of Predicted Anchors is Batch Num * Feature Map Width * Heights * 9
248 |     """
249 |     width = feature_shape[0]
250 |     height = feature_shape[1]
251 |     batch_size = gt_boxes.shape[0]
252 |     # shifts is the all candicate anchors(prediction of bounding boxes)
253 |     center_x = np.arange(0, height) * feat_stride
254 |     center_y = np.arange(0, width) * feat_stride
255 |     center_x, center_y = np.meshgrid(center_x, center_y)
256 |     # Shape is [Batch, Width*Height, 4]
257 |     centers = np.zeros((batch_size, width*height, 4))
258 |     centers[:] = np.vstack((center_x.ravel(), center_y.ravel(),
259 |                         center_x.ravel(), center_y.ravel())).transpose()
260 |     A = scales.shape[0] * len(ratios)
261 |     K = width * height # width * height
262 |     anchors = np.zeros((batch_size, A, 4))
263 |     anchors = generate_anchors(scales=scales, ratios=ratios) # Shape is [A, 4]
264 | 
265 |     candicate_anchors = centers.reshape(batch_size, K, 1, 4) + anchors # [Batch, K, A, 4]
266 | 
267 |     # shape is [B, K, A]
268 |     is_inside = batch_inside_image(candicate_anchors, image_size[1], image_size[0])
269 | 
270 |     # candicate_anchors: Shape is [Batch, K, A, 4]
271 |     # gt_boxes: Shape is [Batch, G, 4]
272 |     # true_index: Shape is [Batch, K, A]
273 |     # false_index: Shape is [Batch, K, A]
274 |     candicate_anchors, true_index, false_index = bbox_overlaps(
275 |         np.ascontiguousarray(candicate_anchors, dtype=np.float),
276 |         is_inside,
277 |         gt_boxes)
278 | 
279 |     for i in range(batch_size):
280 |         true_where = np.where(true_index[i] == 1)
281 |         num_true = len(true_where[0])
282 | 
283 |         if num_true > 64:
284 |             select = np.random.choice(num_true, num_true - 64, replace=False)
285 |             num_true = 64
286 |             batch = np.ones((select.shape[0]), dtype=np.int) * i
287 |             true_where = remove_extraboxes(true_where[0], true_where[1], select, batch)
288 |             true_index[true_where] = 0
289 | 
290 |         false_where = np.where(false_index[i] == 1)
291 |         num_false = len(false_where[0])
292 |         select = np.random.choice(num_false, num_false - (128-num_true), replace=False)
293 |         batch = np.ones((select.shape[0]), dtype=np.int) * i
294 |         false_where = remove_extraboxes(false_where[0], false_where[1], select, batch)
295 |         false_index[false_where] = 0
296 | 
297 |     return candicate_anchors, true_index, false_index
298 | 
299 | if __name__ == '__main__':
300 |     import sys
301 |     import matplotlib.pyplot as plt
302 |     from PIL import Image as im
303 |     sys.path.append('/home/katou01/code/grid/DataAugmentation')
304 |     # from resize import resize
305 | 
306 |     image_dir = "/home/katou01/download/training/image_2/*.png"
307 |     label_dir = "/home/katou01/download/training/label_2/*.txt"
308 |     get_Image_Roi_All(image_dir, label_dir, 80)
309 |     #
310 |     # image = im.open("./test_images/test1.jpg")
311 |     # image = np.array(image, dtype=np.float32)
312 |     # new_image = image[np.newaxis, :]
313 |     # batch_image = np.vstack((new_image, new_image))
314 |     # batch_image = resize(batch_image, size=(300, 300))
315 |     #
316 |     # with tf.Session() as sess:
317 |     #     model = ssd_model(sess, batch_image, activation=None, atrous=False, rate=1, implement_atrous=False)
318 |     #     print(vars(model))
319 |     #     # tf.summary.scalar('model', model)
320 | 


--------------------------------------------------------------------------------
/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukitsuji/Faster_RCNN_tensorflow/765c729eaf03cb401ad308a289ec7d8c2bfca474/rpn/__init__.py


--------------------------------------------------------------------------------
/rpn/rpn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | sys.path.append("../")
  5 | sys.path.append("../util")
  6 | sys.path.append("../cython_util")
  7 | sys.path.append("../pretrain")
  8 | import glob
  9 | import cv2
 10 | import numpy as np
 11 | # from vgg16 import vgg16
 12 | from input_kitti import *
 13 | from data_util import *
 14 | from parse_xml import parseXML
 15 | from base_vgg16 import Vgg16 as Vgg
 16 | import tensorflow as tf
 17 | from network_util import *
 18 | from bbox_overlap import bbox_overlaps
 19 | from remove_extraboxes import remove_extraboxes
 20 | from bool_anchors_inside_image import batch_inside_image
 21 | from generate_anchors import generate_anchors
 22 | # from utility.image.data_augmentation.flip import Flip
 23 | # sys.path.append("/Users/tsujiyuuki/env_python/code/my_code/Data_Augmentation")
 24 | 
 25 | 
 26 | """Flow of Fast RCNN
 27 | ###############################################################################
 28 | In this state, Create Input Images and ROI Labels
 29 | 
 30 | 1. input batch images and GroundTruth BBox from datasets *folder name, batch size
 31 |    Image shape is [batch size, width, height, channel], tf.float32, vgg normalized, bgr
 32 |    Bounding Box shape is [batch size, center_x, center_y, width, height]
 33 | 
 34 | 2. get candicate bounding box from images.
 35 | 
 36 |    # Implemented
 37 | 3. resize input images to input size   *size of resize    if needed.
 38 |    if this operation was done, you should adjust bounding box according to it.
 39 |    Both of Candicate and GroundTruth Bounding Boxes.
 40 |    In thesis, Image size is in [600, 1000]
 41 |    In this Implemention, input image has dynamic shape between [600, 1000]
 42 | 
 43 | 4. convert candicate bounding box to ROI label.
 44 | 
 45 | 5. calculate IOU between ROI label and GroundTruth label.
 46 |    IOU is Intersection Over Union.
 47 | 
 48 | 6. Select Bounding Box from IOU.
 49 |    IOU > 0.5 is correct label, IOU = [0.1 0.5) is a false label(background).
 50 |    Correct Label is 25%, BackGround Label is 75%.
 51 |    Number of Label is 128, Batch Size is 2, so each image has 64 ROIs
 52 | 
 53 | ###############################################################################
 54 | In this stage, Calculate Loss
 55 | 
 56 | 7. Input data to ROI Pooling Layer is Conv5_3 Feature Map and ROIs
 57 |    Input shape is Feature map (batch, width, height, 512), ROIs (Num of ROIs, 5)
 58 |    ROIs, ex:) [0, left, height, right, bottom]. First Element is the index of batch
 59 | 
 60 | 8. Through ROI Pooling Layer, Output Shape is [Num of ROIs, 7, 7, 512]
 61 | 
 62 | 9. Reshape it to [Num of ROIs, -1], and then connect to Fully Connected Layer.
 63 | 
 64 | 10.Output Layer has two section, one is class prediction, the other is its bounding box prediction.
 65 |    class prediction shape is [Num of ROIs, Num of Class + 1]
 66 |    bounding box prediction shape is [Num of ROIs, 4 * (Num of Class + 1)]
 67 | 
 68 | 11.Loss Function
 69 |    Regularize bounding box value [center_x, center_y, w, h] into
 70 |    [(GroundTruth x - pred_x) / pred_w, (GroundTruth y - pred_y) / pred_h, log(GroundTruth w / pred_w), log(GroundTruth h / pred_h)]
 71 |    Class prediction is by softmax with loss.
 72 |    Bounding Box prediction is by smooth_L1 loss
 73 | ###############################################################################
 74 | In this stage, Describe Datasets.
 75 | 1. PASCAL VOC2007
 76 | 2. KITTI Datasets
 77 | 3. Udacity Datasets
 78 | """
 79 | 
 80 | # TODO: datasetsを丸ごとメモリに展開できるか。Generatorを用いるか。
 81 | 
 82 | 
 83 | def create_optimizer(all_loss, lr=0.001):
 84 |     opt = tf.train.AdamOptimizer(lr)
 85 |     optimizer = opt.minimize(all_loss)
 86 |     return optimizer
 87 | 
 88 | class RPN_ExtendedLayer(object):
 89 |     def __init__(self):
 90 |         pass
 91 | 
 92 |     def build_model(self, input_layer, use_batchnorm=False, is_training=True, atrous=False, \
 93 |                     rate=1, activation=tf.nn.relu, implement_atrous=False, lr_mult=1, anchors=1):
 94 |         self.rpn_conv = convBNLayer(input_layer, use_batchnorm, is_training, 512, 512, 3, 1, name="conv_rpn", activation=activation)
 95 |         # shape is [Batch, 2(bg/fg) * 9(anchors=3scale*3aspect ratio)]
 96 |         self.rpn_cls = convBNLayer(self.rpn_conv, use_batchnorm, is_training, 512, anchors*2, 1, 1, name="rpn_cls", activation=activation)
 97 |         rpn_shape = self.rpn_cls.get_shape().as_list()
 98 |         rpn_shape = tf.shape(self.rpn_cls)
 99 |         self.rpn_cls = tf.reshape(self.rpn_cls, [rpn_shape[0], rpn_shape[1], rpn_shape[2], anchors, 2])
100 |         self.rpn_cls = tf.nn.softmax(self.rpn_cls, dim=-1)
101 |         self.rpn_cls = tf.reshape(self.rpn_cls, [rpn_shape[0], rpn_shape[1]*rpn_shape[2], anchors, 2])
102 |         # shape is [Batch, 4(x, y, w, h) * 9(anchors=3scale*3aspect ratio)]
103 |         self.rpn_bbox = convBNLayer(self.rpn_conv, use_batchnorm, is_training, 512, anchors*4, 1, 1, name="rpn_bbox", activation=activation)
104 |         self.rpn_bbox = tf.reshape(self.rpn_bbox, [rpn_shape[0], rpn_shape[1]*rpn_shape[2], anchors, 4])
105 | 
106 | def rpn(sess, vggpath=None, image_shape=(300, 300), \
107 |               is_training=None, use_batchnorm=False, activation=tf.nn.relu, anchors=9):
108 |     images = tf.placeholder(tf.float32, [None, None, None, 3])
109 |     phase_train = tf.placeholder(tf.bool, name="phase_traing") if is_training else None
110 | 
111 |     vgg = Vgg(vgg16_npy_path=vggpath)
112 |     vgg.build_model(images)
113 | 
114 |     with tf.variable_scope("rpn_model") as scope:
115 |         rpn_model = RPN_ExtendedLayer()
116 |         rpn_model.build_model(vgg.conv5_3, use_batchnorm=use_batchnorm, \
117 |                                    is_training=phase_train, activation=activation, anchors=anchors)
118 | 
119 |     if is_training:
120 |         initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_model")
121 |         sess.run(tf.variables_initializer(initialized_var))
122 |     return rpn_model, images, phase_train
123 | 
124 | def smooth_L1(x):
125 |     l2 = 0.5 * (x**2.0)
126 |     l1 = tf.abs(x) - 0.5
127 | 
128 |     condition = tf.less(tf.abs(x), 1.0)
129 |     loss = tf.where(condition, l2, l1)
130 |     return loss
131 | 
132 | def rpn_loss(rpn_cls, rpn_bbox):
133 |     """Calculate Class Loss and Bounding Regression Loss.
134 | 
135 |     # Args:
136 |         obj_class: Prediction of object class. Shape is [ROIs*Batch_Size, 2]
137 |         bbox_regression: Prediction of bounding box. Shape is [ROIs*Batch_Size, 4]
138 |     """
139 |     rpn_shape = rpn_cls.get_shape().as_list()
140 |     g_bbox = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2], 4])
141 |     true_index = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2]])
142 |     false_index = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2]])
143 |     elosion = 0.00001
144 |     true_obj_loss = -tf.reduce_sum(tf.multiply(tf.log(rpn_cls[:, :, :, 0]+elosion), true_index))
145 |     false_obj_loss = -tf.reduce_sum(tf.multiply(tf.log(rpn_cls[:, :, :, 1]+elosion), false_index))
146 |     obj_loss = tf.add(true_obj_loss, false_obj_loss)
147 |     cls_loss = tf.div(obj_loss, 16) # L(cls) / N(cls) N=batch size
148 | 
149 |     bbox_loss = smooth_L1(tf.subtract(rpn_bbox, g_bbox))
150 |     bbox_loss = tf.reduce_sum(tf.multiply(tf.reduce_sum(bbox_loss, 3), true_index))
151 |     bbox_loss = tf.multiply(tf.div(bbox_loss, 1197), 100) # rpn_shape[1]*rpn_shape[2]
152 |     # bbox_loss = bbox_loss / rpn_shape[1]
153 | 
154 |     total_loss = tf.add(cls_loss, bbox_loss)
155 |     return total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss, g_bbox, true_index, false_index
156 | 
157 | 
158 | def create_Labels_For_Loss(gt_boxes, feat_stride=16, feature_shape=(64, 19), \
159 |                            scales=np.array([8, 16, 32]), ratios=[0.5, 0.8, 1], \
160 |                            image_size=(300, 1000)):
161 |     """This Function is processed before network input
162 |     Number of Candicate Anchors is Feature Map width * heights
163 |     Number of Predicted Anchors is Batch Num * Feature Map Width * Heights * 9
164 |     """
165 |     width = feature_shape[0]
166 |     height = feature_shape[1]
167 |     batch_size = gt_boxes.shape[0]
168 |     # shifts is the all candicate anchors(prediction of bounding boxes)
169 |     center_x = np.arange(0, height) * feat_stride
170 |     center_y = np.arange(0, width) * feat_stride
171 |     center_x, center_y = np.meshgrid(center_x, center_y)
172 |     # Shape is [Batch, Width*Height, 4]
173 |     centers = np.zeros((batch_size, width*height, 4))
174 |     centers[:] = np.vstack((center_x.ravel(), center_y.ravel(),
175 |                         center_x.ravel(), center_y.ravel())).transpose()
176 |     A = scales.shape[0] * len(ratios)
177 |     K = width * height # width * height
178 |     anchors = np.zeros((batch_size, A, 4))
179 |     anchors = generate_anchors(scales=scales, ratios=ratios) # Shape is [A, 4]
180 | 
181 |     candicate_anchors = centers.reshape(batch_size, K, 1, 4) + anchors # [Batch, K, A, 4]
182 | 
183 |     # shape is [B, K, A]
184 |     is_inside = batch_inside_image(candicate_anchors, image_size[1], image_size[0])
185 | 
186 |     # candicate_anchors: Shape is [Batch, K, A, 4]
187 |     # gt_boxes: Shape is [Batch, G, 4]
188 |     # true_index: Shape is [Batch, K, A]
189 |     # false_index: Shape is [Batch, K, A]
190 |     candicate_anchors, true_index, false_index = bbox_overlaps(
191 |         np.ascontiguousarray(candicate_anchors, dtype=np.float),
192 |         is_inside,
193 |         gt_boxes)
194 | 
195 |     for i in range(batch_size):
196 |         true_where = np.where(true_index[i] == 1)
197 |         num_true = len(true_where[0])
198 | 
199 |         if num_true > 64:
200 |             select = np.random.choice(num_true, num_true - 64, replace=False)
201 |             num_true = 64
202 |             batch = np.ones((select.shape[0]), dtype=np.int) * i
203 |             true_where = remove_extraboxes(true_where[0], true_where[1], select, batch)
204 |             true_index[true_where] = 0
205 | 
206 |         false_where = np.where(false_index[i] == 1)
207 |         num_false = len(false_where[0])
208 |         select = np.random.choice(num_false, num_false - (128-num_true), replace=False)
209 |         batch = np.ones((select.shape[0]), dtype=np.int) * i
210 |         false_where = remove_extraboxes(false_where[0], false_where[1], select, batch)
211 |         false_index[false_where] = 0
212 | 
213 |     return candicate_anchors, true_index, false_index
214 | 
215 | def train_rpn(batch_size, image_dir, label_dir, epoch=101, lr=0.01, feature_shape=(64, 19), \
216 |                   vggpath="../pretrain/vgg16.npy", use_batchnorm=False, activation=tf.nn.relu, \
217 |                   scales=np.array([5, 8, 12, 16, 32]), ratios=[0.3, 0.5, 0.8, 1], feature_stride=16):
218 |     import time
219 |     training_epochs = epoch
220 | 
221 |     with tf.Session() as sess:
222 |         model, images, phase_train = rpn(sess, vggpath=vggpath, is_training=True, \
223 |                                          use_batchnorm=use_batchnorm, activation=activation, anchors=scales.shape[0]*len(ratios))
224 |         total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss, g_bboxes, true_index, false_index = rpn_loss(model.rpn_cls, model.rpn_bbox)
225 |         optimizer = create_optimizer(total_loss, lr=lr)
226 |         init = tf.global_variables_initializer()
227 |         sess.run(init)
228 | 
229 |         image_pathlist, label_pathlist = get_pathlist(image_dir, label_dir)
230 |         for epoch in range(training_epochs):
231 |             for batch_images, batch_labels in generator__Image_and_label(image_pathlist, label_pathlist, batch_size=batch_size):
232 |                 start = time.time()
233 |                 candicate_anchors, batch_true_index, batch_false_index = create_Labels_For_Loss(batch_labels, feat_stride=feature_stride, feature_shape=(batch_images.shape[1]//feature_stride +1, batch_images.shape[2]//feature_stride+1), \
234 |                                            scales=scales, ratios=ratios, image_size=batch_images.shape[1:3])
235 |                 print "batch time", time.time() - start
236 |                 print batch_true_index[batch_true_index==1].shape
237 |                 print batch_false_index[batch_false_index==1].shape
238 | 
239 |                 sess.run(optimizer, feed_dict={images:batch_images, g_bboxes: candicate_anchors, true_index:batch_true_index, false_index:batch_false_index})
240 |                 tl, cl, bl, tol, fol = sess.run([total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss], feed_dict={images:batch_images, g_bboxes: candicate_anchors, true_index:batch_true_index, false_index:batch_false_index})
241 |                 print("Epoch:", '%04d' % (epoch+1), "total loss=", "{:.9f}".format(tl))
242 |                 print("Epoch:", '%04d' % (epoch+1), "closs loss=", "{:.9f}".format(cl))
243 |                 print("Epoch:", '%04d' % (epoch+1), "bbox loss=", "{:.9f}".format(bl))
244 |                 print("Epoch:", '%04d' % (epoch+1), "true loss=", "{:.9f}".format(tol))
245 |                 print("Epoch:", '%04d' % (epoch+1), "false loss=", "{:.9f}".format(fol))
246 |     print("Optimization Finished")
247 | 
248 | if __name__ == '__main__':
249 |     import sys
250 |     import matplotlib.pyplot as plt
251 |     from PIL import Image as im
252 |     sys.path.append('/home/katou01/code/grid/DataAugmentation')
253 |     # from resize import resize
254 | 
255 |     image_dir = "/home/katou01/download/training/image_2/*.png"
256 |     label_dir = "/home/katou01/download/training/label_2/*.txt"
257 |     # import time
258 |     train_rpn(6, image_dir, label_dir, epoch=20, lr=0.001, \
259 |                scales=np.array([2, 4, 6, 8, 10]), ratios=[0.4,  0.6, 0.8, 1.0])
260 |     # image_pathlist, label_pathlist = get_pathlist(image_dir, label_dir)
261 |     # for images, labels in generator__Image_and_label(image_pathlist, label_pathlist, batch_size=32):
262 |     #     start = time.time()
263 |     #     # images, labels = get_ALL_Image(image_pathlist, label_pathlist)
264 |     #     candicate_anchors, true_index, false_index = create_Labels_For_Loss(labels, feat_stride=16, feature_shape=(64, 19), \
265 |     #                                scales=np.array([5,  8, 12, 16, 32]), ratios=[0.3, 0.5, 0.8, 1], \
266 |     #                                image_size=(302, 1000))
267 |     #     print "batch time", time.time() - start
268 |     #     print candicate_anchors.shape, true_index.shape, false_index.shape
269 |     # # images, labels = get_ALL_Image(image_pathlist, label_pathlist)
270 |     # candicate_anchors, true_index, false_index = create_Labels_For_Loss(labels, feat_stride=16, feature_shape=(64, 19), \
271 |     #                            scales=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32]), ratios=[0.1, 0.2, 0.3, 0.5, 0.8, 1, 1.2], \
272 |     #                            image_size=(302, 1000))
273 | 


--------------------------------------------------------------------------------
/rpn/rpn_vehicle.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | sys.path.append("../")
  5 | sys.path.append("../util")
  6 | sys.path.append("../cython_util")
  7 | sys.path.append("../pretrain")
  8 | import glob
  9 | import cv2
 10 | import numpy as np
 11 | # from vgg16 import vgg16
 12 | from input_kitti import *
 13 | from data_util import *
 14 | from parse_xml import parseXML
 15 | from vgg16_vehicle import Vgg16 as Vgg
 16 | import tensorflow as tf
 17 | from network_util import *
 18 | from bbox_overlap import bbox_overlaps
 19 | from remove_extraboxes import remove_extraboxes
 20 | from bool_anchors_inside_image import batch_inside_image
 21 | from generate_anchors import generate_anchors
 22 | # from utility.image.data_augmentation.flip import Flip
 23 | # sys.path.append("/Users/tsujiyuuki/env_python/code/my_code/Data_Augmentation")
 24 | 
 25 | 
 26 | """Flow of Fast RCNN
 27 | ###############################################################################
 28 | In this state, Create Input Images and ROI Labels
 29 | 
 30 | 1. input batch images and GroundTruth BBox from datasets *folder name, batch size
 31 |    Image shape is [batch size, width, height, channel], tf.float32, vgg normalized, bgr
 32 |    Bounding Box shape is [batch size, center_x, center_y, width, height]
 33 | 
 34 | 2. get candicate bounding box from images.
 35 | 
 36 |    # Implemented
 37 | 3. resize input images to input size   *size of resize    if needed.
 38 |    if this operation was done, you should adjust bounding box according to it.
 39 |    Both of Candicate and GroundTruth Bounding Boxes.
 40 |    In thesis, Image size is in [600, 1000]
 41 |    In this Implemention, input image has dynamic shape between [600, 1000]
 42 | 
 43 | 4. convert candicate bounding box to ROI label.
 44 | 
 45 | 5. calculate IOU between ROI label and GroundTruth label.
 46 |    IOU is Intersection Over Union.
 47 | 
 48 | 6. Select Bounding Box from IOU.
 49 |    IOU > 0.5 is correct label, IOU = [0.1 0.5) is a false label(background).
 50 |    Correct Label is 25%, BackGround Label is 75%.
 51 |    Number of Label is 128, Batch Size is 2, so each image has 64 ROIs
 52 | 
 53 | ###############################################################################
 54 | In this stage, Calculate Loss
 55 | 
 56 | 7. Input data to ROI Pooling Layer is Conv5_3 Feature Map and ROIs
 57 |    Input shape is Feature map (batch, width, height, 512), ROIs (Num of ROIs, 5)
 58 |    ROIs, ex:) [0, left, height, right, bottom]. First Element is the index of batch
 59 | 
 60 | 8. Through ROI Pooling Layer, Output Shape is [Num of ROIs, 7, 7, 512]
 61 | 
 62 | 9. Reshape it to [Num of ROIs, -1], and then connect to Fully Connected Layer.
 63 | 
 64 | 10.Output Layer has two section, one is class prediction, the other is its bounding box prediction.
 65 |    class prediction shape is [Num of ROIs, Num of Class + 1]
 66 |    bounding box prediction shape is [Num of ROIs, 4 * (Num of Class + 1)]
 67 | 
 68 | 11.Loss Function
 69 |    Regularize bounding box value [center_x, center_y, w, h] into
 70 |    [(GroundTruth x - pred_x) / pred_w, (GroundTruth y - pred_y) / pred_h, log(GroundTruth w / pred_w), log(GroundTruth h / pred_h)]
 71 |    Class prediction is by softmax with loss.
 72 |    Bounding Box prediction is by smooth_L1 loss
 73 | ###############################################################################
 74 | In this stage, Describe Datasets.
 75 | 1. PASCAL VOC2007
 76 | 2. KITTI Datasets
 77 | 3. Udacity Datasets
 78 | """
 79 | 
 80 | # TODO: datasetsを丸ごとメモリに展開できるか。Generatorを用いるか。
 81 | 
 82 | 
 83 | def create_optimizer(all_loss, lr=0.001):
 84 |     opt = tf.train.AdamOptimizer(lr)
 85 |     optimizer = opt.minimize(all_loss)
 86 |     return optimizer
 87 | 
 88 | class RPN_ExtendedLayer(object):
 89 |     def __init__(self):
 90 |         pass
 91 | 
 92 |     def build_model(self, input_layer, use_batchnorm=False, is_training=True, atrous=False, \
 93 |                     rate=1, activation=tf.nn.relu, implement_atrous=False, anchors=1):
 94 |         self.rpn_conv = convBNLayer(input_layer, use_batchnorm, is_training, 512, 512, 3, 1, name="conv_rpn", activation=activation)
 95 |         # shape is [Batch, 2(bg/fg) * 9(anchors=3scale*3aspect ratio)]
 96 |         self.rpn_cls = convBNLayer(self.rpn_conv, False, is_training, 512, anchors*2, 1, 1, name="rpn_cls", activation=None)
 97 |         rpn_shape = self.rpn_cls.get_shape().as_list()
 98 |         rpn_shape = tf.shape(self.rpn_cls)
 99 |         self.rpn_cls = tf.reshape(self.rpn_cls, [rpn_shape[0], rpn_shape[1], rpn_shape[2], anchors, 2])
100 |         self.rpn_cls = tf.nn.softmax(self.rpn_cls, dim=-1)
101 |         self.rpn_cls = tf.reshape(self.rpn_cls, [rpn_shape[0], rpn_shape[1]*rpn_shape[2], anchors, 2])
102 |         # shape is [Batch, 4(x, y, w, h) * 9(anchors=3scale*3aspect ratio)]
103 |         self.rpn_bbox = convBNLayer(self.rpn_conv, use_batchnorm, is_training, 512, anchors*4, 1, 1, name="rpn_bbox", activation=None)
104 |         self.rpn_bbox = tf.reshape(self.rpn_bbox, [rpn_shape[0], rpn_shape[1]*rpn_shape[2], anchors, 4])
105 | 
106 | def rpn(sess, vggpath=None, image_shape=(300, 300), \
107 |               is_training=None, use_batchnorm=False, activation=tf.nn.relu, anchors=9):
108 |     images = tf.placeholder(tf.float32, [None, None, None, 3])
109 |     phase_train = tf.placeholder(tf.bool, name="phase_traing") if is_training else None
110 | 
111 |     vgg = Vgg(vgg16_npy_path=vggpath)
112 |     vgg.build_model(images)
113 | 
114 |     with tf.variable_scope("rpn_model"):
115 |         rpn_model = RPN_ExtendedLayer()
116 |         rpn_model.build_model(vgg.conv4_3, use_batchnorm=use_batchnorm, \
117 |                                    is_training=phase_train, activation=activation, anchors=anchors)
118 | 
119 |     if is_training:
120 |         initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="rpn_model")
121 |         sess.run(tf.variables_initializer(initialized_var))
122 |     return rpn_model, images, phase_train
123 | 
124 | def smooth_L1(x):
125 |     l2 = 0.5 * (x**2.0)
126 |     l1 = tf.abs(x) - 0.5
127 | 
128 |     condition = tf.less(tf.abs(x), 1.0)
129 |     loss = tf.where(condition, l2, l1)
130 |     return loss
131 | 
132 | def rpn_loss(rpn_cls, rpn_bbox):
133 |     """Calculate Class Loss and Bounding Regression Loss.
134 | 
135 |     # Args:
136 |         obj_class: Prediction of object class. Shape is [ROIs*Batch_Size, 2]
137 |         bbox_regression: Prediction of bounding box. Shape is [ROIs*Batch_Size, 4]
138 |     """
139 |     rpn_shape = rpn_cls.get_shape().as_list()
140 |     g_bbox = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2], 4])
141 |     true_index = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2]])
142 |     false_index = tf.placeholder(tf.float32, [rpn_shape[0], rpn_shape[1], rpn_shape[2]])
143 |     elosion = 0.00001
144 |     true_obj_loss = -tf.reduce_sum(tf.multiply(tf.log(rpn_cls[:, :, :, 0]+elosion), true_index))
145 |     false_obj_loss = -tf.reduce_sum(tf.multiply(tf.log(rpn_cls[:, :, :, 1]+elosion), false_index))
146 |     obj_loss = tf.add(true_obj_loss, false_obj_loss)
147 |     cls_loss = tf.div(obj_loss, 16) # L(cls) / N(cls) N=batch size
148 | 
149 |     bbox_loss = smooth_L1(tf.subtract(rpn_bbox, g_bbox))
150 |     bbox_loss = tf.reduce_sum(tf.multiply(tf.reduce_sum(bbox_loss, 3), true_index))
151 |     bbox_loss = tf.multiply(tf.div(bbox_loss, 1197), 100) # rpn_shape[1]*rpn_shape[2]
152 |     # bbox_loss = bbox_loss / rpn_shape[1]
153 | 
154 |     total_loss = tf.add(cls_loss, bbox_loss)
155 |     return total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss, g_bbox, true_index, false_index
156 | 
157 | 
158 | def create_Labels_For_Loss(gt_boxes, feat_stride=16, feature_shape=(64, 19), \
159 |                            scales=np.array([8, 16, 32]), ratios=[0.5, 0.8, 1], \
160 |                            image_size=(300, 1000)):
161 |     """This Function is processed before network input
162 |     Number of Candicate Anchors is Feature Map width * heights
163 |     Number of Predicted Anchors is Batch Num * Feature Map Width * Heights * 9
164 |     """
165 |     width = feature_shape[0]
166 |     height = feature_shape[1]
167 |     batch_size = gt_boxes.shape[0]
168 |     # shifts is the all candicate anchors(prediction of bounding boxes)
169 |     center_x = np.arange(0, height) * feat_stride
170 |     center_y = np.arange(0, width) * feat_stride
171 |     center_x, center_y = np.meshgrid(center_x, center_y)
172 |     # Shape is [Batch, Width*Height, 4]
173 |     centers = np.zeros((batch_size, width*height, 4))
174 |     centers[:] = np.vstack((center_x.ravel(), center_y.ravel(),
175 |                         center_x.ravel(), center_y.ravel())).transpose()
176 |     A = scales.shape[0] * len(ratios)
177 |     K = width * height # width * height
178 |     anchors = np.zeros((batch_size, A, 4))
179 |     anchors = generate_anchors(scales=scales, ratios=ratios) # Shape is [A, 4]
180 | 
181 |     candicate_anchors = centers.reshape(batch_size, K, 1, 4) + anchors # [Batch, K, A, 4]
182 | 
183 |     # shape is [B, K, A]
184 |     is_inside = batch_inside_image(candicate_anchors, image_size[1], image_size[0])
185 | 
186 |     # candicate_anchors: Shape is [Batch, K, A, 4]
187 |     # gt_boxes: Shape is [Batch, G, 4]
188 |     # true_index: Shape is [Batch, K, A]
189 |     # false_index: Shape is [Batch, K, A]
190 |     candicate_anchors, true_index, false_index = bbox_overlaps(
191 |         np.ascontiguousarray(candicate_anchors, dtype=np.float),
192 |         is_inside,
193 |         gt_boxes)
194 | 
195 |     for i in range(batch_size):
196 |         true_where = np.where(true_index[i] == 1)
197 |         num_true = len(true_where[0])
198 | 
199 |         if num_true > 64:
200 |             select = np.random.choice(num_true, num_true - 64, replace=False)
201 |             num_true = 64
202 |             batch = np.ones((select.shape[0]), dtype=np.int) * i
203 |             true_where = remove_extraboxes(true_where[0], true_where[1], select, batch)
204 |             true_index[true_where] = 0
205 | 
206 |         false_where = np.where(false_index[i] == 1)
207 |         num_false = len(false_where[0])
208 |         select = np.random.choice(num_false, num_false - (128-num_true), replace=False)
209 |         batch = np.ones((select.shape[0]), dtype=np.int) * i
210 |         false_where = remove_extraboxes(false_where[0], false_where[1], select, batch)
211 |         false_index[false_where] = 0
212 | 
213 |     return candicate_anchors, true_index, false_index
214 | 
215 | def train_rpn(batch_size, image_dir, label_dir, epoch=101, lr=0.01, feature_shape=(64, 19), \
216 |                   vggpath="../pretrain/vgg16.npy", use_batchnorm=False, activation=tf.nn.relu, \
217 |                   scales=np.array([5, 8, 12, 16, 32]), ratios=[0.3, 0.5, 0.8, 1], feature_stride=16):
218 |     import time
219 |     training_epochs = epoch
220 | 
221 |     with tf.Session() as sess:
222 |         model, images, phase_train = rpn(sess, vggpath=vggpath, is_training=True, \
223 |                                          use_batchnorm=use_batchnorm, activation=activation, anchors=scales.shape[0]*len(ratios))
224 |         saver = tf.train.Saver()
225 |         total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss, g_bboxes, true_index, false_index = rpn_loss(model.rpn_cls, model.rpn_bbox)
226 |         optimizer = create_optimizer(total_loss, lr=lr)
227 |         init = tf.global_variables_initializer()
228 |         sess.run(init)
229 | 
230 |         image_pathlist, label_pathlist = get_pathlist(image_dir, label_dir)
231 |         for epoch in range(training_epochs):
232 |             for batch_images, batch_labels in generator__Image_and_label(image_pathlist, label_pathlist, batch_size=batch_size):
233 |                 start = time.time()
234 |                 candicate_anchors, batch_true_index, batch_false_index = create_Labels_For_Loss(batch_labels, feat_stride=feature_stride, \
235 |                     feature_shape=(batch_images.shape[1]//feature_stride +1, batch_images.shape[2]//feature_stride), \
236 |                     scales=scales, ratios=ratios, image_size=batch_images.shape[1:3])
237 |                 print "batch time", time.time() - start
238 |                 print batch_true_index[batch_true_index==1].shape
239 |                 print batch_false_index[batch_false_index==1].shape
240 | 
241 |                 sess.run(optimizer, feed_dict={images:batch_images, g_bboxes: candicate_anchors, true_index:batch_true_index, false_index:batch_false_index})
242 |                 tl, cl, bl, tol, fol = sess.run([total_loss, cls_loss, bbox_loss, true_obj_loss, false_obj_loss], feed_dict={images:batch_images, g_bboxes: candicate_anchors, true_index:batch_true_index, false_index:batch_false_index})
243 |                 print("Epoch:", '%04d' % (epoch+1), "total loss=", "{:.9f}".format(tl))
244 |                 print("Epoch:", '%04d' % (epoch+1), "closs loss=", "{:.9f}".format(cl))
245 |                 print("Epoch:", '%04d' % (epoch+1), "bbox loss=", "{:.9f}".format(bl))
246 |                 print("Epoch:", '%04d' % (epoch+1), "true loss=", "{:.9f}".format(tol))
247 |                 print("Epoch:", '%04d' % (epoch+1), "false loss=", "{:.9f}".format(fol))
248 |             if (epoch != 0) and ((epoch+1) % 10 == 0):
249 |                 print "Save epoch " + str(epoch)
250 |                 saver.save(sess, "rpn_model" + str(epoch) + ".ckpt")
251 |     print("Optimization Finished")
252 | 
253 | if __name__ == '__main__':
254 |     import sys
255 |     import matplotlib.pyplot as plt
256 |     from PIL import Image as im
257 |     sys.path.append('/home/katou01/code/grid/DataAugmentation')
258 |     # from resize import resize
259 | 
260 |     image_dir = "/home/katou01/download/training/image_2/*.png"
261 |     label_dir = "/home/katou01/download/training/label_2/*.txt"
262 |     # import time
263 |     train_rpn(4, image_dir, label_dir, epoch=41, lr=0.001, use_batchnorm=True, \
264 |                scales=np.array([6, 8, 10, 12, 14, 16, 20, 32]), ratios=[0.4,  0.6, 0.8, 1.0], feature_stride=8)
265 |     # image_pathlist, label_pathlist = get_pathlist(image_dir, label_dir)
266 |     # for images, labels in generator__Image_and_label(image_pathlist, label_pathlist, batch_size=32):
267 |     #     start = time.time()
268 |     #     # images, labels = get_ALL_Image(image_pathlist, label_pathlist)
269 |     #     candicate_anchors, true_index, false_index = create_Labels_For_Loss(labels, feat_stride=16, feature_shape=(64, 19), \
270 |     #                                scales=np.array([5,  8, 12, 16, 32]), ratios=[0.3, 0.5, 0.8, 1], \
271 |     #                                image_size=(302, 1000))
272 |     #     print "batch time", time.time() - start
273 |     #     print candicate_anchors.shape, true_index.shape, false_index.shape
274 |     # # images, labels = get_ALL_Image(image_pathlist, label_pathlist)
275 |     # candicate_anchors, true_index, false_index = create_Labels_For_Loss(labels, feat_stride=16, feature_shape=(64, 19), \
276 |     #                            scales=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32]), ratios=[0.1, 0.2, 0.3, 0.5, 0.8, 1, 1.2], \
277 |     #                            image_size=(302, 1000))
278 | 


--------------------------------------------------------------------------------
/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukitsuji/Faster_RCNN_tensorflow/765c729eaf03cb401ad308a289ec7d8c2bfca474/util/__init__.py


--------------------------------------------------------------------------------
/util/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def create_bbox_regression_label(p_bboxes, g_bboxes):
11 |     """Create Label for Bounding Box Regression Label
12 |     # Args:
13 |         p_bboxes: Predicted Bounging Box. Shape is [ROIs, 5].
14 |                   [0, left, top, right, bottom]
15 |         g_bboxes: GroundTruth Bounding Box. Shape is [ROIs, 5].
16 |                   [0, left, top, right, bottom]
17 |     # Returns:
18 |         regression_label: Regression Label of Bounding Boxes.
19 |                           Shape is [ROIs, 4]
20 |     """
21 |     p_width = p_bboxes[:, 2] - p_bboxes[:, 0] + 1
22 |     p_height = p_bboxes[:, 3] - p_bboxes[:, 1] + 1
23 | 
24 | 
25 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
26 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
27 | 
28 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
29 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
30 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
31 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
32 | 
33 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
34 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
35 |     targets_dw = np.log(gt_widths / ex_widths)
36 |     targets_dh = np.log(gt_heights / ex_heights)
37 | 
38 |     targets = np.vstack(
39 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
40 |     return targets
41 | 
42 | def bbox_transform_inv(boxes, deltas):
43 |     """Convert network output to Bounding Boxes
44 |     """
45 |     if boxes.shape[0] == 0:
46 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
47 | 
48 |     boxes = boxes.astype(deltas.dtype, copy=False)
49 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
50 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
51 |     ctr_x = boxes[:, 0] + 0.5 * widths
52 |     ctr_y = boxes[:, 1] + 0.5 * heights
53 | 
54 |     dx = deltas[:, 0::4]
55 |     dy = deltas[:, 1::4]
56 |     dw = deltas[:, 2::4]
57 |     dh = deltas[:, 3::4]
58 | 
59 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
60 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
61 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
62 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
63 | 
64 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
65 |     # x1
66 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
67 |     # y1
68 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
69 |     # x2
70 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
71 |     # y2
72 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
73 | 
74 |     return pred_boxes
75 | 
76 | def clip_boxes(boxes, im_shape):
77 |     """
78 |     Clip boxes to image boundaries.
79 |     """
80 | 
81 |     # x1 >= 0
82 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
83 |     # y1 >= 0
84 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
85 |     # x2 < im_shape[1]
86 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
87 |     # y2 < im_shape[0]
88 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
89 |     return boxes
90 | 


--------------------------------------------------------------------------------
/util/data_util.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | import glob
  5 | import cv2
  6 | import numpy as np
  7 | # from vgg16 import vgg16
  8 | # from input_kitti import *
  9 | from parse_xml import parseXML
 10 | from bbox_transform import *
 11 | from base_vgg16 import Vgg16
 12 | import tensorflow as tf
 13 | from bbox_overlap import bbox_overlaps
 14 | 
 15 | def create_labels(resized_images, resize_scales, feature_scale=1./16):
 16 |     """create labels for classification and regression
 17 |     1. get bbox from resized images
 18 |     2. from bbox, create input labels for regression
 19 |     3. get GroundTruth Bounding Boxes
 20 |     4. calculate IOU for training
 21 |     5. divide labels into training sets and trush
 22 |     6.
 23 |     """
 24 |     return labels
 25 | 
 26 | def create_rois(labels, feature_scale=1./16):
 27 |     """create rois from labels"""
 28 |     return rois
 29 | 
 30 | def nms():
 31 |     return bboxes
 32 | 
 33 | def process(image_dir, label_dir, num_of_rois, batch_size, min_size):
 34 |     # model Definition
 35 |     # loss function
 36 |     dataset_img_list, dataset_pred_bbox_list, g_bboxes, get_Image_Roi_All(image_dir, label_dir, min_size)
 37 |     # batch_imgs, batch_rois, batch_g_bboxes = select_inputs_from_datasets(dataset_img_list, dataset_pred_bbox_list, g_bboxes, batch_size)
 38 |     for batch_imgs, batch_rois, batch_g_bboxes in select_inputs_from_datasets(dataset_img_list, dataset_pred_bbox_list, g_bboxes, batch_size):
 39 |         pass
 40 |         # training
 41 |         # test
 42 |         # validation
 43 | 
 44 | def get_Image_Roi_All(image_dir, label_dir, min_size):
 45 |     """Get Images and ROIs of All Datasets.
 46 |     # Args:
 47 |         image_dir  (str): path of image directory.
 48 |         label_dir    (str): path of label's xml directory.
 49 |         num_of_rois(int): Number of ROIs in a image.
 50 |     # Returns:
 51 |         images     (list): ndarray Images of datasets.
 52 |         pred_bboxes(ndarray): rescaled bbox Label [0, x, y, w, h]
 53 |     """
 54 |     # 車が含まれている画像のみラベルと一緒に読み込む
 55 |     image_pathlist = 0 #load_for_detection(label_dir)
 56 |     g_bboxes = 0 #load_for_detection(label_dir) #TODO: [Datasets, x, y, w, h]
 57 |     dataset_img_list = [] # len(dataset_img_list) == Number of Datasets Images
 58 |     dataset_pred_bbox_list = [] # len(dataset_pred_bbox_list) == Number of (num_of_rois * num of images)
 59 |     # Preprocess Ground Truth ROIs. shape is [Num of ROIs * batch_size, x, y, w, h, 0, 1]
 60 |     g_bboxes = []
 61 |     # shape is [batch_channel, x, y, w, h]
 62 |     image_pathlist = glob.glob(image_dir)
 63 |     label_pathlist = glob.glob(label_dir)
 64 |     image_pathlist.sort()
 65 |     label_pathlist.sort()
 66 | 
 67 |     for index, (image_path, label_path) in enumerate(zip(image_pathlist, label_pathlist)):
 68 |         if index == 10:
 69 |             break
 70 |         img = cv2.imread(image_path)
 71 |         label = read_label_from_txt(label_path)
 72 |         if label is None:
 73 |             continue
 74 |         # ここでは、IOUを計算していないので、予測のbounding boxは絞らない
 75 |         # なので、数多くのbounding boxが存在していることになるが、メモリが許す限り確保する
 76 |         p_bbox_candicate = pred_bboxes(img, min_size, index)
 77 |         img, im_scale = preprocess_imgs(img)
 78 |         p_bbox_candicate = unique_bboxes(p_bbox_candicate, im_scale, feature_scale=1./16)
 79 |         overlaps = bbox_overlaps(p_bbox_candicate[:, 1:], label)
 80 |         print label
 81 |         print p_bbox_candicate[0]
 82 |         print overlaps[overlaps > 0.5]
 83 |         print overlaps.shape
 84 |         print
 85 |         dataset_img_list.append(img)
 86 |         dataset_pred_bbox_list.append(p_bbox_candicate)
 87 |         g_bboxes.append(label)
 88 | 
 89 |     dataset_pred_bbox_list = np.array(dataset_pred_bbox_list)
 90 |     g_bboxes = np.array(g_bboxes)
 91 |     print dataset_img_list[1].shape, dataset_pred_bbox_list[0].shape, g_bboxes[0].shape
 92 |     print dataset_pred_bbox_list[1].shape
 93 |     print dataset_pred_bbox_list[2].shape
 94 |     g_bboxes = create_bbox_regression_label(dataset_pred_bbox_list, g_bboxes)
 95 |     return np.array(dataset_img_list), np.array(dataset_pred_bbox_list), g_bboxes
 96 | 
 97 | 
 98 | def select_inputs_from_datasets(dataset_img_list, dataset_pred_bbox_list, g_bboxes, batch_size):
 99 |     """
100 |     # Args:
101 |         dataset_img_list      (ndarray): ndarray Images in datasets.
102 |         dataset_pred_bbox_list(ndarray): rescaled bbox Label [0, x, y, w, h]
103 |                                          shape is [batch, num_of_rois, 5]
104 |         g_bboxes              (ndarray): GroundTruth Bounding Box with Class Label
105 |                                          shape is [batch, 6*max_label_num]
106 |                                          label is [x, y, w, h, car, background]
107 |         batch_size                (int): batch size for training
108 |     # Returns:
109 |         batch_imgs    (ndarray): input batch images for Network. Shape is [Batch Size, shape]
110 |         batch_p_bboxes(ndarray): input ROIs for Network. Shape is [Num of ROIs*Batch size]
111 |         batch_g_bboxes(ndarray): input GroundTruth Bounding Box for Network.
112 |                                  Shape is [Num of ROIs*Batch Size]
113 |     """
114 |     perm = np.random.permutation(len(dataset_img_list))
115 |     batches = [perm[i * batch_size:(i + 1) * batch_size] \
116 |                    for i in range(len(dataset_img_list) // batch_size)]
117 |     for batch in batches:
118 |         batch_imgs = dataset_img_list[batch]
119 |         batch_p_bboxes = dataset_pred_bbox_list[batch]
120 |         batch_g_bboxes = g_bboxes[batch]
121 |         # この時点でbatch_p_bboxes, g_bboxesは、batch毎にListでまとめられていそう？　#TODO
122 |         # TODO: Batch毎にLabelの形にする。それをcalculate IOUに入れて、最終的な形をvstackすれば全体のLabelが得られる
123 | 
124 |         # Flip Conversion
125 |         # batch_imgs, batch_p_bboxes, batch_g_bboxes = flip_conversion(batch_imgs, batch_p_bboxes, batch_g_bboxes)
126 |         batch_imgs = convert_imgslist_to_ndarray(batch_imgs)
127 |         # calculate IOU between pred_roi_candicate, ground truth bounding box
128 |         # この時点でbatch_g_bboxesはLabelの形になっていると想定
129 |         batch_p_bboxes, batch_g_bboxes = calculate_IOU(batch_p_bboxes, batch_g_bboxes)
130 |         yield batch_imgs, batch_rois, batch_g_bboxes
131 | 
132 | def convert_pred_bbox_to_roi(batch_bbox, feature_scale=1./16):
133 |     pass
134 | 
135 | def calculate_IOU(batch_roi, batch_g_bboxes, fg_thres=0.5, bg_thres_max=0.5, bg_thres_min=0.1):
136 |     """各画像の全ての車のラベルに対して、IOUを計算する
137 |     そのために、batch_roi, batch_g_bboxesをforループで回し、
138 |     """
139 |     area = batch_g_bboxes[:, 3] * batch_g_bboxes[: 4]
140 |     w = np.maximum(batch_roi[:, 0], batch_g_bboxes[:, 0]) - np.minimum(batch_roi[:, 1], batch_g_bboxes[:, 1])
141 |     w_id = np.where(w > 0)[0]
142 |     h = np.minimum(batch_roi[w_id][:, 0], batch_g_bboxes[w_id][:, 0]) - np.minimum(batch_roi[w_id][:, 1], batch_g_bboxes[w_id][:, 1])
143 |     h_id = np.where(h > 0)[0]
144 |     IOU = float(w[w_id][h_id] * h[w_id][h_id]) / area[w_id][h_id]
145 |     fg_rois = np.where(IOU >= fg_thres)[0]
146 |     bg_rois1 = np.where(IOU < bg_thres_max)[0]
147 |     bg_rois2 = np.where(IOU[bg_rois] >= bg_thres_min)[0]
148 |     fg_index = w_id[h_id][fg_rois]
149 |     bg_index = w_id[h_id][bg_rois1][bg_rois2]
150 |     index = np.hstack((fg_index, bg_index))
151 |     return batch_rois[index], batch_g_bboxes[index]
152 | 
153 | def convert_imgslist_to_ndarray(images):
154 |     """Convert a list of images into a network input.
155 |     Assumes images are already prepared (means subtracted, BGR order, ...).
156 | 
157 |     In this stage, the shape of images are different
158 |     """
159 |     max_shape = np.array([im.shape for im in images]).max(axis=0)
160 |     num_images = len(images)
161 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
162 |                     dtype=np.float32)
163 |     for i in xrange(num_images):
164 |         im = images[i]
165 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
166 |     return blob
167 | 
168 | def flip_conversion(batch_imgs, batch_rois, batch_g_bboxes, batch_size):
169 |     return batch_imgs, batch_rois, batch_g_bboxes
170 | 
171 | def preprocess_imgs(im, pixel_means=np.array([103.939, 116.779, 123.68]), target_size=600, max_size=1000):
172 |     """Mean subtract and scale an image for use in a blob.
173 |     If you want to Data Augmentation, please edit this function
174 |     """
175 |     im = im.astype(np.float32, copy=False)
176 |     # if np.random.randint(2):
177 |     #     im = im[:, ::-1]
178 |     im -= pixel_means
179 |     im_shape = im.shape
180 |     im_size_min = np.min(im_shape[0:2])
181 |     im_size_max = np.max(im_shape[0:2])
182 |     im_scale = float(target_size) / float(im_size_min)
183 |     # Prevent the biggest axis from being more than MAX_SIZE
184 |     if np.round(im_scale * im_size_max) > max_size:
185 |         im_scale = float(max_size) / float(im_size_max)
186 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
187 |                     interpolation=cv2.INTER_LINEAR)
188 |     return im, im_scale
189 | 
190 | def data_generator(imgs, rois, labels):
191 |     """data generator for network inputs"""
192 |     yield batch_x, batch_rois, batch_labels
193 | 
194 | def unique_bboxes(rects, im_scale, feature_scale=1./16):
195 |     """Get Bounding Box from Original Image.
196 | 
197 |     # Args:
198 |         orig_img   (ndarray): original image. 3 dimensional array.
199 |         min_size     (tuple): minimum size of bounding box.
200 |         feature_scale(float): scale of feature map. 2 ** (num of pooling layer)
201 | 
202 |     """
203 |     rects *= im_scale
204 |     v = np.array([1, 1e3, 1e6, 1e9, 1e12])
205 |     hashes = np.round(rects * feature_scale).dot(v)
206 |     _, index, inv_index = np.unique(hashes, return_index=True,
207 |                                     return_inverse=True)
208 |     rects = rects[index, :]
209 |     return rects
210 | 
211 | def pred_bboxes(orig_img, min_size, index):
212 |     rects = []
213 |     dlib.find_candidate_object_locations(orig_img, rects, min_size=min_size)
214 |     rects = [[0, d.left(), d.top(), d.right(), d.bottom()] for d in rects]
215 |     rects = np.asarray(rects, dtype=np.float64)
216 |     return rects
217 | 


--------------------------------------------------------------------------------
/util/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | 
 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 11 | #
 12 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 13 | #    >> anchors
 14 | #
 15 | #    anchors =
 16 | #
 17 | #       -83   -39   100    56
 18 | #      -175   -87   192   104
 19 | #      -359  -183   376   200
 20 | #       -55   -55    72    72
 21 | #      -119  -119   136   136
 22 | #      -247  -247   264   264
 23 | #       -35   -79    52    96
 24 | #       -79  -167    96   184
 25 | #      -167  -343   184   360
 26 | 
 27 | #array([[ -83.,  -39.,  100.,   56.],
 28 | #       [-175.,  -87.,  192.,  104.],
 29 | #       [-359., -183.,  376.,  200.],
 30 | #       [ -55.,  -55.,   72.,   72.],
 31 | #       [-119., -119.,  136.,  136.],
 32 | #       [-247., -247.,  264.,  264.],
 33 | #       [ -35.,  -79.,   52.,   96.],
 34 | #       [ -79., -167.,   96.,  184.],
 35 | #       [-167., -343.,  184.,  360.]])
 36 | 
 37 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 38 |                      scales=2**np.arange(3, 6)):
 39 |     """
 40 |     Generate anchor (reference) windows by enumerating aspect ratios X
 41 |     scales wrt a reference (0, 0, 15, 15) window.
 42 |     """
 43 | 
 44 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 45 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 46 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 47 |                          for i in xrange(ratio_anchors.shape[0])])
 48 |     return anchors
 49 | 
 50 | def _whctrs(anchor):
 51 |     """
 52 |     Return width, height, x center, and y center for an anchor (window).
 53 |     """
 54 | 
 55 |     w = anchor[2] - anchor[0] + 1
 56 |     h = anchor[3] - anchor[1] + 1
 57 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 58 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 59 |     return w, h, x_ctr, y_ctr
 60 | 
 61 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 62 |     """
 63 |     Given a vector of widths (ws) and heights (hs) around a center
 64 |     (x_ctr, y_ctr), output a set of anchors (windows).
 65 |     """
 66 | 
 67 |     ws = ws[:, np.newaxis]
 68 |     hs = hs[:, np.newaxis]
 69 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 70 |                          y_ctr - 0.5 * (hs - 1),
 71 |                          x_ctr + 0.5 * (ws - 1),
 72 |                          y_ctr + 0.5 * (hs - 1)))
 73 |     return anchors
 74 | 
 75 | def _ratio_enum(anchor, ratios):
 76 |     """
 77 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 78 |     """
 79 | 
 80 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 81 |     size = w * h
 82 |     size_ratios = size / ratios
 83 |     ws = np.round(np.sqrt(size_ratios))
 84 |     hs = np.round(ws * ratios)
 85 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 86 |     return anchors
 87 | 
 88 | def _scale_enum(anchor, scales):
 89 |     """
 90 |     Enumerate a set of anchors for each scale wrt an anchor.
 91 |     """
 92 | 
 93 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 94 |     ws = w * scales
 95 |     hs = h * scales
 96 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 97 |     return anchors
 98 | 
 99 | if __name__ == '__main__':
100 |     import time
101 |     t = time.time()
102 |     a = generate_anchors()
103 |     print time.time() - t
104 |     print a
105 |     from IPython import embed; embed()
106 | 


--------------------------------------------------------------------------------
/util/input_kitti.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import sys
  5 | import os
  6 | import numpy as np
  7 | import cv2
  8 | import glob
  9 | import math
 10 | from parse_xml import parseXML
 11 | from data_util import *
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | def read_label_from_txt(label_path):
 15 |     """From label text file, Read bounding box
 16 |     Each text file corresponds to one image.
 17 | 
 18 |     # Returns:
 19 |         bounding_box(list): List of Bounding Boxes in one image
 20 |     """
 21 |     bounding_box = []
 22 |     with open(label_path, "r") as f:
 23 |         labels = f.read().split("\n")
 24 |         for label in labels:
 25 |             label = label.split(" ")
 26 |             if label[0] == ("Car" or "Van"): #  or "Truck"
 27 |                 bounding_box.append(label[4:8])
 28 | 
 29 |     if bounding_box:
 30 |         return np.array(bounding_box, dtype=np.float64)
 31 |     else:
 32 |         return None
 33 | 
 34 | def select_inputs_from_datasets(dataset_img_list, g_boxes, batch_size):
 35 |     """
 36 |     # Args:
 37 |         dataset_img_list      (ndarray): ndarray Images in datasets.
 38 |         g_boxes              (ndarray): GroundTruth Bounding Box with Class Label
 39 |                                          shape is [batch, 6*max_label_num]
 40 |                                          label is [x, y, w, h]
 41 |         batch_size                (int): batch size for training
 42 |     # Returns:
 43 |         batch_imgs    (ndarray): input batch images for Network. Shape is [Batch Size, shape]
 44 |         batch_g_boxes(ndarray): input GroundTruth Bounding Box for Network.
 45 |                                  Shape is [Num of ROIs*Batch Size]
 46 |     """
 47 |     perm = np.random.permutation(len(dataset_img_list))
 48 |     batches = [perm[i * batch_size:(i + 1) * batch_size] \
 49 |                    for i in range(len(dataset_img_list) // batch_size)]
 50 |     for batch in batches:
 51 |         batch_imgs = dataset_img_list[batch]
 52 |         batch_g_boxes = g_bboxes[batch]
 53 |         # Flip Conversion
 54 |         # batch_imgs, batch_p_bboxes, batch_g_boxes = flip_conversion(batch_imgs, batch_p_bboxes, batch_g_boxes)
 55 |         batch_imgs, batch_g_boxes = convert_imgslist_to_ndarray(batch_imgs, batch_g_boxes)
 56 |         yield batch_imgs, batch_g_boxes
 57 | 
 58 | # def convert_imgslist_to_ndarray(images, batch_g_boxes):
 59 | #     """Convert a list of images into a network input.
 60 | #     Assumes images are already prepared (means subtracted, BGR order, ...).
 61 | #
 62 | #     In this stage, the shape of images are different
 63 | #     """
 64 | #     max_shape = np.array([im.shape for im in images]).max(axis=0)
 65 | #     num_images = len(images)
 66 | #     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
 67 | #                     dtype=np.float32)
 68 | #     for i in xrange(num_images):
 69 | #         if np.random.randint(2):
 70 | #             blob[i, 0:im.shape[0], 0:im.shape[1], :] = images[i][:, ::-1]
 71 | #             batch_g_boxes[i][:, 0] -= (max_shape[1]-1)
 72 | #             batch_g_boxes[i][:, 1] -= (max_shape[1]-1)
 73 | #             batch_g_boxes[i][:, 2] -= (max_shape[1]-1)
 74 | #             batch_g_boxes[i][:, 3] -= (max_shape[1]-1)
 75 | #         else:
 76 | #             blob[i, 0:im.shape[0], 0:im.shape[1], :] = images[i]
 77 | #     return blob, batch_g_boxes
 78 | 
 79 | def get_pathlist(image_dir, label_dir):
 80 |     image_pathlist = 0 #load_for_detection(label_dir)
 81 |     dataset_img_list = [] # len(dataset_img_list) == Number of Datasets Images
 82 |     # Preprocess Ground Truth ROIs. shape is [Num of ROIs * batch_size, x, y, w, h, 0, 1]
 83 |     g_bboxes = []
 84 |     # shape is [batch_channel, x, y, w, h]
 85 |     image_pathlist = glob.glob(image_dir)
 86 |     label_pathlist = glob.glob(label_dir)
 87 |     image_pathlist.sort()
 88 |     label_pathlist.sort()
 89 |     return np.array(image_pathlist), np.array(label_pathlist)
 90 | 
 91 | def generator__Image_and_label(image_pathlist, label_pathlist, batch_size=32):
 92 |     """Get Images and ROIs of All Datasets.
 93 |     # Args:
 94 |         image_pathlist  (ndarray): path of image files.
 95 |         label_pathlist    (ndarray): path of label's xml files.
 96 |         batch_size(int): Batch Size for network input.
 97 |     # Returns:
 98 |         images     (list): ndarray Images of datasets.
 99 |         g_bboxes(ndarray): rescaled bbox Label. Shapeis [Batch, ?, 4](x, y, w, h)
100 |     """
101 |     iter_num = image_pathlist.shape[0] / batch_size
102 |     for it in range(iter_num):
103 |         dataset_img_list = [] # len(dataset_img_list) == Number of Datasets Images
104 |         g_bboxes = []
105 |         for (image_path, label_path) in zip(image_pathlist[it*batch_size:(it+1)*batch_size], label_pathlist[it*batch_size:(it+1)*batch_size]):
106 |             img = cv2.imread(image_path)
107 |             label = read_label_from_txt(label_path)
108 |             if label is None:
109 |                 continue
110 |             img, im_scale = preprocess_imgs(img)
111 |             dataset_img_list.append(img)
112 |             g_bboxes.append(label)
113 |         dataset_img_list = convert_imgslist_to_ndarray(dataset_img_list)
114 |         yield np.array(dataset_img_list), np.array(g_bboxes)
115 | 
116 | def get_ALL_Image(image_dir, label_dir):
117 |     """Get Images and ROIs of All Datasets.
118 |     # Args:
119 |         image_dir  (str): path of image directory.
120 |         label_dir    (str): path of label's xml directory.
121 |         num_of_rois(int): Number of ROIs in a image.
122 |     # Returns:
123 |         images     (list): ndarray Images of datasets.
124 |         pred_bboxes(ndarray): rescaled bbox Label. Shapeis [Batch, ?, 4](x, y, w, h)
125 |     """
126 |     import time
127 |     start = time.time()
128 |     # 車が含まれている画像のみラベルと一緒に読み込む
129 |     image_pathlist = 0 #load_for_detection(label_dir)
130 |     dataset_img_list = [] # len(dataset_img_list) == Number of Datasets Images
131 |     # Preprocess Ground Truth ROIs. shape is [Num of ROIs * batch_size, x, y, w, h, 0, 1]
132 |     g_bboxes = []
133 |     # shape is [batch_channel, x, y, w, h]
134 |     image_pathlist = glob.glob(image_dir)
135 |     label_pathlist = glob.glob(label_dir)
136 |     image_pathlist.sort()
137 |     label_pathlist.sort()
138 | 
139 |     for (image_path, label_path) in zip(image_pathlist, label_pathlist):
140 |         img = cv2.imread(image_path)
141 |         label = read_label_from_txt(label_path)
142 |         if label is None:
143 |             continue
144 |         img, im_scale = preprocess_imgs(img)
145 |         dataset_img_list.append(img)
146 |         g_bboxes.append(label)
147 | 
148 |     print time.time() - start
149 |     return np.array(dataset_img_list), np.array(g_bboxes)
150 | 


--------------------------------------------------------------------------------
/util/model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | import glob
  5 | import cv2
  6 | import dlib
  7 | import numpy as np
  8 | # from vgg16 import vgg16
  9 | from input_kitti import *
 10 | from util import *
 11 | from parse_xml import parseXML
 12 | from base_vgg16 import Vgg16
 13 | import tensorflow as tf
 14 | from network_util import *
 15 | # from utility.image.data_augmentation.flip import Flip
 16 | sys.path.append("/Users/tsujiyuuki/env_python/code/my_code/Data_Augmentation")
 17 | 
 18 | """
 19 | ・collect dataset of cars
 20 | ・Preprocessing BBOX and Label for training
 21 | ・try roi_pooling layer
 22 | ・Extract ROI using mitmul tools
 23 | ・NMS
 24 | """
 25 | 
 26 | """Flow of Faster RCNN
 27 | ###############################################################################
 28 | In this state, Create Input Images and ROI Labels
 29 | 
 30 | 1. input batch images and GroundTruth BBox from datasets *folder name, batch size
 31 |    Image shape is [batch size, width, height, channel], tf.float32, vgg normalized, bgr
 32 |    Bounding Box shape is [batch size, center_x, center_y, width, height]
 33 | 
 34 | 2. get candicate bounding box from images.
 35 | 
 36 |    # Implemented
 37 | 3. resize input images to input size   *size of resize    if needed.
 38 |    if this operation was done, you should adjust bounding box according to it.
 39 |    Both of Candicate and GroundTruth Bounding Boxes.
 40 |    In thesis, Image size is in [600, 1000]
 41 |    In this Implemention, input image has dynamic shape between [600, 1000]
 42 | 
 43 | 4. convert candicate bounding box to ROI label.
 44 | 
 45 | 5. calculate IOU between ROI label and GroundTruth label.
 46 |    IOU is Intersection Over Union.
 47 | 
 48 | 6. Select Bounding Box from IOU.
 49 |    IOU > 0.5 is correct label, IOU = [0.1 0.5) is a false label(background).
 50 |    Correct Label is 25%, BackGround Label is 75%.
 51 |    Number of Label is 128, Batch Size is 2, so each image has 64 ROIs
 52 | 
 53 | ###############################################################################
 54 | In this stage, Calculate Loss
 55 | 
 56 | 7. Input data to ROI Pooling Layer is Conv5_3 Feature Map and ROIs
 57 |    Input shape is Feature map (batch, width, height, 512), ROIs (Num of ROIs, 5)
 58 |    ROIs, ex:) [0, left, height, right, bottom]. First Element is the index of batch
 59 | 
 60 | 8. Through ROI Pooling Layer, Output Shape is [Num of ROIs, 7, 7, 512]
 61 | 
 62 | 9. Reshape it to [Num of ROIs, -1], and then connect to Fully Connected Layer.
 63 | 
 64 | 10.Output Layer has two section, one is class prediction, the other is its bounding box prediction.
 65 |    class prediction shape is [Num of ROIs, Num of Class + 1]
 66 |    bounding box prediction shape is [Num of ROIs, 4 * (Num of Class + 1)]
 67 | 
 68 | 11.Loss Function
 69 |    Regularize bounding box value [center_x, center_y, w, h] into
 70 |    [(GroundTruth x - pred_x) / pred_w, (GroundTruth y - pred_y) / pred_h, log(GroundTruth w / pred_w), log(GroundTruth h / pred_h)]
 71 |    Class prediction is by softmax with loss.
 72 |    Bounding Box prediction is by smooth_L1 loss
 73 | ###############################################################################
 74 | In this stage, Describe Datasets.
 75 | 1. PASCAL VOC2007
 76 | 2. KITTI Datasets
 77 | 3. Udacity Datasets
 78 | """
 79 | 
 80 | # TODO: datasetsを丸ごとメモリに展開できるか。Generatorを用いるか。
 81 | def loss(obj_class, bbox_regression, g_obj_class, g_bbox_regression):
 82 |     """Calculate Class Loss and Bounding Regression Loss.
 83 | 
 84 |     # Args:
 85 |         obj_class: Prediction of object class. Shape is [ROIs*Batch_Size, 2]
 86 |         bbox_regression: Prediction of bounding box. Shape is [ROIs*Batch_Size, 4]
 87 |     """
 88 |     pass
 89 | 
 90 | def fast_rcnn(sess, rois, roi_size=(7, 7), vggpath=None, image_shape=(300, 300), \
 91 |               is_training=None, use_batchnorm=False, activation=tf.nn.relu, num_of_rois=128):
 92 |     """Model Definition of Fast RCNN
 93 |     In thesis, Roi Size is (7, 7), channel is 512
 94 |     """
 95 |     # images = tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], 3])
 96 |     # images = tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], 3])
 97 | 
 98 |     vgg = Vgg(vgg16_npy_path=vggpath)
 99 |     vgg.build_model(images)
100 |     feature_map = vgg.conv5_3 # (batch, kernel, kernel, channel)
101 | 
102 |     with tf.variable_scope("fast_rcnn"):
103 |         # roi shape [Num of ROIs, X, Y, W, H]
104 |         roi_layer = roi_pooling(feature_map, rois, roi_size[0], roi_size[1])
105 |         # input_shape [num_of_rois, channel, roi size, roi size]
106 |         pool_5 = tf.reshape(roi_layer, [num_of_rois, roi_size[0]*roi_size[1]*512])
107 |         fc6 = fully_connected(pool_5, [roi_size[0]*roi_size[1]*512, 4096], name="fc6", is_training=is_training)
108 |         fc7 = fully_connected(fc6, [4096, 4096], name="fc7", is_training=is_training)
109 |         # output shape [num_of_rois, 2]
110 |         obj_class = tf.nn.softmax(fully_connected(fc7, [4096, 2], name="fc_class", activation=None, use_batchnorm=None), dim=-1)
111 |         # output shape [num_of_rois, 8]
112 |         bbox_regression = fully_connected(fc7, [4096, 8], name="fc_bbox", activation=None, use_batchnorm=None)
113 | 
114 | 
115 | class ExtendedLayer(object):
116 |     def __init__(self):
117 |         pass
118 | 
119 |     def build_model(self, input_layer, use_batchnorm=False, is_training=True, atrous=False, \
120 |                     rate=1, activation=tf.nn.relu, implement_atrous=False, lr_mult=1):
121 |         if implement_atrous:
122 |             if atrous:
123 |                 self.pool_5 = maxpool2d(input_layer, kernel=3, stride=1, name="pool5", padding="SAME")
124 |             else:
125 |                 self.pool_5 = maxpool2d(input_layer, kernel=2, stride=2, name="pool5", padding="SAME") #TODO: padding is valid or same
126 | 
127 |             kernel_size = 3
128 |             if atrous:
129 |                 rate *= 6
130 |                 # pad = int(((kernel_size + (rate - 1) * (kernel_size - 1)) - 1) / 2)
131 |                 self.conv_6 = convBNLayer(self.pool_5, use_batchnorm, is_training, 512, 1024, kernel_size, 1, \
132 |                                           name="conv_6", activation=tf.nn.relu, atrous=True, rate=rate)
133 |             else:
134 |                 rate *= 3
135 |                 # pad = int(((kernel_size + (rate - 1) * (kernel_size - 1)) - 1) / 2)
136 |                 self.conv_6 = convBNLayer(self.pool_5, use_batchnorm, is_training, 512, 1024, kernel_size, 1, \
137 |                                           name="conv_6", activation=tf.nn.relu, atrous=True, rate=rate)
138 |         else:
139 |             self.pool_5 = maxpool2d(input_layer, kernel=3, stride=1, name="pool5", padding="SAME")
140 |             self.conv_6 = convBNLayer(self.pool_5, use_batchnorm, is_training, 512, 1024, 3, 1, \
141 |                                       name="conv_6", activation=tf.nn.relu, atrous=False, rate=rate)
142 | 
143 |         self.conv_7 = convBNLayer(self.conv_6, use_batchnorm, is_training, 1024, 1024, 1, 1, name="conv_7", activation=activation)
144 |         self.conv_8_1 = convBNLayer(self.conv_7, use_batchnorm, is_training, 1024, 256, 1, 1, name="conv_8_1", activation=activation)
145 |         self.conv_8_2 = convBNLayer(self.conv_8_1, use_batchnorm, is_training, 256, 512, 3, 2, name="conv_8_2", activation=activation)
146 |         self.conv_9_1 = convBNLayer(self.conv_8_2, use_batchnorm, is_training, 512, 128, 1, 1, name="conv_9_1", activation=activation)
147 |         self.conv_9_2 = convBNLayer(self.conv_9_1, use_batchnorm, is_training, 128, 256, 3, 2, name="conv_9_2", activation=activation)
148 |         self.conv_10_1 = convBNLayer(self.conv_9_2, use_batchnorm, is_training, 256, 128, 1, 1, name="conv_10_1", activation=activation)
149 |         self.conv_10_2 = convBNLayer(self.conv_10_1, use_batchnorm, is_training, 128, 256, 3, 1, name="conv_10_2", activation=activation, padding="VALID")
150 |         self.conv_11_1 = convBNLayer(self.conv_10_2, use_batchnorm, is_training, 256, 128, 1, 1, name="conv_11_1", activation=activation)
151 |         self.conv_11_2 = convBNLayer(self.conv_11_1, use_batchnorm, is_training, 128, 256, 3, 1, name="conv_11_2", activation=activation, padding="VALID")
152 | 
153 | def ssd_model(sess, vggpath=None, image_shape=(300, 300), \
154 |               is_training=None, use_batchnorm=False, activation=tf.nn.relu, \
155 |               num_classes=0, normalization=[], atrous=False, rate=1, implement_atrous=False):
156 |     """
157 |        1. input RGB images and labels
158 |        2. edit images like [-1, image_shape[0], image_shape[1], 3]
159 |        3. Create Annotate Layer?
160 |        4. input x into Vgg16 architecture(pretrained)
161 |        5.
162 |     """
163 |     images = tf.placeholder(tf.float32, [None, image_shape[0], image_shape[1], 3])
164 |     vgg = Vgg(vgg16_npy_path=vggpath)
165 |     vgg.build_model(images)
166 | 
167 |     with tf.variable_scope("extended_model") as scope:
168 |         phase_train = tf.placeholder(tf.bool, name="phase_traing") if is_training else None
169 |         extended_model = ExtendedLayer()
170 |         extended_model.build_model(vgg.conv5_3, use_batchnorm=use_batchnorm, atrous=atrous, rate=rate, \
171 |                                    is_training=phase_train, activation=activation, lr_mult=1, implement_atrous=implement_atrous)
172 | 
173 |     # with tf.variable_scope("multibox_layer"):
174 |     #     from_layers = [vgg.conv4_3, extended_model.conv_7, extended_model.conv_8_2,
175 |     #                    extended_model.conv_9_2, extended_model.conv_10_2, extended_model.conv_11_2]
176 |     #     multibox_layer = MultiboxLayer()
177 |     #     multibox_layer.build_model(from_layers, num_classes=0, normalization=normalization)
178 |     #
179 |     initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="extended_model")
180 |     sess.run(tf.variables_initializer(initialized_var))
181 | 
182 |     return extended_model
183 | 
184 | class MultiboxLayer(object):
185 |     def __init__(self):
186 |         pass
187 | 
188 |     # TODO: validate this is correct or not
189 |     def l2_normalization(self, input_layer, scale=20):
190 |         return tf.nn.l2_normalize(input_layer, dim) * scale
191 | 
192 |     def createMultiBoxHead(self, from_layers, num_classes=0, normalizations=[], \
193 |                            use_batchnorm=False, is_training=None, activation=None, \
194 |                            kernel_size=3, prior_boxes=[], kernel_sizes=[]):
195 |         """
196 |            # Args:
197 |                from_layers(list)   : list of input layers
198 |                num_classes(int)    : num of label's classes that this architecture detects
199 |                normalizations(list): list of scale for normalizations
200 |                                      if value <= 0, not apply normalization to the specified layer
201 |         """
202 |         assert num_classes > 0, "num of label's class  must be positive number"
203 |         if normalizations:
204 |             assert len(from_layers) == len(normalizations), "from_layers and normalizations should have same length"
205 | 
206 |         num_list = len(from_layers)
207 |         for index, kernel_size, layer, norm in zip(range(num_list), kernel_sizes, from_layers, normalizations):
208 |             input_layer = layer
209 |             with tf.variable_scope("layer" + str(index+1)):
210 |                 if norm > 0:
211 |                     scale = tf.get_variable("scale", trainable=True, initializer=tf.constant(norm))#initialize = norm
212 |                     input_layer = self.l2_normalization(input_layer, scale)
213 | 
214 |                 # create location prediction layer
215 |                 loc_output_dim = 4 * prior_num # (center_x, center_y, width, height)
216 |                 location_layer = convBNLayer(input_layer, use_batchnorm, is_training, input_layer.get_shape()[0], loc_output_dim, kernel_size, 1, name="loc_layer", activation=activation)
217 |                 # from shape : (batch, from_kernel, from_kernel, loc_output_dim)
218 |                 # to         : (batch, )
219 |                 location_pred = tf.reshape(location_layer, [-1, ])
220 | 
221 |                 # create confidence prediction layer
222 |                 conf_output_dim = num_classes * prior_num
223 |                 confidence_layer = convBNLayer(input_layer, use_batchnorm, is_training, input_layer.get_shape()[0], conf_output_dim, kernel_size, 1, name="conf_layer", activation=activation)
224 |                 confidence_pred = tf.reshape(confidence_pred, [-1, ])
225 | 
226 |                 # Flatten each output
227 | 
228 |                 # append result of each results
229 | 
230 |         return None
231 | 
232 | if __name__ == '__main__':
233 |     import sys
234 |     import matplotlib.pyplot as plt
235 |     from PIL import Image as im
236 |     sys.path.append('/home/katou01/code/grid/DataAugmentation')
237 |     # from resize import resize
238 | 
239 |     image_dir = "/home/katou01/download/training/image_2/*.png"
240 |     label_dir = "/home/katou01/download/training/label_2/*.txt"
241 |     get_Image_Roi_All(image_dir, label_dir, 80)
242 |     #
243 |     # image = im.open("./test_images/test1.jpg")
244 |     # image = np.array(image, dtype=np.float32)
245 |     # new_image = image[np.newaxis, :]
246 |     # batch_image = np.vstack((new_image, new_image))
247 |     # batch_image = resize(batch_image, size=(300, 300))
248 |     #
249 |     # with tf.Session() as sess:
250 |     #     model = ssd_model(sess, batch_image, activation=None, atrous=False, rate=1, implement_atrous=False)
251 |     #     print(vars(model))
252 |     #     # tf.summary.scalar('model', model)
253 | 


--------------------------------------------------------------------------------
/util/network_util.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import sys
  4 | import numpy as np
  5 | from base_vgg16 import Vgg16
  6 | import tensorflow as tf
  7 | 
  8 | def fully_connected(input_layer, shape, name="", is_training=True, use_batchnorm=True, activation=tf.nn.relu):
  9 |     with tf.variable_scope("fully" + name):
 10 |         kernel = tf.get_variable("weights", shape=shape, \
 11 |             dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
 12 |         fully = tf.matmul(input_layer, kernel)
 13 |         if activation:
 14 |             fully = activation(fully)
 15 |         if use_batchnorm:
 16 |             fully = batch_norm(fully, is_training)
 17 |         return fully
 18 | 
 19 | def vgg_fully(input_layer, shape, name="", activation=tf.nn.relu):
 20 |     with tf.variable_scope(name):
 21 |         kernel = tf.get_variable("weights", shape=shape, \
 22 |             dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
 23 |         b = tf.get_variable("biases", shape=[shape[1]], initializer=tf.constant_initializer(0.0))
 24 |         fully = tf.matmul(input_layer, kernel)
 25 |         fully = tf.nn.bias_add(fully, b)
 26 |         if activation:
 27 |             fully = activation(fully)
 28 |         return fully
 29 | 
 30 | def batch_norm(inputs, phase_train, decay=0.9, eps=1e-5):
 31 |     """Batch Normalization
 32 | 
 33 |        Args:
 34 |            inputs: input data(Batch size) from last layer
 35 |            phase_train: when you test, please set phase_train "None"
 36 |        Returns:
 37 |            output for next layer
 38 |     """
 39 |     gamma = tf.get_variable("gamma", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
 40 |     beta = tf.get_variable("beta", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 41 |     pop_mean = tf.get_variable("pop_mean", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
 42 |     pop_var = tf.get_variable("pop_var", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
 43 |     axes = range(len(inputs.get_shape()) - 1)
 44 | 
 45 |     if phase_train != None:
 46 |         batch_mean, batch_var = tf.nn.moments(inputs, axes)
 47 |         train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean*(1 - decay))
 48 |         train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
 49 |         with tf.control_dependencies([train_mean, train_var]):
 50 |             return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, eps)
 51 |     else:
 52 |         return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)
 53 | 
 54 | def convBNLayer(input_layer, use_batchnorm, is_training, input_dim, output_dim, \
 55 |                 kernel_size, stride, activation=tf.nn.relu, padding="SAME", name=""):
 56 |     with tf.variable_scope("convBN" + name):
 57 |         w = tf.get_variable("weights", \
 58 |             shape=[kernel_size, kernel_size, input_dim, output_dim], initializer=tf.contrib.layers.xavier_initializer())
 59 | 
 60 |         conv = tf.nn.conv2d(input_layer, w, strides=[1, stride, stride, 1], padding=padding)
 61 | 
 62 |         if use_batchnorm:
 63 |             if activation != None:
 64 |                 conv = activation(conv, name="activation")
 65 |             bn = batch_norm(conv, is_training)
 66 |             return bn
 67 | 
 68 |         b = tf.get_variable("bias", shape=[output_dim], initializer=tf.constant_initializer(0.0))
 69 |         bias = tf.nn.bias_add(conv, b)
 70 |         if activation is not None:
 71 |             return activation(bias, name="activation")
 72 |         return bias
 73 | 
 74 | 
 75 | def get_fc_weight(self, name):
 76 | return tf.Variable(self.data_dict[name][0], name="weights")
 77 | 
 78 | def convLayer(input_layer, input_dim, output_dim, \
 79 |                 kernel_size, stride, activation=tf.nn.relu, padding="SAME", name=""):
 80 |     with tf.variable_scope(name):
 81 |         w = tf.get_variable("filter", \
 82 |             shape=[kernel_size, kernel_size, input_dim, output_dim], initializer=tf.contrib.layers.xavier_initializer())
 83 | 
 84 |         conv = tf.nn.conv2d(input_layer, w, strides=[1, stride, stride, 1], padding=padding)
 85 | 
 86 |         b = tf.get_variable("biases", shape=[output_dim], initializer=tf.constant_initializer(0.0))
 87 |         bias = tf.nn.bias_add(conv, b)
 88 |         if activation is not None:
 89 |             return activation(bias, name="activation")
 90 |         return bias
 91 | 
 92 | def maxpool2d(x, kernel=2, stride=2, name="", padding="SAME"):
 93 |     """define max pooling layer"""
 94 |     with tf.variable_scope(name):
 95 |         return tf.nn.max_pool(
 96 |             x,
 97 |             ksize = [1, kernel, kernel, 1],
 98 |             strides = [1, stride, stride, 1],
 99 |             padding=padding)
100 | 


--------------------------------------------------------------------------------
/util/parse_xml.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | parse XML files containing tracklet info for kitti data base (raw data section)
  4 | (http://cvlibs.net/datasets/kitti/raw_data.php)
  5 | 
  6 | No guarantees that this code is correct, usage is at your own risk!
  7 | 
  8 | created by Christian Herdtweck, Max Planck Institute for Biological Cybernetics
  9 |   (christian.herdtweck@tuebingen.mpg.de)
 10 | 
 11 | requires numpy!
 12 | 
 13 | example usage:
 14 |   import parseTrackletXML as xmlParser
 15 |   kittiDir = '/path/to/kitti/data'
 16 |   drive = '2011_09_26_drive_0001'
 17 |   xmlParser.example(kittiDir, drive)
 18 | or simply on command line:
 19 |   python parseTrackletXML.py
 20 | """
 21 | 
 22 | # Version History:
 23 | # 4/7/12 Christian Herdtweck: seems to work with a few random test xml tracklet files;
 24 | #   converts file contents to ElementTree and then to list of Tracklet objects;
 25 | #   Tracklet objects have str and iter functions
 26 | # 5/7/12 ch: added constants for state, occlusion, truncation and added consistency checks
 27 | # 30/1/14 ch: create example function from example code
 28 | 
 29 | from sys import argv as cmdLineArgs
 30 | from xml.etree.ElementTree import ElementTree
 31 | import numpy as np
 32 | import itertools
 33 | from warnings import warn
 34 | 
 35 | STATE_UNSET = 0
 36 | STATE_INTERP = 1
 37 | STATE_LABELED = 2
 38 | stateFromText = {'0':STATE_UNSET, '1':STATE_INTERP, '2':STATE_LABELED}
 39 | 
 40 | OCC_UNSET = 255  # -1 as uint8
 41 | OCC_VISIBLE = 0
 42 | OCC_PARTLY = 1
 43 | OCC_FULLY = 2
 44 | occFromText = {'-1':OCC_UNSET, '0':OCC_VISIBLE, '1':OCC_PARTLY, '2':OCC_FULLY}
 45 | 
 46 | TRUNC_UNSET = 255  # -1 as uint8, but in xml files the value '99' is used!
 47 | TRUNC_IN_IMAGE = 0
 48 | TRUNC_TRUNCATED = 1
 49 | TRUNC_OUT_IMAGE = 2
 50 | TRUNC_BEHIND_IMAGE = 3
 51 | truncFromText = {'99':TRUNC_UNSET, '0':TRUNC_IN_IMAGE, '1':TRUNC_TRUNCATED, \
 52 |                   '2':TRUNC_OUT_IMAGE, '3': TRUNC_BEHIND_IMAGE}
 53 | 
 54 | 
 55 | class Tracklet(object):
 56 |     """ representation an annotated object track
 57 | 
 58 |     Tracklets are created in function parseXML and can most conveniently used as follows:
 59 | 
 60 |     for trackletObj in parseXML(trackletFile):
 61 |     for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj:
 62 |       your code here
 63 |     #end: for all frames
 64 |     #end: for all tracklets
 65 | 
 66 |     absoluteFrameNumber is in range [firstFrame, firstFrame+nFrames[
 67 |     amtOcclusion and amtBorders could be None
 68 | 
 69 |     You can of course also directly access the fields objType (string), size (len-3 ndarray), firstFrame/nFrames (int),
 70 |     trans/rots (nFrames x 3 float ndarrays), states/truncs (len-nFrames uint8 ndarrays), occs (nFrames x 2 uint8 ndarray),
 71 |     and for some tracklets amtOccs (nFrames x 2 float ndarray) and amtBorders (nFrames x 3 float ndarray). The last two
 72 |     can be None if the xml file did not include these fields in poses
 73 |     """
 74 | 
 75 |     objectType = None
 76 |     size = None  # len-3 float array: (height, width, length)
 77 |     firstFrame = None
 78 |     trans = None   # n x 3 float array (x,y,z)
 79 |     rots = None    # n x 3 float array (x,y,z)
 80 |     states = None  # len-n uint8 array of states
 81 |     occs = None    # n x 2 uint8 array  (occlusion, occlusion_kf)
 82 |     truncs = None  # len-n uint8 array of truncation
 83 |     amtOccs = None    # None or (n x 2) float array  (amt_occlusion, amt_occlusion_kf)
 84 |     amtBorders = None    # None (n x 3) float array  (amt_border_l / _r / _kf)
 85 |     nFrames = None
 86 | 
 87 |     def __init__(self):
 88 |         """create Tracklet with no info set """
 89 |         self.size = np.nan*np.ones(3, dtype=float)
 90 | 
 91 |     def __str__(self):
 92 |         """ return human-readable string representation of tracklet object
 93 | 
 94 |         called implicitly in
 95 |         print trackletObj
 96 |         or in
 97 |         text = str(trackletObj)
 98 |         """
 99 |         return '[Tracklet over {0} frames for {1}]'.format(self.nFrames, self.objectType)
100 | 
101 |     def __iter__(self):
102 |         """ returns an iterator that yields tuple of all the available data for each frame
103 | 
104 |         called whenever code iterates over a tracklet object, e.g. in
105 |         for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj:
106 |           ...do something ...
107 |         or
108 |         trackDataIter = iter(trackletObj)
109 |         """
110 |         if self.amtOccs is None:
111 |           return itertools.izip(self.trans, self.rots, self.states, self.occs, self.truncs, \
112 |               itertools.repeat(None), itertools.repeat(None), xrange(self.firstFrame, self.firstFrame+self.nFrames))
113 |         else:
114 |           return itertools.izip(self.trans, self.rots, self.states, self.occs, self.truncs, \
115 |               self.amtOccs, self.amtBorders, xrange(self.firstFrame, self.firstFrame+self.nFrames))
116 | #end: class Tracklet
117 | 
118 | 
119 | def parseXML(trackletFile):
120 |   r""" parse tracklet xml file and convert results to list of Tracklet objects
121 | 
122 |   :param trackletFile: name of a tracklet xml file
123 |   :returns: list of Tracklet objects read from xml file
124 |   """
125 | 
126 |   # convert tracklet XML data to a tree structure
127 |   eTree = ElementTree()
128 |   print 'parsing tracklet file', trackletFile
129 |   with open(trackletFile) as f:
130 |     eTree.parse(f)
131 | 
132 |   # now convert output to list of Tracklet objects
133 |   trackletsElem = eTree.find('tracklets')
134 |   tracklets = []
135 |   trackletIdx = 0
136 |   nTracklets = None
137 |   for trackletElem in trackletsElem:
138 |     #print 'track:', trackletElem.tag
139 |     if trackletElem.tag == 'count':
140 |       nTracklets = int(trackletElem.text)
141 |       print 'file contains', nTracklets, 'tracklets'
142 |     elif trackletElem.tag == 'item_version':
143 |       pass
144 |     elif trackletElem.tag == 'item':
145 |       #print 'tracklet {0} of {1}'.format(trackletIdx, nTracklets)
146 |       # a tracklet
147 |       newTrack = Tracklet()
148 |       isFinished = False
149 |       hasAmt = False
150 |       frameIdx = None
151 |       for info in trackletElem:
152 |         #print 'trackInfo:', info.tag
153 |         if isFinished:
154 |           raise ValueError('more info on element after finished!')
155 |         if info.tag == 'objectType':
156 |           newTrack.objectType = info.text
157 |         elif info.tag == 'h':
158 |           newTrack.size[0] = float(info.text)
159 |         elif info.tag == 'w':
160 |           newTrack.size[1] = float(info.text)
161 |         elif info.tag == 'l':
162 |           newTrack.size[2] = float(info.text)
163 |         elif info.tag == 'first_frame':
164 |           newTrack.firstFrame = int(info.text)
165 |         elif info.tag == 'poses':
166 |           # this info is the possibly long list of poses
167 |           for pose in info:
168 |             #print 'trackInfoPose:', pose.tag
169 |             if pose.tag == 'count':   # this should come before the others
170 |               if newTrack.nFrames is not None:
171 |                 raise ValueError('there are several pose lists for a single track!')
172 |               elif frameIdx is not None:
173 |                 raise ValueError('?!')
174 |               newTrack.nFrames = int(pose.text)
175 |               newTrack.trans  = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
176 |               newTrack.rots   = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
177 |               newTrack.states = np.nan * np.ones(newTrack.nFrames, dtype='uint8')
178 |               newTrack.occs   = np.nan * np.ones((newTrack.nFrames, 2), dtype='uint8')
179 |               newTrack.truncs = np.nan * np.ones(newTrack.nFrames, dtype='uint8')
180 |               newTrack.amtOccs = np.nan * np.ones((newTrack.nFrames, 2), dtype=float)
181 |               newTrack.amtBorders = np.nan * np.ones((newTrack.nFrames, 3), dtype=float)
182 |               frameIdx = 0
183 |             elif pose.tag == 'item_version':
184 |               pass
185 |             elif pose.tag == 'item':
186 |               # pose in one frame
187 |               if frameIdx is None:
188 |                 raise ValueError('pose item came before number of poses!')
189 |               for poseInfo in pose:
190 |                 #print 'trackInfoPoseInfo:', poseInfo.tag
191 |                 if poseInfo.tag == 'tx':
192 |                   newTrack.trans[frameIdx, 0] = float(poseInfo.text)
193 |                 elif poseInfo.tag == 'ty':
194 |                   newTrack.trans[frameIdx, 1] = float(poseInfo.text)
195 |                 elif poseInfo.tag == 'tz':
196 |                   newTrack.trans[frameIdx, 2] = float(poseInfo.text)
197 |                 elif poseInfo.tag == 'rx':
198 |                   newTrack.rots[frameIdx, 0] = float(poseInfo.text)
199 |                 elif poseInfo.tag == 'ry':
200 |                   newTrack.rots[frameIdx, 1] = float(poseInfo.text)
201 |                 elif poseInfo.tag == 'rz':
202 |                   newTrack.rots[frameIdx, 2] = float(poseInfo.text)
203 |                 elif poseInfo.tag == 'state':
204 |                   newTrack.states[frameIdx] = stateFromText[poseInfo.text]
205 |                 elif poseInfo.tag == 'occlusion':
206 |                   newTrack.occs[frameIdx, 0] = occFromText[poseInfo.text]
207 |                 elif poseInfo.tag == 'occlusion_kf':
208 |                   newTrack.occs[frameIdx, 1] = occFromText[poseInfo.text]
209 |                 elif poseInfo.tag == 'truncation':
210 |                   newTrack.truncs[frameIdx] = truncFromText[poseInfo.text]
211 |                 elif poseInfo.tag == 'amt_occlusion':
212 |                   newTrack.amtOccs[frameIdx,0] = float(poseInfo.text)
213 |                   hasAmt = True
214 |                 elif poseInfo.tag == 'amt_occlusion_kf':
215 |                   newTrack.amtOccs[frameIdx,1] = float(poseInfo.text)
216 |                   hasAmt = True
217 |                 elif poseInfo.tag == 'amt_border_l':
218 |                   newTrack.amtBorders[frameIdx,0] = float(poseInfo.text)
219 |                   hasAmt = True
220 |                 elif poseInfo.tag == 'amt_border_r':
221 |                   newTrack.amtBorders[frameIdx,1] = float(poseInfo.text)
222 |                   hasAmt = True
223 |                 elif poseInfo.tag == 'amt_border_kf':
224 |                   newTrack.amtBorders[frameIdx,2] = float(poseInfo.text)
225 |                   hasAmt = True
226 |                 else:
227 |                   raise ValueError('unexpected tag in poses item: {0}!'.format(poseInfo.tag))
228 |               frameIdx += 1
229 |             else:
230 |               raise ValueError('unexpected pose info: {0}!'.format(pose.tag))
231 |         elif info.tag == 'finished':
232 |           isFinished = True
233 |         else:
234 |           raise ValueError('unexpected tag in tracklets: {0}!'.format(info.tag))
235 |       #end: for all fields in current tracklet
236 | 
237 |       # some final consistency checks on new tracklet
238 |       if not isFinished:
239 |         warn('tracklet {0} was not finished!'.format(trackletIdx))
240 |       if newTrack.nFrames is None:
241 |         warn('tracklet {0} contains no information!'.format(trackletIdx))
242 |       elif frameIdx != newTrack.nFrames:
243 |         warn('tracklet {0} is supposed to have {1} frames, but perser found {1}!'.format(\
244 |             trackletIdx, newTrack.nFrames, frameIdx))
245 |       if np.abs(newTrack.rots[:,:2]).sum() > 1e-16:
246 |         warn('track contains rotation other than yaw!')
247 | 
248 |       # if amtOccs / amtBorders are not set, set them to None
249 |       if not hasAmt:
250 |         newTrack.amtOccs = None
251 |         newTrack.amtBorders = None
252 | 
253 |       # add new tracklet to list
254 |       tracklets.append(newTrack)
255 |       trackletIdx += 1
256 | 
257 |     else:
258 |       raise ValueError('unexpected tracklet info')
259 |   #end: for tracklet list items
260 | 
261 |   print 'loaded', trackletIdx, 'tracklets'
262 | 
263 |   # final consistency check
264 |   if trackletIdx != nTracklets:
265 |     warn('according to xml information the file has {0} tracklets, but parser found {1}!'.format(nTracklets, trackletIdx))
266 | 
267 |   return tracklets
268 | #end: function parseXML
269 | 
270 | 
271 | def example(kittiDir=None, drive=None):
272 | 
273 |   from os.path import join, expanduser
274 |   import readline    # makes raw_input behave more fancy
275 |   # from xmlParser import parseXML, TRUNC_IN_IMAGE, TRUNC_TRUNCATED
276 | 
277 |   DEFAULT_DRIVE = '2011_09_26_drive_0001'
278 |   twoPi = 2.*np.pi
279 | 
280 |   # get dir names
281 |   if kittiDir is None:
282 |     kittiDir = expanduser(raw_input('please enter kitti base dir (e.g. ~/path/to/kitti): ').strip())
283 |   if drive is None:
284 |     drive    = raw_input('please enter drive name (default {0}): '.format(DEFAULT_DRIVE)).strip()
285 |     if len(drive) == 0:
286 |       drive = DEFAULT_DRIVE
287 | 
288 |   # read tracklets from file
289 |   myTrackletFile = join(kittiDir, drive, 'tracklet_labels.xml')
290 |   tracklets = parseXML(myTrackletFile)
291 | 
292 |   # loop over tracklets
293 |   for iTracklet, tracklet in enumerate(tracklets):
294 |     print 'tracklet {0: 3d}: {1}'.format(iTracklet, tracklet)
295 | 
296 |     # this part is inspired by kitti object development kit matlab code: computeBox3D
297 |     h,w,l = tracklet.size
298 |     trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet\
299 |         [-l/2, -l/2,  l/2, l/2, -l/2, -l/2,  l/2, l/2], \
300 |         [ w/2, -w/2, -w/2, w/2,  w/2, -w/2, -w/2, w/2], \
301 |         [ 0.0,  0.0,  0.0, 0.0,    h,     h,   h,   h]])
302 | 
303 |     # loop over all data in tracklet
304 |     for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber \
305 |         in tracklet:
306 | 
307 |       # determine if object is in the image; otherwise continue
308 |       if truncation not in (TRUNC_IN_IMAGE, TRUNC_TRUNCATED):
309 |         continue
310 | 
311 |       # re-create 3D bounding box in velodyne coordinate system
312 |       yaw = rotation[2]   # other rotations are 0 in all xml files I checked
313 |       assert np.abs(rotation[:2]).sum() == 0, 'object rotations other than yaw given!'
314 |       rotMat = np.array([\
315 |           [np.cos(yaw), -np.sin(yaw), 0.0], \
316 |           [np.sin(yaw),  np.cos(yaw), 0.0], \
317 |           [        0.0,          0.0, 1.0]])
318 |       cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8,1)).T
319 | 
320 |       # calc yaw as seen from the camera (i.e. 0 degree = facing away from cam), as opposed to
321 |       #   car-centered yaw (i.e. 0 degree = same orientation as car).
322 |       #   makes quite a difference for objects in periphery!
323 |       # Result is in [0, 2pi]
324 |       x, y, z = translation
325 |       yawVisual = ( yaw - np.arctan2(y, x) ) % twoPi
326 | 
327 |     #end: for all frames in track
328 |   #end: for all tracks
329 | #end: function example
330 | 
331 | # when somebody runs this file as a script:
332 | #   run example if no arg or only 'example' was given as arg
333 | #   otherwise run parseXML
334 | if __name__ == "__main__":
335 |   # cmdLineArgs[0] is 'parseTrackletXML.py'
336 |   if len(cmdLineArgs) < 2:
337 |     example()
338 |   elif (len(cmdLineArgs) == 2) and (cmdLineArgs[1] == 'example'):
339 |     example()
340 |   else:
341 |     parseXML(*cmdLineArgs[1:])
342 | 
343 | # (created using vim - the world's best text editor)
344 | 
345 | 


--------------------------------------------------------------------------------
/util/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukitsuji/Faster_RCNN_tensorflow/765c729eaf03cb401ad308a289ec7d8c2bfca474/util/train.py


--------------------------------------------------------------------------------