├── .gitignore
├── Notes
    ├── AnchorsBoxes.ipynb
    └── data_processing.ipynb
├── README.md
├── Train.py
├── datasets
    ├── __init__.py
    ├── data2record.py
    ├── dataset_utils.py
    ├── sythtextprovider.py
    └── testproviderfailed.py
├── deployment
    ├── __init__.py
    └── model_deploy.py
├── nets
    ├── __init__.py
    ├── custom_layers.py
    ├── textbox_common.py
    └── txtbox_300.py
├── processing
    ├── __init__.py
    ├── image_processing.py
    ├── image_processing2.py
    ├── ssd_vgg_preprocessing.py
    ├── test_processing.py
    └── tf_image.py
├── tf_extended
    ├── __init__.py
    ├── bboxes.py
    ├── image.py
    ├── math.py
    ├── metrics.py
    └── tensors.py
└── tf_utils.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .DS_Store
 3 | .DS_Store?
 4 | .Spotlight-V100
 5 | .Trashes
 6 | ehthumbs.db
 7 | rawdata
 8 | Thumbs.db
 9 | .csv
10 | data
11 | # Directories.
12 | datasets/__pycache__/
13 | deployment/__pycache__/
14 | nets/__pycache__/
15 | preprocessing/__pycache__/
16 | Notes/.ipynb_checkpoints/
17 | notebooks/.ipynb_checkpoints/
18 | ssd-tensorflow.sublime-workspace
19 | ssd-tensorflow.sublime-project
20 | 
21 | checkpoints/ssd_300_vgg.ckpt.data-00000-of-00001
22 | checkpoints/ssd_300_vgg.ckpt.index
23 | 
24 | logs/
25 | .ipynb_checkpoints/
26 | __pycache__/
27 | 
28 | *.log
29 | 
30 | checkpoints/VGG_VOC0712_SSD_*
31 | 


--------------------------------------------------------------------------------
/Notes/AnchorsBoxes.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "## test \n",
 12 |     "## 1. anchor_boxes\n",
 13 |     "## 2. groudtruth encode\n",
 14 |     "## 3. bboxes decode(not yet finished)"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 10,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "import numpy as np\n",
 26 |     "import math\n",
 27 |     "import tensorflow as tf\n",
 28 |     "import sys\n",
 29 |     "sys.path.insert(0,'../processing/')\n",
 30 |     "sys.path.insert(0,'../')\n",
 31 |     "from image_processing2 import *\n",
 32 |     "import tf_extended as tfe"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {
 39 |     "collapsed": false
 40 |    },
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "[0.2, 0.31666666666666665, 0.43333333333333335, 0.55, 0.6666666666666666, 0.7833333333333332, 0.8999999999999999]\n"
 47 |      ]
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "img_shape=(300, 300)\n",
 52 |     "num_classes=2\n",
 53 |     "feat_layers=['conv_4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'pool6']\n",
 54 |     "feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]\n",
 55 |     "scale_range=[0.20, 0.90]\n",
 56 |     "anchor_ratios=[1,2,3,5,7,10]\n",
 57 |     "normalizations=[20, -1, -1, -1, -1, -1]\n",
 58 |     "prior_scaling=[0.1, 0.1, 0.2, 0.2]\n",
 59 |     "\n",
 60 |     "step = (scale_range[1] - scale_range[0]) / len(feat_shapes)\n",
 61 |     "scales = [scale_range[0] + i * step for i in range(len(feat_shapes)+1)]\n",
 62 |     "print scales"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {
 69 |     "collapsed": false
 70 |    },
 71 |    "outputs": [
 72 |     {
 73 |      "name": "stdout",
 74 |      "output_type": "stream",
 75 |      "text": [
 76 |       "(38, 38, 2, 1)\n",
 77 |       "[ 0.02368421  0.01674727  0.01367409  0.0105919   0.00895179  0.0074896 ]\n",
 78 |       "(38, 38, 2, 6)\n"
 79 |      ]
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "def textbox_anchor_one_layer(img_shape,\n",
 84 |     "                             feat_size,\n",
 85 |     "                             ratios,\n",
 86 |     "                             scale,\n",
 87 |     "                             offset = 0.5,\n",
 88 |     "                             dtype=np.float32):\n",
 89 |     "    # Follow the papers scheme\n",
 90 |     "    # 12 ahchor boxes with out sk' = sqrt(sk * sk+1)\n",
 91 |     "    y, x = np.mgrid[0:feat_size[0], 0:feat_size[1]] + 0.5\n",
 92 |     "    y = y.astype(dtype) / feat_size[0]\n",
 93 |     "    x = x.astype(dtype) / feat_size[1]\n",
 94 |     "\n",
 95 |     "    x_offset = x\n",
 96 |     "    y_offset = y + offset\n",
 97 |     "    x_out = np.stack((x, x_offset), -1)\n",
 98 |     "    y_out = np.stack((y, y_offset), -1)\n",
 99 |     "    y_out = np.expand_dims(y_out, axis=-1)\n",
100 |     "    x_out = np.expand_dims(x_out, axis=-1)\n",
101 |     "\n",
102 |     "    # \n",
103 |     "    num_anchors = 6\n",
104 |     "    h = np.zeros((num_anchors, ), dtype=dtype)\n",
105 |     "    w = np.zeros((num_anchors, ), dtype=dtype)\n",
106 |     "    for i ,r in enumerate(ratios):\n",
107 |     "        h[i] = scale / math.sqrt(r) / feat_size[0]\n",
108 |     "        w[i] = scale * math.sqrt(r) / feat_size[1]\n",
109 |     "    return y_out, x_out, h, w\n",
110 |     "\n",
111 |     "y,x,h,w = textbox_anchor_one_layer((300,300), (38,38),(1,2,3,5,7,10),0.9)\n",
112 |     "print y.shape\n",
113 |     "print h\n",
114 |     "print (y -h).shape"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 5,
120 |    "metadata": {
121 |     "collapsed": false
122 |    },
123 |    "outputs": [
124 |     {
125 |      "name": "stdout",
126 |      "output_type": "stream",
127 |      "text": [
128 |       "6\n",
129 |       "4\n",
130 |       "(38, 38, 2, 1)\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "def textbox_achor_all_layers(img_shape,\n",
136 |     "                           layers_shape,\n",
137 |     "                           anchor_ratios,\n",
138 |     "                           scales,\n",
139 |     "                           offset=0.5,\n",
140 |     "                           dtype=np.float32):\n",
141 |     "    \"\"\"\n",
142 |     "    Compute anchor boxes for all feature layers.\n",
143 |     "    \"\"\"\n",
144 |     "    layers_anchors = []\n",
145 |     "    for i, s in enumerate(layers_shape):\n",
146 |     "        anchor_bboxes = textbox_anchor_one_layer(img_shape, s,\n",
147 |     "                                                 anchor_ratios,\n",
148 |     "                                                 scales[i],\n",
149 |     "                                                 offset=offset, dtype=dtype)\n",
150 |     "        layers_anchors.append(anchor_bboxes)\n",
151 |     "    return layers_anchors\n",
152 |     "\n",
153 |     "layers_anchors = textbox_achor_all_layers((300,300), feat_shapes,anchor_ratios,scales)\n",
154 |     "print len(layers_anchors)\n",
155 |     "print len(layers_anchors[0])\n",
156 |     "print layers_anchors[0][0].shape\n"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 12,
162 |    "metadata": {
163 |     "collapsed": false
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "# =========================================================================== #\n",
168 |     "# TensorFlow implementation of Text Boxes encoding / decoding.\n",
169 |     "# =========================================================================== #\n",
170 |     "\n",
171 |     "def tf_text_bboxes_encode_layer(bboxes,\n",
172 |     "                               anchors_layer,\n",
173 |     "                               matching_threshold=0.5,\n",
174 |     "                               prior_scaling=[0.1, 0.1, 0.2, 0.2],\n",
175 |     "                               dtype=tf.float32):\n",
176 |     "    \n",
177 |     "    \"\"\"\n",
178 |     "    Encode groundtruth labels and bounding boxes using Textbox anchors from\n",
179 |     "    one layer.\n",
180 |     "\n",
181 |     "    Arguments:\n",
182 |     "      bboxes: Nx4 Tensor(float) with bboxes relative coordinates;\n",
183 |     "      anchors_layer: Numpy array with layer anchors;\n",
184 |     "      matching_threshold: Threshold for positive match with groundtruth bboxes;\n",
185 |     "      prior_scaling: Scaling of encoded coordinates.\n",
186 |     "\n",
187 |     "    Return:\n",
188 |     "      (target_localizations, target_scores): Target Tensors.\n",
189 |     "    # thisi is a binary problem, so target_score and tartget_labels are same.\n",
190 |     "    \"\"\"\n",
191 |     "    # Anchors coordinates and volume.\n",
192 |     "\n",
193 |     "    yref, xref, href, wref = anchors_layer\n",
194 |     "    print yref.shape\n",
195 |     "    print href.shape\n",
196 |     "    print bboxes.shape\n",
197 |     "    ymin = yref - href / 2.\n",
198 |     "    xmin = xref - wref / 2.\n",
199 |     "    ymax = yref + href / 2.\n",
200 |     "    xmax = xref + wref / 2. \n",
201 |     "    vol_anchors = (xmax - xmin) * (ymax - ymin)\n",
202 |     "    \n",
203 |     "    # Initialize tensors...\n",
204 |     "    shape = (yref.shape[0], yref.shape[1], yref.shape[2], href.size)\n",
205 |     "    # all follow the shape(feat.size, feat.size, 2, 6)\n",
206 |     "    #feat_labels = tf.zeros(shape, dtype=tf.int64)\n",
207 |     "    feat_scores = tf.zeros(shape, dtype=dtype)\n",
208 |     "\n",
209 |     "    feat_ymin = tf.zeros(shape, dtype=dtype)\n",
210 |     "    feat_xmin = tf.zeros(shape, dtype=dtype)\n",
211 |     "    feat_ymax = tf.ones(shape, dtype=dtype)\n",
212 |     "    feat_xmax = tf.ones(shape, dtype=dtype)\n",
213 |     "\n",
214 |     "    def jaccard_with_anchors(bbox):\n",
215 |     "        \"\"\"\n",
216 |     "        Compute jaccard score between a box and the anchors.\n",
217 |     "        \"\"\"\n",
218 |     "        int_ymin = tf.maximum(ymin, bbox[0])\n",
219 |     "        int_xmin = tf.maximum(xmin, bbox[1])\n",
220 |     "        int_ymax = tf.minimum(ymax, bbox[2])\n",
221 |     "        int_xmax = tf.minimum(xmax, bbox[3])\n",
222 |     "        h = tf.maximum(int_ymax - int_ymin, 0.)\n",
223 |     "        w = tf.maximum(int_xmax - int_xmin, 0.)\n",
224 |     "        # Volumes.\n",
225 |     "        inter_vol = h * w\n",
226 |     "        union_vol = vol_anchors - inter_vol \\\n",
227 |     "            + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])\n",
228 |     "        jaccard = tf.div(inter_vol, union_vol)\n",
229 |     "        return jaccard\n",
230 |     "    \n",
231 |     "    \"\"\"\n",
232 |     "    # never use in Textbox\n",
233 |     "    def intersection_with_anchors(bbox):\n",
234 |     "        '''\n",
235 |     "        Compute intersection between score a box and the anchors.\n",
236 |     "        '''\n",
237 |     "        int_ymin = tf.maximum(ymin, bbox[0])\n",
238 |     "        int_xmin = tf.maximum(xmin, bbox[1])\n",
239 |     "        int_ymax = tf.minimum(ymax, bbox[2])\n",
240 |     "        int_xmax = tf.minimum(xmax, bbox[3])\n",
241 |     "        h = tf.maximum(int_ymax - int_ymin, 0.)\n",
242 |     "        w = tf.maximum(int_xmax - int_xmin, 0.)\n",
243 |     "        inter_vol = h * w\n",
244 |     "        scores = tf.div(inter_vol, vol_anchors)\n",
245 |     "        return scores\n",
246 |     "    \"\"\"\n",
247 |     "    \n",
248 |     "    def condition(i, feat_scores,\n",
249 |     "                  feat_ymin, feat_xmin, feat_ymax, feat_xmax):\n",
250 |     "        \"\"\"Condition: check label index.\n",
251 |     "        \"\"\"\n",
252 |     "        r = tf.less(i, 3)\n",
253 |     "        return r\n",
254 |     "\n",
255 |     "    def body(i, feat_scores,feat_ymin, feat_xmin, feat_ymax, feat_xmax,bbox):\n",
256 |     "        \"\"\"Body: update feature labels, scores and bboxes.\n",
257 |     "        Follow the original SSD paper for that purpose:\n",
258 |     "          - assign values when jaccard > 0.5;\n",
259 |     "          - only update if beat the score of other bboxes.\n",
260 |     "        \"\"\"\n",
261 |     "        # Jaccard score.\n",
262 |     "        #bbox = bboxes[i]\n",
263 |     "        jaccard = jaccard_with_anchors(bbox)\n",
264 |     "        # Mask: check threshold + scores + no annotations + num_classes.\n",
265 |     "        mask = tf.greater(jaccard, feat_scores)\n",
266 |     "        mask = tf.logical_and(mask, tf.greater(jaccard, matching_threshold))\n",
267 |     "        #mask = tf.logical_and(mask, feat_scores > -0.5)\n",
268 |     "        #mask = tf.logical_and(mask, label < num_classes)\n",
269 |     "        imask = tf.cast(mask, tf.int64)\n",
270 |     "        fmask = tf.cast(mask, dtype)\n",
271 |     "        # Update values using mask.\n",
272 |     "        #feat_labels = imask * label + (1 - imask) * feat_labels\n",
273 |     "        feat_scores = tf.where(mask, jaccard, feat_scores)\n",
274 |     "\n",
275 |     "        feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin\n",
276 |     "        feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin\n",
277 |     "        feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax\n",
278 |     "        feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax\n",
279 |     "\n",
280 |     "        # Check no annotation label: ignore these anchors...\n",
281 |     "        #interscts = intersection_with_anchors(bbox)\n",
282 |     "        #mask = tf.logical_and(interscts > ignore_threshold,\n",
283 |     "        #                     label == no_annotation_label)\n",
284 |     "        # Replace scores by -1.\n",
285 |     "        #feat_scores = tf.where(mask, -tf.cast(mask, dtype), feat_scores)\n",
286 |     "\n",
287 |     "        return [i+1, feat_scores,\n",
288 |     "                feat_ymin, feat_xmin, feat_ymax, feat_xmax]\n",
289 |     "    # Main loop definition.\n",
290 |     "    '''\n",
291 |     "    i = 0\n",
292 |     "    [i,feat_scores,\n",
293 |     "     feat_ymin, feat_xmin,\n",
294 |     "     feat_ymax, feat_xmax] = tf.while_loop(condition, body,\n",
295 |     "                                           [i, feat_scores,\n",
296 |     "                                            feat_ymin, feat_xmin,\n",
297 |     "                                            feat_ymax, feat_xmax])\n",
298 |     "    '''\n",
299 |     "    for i, bbox in enumerate(tf.unstack(bboxes, axis=0)):\n",
300 |     "        [i,feat_scores,feat_ymin, \n",
301 |     "        feat_xmin, feat_ymax, feat_xmax] = body(i, feat_scores,\n",
302 |     "                                                feat_ymin, feat_xmin, \n",
303 |     "                                                feat_ymax, feat_xmax,bbox)\n",
304 |     "    # Transform to center / size.\n",
305 |     "    feat_cy = (feat_ymax + feat_ymin) / 2.\n",
306 |     "    feat_cx = (feat_xmax + feat_xmin) / 2.\n",
307 |     "    feat_h = feat_ymax - feat_ymin\n",
308 |     "    feat_w = feat_xmax - feat_xmin\n",
309 |     "    # Encode features.\n",
310 |     "    feat_cy = (feat_cy - yref) / href / prior_scaling[0]\n",
311 |     "    feat_cx = (feat_cx - xref) / wref / prior_scaling[1]\n",
312 |     "    feat_h = tf.log(feat_h / href) / prior_scaling[2]\n",
313 |     "    feat_w = tf.log(feat_w / wref) / prior_scaling[3]\n",
314 |     "    # Use SSD ordering: x / y / w / h instead of ours.\n",
315 |     "    feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1)\n",
316 |     "    return feat_localizations, feat_scores\n"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": 13,
322 |    "metadata": {
323 |     "collapsed": true
324 |    },
325 |    "outputs": [],
326 |    "source": [
327 |     "def tf_text_bboxes_encode(bboxes,\n",
328 |     "                         anchors,\n",
329 |     "                         matching_threshold=0.5,\n",
330 |     "                         prior_scaling=[0.1, 0.1, 0.2, 0.2],\n",
331 |     "                         dtype=tf.float32,\n",
332 |     "                         scope='ssd_bboxes_encode'):\n",
333 |     "    \"\"\"Encode groundtruth labels and bounding boxes using SSD net anchors.\n",
334 |     "    Encoding boxes for all feature layers.\n",
335 |     "\n",
336 |     "    Arguments:\n",
337 |     "      bboxes: Nx4 Tensor(float) with bboxes relative coordinates;\n",
338 |     "      anchors: List of Numpy array with layer anchors;\n",
339 |     "      matching_threshold: Threshold for positive match with groundtruth bboxes;\n",
340 |     "      prior_scaling: Scaling of encoded coordinates.\n",
341 |     "\n",
342 |     "    Return:\n",
343 |     "      (target_labels, target_localizations, target_scores):\n",
344 |     "        Each element is a list of target Tensors.\n",
345 |     "    \"\"\"\n",
346 |     "    with tf.name_scope(scope):\n",
347 |     "        target_labels = []\n",
348 |     "        target_localizations = []\n",
349 |     "        target_scores = []\n",
350 |     "        for i, anchors_layer in enumerate(anchors):\n",
351 |     "            with tf.name_scope('bboxes_encode_block_%i' % i):\n",
352 |     "                t_loc, t_scores = \\\n",
353 |     "                    tf_text_bboxes_encode_layer(bboxes, anchors_layer,\n",
354 |     "                                                matching_threshold,\n",
355 |     "                                               prior_scaling, dtype)\n",
356 |     "                target_localizations.append(t_loc)\n",
357 |     "                target_scores.append(t_scores)\n",
358 |     "        return target_localizations, target_scores"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": 14,
364 |    "metadata": {
365 |     "collapsed": false
366 |    },
367 |    "outputs": [
368 |     {
369 |      "name": "stdout",
370 |      "output_type": "stream",
371 |      "text": [
372 |       "name SparseTensor(indices=Tensor(\"ParseSingleExample_1/Slice_Indices_image/name:0\", shape=(?, 1), dtype=int64), values=Tensor(\"ParseSingleExample_1/ParseExample/ParseExample:6\", shape=(?,), dtype=string), dense_shape=Tensor(\"ParseSingleExample_1/Squeeze_Shape_image/name:0\", shape=(1,), dtype=int64))\n",
373 |       "image after decode Tensor(\"decode_jpeg_1/convert_image:0\", shape=(?, ?, 3), dtype=float32)\n",
374 |       "labels: Tensor(\"ExpandDims_11:0\", shape=(1, ?), dtype=int64) \n",
375 |       "(38, 38, 2, 1)\n",
376 |       "(6,)\n",
377 |       "(?, 4)\n"
378 |      ]
379 |     },
380 |     {
381 |      "ename": "ValueError",
382 |      "evalue": "Cannot infer num from shape (?, 4)",
383 |      "output_type": "error",
384 |      "traceback": [
385 |       "\u001b[0;31m---------------------------------------------------------------\u001b[0m",
386 |       "\u001b[0;31mValueError\u001b[0m                    Traceback (most recent call last)",
387 |       "\u001b[0;32m<ipython-input-14-4968af59bf0b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m image,label,bboxes = image_processing(image_buffer, bboxes,label,\n\u001b[1;32m      8\u001b[0m                                      train= True, thread_id = 0)\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mflocalization\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf_text_bboxes_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbboxes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlayers_anchors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmatching_threshold\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m \u001b[0;31m#print flocalization.shape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;31m#print fscores.shape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
388 |       "\u001b[0;32m<ipython-input-13-c48699524f49>\u001b[0m in \u001b[0;36mtf_text_bboxes_encode\u001b[0;34m(bboxes, anchors, matching_threshold, prior_scaling, dtype, scope)\u001b[0m\n\u001b[1;32m     26\u001b[0m                 t_loc, t_scores =                     tf_text_bboxes_encode_layer(bboxes, anchors_layer,\n\u001b[1;32m     27\u001b[0m                                                 \u001b[0mmatching_threshold\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m                                                prior_scaling, dtype)\n\u001b[0m\u001b[1;32m     29\u001b[0m                 \u001b[0mtarget_localizations\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt_loc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     30\u001b[0m                 \u001b[0mtarget_scores\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mt_scores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
389 |       "\u001b[0;32m<ipython-input-12-de2668f60d9b>\u001b[0m in \u001b[0;36mtf_text_bboxes_encode_layer\u001b[0;34m(bboxes, anchors_layer, matching_threshold, prior_scaling, dtype)\u001b[0m\n\u001b[1;32m    130\u001b[0m                                             feat_ymax, feat_xmax])\n\u001b[1;32m    131\u001b[0m     '''\n\u001b[0;32m--> 132\u001b[0;31m     \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbbox\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbboxes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    133\u001b[0m         [i,feat_scores,feat_ymin, \n\u001b[1;32m    134\u001b[0m         \u001b[0mfeat_xmin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeat_ymax\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeat_xmax\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeat_scores\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
390 |       "\u001b[0;32m/Applications/python/anaconda/envs/tensorflow2.7/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.pyc\u001b[0m in \u001b[0;36munstack\u001b[0;34m(value, num, axis, name)\u001b[0m\n\u001b[1;32m    958\u001b[0m       \u001b[0mnum\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue_shape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    959\u001b[0m   \u001b[0;32mif\u001b[0m \u001b[0mnum\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 960\u001b[0;31m     \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Cannot infer num from shape %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mvalue_shape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    961\u001b[0m   \u001b[0;32mreturn\u001b[0m \u001b[0mgen_array_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_unpack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    962\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
391 |       "\u001b[0;31mValueError\u001b[0m: Cannot infer num from shape (?, 4)"
392 |      ]
393 |     }
394 |    ],
395 |    "source": [
396 |     "data_dir = '/Users/xiaodiu/Documents/github/projecttextbox/TextBoxes-TensorFlow/data/sythtext/'\n",
397 |     "file_name = data_dir + '1.tfrecord'\n",
398 |     "## test if file_name exists  \n",
399 |     "\n",
400 |     "example = tf.python_io.tf_record_iterator(file_name).next()\n",
401 |     "image_buffer, label, bboxes, name= parse_example(example)\n",
402 |     "image,label,bboxes = image_processing(image_buffer, bboxes,label,\n",
403 |     "                                     train= True, thread_id = 0)\n",
404 |     "flocalization, fscores = tf_text_bboxes_encode(bboxes,layers_anchors,matching_threshold=0.1)\n",
405 |     "#print flocalization.shape\n",
406 |     "#print fscores.shape\n",
407 |     "\n",
408 |     "\n",
409 |     "with tf.Session() as sess:\n",
410 |     "    sess.run(tf.global_variables_initializer())\n",
411 |     "    Image, label, bboxes = sess.run([image, label, bboxes])\n",
412 |     "    flocalization, fscores = sess.run([flocalization,fscores])\n",
413 |     "    print label.shape\n",
414 |     "    print bboxes\n",
415 |     "    #print name\n",
416 |     "    #print width\n",
417 |     "    #print height\n",
418 |     "    print Image.shape\n",
419 |     "    print flocalization[0].shape\n",
420 |     "    for i in range(6):\n",
421 |     "        print np.where(fscores[i] > 0)\n",
422 |     "    \"\"\"\n",
423 |     "    visualize_bbox(Image, bboxes)\n",
424 |     "    skio.imshow(Image)\n",
425 |     "    skio.show()\n",
426 |     "    \"\"\""
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": null,
432 |    "metadata": {
433 |     "collapsed": true
434 |    },
435 |    "outputs": [],
436 |    "source": []
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": null,
441 |    "metadata": {
442 |     "collapsed": true
443 |    },
444 |    "outputs": [],
445 |    "source": []
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": null,
450 |    "metadata": {
451 |     "collapsed": true
452 |    },
453 |    "outputs": [],
454 |    "source": []
455 |   }
456 |  ],
457 |  "metadata": {
458 |   "kernelspec": {
459 |    "display_name": "keras_tf_2.7",
460 |    "language": "python",
461 |    "name": "tensorflow2.7"
462 |   },
463 |   "language_info": {
464 |    "codemirror_mode": {
465 |     "name": "ipython",
466 |     "version": 2
467 |    },
468 |    "file_extension": ".py",
469 |    "mimetype": "text/x-python",
470 |    "name": "python",
471 |    "nbconvert_exporter": "python",
472 |    "pygments_lexer": "ipython2",
473 |    "version": "2.7.13"
474 |   }
475 |  },
476 |  "nbformat": 4,
477 |  "nbformat_minor": 0
478 | }
479 | 


--------------------------------------------------------------------------------
/Notes/data_processing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "## 1. Transform data to record format\n",
 12 |     "## First dataset from http://www.robots.ox.ac.uk/~vgg/data/scenetext/\n",
 13 |     "## This method failed, because "
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "metadata": {
 20 |     "collapsed": false
 21 |    },
 22 |    "outputs": [
 23 |     {
 24 |      "data": {
 25 |       "text/plain": [
 26 |        "'1.0.1'"
 27 |       ]
 28 |      },
 29 |      "execution_count": 1,
 30 |      "metadata": {},
 31 |      "output_type": "execute_result"
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "%matplotlib inline\n",
 36 |     "import math\n",
 37 |     "import numpy as np\n",
 38 |     "import scipy.io as sio\n",
 39 |     "import gzip\n",
 40 |     "from zipfile import ZipFile\n",
 41 |     "import matplotlib.pyplot as plt\n",
 42 |     "import cv2\n",
 43 |     "import sys\n",
 44 |     "sys.path.insert(0,'../')\n",
 45 |     "import tensorflow as tf\n",
 46 |     "import skimage.io as skio\n",
 47 |     "tf.InteractiveSession()\n",
 48 |     "from PIL import Image\n",
 49 |     "import re\n",
 50 |     "import os\n",
 51 |     "slim = tf.contrib.slim\n",
 52 |     "tf.__version__"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 2,
 58 |    "metadata": {
 59 |     "collapsed": true
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "def int64_feature(value):\n",
 64 |     "    \"\"\"Wrapper for inserting int64 features into Example proto.\n",
 65 |     "    \"\"\"\n",
 66 |     "    if not isinstance(value, list):\n",
 67 |     "        value = [value]\n",
 68 |     "    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))\n",
 69 |     "\n",
 70 |     "\n",
 71 |     "def float_feature(value):\n",
 72 |     "    \"\"\"Wrapper for inserting float features into Example proto.\n",
 73 |     "    \"\"\"\n",
 74 |     "    if not isinstance(value, list):\n",
 75 |     "        value = [value]\n",
 76 |     "    return tf.train.Feature(float_list=tf.train.FloatList(value=value))\n",
 77 |     "\n",
 78 |     "\n",
 79 |     "def bytes_feature(value):\n",
 80 |     "    \"\"\"Wrapper for inserting bytes features into Example proto.\n",
 81 |     "    \"\"\"\n",
 82 |     "    if not isinstance(value, list):\n",
 83 |     "        value = [value]\n",
 84 |     "    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": true
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "def visualize_bbox(image, bboxes):\n",
 96 |     "    \"\"\"\n",
 97 |     "    Input: image (height, width, channels)\n",
 98 |     "           bboxes (numof bboxes, 4) in order(ymin, xmin, ymax, xmax)\n",
 99 |     "                  range(0,1) \n",
100 |     "    \"\"\"\n",
101 |     "    numofbox = bboxes.shape[0]\n",
102 |     "    width = image.shape[1]\n",
103 |     "    height = image.shape[0]\n",
104 |     "    def norm(x):\n",
105 |     "        if x < 0:\n",
106 |     "            x = 0\n",
107 |     "        else:\n",
108 |     "            if x > 1:\n",
109 |     "                x = 1\n",
110 |     "        return x\n",
111 |     "    xmin = [int(norm(i) * width) for i in bboxes[:,1]]\n",
112 |     "    ymin = [int(norm(i) * height) for i in bboxes[:,0]]\n",
113 |     "    ymax = [int(norm(i) * height) for i in bboxes[:,2]]\n",
114 |     "    xmax = [int(norm(i) * width) for i in bboxes[:,3]]\n",
115 |     "\n",
116 |     "    for i in range(numofbox):\n",
117 |     "        image = cv2.rectangle(image,(xmin[i],ymin[i]),\n",
118 |     "                             (xmax[i],ymax[i]),(0,0,0))\n",
119 |     "    skio.imshow(image)\n",
120 |     "    skio.show()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {
127 |     "collapsed": true
128 |    },
129 |    "outputs": [],
130 |    "source": [
131 |     "if __name__ == \"__main__\":\n",
132 |     "    data_dir = '/Users/xiaodiu/Documents/github/projecttextbox/TextBoxes-TensorFlow/data/sythtext/'\n",
133 |     "    file_name = data_dir + '1.tfrecord'\n",
134 |     "    ## test if file_name exists  \n",
135 |     "\n",
136 |     "    example = tf.python_io.tf_record_iterator(file_name).next()\n",
137 |     "    image_buffer, label, bboxes, name= parse_example(example)\n",
138 |     "    image,label,bboxes = image_processing(image_buffer, bboxes,label,\n",
139 |     "                                         train= True, thread_id = 0)\n",
140 |     "\n",
141 |     "    with tf.Session() as sess:\n",
142 |     "        sess.run(tf.global_variables_initializer())\n",
143 |     "        Image, label, bboxes = sess.run([image, label, bboxes])\n",
144 |     "        print label.shape\n",
145 |     "        print bboxes\n",
146 |     "        #print name\n",
147 |     "        #print width\n",
148 |     "        #print height\n",
149 |     "        print Image.shape\n",
150 |     "        visualize_bbox(Image, bboxes)\n",
151 |     "        skio.imshow(Image)\n",
152 |     "        skio.show()"
153 |    ]
154 |   }
155 |  ],
156 |  "metadata": {
157 |   "kernelspec": {
158 |    "display_name": "keras_tf_2.7",
159 |    "language": "python",
160 |    "name": "tensorflow2.7"
161 |   },
162 |   "language_info": {
163 |    "codemirror_mode": {
164 |     "name": "ipython",
165 |     "version": 2
166 |    },
167 |    "file_extension": ".py",
168 |    "mimetype": "text/x-python",
169 |    "name": "python",
170 |    "nbconvert_exporter": "python",
171 |    "pygments_lexer": "ipython2",
172 |    "version": "2.7.13"
173 |   }
174 |  },
175 |  "nbformat": 4,
176 |  "nbformat_minor": 0
177 | }
178 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TextBoxes-TensorFlow
 2 | TextBoxes re-implementation using tensorflow.
 3 | This project is greatly inspired by [slim project](https://github.com/tensorflow/models/tree/master/slim)
 4 | And many functions are modified based on [SSD-tensorflow project](https://github.com/balancap/SSD-Tensorflow)
 5 | Later, we will overwrite this project so make it more
 6 | flexiable and modularized.
 7 | 
 8 | Author: 
 9 | 	Daitao Xing : dx383@nyu.edu
10 | 	Jin Huang   : jh5442@nyu.edu
11 | 
12 | # Progress
13 | 2017/ 03/14  
14 | 
15 | data_processing phase finished
16 | Test：
17 | 
18 | 	1. Download the dataset， put 1/ folder and gt.mat uner ddata/sythtext/ folder（will wirte script）   
19 | 	2. python datasets/data2record.py    
20 | 	3. python image_processing.py    
21 | 	
22 | output： batch_size * 300 * 300 * 3 image
23 | 
24 | 2017/ 03/17  
25 | 
26 | Finish the design of training(can start training)	
27 | 
28 | 	python train.py \
29 |     --train_dir=${TRAIN_DIR} \
30 |     --dataset_dir=${DATASET_DIR} \
31 |     --save_summaries_secs=60 \
32 |     --save_interval_secs=600 \
33 |     --weight_decay=0.0005 \
34 |     --optimizer=adam \
35 |     --learning_rate=0.001 \
36 |     --batch_size=32
37 | 
38 | # Problems to be solved： 
39 | 	1. Need to redesign visualization		
40 | 	2. image_processing can be improved
41 | 		
42 | # Next steps:
43 |  
44 | 1. traing on other datasets
45 | 2. fine tunes
46 | 3. test
47 | 4. automatic downloading datasets and so on
48 | 
49 | 


--------------------------------------------------------------------------------
/Train.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Generic training script that trains a SSD model using a given dataset."""
 16 | 
 17 | import tensorflow as tf
 18 | from tensorflow.python.ops import control_flow_ops
 19 | 
 20 | from datasets import sythtextprovider
 21 | from deployment import model_deploy
 22 | from nets import txtbox_300
 23 | from processing import image_processing
 24 | from processing import ssd_vgg_preprocessing
 25 | import tf_utils
 26 | 
 27 | slim = tf.contrib.slim
 28 | 
 29 | # =========================================================================== #
 30 | # Text Network flags.
 31 | # =========================================================================== #
 32 | tf.app.flags.DEFINE_float(
 33 | 	'loss_alpha', 1., 'Alpha parameter in the loss function.')
 34 | tf.app.flags.DEFINE_float(
 35 | 	'negative_ratio', 3., 'Negative ratio in the loss function.')
 36 | tf.app.flags.DEFINE_float(
 37 | 	'match_threshold', 0.1, 'Matching threshold in the loss function.')
 38 | 
 39 | # =========================================================================== #
 40 | # General Flags.
 41 | # =========================================================================== #
 42 | tf.app.flags.DEFINE_string(
 43 | 	'train_dir', '/tmp/tfmodel/',
 44 | 	'Directory where checkpoints and event logs are written to.')
 45 | tf.app.flags.DEFINE_integer('num_clones', 1,
 46 | 							'Number of model clones to deploy.')
 47 | tf.app.flags.DEFINE_boolean('clone_on_cpu', False,
 48 | 							'Use CPUs to deploy clones.')
 49 | tf.app.flags.DEFINE_integer(
 50 | 	'num_readers', 4,
 51 | 	'The number of parallel readers that read data from the dataset.')
 52 | tf.app.flags.DEFINE_integer(
 53 | 	'num_preprocessing_threads', 4,
 54 | 	'The number of threads used to create the batches.')
 55 | 
 56 | tf.app.flags.DEFINE_integer(
 57 | 	'log_every_n_steps', 10,
 58 | 	'The frequency with which logs are print.')
 59 | tf.app.flags.DEFINE_integer(
 60 | 	'save_summaries_secs', 600,
 61 | 	'The frequency with which summaries are saved, in seconds.')
 62 | tf.app.flags.DEFINE_integer(
 63 | 	'save_interval_secs', 600,
 64 | 	'The frequency with which the model is saved, in seconds.')
 65 | tf.app.flags.DEFINE_integer(
 66 | 	'gpu_memory_fraction', 0.75, 'GPU memory fraction to use.')
 67 | 
 68 | # =========================================================================== #
 69 | # Optimization Flags.
 70 | # =========================================================================== #
 71 | tf.app.flags.DEFINE_float(
 72 | 	'weight_decay', 0.0005, 'The weight decay on the model weights.')
 73 | tf.app.flags.DEFINE_string(
 74 | 	'optimizer', 'rmsprop',
 75 | 	'The name of the optimizer, one of "adadelta", "adagrad", "adam",'
 76 | 	'"ftrl", "momentum", "sgd" or "rmsprop".')
 77 | tf.app.flags.DEFINE_float(
 78 | 	'adadelta_rho', 0.95,
 79 | 	'The decay rate for adadelta.')
 80 | tf.app.flags.DEFINE_float(
 81 | 	'adagrad_initial_accumulator_value', 0.1,
 82 | 	'Starting value for the AdaGrad accumulators.')
 83 | tf.app.flags.DEFINE_float(
 84 | 	'adam_beta1', 0.9,
 85 | 	'The exponential decay rate for the 1st moment estimates.')
 86 | tf.app.flags.DEFINE_float(
 87 | 	'adam_beta2', 0.999,
 88 | 	'The exponential decay rate for the 2nd moment estimates.')
 89 | tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.')
 90 | tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5,
 91 | 						  'The learning rate power.')
 92 | tf.app.flags.DEFINE_float(
 93 | 	'ftrl_initial_accumulator_value', 0.1,
 94 | 	'Starting value for the FTRL accumulators.')
 95 | tf.app.flags.DEFINE_float(
 96 | 	'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.')
 97 | tf.app.flags.DEFINE_float(
 98 | 	'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.')
 99 | tf.app.flags.DEFINE_float(
100 | 	'momentum', 0.9,
101 | 	'The momentum for the MomentumOptimizer and RMSPropOptimizer.')
102 | tf.app.flags.DEFINE_float('rmsprop_momentum', 0.9, 'Momentum.')
103 | tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.')
104 | 
105 | # =========================================================================== #
106 | # Learning Rate Flags.
107 | # =========================================================================== #
108 | tf.app.flags.DEFINE_string(
109 | 	'learning_rate_decay_type',
110 | 	'fixed',
111 | 	'Specifies how the learning rate is decayed. One of "fixed", "exponential",'
112 | 	' or "polynomial"')
113 | tf.app.flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
114 | tf.app.flags.DEFINE_float(
115 | 	'end_learning_rate', 0.0001,
116 | 	'The minimal end learning rate used by a polynomial decay learning rate.')
117 | tf.app.flags.DEFINE_float(
118 | 	'label_smoothing', 0.0, 'The amount of label smoothing.')
119 | tf.app.flags.DEFINE_float(
120 | 	'learning_rate_decay_factor', 0.1, 'Learning rate decay factor.')
121 | tf.app.flags.DEFINE_float(
122 | 	'num_epochs_per_decay', 40000,
123 | 	'Number of epochs after which learning rate decays.')
124 | tf.app.flags.DEFINE_float(
125 | 	'moving_average_decay', None,
126 | 	'The decay to use for the moving average.'
127 | 	'If left as None, then moving averages are not used.')
128 | 
129 | # =========================================================================== #
130 | # Dataset Flags.
131 | # =========================================================================== #
132 | tf.app.flags.DEFINE_string(
133 | 	'dataset_name', 'sythtext', 'The name of the dataset to load.')
134 | tf.app.flags.DEFINE_integer(
135 | 	'num_classes', 2, 'Number of classes to use in the dataset.')
136 | tf.app.flags.DEFINE_string(
137 | 	'dataset_split_name', 'train', 'The name of the train/test split.')
138 | tf.app.flags.DEFINE_string(
139 | 	'dataset_dir', None, 'The directory where the dataset files are stored.')
140 | tf.app.flags.DEFINE_integer(
141 | 	'labels_offset', 0,
142 | 	'An offset for the labels in the dataset. This flag is primarily used to '
143 | 	'evaluate the VGG and ResNet architectures which do not use a background '
144 | 	'class for the ImageNet dataset.')
145 | tf.app.flags.DEFINE_string(
146 | 	'model_name', 'txtbox_300', 'The name of the architecture to train.')
147 | tf.app.flags.DEFINE_string(
148 | 	'preprocessing_name', None, 'The name of the preprocessing to use. If left '
149 | 	'as `None`, then the model_name flag is used.')
150 | tf.app.flags.DEFINE_integer(
151 | 	'batch_size', 32, 'The number of samples in each batch.')
152 | tf.app.flags.DEFINE_integer(
153 | 	'train_image_size', None, 'Train image size')
154 | tf.app.flags.DEFINE_integer('max_number_of_steps', None,
155 | 							'The maximum number of training steps.')
156 | # =========================================================================== #
157 | # Fine-Tuning Flags.
158 | # =========================================================================== #
159 | tf.app.flags.DEFINE_string(
160 | 	'checkpoint_path', None,
161 | 	'The path to a checkpoint from which to fine-tune.')
162 | tf.app.flags.DEFINE_string(
163 | 	'checkpoint_model_scope', None,
164 | 	'Model scope in the checkpoint. None if the same as the trained model.')
165 | tf.app.flags.DEFINE_string(
166 | 	'checkpoint_exclude_scopes', None,
167 | 	'Comma-separated list of scopes of variables to exclude when restoring '
168 | 	'from a checkpoint.')
169 | tf.app.flags.DEFINE_string(
170 | 	'trainable_scopes', None,
171 | 	'Comma-separated list of scopes to filter the set of variables to train.'
172 | 	'By default, None would train all the variables.')
173 | tf.app.flags.DEFINE_boolean(
174 | 	'ignore_missing_vars', False,
175 | 	'When restoring a checkpoint would ignore missing variables.')
176 | 
177 | FLAGS = tf.app.flags.FLAGS
178 | 
179 | 
180 | # =========================================================================== #
181 | # Main training routine.
182 | # =========================================================================== #
183 | def main(_):
184 | 	if not FLAGS.dataset_dir:
185 | 		raise ValueError('You must supply the dataset directory with --dataset_dir')
186 | 
187 | 	tf.logging.set_verbosity(tf.logging.DEBUG)
188 | 	with tf.Graph().as_default():
189 | 		# Config model_deploy. Keep TF Slim Models structure.
190 | 		# Useful if want to need multiple GPUs and/or servers in the future.
191 | 		deploy_config = model_deploy.DeploymentConfig(
192 | 			num_clones=FLAGS.num_clones,
193 | 			clone_on_cpu=FLAGS.clone_on_cpu,
194 | 			replica_id=0,
195 | 			num_replicas=1,
196 | 			num_ps_tasks=0)
197 | 		# Create global_step.
198 | 		with tf.device(deploy_config.variables_device()):
199 | 			global_step = slim.create_global_step()
200 | 
201 | 		# Select the dataset.
202 | 
203 | 		#dataset = dataset_factory.get_dataset(
204 | 		#	FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
205 | 		dataset = sythtextprovider.get_datasets(FLAGS.dataset_dir)
206 | 		# Get the SSD network and its anchors.
207 | 
208 | 		#ssd_class = nets_factory.get_network(FLAGS.model_name)
209 | 		#ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes)
210 | 		text_net = txtbox_300.TextboxNet()
211 | 		text_shape = text_net.params.img_shape
212 | 		print 'text_shape '+  str(text_shape)
213 | 		text_anchors = text_net.anchors(text_shape)
214 | 		print len(text_anchors)
215 | 		# Select the preprocessing function.
216 | 		'''
217 | 		preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
218 | 		image_preprocessing_fn = preprocessing_factory.get_preprocessing(
219 | 			preprocessing_name, is_training=True)
220 | 		'''
221 | 		#tf_utils.print_configuration(FLAGS.__flags, ssd_params,
222 | 		#							 dataset.data_sources, FLAGS.train_dir)
223 | 		# =================================================================== #
224 | 		# Create a dataset provider and batches.
225 | 		# =================================================================== #
226 | 		with tf.device(deploy_config.inputs_device()):
227 | 			with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
228 | 				provider = slim.dataset_data_provider.DatasetDataProvider(
229 | 					dataset,
230 | 					num_readers=FLAGS.num_readers,
231 | 					common_queue_capacity=20 * FLAGS.batch_size,
232 | 					common_queue_min=10 * FLAGS.batch_size,
233 | 					shuffle=True)
234 | 			# Get for SSD network: image, labels, bboxes.
235 | 			[image, shape, glabels, gbboxes] = provider.get(['image', 'shape',
236 | 															 'object/label',
237 | 															 'object/bbox'])
238 | 		
239 | 			init_op = tf.global_variables_initializer()
240 | 
241 | 			# Pre-processing image, labels and bboxes.
242 | 
243 | 			image, glabels, gbboxes = \
244 | 				ssd_vgg_preprocessing.preprocess_image(image,  glabels,gbboxes, 
245 | 														text_shape,is_training=True,
246 | 														data_format='NHWC')
247 | 
248 | 			# Encode groundtruth labels and bboxes.
249 | 			print 'bboxes num' + str(gbboxes.get_shape())
250 | 			print 'glabes' + str(tf.shape(glabels))
251 | 			glocalisations, gscores = \
252 | 				text_net.bboxes_encode( gbboxes, text_anchors)
253 | 			batch_shape = [1] + [len(text_anchors)] * 2
254 | 
255 | 			# Training batches and queue.
256 | 
257 | 			r = tf.train.batch(
258 | 				tf_utils.reshape_list([image, glocalisations, gscores]),
259 | 				batch_size=FLAGS.batch_size,
260 | 				num_threads=FLAGS.num_preprocessing_threads,
261 | 				capacity=5 * FLAGS.batch_size)
262 | 			print 'r shape' + str(r[0]) + str(r[1]) + str(r[10])
263 | 			b_image, b_glocalisations, b_gscores= \
264 | 				tf_utils.reshape_list(r, batch_shape)
265 | 
266 | 
267 | 			# Intermediate queueing: unique batch computation pipeline for all
268 | 			# GPUs running the training.
269 | 			batch_queue = slim.prefetch_queue.prefetch_queue(
270 | 				tf_utils.reshape_list([b_image, b_glocalisations,b_gscores]),
271 | 				capacity=2 * deploy_config.num_clones)
272 | 			
273 | 
274 | 		# =================================================================== #
275 | 		# Define the model running on every GPU.
276 | 		# =================================================================== #
277 | 		def clone_fn(batch_queue):
278 | 			
279 | 			#Allows data parallelism by creating multiple
280 | 			#clones of network_fn. 
281 | 			
282 | 			# Dequeue batch.
283 | 			b_image, b_glocalisations, b_gscores = \
284 | 				tf_utils.reshape_list(batch_queue.dequeue(), batch_shape)
285 | 
286 | 			# Construct SSD network.
287 | 			arg_scope = text_net.arg_scope(weight_decay=FLAGS.weight_decay)
288 | 			with slim.arg_scope(arg_scope):
289 | 				localisations, logits, end_points = \
290 | 					text_net.net(b_image, is_training=True)
291 | 			# Add loss function.
292 | 			text_net.losses(logits, localisations,
293 | 						   b_glocalisations, b_gscores,
294 | 						   match_threshold=FLAGS.match_threshold,
295 | 						   negative_ratio=FLAGS.negative_ratio,
296 | 						   alpha=FLAGS.loss_alpha,
297 | 						   label_smoothing=FLAGS.label_smoothing)
298 | 			return end_points
299 | 
300 | 		# Gather initial summaries.
301 | 		summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
302 | 
303 | 		# =================================================================== #
304 | 		# Add summaries from first clone.
305 | 		# =================================================================== #
306 | 		clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
307 | 		first_clone_scope = deploy_config.clone_scope(0)
308 | 		# Gather update_ops from the first clone. These contain, for example,
309 | 		# the updates for the batch_norm variables created by network_fn.
310 | 		update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)
311 | 
312 | 		# Add summaries for end_points.
313 | 		end_points = clones[0].outputs
314 | 		for end_point in end_points:
315 | 			x = end_points[end_point]
316 | 			summaries.add(tf.summary.histogram('activations/' + end_point, x))
317 | 			summaries.add(tf.summary.scalar('sparsity/' + end_point,
318 | 											tf.nn.zero_fraction(x)))
319 | 		# Add summaries for losses and extra losses.
320 | 		for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
321 | 			summaries.add(tf.summary.scalar(loss.op.name, loss))
322 | 		for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope):
323 | 			summaries.add(tf.summary.scalar(loss.op.name, loss))
324 | 
325 | 		# Add summaries for variables.
326 | 		for variable in slim.get_model_variables():
327 | 			summaries.add(tf.summary.histogram(variable.op.name, variable))
328 | 
329 | 		# =================================================================== #
330 | 		# Configure the moving averages.
331 | 		# =================================================================== #
332 | 		if FLAGS.moving_average_decay:
333 | 			moving_average_variables = slim.get_model_variables()
334 | 			variable_averages = tf.train.ExponentialMovingAverage(
335 | 				FLAGS.moving_average_decay, global_step)
336 | 		else:
337 | 			moving_average_variables, variable_averages = None, None
338 | 
339 | 		# =================================================================== #
340 | 		# Configure the optimization procedure.
341 | 		# =================================================================== #
342 | 		with tf.device(deploy_config.optimizer_device()):
343 | 			learning_rate = tf_utils.configure_learning_rate(FLAGS,
344 | 															 dataset.num_samples,
345 | 															 global_step)
346 | 			optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
347 | 			summaries.add(tf.summary.scalar('learning_rate', learning_rate))
348 | 
349 | 		if FLAGS.moving_average_decay:
350 | 			# Update ops executed locally by trainer.
351 | 			update_ops.append(variable_averages.apply(moving_average_variables))
352 | 
353 | 		# Variables to train.
354 | 		variables_to_train = tf_utils.get_variables_to_train(FLAGS)
355 | 
356 | 		# and returns a train_tensor and summary_op
357 | 		total_loss, clones_gradients = model_deploy.optimize_clones(
358 | 			clones,
359 | 			optimizer,
360 | 			var_list=variables_to_train)
361 | 		# Add total_loss to summary.
362 | 		summaries.add(tf.summary.scalar('total_loss', total_loss))
363 | 
364 | 		# Create gradient updates.
365 | 		grad_updates = optimizer.apply_gradients(clones_gradients,
366 | 												 global_step=global_step)
367 | 		update_ops.append(grad_updates)
368 | 		update_op = tf.group(*update_ops)
369 | 		train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
370 | 														  name='train_op')
371 | 
372 | 		# Add the summaries from the first clone. These contain the summaries
373 | 		summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
374 | 										   first_clone_scope))
375 | 		# Merge all summaries together.
376 | 		summary_op = tf.summary.merge(list(summaries), name='summary_op')
377 | 
378 | 		# =================================================================== #
379 | 		# Kicks off the training.
380 | 		# =================================================================== #
381 | 		gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)
382 | 		config = tf.ConfigProto(log_device_placement=False,
383 | 								gpu_options=gpu_options)
384 | 		saver = tf.train.Saver(max_to_keep=5,
385 | 							   keep_checkpoint_every_n_hours=1.0,
386 | 							   write_version=2,
387 | 							   pad_step_number=False)
388 | 		slim.learning.train(
389 | 			train_tensor,
390 | 			logdir=FLAGS.train_dir,
391 | 			master='',
392 | 			is_chief=True,
393 | 			init_fn=tf_utils.get_init_fn(FLAGS),
394 | 			summary_op=summary_op,
395 | 			number_of_steps=FLAGS.max_number_of_steps,
396 | 			log_every_n_steps=FLAGS.log_every_n_steps,
397 | 			save_summaries_secs=FLAGS.save_summaries_secs,
398 | 			saver=saver,
399 | 			save_interval_secs=FLAGS.save_interval_secs,
400 | 			session_config=config,
401 | 			sync_optimizer=None)
402 | 
403 | 
404 | if __name__ == '__main__':
405 | 	tf.app.run()
406 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/datasets/data2record.py:
--------------------------------------------------------------------------------
  1 | ## create script that download datasets and transform into tf-record
  2 | ## Assume the datasets is downloaded into following folders
  3 | ## SythTexts datasets(41G)
  4 | ## data/sythtext/*
  5 | 
  6 | import numpy as np 
  7 | import scipy.io as sio
  8 | import os
  9 | import tensorflow as tf
 10 | import re
 11 | from datasets.dataset_utils import int64_feature, float_feature, bytes_feature ,ImageCoder, norm
 12 | import cv2
 13 | from PIL import Image
 14 | 
 15 | data_path = 'data/sythtext/'
 16 | os.chdir(data_path)
 17 | cellname = 'gt'
 18 | textname = 'txt'
 19 | imcell = 'imnames'
 20 | wordname = 'wordBB'
 21 | charname = 'charBB'
 22 | NUMoffolder = 1
 23 | 
 24 | ## SythText datasets is too big to store in a record. 
 25 | ## So Transform tfrecord according to dir name
 26 | 
 27 | 
 28 | def _convert_to_example(image_data, shape, bbox, label,imname):
 29 | 	nbbox = np.array(bbox)
 30 | 	ymin = list(nbbox[:, 0])
 31 | 	xmin = list(nbbox[:, 1])
 32 | 	ymax = list(nbbox[:, 2])
 33 | 	xmax = list(nbbox[:, 3])
 34 | 
 35 | 	print 'shape: {}, height:{}, width:{}'.format(shape,shape[0],shape[1])
 36 | 	example = tf.train.Example(features=tf.train.Features(feature={
 37 | 			'image/height': int64_feature(shape[0]),
 38 | 			'image/width': int64_feature(shape[1]),
 39 | 			'image/channels': int64_feature(shape[2]),
 40 | 			'image/shape': int64_feature(shape),
 41 | 			'image/object/bbox/xmin': float_feature(xmin),
 42 | 			'image/object/bbox/xmax': float_feature(xmax),
 43 | 			'image/object/bbox/ymin': float_feature(ymin),
 44 | 			'image/object/bbox/ymax': float_feature(ymax),
 45 | 			'image/object/bbox/label': int64_feature(label),
 46 | 			'image/format': bytes_feature('jpeg'),
 47 | 			'image/encoded': bytes_feature(image_data),
 48 | 			'image/name': bytes_feature(imname.tostring()),
 49 | 			}))
 50 | 	return example
 51 | 	
 52 | 
 53 | def _processing_image(wordbb, imname,coder):
 54 | 	image_data = tf.gfile.GFile(imname, 'r').read()
 55 | 	image = coder.decode_jpeg(image_data)
 56 | 	#image_data = np.array(Image.open(imname))
 57 | 	shape = image.shape
 58 | 	if(len(wordbb.shape) < 3 ):
 59 | 		numofbox = 1
 60 | 	else:
 61 | 		numofbox = wordbb.shape[2]
 62 | 	bbox = []
 63 | 	[xmin, ymin]= np.min(wordbb,1)
 64 | 	[xmax, ymax] = np.max(wordbb,1)
 65 | 	xmin = np.maximum(xmin, 0)
 66 | 	ymin = np.maximum(ymin, 0)
 67 | 	xmax = np.minimum(xmax, 1)
 68 | 	ymax = np.minimum(ymax, 1)
 69 | 	if numofbox > 1:
 70 | 		bbox = [[ymin[i]/shape[0],xmin[i]/shape[1],ymax[i]/shape[0],xmax[i]/shape[1]] for i in range(numofbox)] 
 71 | 	if numofbox == 1:
 72 | 		bbox = [[ymin/shape[0],xmin/shape[1],ymax/shape[0],xmax/shape[1]]]
 73 | 
 74 | 
 75 | 	label = [1 for i in range(numofbox)]
 76 | 	shape = list(shape)
 77 | 	return image_data, shape, bbox, label, imname
 78 | 
 79 | 
 80 | def run():
 81 | 	labels = sio.loadmat('gt.mat')
 82 | 	print labels.keys()
 83 | 	texts = labels[textname]
 84 | 	imnames = labels[imcell]
 85 | 	wordBB = labels[wordname]
 86 | 	charBB = labels[charname]
 87 | 	coder = ImageCoder()
 88 | 	for i in range(NUMoffolder):
 89 | 		tf_filename = str(i+1) + '.tfrecord'
 90 | 		tfrecord_writer = tf.python_io.TFRecordWriter(tf_filename)
 91 | 		dir = i+1
 92 | 		pattern = re.compile(r'^{}\/'.format(dir))
 93 | 		i = 0
 94 | 		res =[i for i in range(imnames.shape[1]) if pattern.match(imnames[0,i][0]) != None ]
 95 | 		print len(res)
 96 | 		# shuffle
 97 | 		res = np.random.permutation(res)
 98 | 		for j in res:
 99 | 			wordbb = wordBB[0,j]
100 | 			imname = imnames[0,j][0]
101 | 			image_data, shape, bbox, label ,imname= _processing_image(wordbb, imname,coder)
102 | 
103 | 			example = _convert_to_example(image_data, shape, bbox, label, imname)
104 | 			tfrecord_writer.write(example.SerializeToString())  
105 | 	print 'Transform to tfrecord finished'
106 | 
107 | if __name__ == '__main__':
108 | 	run()
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/datasets/dataset_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains utilities for downloading and converting datasets."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import os
 21 | import sys
 22 | import tarfile
 23 | 
 24 | from six.moves import urllib
 25 | import tensorflow as tf
 26 | 
 27 | LABELS_FILENAME = 'labels.txt'
 28 | def norm(x):
 29 |     if x < 0:
 30 |         x = 0
 31 |     else:
 32 |         if x > 1:
 33 |             x = 1
 34 |     return x
 35 | 
 36 | def int64_feature(value):
 37 |     """Wrapper for inserting int64 features into Example proto.
 38 |     """
 39 |     if not isinstance(value, list):
 40 |         value = [value]
 41 |     return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
 42 | 
 43 | 
 44 | def float_feature(value):
 45 |     """Wrapper for inserting float features into Example proto.
 46 |     """
 47 |     if not isinstance(value, list):
 48 |         value = [value]
 49 |     return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 50 | 
 51 | 
 52 | def bytes_feature(value):
 53 |     """Wrapper for inserting bytes features into Example proto.
 54 |     """
 55 |     if not isinstance(value, list):
 56 |         value = [value]
 57 |     return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
 58 | 
 59 | 
 60 | def image_to_tfexample(image_data, image_format, height, width, class_id):
 61 |     return tf.train.Example(features=tf.train.Features(feature={
 62 |       'image/encoded': bytes_feature(image_data),
 63 |       'image/format': bytes_feature(image_format),
 64 |       'image/class/label': int64_feature(class_id),
 65 |       'image/height': int64_feature(height),
 66 |       'image/width': int64_feature(width),
 67 |     }))
 68 | 
 69 | 
 70 | def download_and_uncompress_tarball(tarball_url, dataset_dir):
 71 |     """Downloads the `tarball_url` and uncompresses it locally.
 72 | 
 73 |     Args:
 74 |     tarball_url: The URL of a tarball file.
 75 |     dataset_dir: The directory where the temporary files are stored.
 76 |     """
 77 |     filename = tarball_url.split('/')[-1]
 78 |     filepath = os.path.join(dataset_dir, filename)
 79 | 
 80 |     def _progress(count, block_size, total_size):
 81 |         sys.stdout.write('\r>> Downloading %s %.1f%%' % (
 82 |             filename, float(count * block_size) / float(total_size) * 100.0))
 83 |         sys.stdout.flush()
 84 |     filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress)
 85 |     print()
 86 |     statinfo = os.stat(filepath)
 87 |     print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
 88 |     tarfile.open(filepath, 'r:gz').extractall(dataset_dir)
 89 | 
 90 | 
 91 | def write_label_file(labels_to_class_names, dataset_dir,
 92 |                      filename=LABELS_FILENAME):
 93 |     """Writes a file with the list of class names.
 94 | 
 95 |     Args:
 96 |     labels_to_class_names: A map of (integer) labels to class names.
 97 |     dataset_dir: The directory in which the labels file should be written.
 98 |     filename: The filename where the class names are written.
 99 |     """
100 |     labels_filename = os.path.join(dataset_dir, filename)
101 |     with tf.gfile.Open(labels_filename, 'w') as f:
102 |         for label in labels_to_class_names:
103 |             class_name = labels_to_class_names[label]
104 |             f.write('%d:%s\n' % (label, class_name))
105 | 
106 | 
107 | def has_labels(dataset_dir, filename=LABELS_FILENAME):
108 |     """Specifies whether or not the dataset directory contains a label map file.
109 | 
110 |     Args:
111 |     dataset_dir: The directory in which the labels file is found.
112 |     filename: The filename where the class names are written.
113 | 
114 |     Returns:
115 |     `True` if the labels file exists and `False` otherwise.
116 |     """
117 |     return tf.gfile.Exists(os.path.join(dataset_dir, filename))
118 | 
119 | 
120 | def read_label_file(dataset_dir, filename=LABELS_FILENAME):
121 |     """Reads the labels file and returns a mapping from ID to class name.
122 | 
123 |     Args:
124 |     dataset_dir: The directory in which the labels file is found.
125 |     filename: The filename where the class names are written.
126 | 
127 |     Returns:
128 |     A map from a label (integer) to class name.
129 |     """
130 |     labels_filename = os.path.join(dataset_dir, filename)
131 |     with tf.gfile.Open(labels_filename, 'rb') as f:
132 |         lines = f.read()
133 |     lines = lines.split(b'\n')
134 |     lines = filter(None, lines)
135 | 
136 |     labels_to_class_names = {}
137 |     for line in lines:
138 |         index = line.index(b':')
139 |         labels_to_class_names[int(line[:index])] = line[index+1:]
140 |     return labels_to_class_names
141 | 
142 | 
143 | class ImageCoder(object):
144 |   """Helper class that provides TensorFlow image coding utilities."""
145 | 
146 |   def __init__(self):
147 |     # Create a single Session to run all image coding calls.
148 |     self._sess = tf.Session()
149 | 
150 |     # Initializes function that converts PNG to JPEG data.
151 |     self._png_data = tf.placeholder(dtype=tf.string)
152 |     image = tf.image.decode_png(self._png_data, channels=3)
153 |     self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100)
154 | 
155 |     # Initializes function that converts CMYK JPEG data to RGB JPEG data.
156 |     self._cmyk_data = tf.placeholder(dtype=tf.string)
157 |     image = tf.image.decode_jpeg(self._cmyk_data, channels=0)
158 |     self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100)
159 | 
160 |     # Initializes function that decodes RGB JPEG data.
161 |     self._decode_jpeg_data = tf.placeholder(dtype=tf.string)
162 |     self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3)
163 | 
164 |   def png_to_jpeg(self, image_data):
165 |     return self._sess.run(self._png_to_jpeg,
166 |                           feed_dict={self._png_data: image_data})
167 | 
168 |   def cmyk_to_rgb(self, image_data):
169 |     return self._sess.run(self._cmyk_to_rgb,
170 |                           feed_dict={self._cmyk_data: image_data})
171 | 
172 |   def decode_jpeg(self, image_data):
173 |     image = self._sess.run(self._decode_jpeg,
174 |                            feed_dict={self._decode_jpeg_data: image_data})
175 |     assert len(image.shape) == 3
176 |     assert image.shape[2] == 3
177 |     return image
178 | 


--------------------------------------------------------------------------------
/datasets/sythtextprovider.py:
--------------------------------------------------------------------------------
 1 | ## an initial version
 2 | ## Transform the tfrecord to slim data provider format
 3 | 
 4 | import numpy 
 5 | import tensorflow as tf
 6 | import os
 7 | slim = tf.contrib.slim
 8 | 
 9 | 
10 | 
11 | 
12 | ITEMS_TO_DESCRIPTIONS = {
13 |     'image': 'slim.tfexample_decoder.Image',
14 |     'shape': 'shape',
15 |     'height': 'height',
16 |     'width': 'width',
17 |     'object/bbox': 'box',
18 |     'object/label': 'label'
19 | }
20 | SPLITS_TO_SIZES = {
21 |     'train': 4262,
22 | }
23 | NUM_CLASSES = 2
24 | 
25 | 
26 | 
27 | def get_datasets(data_dir,file_pattern = '*.tfrecord'):
28 |     file_patterns = os.path.join(data_dir, file_pattern)
29 |     print 'file_path: {}'.format(file_patterns)
30 |     reader = tf.TFRecordReader
31 |     keys_to_features = {
32 |         'image/height': tf.FixedLenFeature([1], tf.int64),
33 |         'image/width': tf.FixedLenFeature([1], tf.int64),
34 |         'image/channels': tf.FixedLenFeature([1], tf.int64),
35 |         'image/shape': tf.FixedLenFeature([3], tf.int64),
36 |         'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
37 |         'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
38 |         'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
39 |         'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
40 |         'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
41 |         'image/format': tf.FixedLenFeature([], tf.string, default_value='jpeg'),
42 |         'image/encoded': tf.FixedLenFeature([], tf.string, default_value=''),
43 |         'image/name': tf.VarLenFeature(dtype = tf.string),
44 |     }
45 | 
46 |     items_to_handlers = {
47 |         'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
48 |         #'image': slim.tfexample_decoder.Tensor('image/encoded'),
49 |         'shape': slim.tfexample_decoder.Tensor('image/shape'),
50 |         'height': slim.tfexample_decoder.Tensor('image/height'),
51 |         'width': slim.tfexample_decoder.Tensor('image/width'),
52 |         'object/bbox': slim.tfexample_decoder.BoundingBox(
53 |                 ['xmin', 'ymin', 'xmax', 'ymax'], 'image/object/bbox/'),
54 |         'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
55 |         #'objext/txt': slim.tfexample_decoder.Tensor('image/object/bbox/label_text'),
56 |       }
57 | 
58 |     decoder = slim.tfexample_decoder.TFExampleDecoder(
59 |         keys_to_features, items_to_handlers)
60 | 
61 |     labels_to_names = None
62 | 
63 | 
64 |     return slim.dataset.Dataset(
65 |         data_sources=file_patterns,
66 |         reader=reader,
67 |         decoder=decoder,
68 |         num_samples=SPLITS_TO_SIZES['train'],
69 |         items_to_descriptions=ITEMS_TO_DESCRIPTIONS,
70 |         num_classes=NUM_CLASSES,
71 |         labels_to_names=labels_to_names)


--------------------------------------------------------------------------------
/datasets/testproviderfailed.py:
--------------------------------------------------------------------------------
 1 | import datasets.sythtextprovider as sythtext
 2 | import tensorflow as tf
 3 | slim = tf.contrib.slim
 4 | import cv2
 5 | #import matplotlib.pyplot as plt
 6 | from PIL import Image
 7 | from datasets.sythtextprovider import get_datasets
 8 | """
 9 | data_dir = '/Users/xiaodiu/Documents/github/projecttextbox/TextBoxes-TensorFlow/data/sythtext/'
10 | file = data_dir + '1.tfrecord'
11 | 
12 | 
13 | tfrecord_file_queue = tf.train.string_input_producer([file] ,num_epochs = 1,name='queue',shuffle = True)
14 | reader = tf.TFRecordReader()
15 | _, tfrecord_serialized = reader.read(tfrecord_file_queue)
16 | # label and image are stored as bytes but could be stored as
17 | # int64 or float64 values in a serialized tf.Example protobuf.
18 | tfrecord_features = tf.parse_single_example(tfrecord_serialized,
19 | features={
20 |     'image/height': tf.FixedLenFeature([1], tf.int64),
21 |     'image/width': tf.FixedLenFeature([1], tf.int64),
22 |     'image/channels': tf.FixedLenFeature([1], tf.int64),
23 |     'image/shape': tf.FixedLenFeature([], tf.int64),
24 |     'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
25 |     'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
26 |     'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
27 |     'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
28 |     'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
29 |     #'image/object/bbox/label_text' : tf.VarLenFeature(dtype=tf.string),
30 |     'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
31 |     'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
32 | }, name='features')
33 | # image was saved as uint8, so we have to decode as uint8.
34 | 
35 | image = tf.decode_raw(tfrecord_features['image/encoded'], tf.uint8)
36 | shape = tf.cast(tfrecord_features['image/shape'], tf.int64)
37 | #image = tf.reshape(image, shape)
38 | height = tf.cast(tfrecord_features['image/height'],tf.int64)
39 | width = tf.cast(tfrecord_features['image/width'],tf.int64)
40 | 
41 | """
42 | dataset_dir = '/Users/xiaodiu/Documents/github/projecttextbox/TextBoxes-TensorFlow/data/sythtext/'
43 | dataset = get_datasets(dataset_dir)
44 | 
45 | provider = slim.dataset_data_provider.DatasetDataProvider(
46 |                     dataset,
47 |                     num_readers=1,
48 |                     common_queue_capacity=20 * 32,
49 |                     common_queue_min=10 * 32,
50 |                     shuffle=True)
51 |             # Get for SSD network: image, labels, bboxes.
52 | [image,shape, height, width,glabels, gbboxes,] = provider.get(['image','shape', 'height',
53 | 												  'width',
54 |                                                   'object/label',
55 |                                                   'object/bbox'])
56 | #image = tf.decode_raw(image, tf.uint8)
57 | #height = tf.cast(features['height'], tf.int32)
58 | #width = tf.cast(features['width'], tf.int32)
59 | #image = tf.reshape(image, tf.pack([height,width,3]))
60 | 
61 | 
62 | #print image
63 | print shape
64 | print glabels
65 | print gbboxes
66 | with tf.Session() as sess:
67 | 
68 | 	sess.run(tf.global_variables_initializer())
69 | 	sess.run(tf.local_variables_initializer())
70 | 	coord = tf.train.Coordinator()
71 | 	threads = tf.train.start_queue_runners(coord=coord)
72 | 	#print sess.run(shape)
73 | 	#img = sess.run(image)
74 | 	#print img.shape
75 | 	print sess.run([height,width])
76 | 	print sess.run(shape)
77 | 	print sess.run(gbboxes).shape
78 | 	print sess.run(glabels)
79 | 	
80 | 	
81 | 	coord.request_stop()
82 | 	coord.join(threads)
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/deployment/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/nets/custom_layers.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Implement some custom layers, not provided by TensorFlow.
 16 | 
 17 | Trying to follow as much as possible the style/standards used in
 18 | tf.contrib.layers
 19 | """
 20 | import tensorflow as tf
 21 | 
 22 | from tensorflow.contrib.framework.python.ops import add_arg_scope
 23 | from tensorflow.contrib.layers.python.layers import initializers
 24 | from tensorflow.contrib.framework.python.ops import variables
 25 | from tensorflow.contrib.layers.python.layers import utils
 26 | from tensorflow.python.ops import nn
 27 | from tensorflow.python.ops import init_ops
 28 | from tensorflow.python.ops import variable_scope
 29 | 
 30 | 
 31 | def abs_smooth(x):
 32 |     """Smoothed absolute function. Useful to compute an L1 smooth error.
 33 | 
 34 |     Define as:
 35 |         x^2 / 2         if abs(x) < 1
 36 |         abs(x) - 0.5    if abs(x) > 1
 37 |     We use here a differentiable definition using min(x) and abs(x). Clearly
 38 |     not optimal, but good enough for our purpose!
 39 |     """
 40 |     absx = tf.abs(x)
 41 |     minx = tf.minimum(absx, 1)
 42 |     r = 0.5 * ((absx - 1) * minx + absx)
 43 |     return r
 44 | 
 45 | 
 46 | @add_arg_scope
 47 | def l2_normalization(
 48 |         inputs,
 49 |         scaling=False,
 50 |         scale_initializer=init_ops.ones_initializer(),
 51 |         reuse=None,
 52 |         variables_collections=None,
 53 |         outputs_collections=None,
 54 |         data_format='NHWC',
 55 |         trainable=True,
 56 |         scope=None):
 57 |     """Implement L2 normalization on every feature (i.e. spatial normalization).
 58 | 
 59 |     Should be extended in some near future to other dimensions, providing a more
 60 |     flexible normalization framework.
 61 | 
 62 |     Args:
 63 |       inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
 64 |       scaling: whether or not to add a post scaling operation along the dimensions
 65 |         which have been normalized.
 66 |       scale_initializer: An initializer for the weights.
 67 |       reuse: whether or not the layer and its variables should be reused. To be
 68 |         able to reuse the layer scope must be given.
 69 |       variables_collections: optional list of collections for all the variables or
 70 |         a dictionary containing a different list of collection per variable.
 71 |       outputs_collections: collection to add the outputs.
 72 |       data_format:  NHWC or NCHW data format.
 73 |       trainable: If `True` also add variables to the graph collection
 74 |         `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
 75 |       scope: Optional scope for `variable_scope`.
 76 |     Returns:
 77 |       A `Tensor` representing the output of the operation.
 78 |     """
 79 | 
 80 |     with variable_scope.variable_scope(
 81 |             scope, 'L2Normalization', [inputs], reuse=reuse) as sc:
 82 |         inputs_shape = inputs.get_shape()
 83 |         inputs_rank = inputs_shape.ndims
 84 |         dtype = inputs.dtype.base_dtype
 85 |         if data_format == 'NHWC':
 86 |             # norm_dim = tf.range(1, inputs_rank-1)
 87 |             norm_dim = tf.range(inputs_rank-1, inputs_rank)
 88 |             params_shape = inputs_shape[-1:]
 89 |         elif data_format == 'NCHW':
 90 |             # norm_dim = tf.range(2, inputs_rank)
 91 |             norm_dim = tf.range(1, 2)
 92 |             params_shape = (inputs_shape[1])
 93 | 
 94 |         # Normalize along spatial dimensions.
 95 |         outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
 96 |         # Additional scaling.
 97 |         if scaling:
 98 |             scale_collections = utils.get_variable_collections(
 99 |                 variables_collections, 'scale')
100 |             scale = variables.model_variable('gamma',
101 |                                              shape=params_shape,
102 |                                              dtype=dtype,
103 |                                              initializer=scale_initializer,
104 |                                              collections=scale_collections,
105 |                                              trainable=trainable)
106 |             if data_format == 'NHWC':
107 |                 outputs = tf.multiply(outputs, scale)
108 |             elif data_format == 'NCHW':
109 |                 scale = tf.expand_dims(scale, axis=-1)
110 |                 scale = tf.expand_dims(scale, axis=-1)
111 |                 outputs = tf.multiply(outputs, scale)
112 |                 # outputs = tf.transpose(outputs, perm=(0, 2, 3, 1))
113 | 
114 |         return utils.collect_named_outputs(outputs_collections,
115 |                                            sc.original_name_scope, outputs)
116 | 
117 | 
118 | @add_arg_scope
119 | def pad2d(inputs,
120 |           pad=(0, 0),
121 |           mode='CONSTANT',
122 |           data_format='NHWC',
123 |           trainable=True,
124 |           scope=None):
125 |     """2D Padding layer, adding a symmetric padding to H and W dimensions.
126 | 
127 |     Aims to mimic padding in Caffe and MXNet, helping the port of models to
128 |     TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`.
129 | 
130 |     Args:
131 |       inputs: 4D input Tensor;
132 |       pad: 2-Tuple with padding values for H and W dimensions;
133 |       mode: Padding mode. C.f. `tf.pad`
134 |       data_format:  NHWC or NCHW data format.
135 |     """
136 |     with tf.name_scope(scope, 'pad2d', [inputs]):
137 |         # Padding shape.
138 |         if data_format == 'NHWC':
139 |             paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]
140 |         elif data_format == 'NCHW':
141 |             paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]]
142 |         net = tf.pad(inputs, paddings, mode=mode)
143 |         return net
144 | 
145 | 
146 | @add_arg_scope
147 | def channel_to_last(inputs,
148 |                     data_format='NHWC',
149 |                     scope=None):
150 |     """Move the channel axis to the last dimension. Allows to
151 |     provide a single output format whatever the input data format.
152 | 
153 |     Args:
154 |       inputs: Input Tensor;
155 |       data_format: NHWC or NCHW.
156 |     Return:
157 |       Input in NHWC format.
158 |     """
159 |     with tf.name_scope(scope, 'channel_to_last', [inputs]):
160 |         if data_format == 'NHWC':
161 |             net = inputs
162 |         elif data_format == 'NCHW':
163 |             net = tf.transpose(inputs, perm=(0, 2, 3, 1))
164 |         return net
165 | 


--------------------------------------------------------------------------------
/nets/textbox_common.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import math
  5 | 
  6 | 
  7 | 
  8 | 
  9 | 
 10 | # =========================================================================== #
 11 | # TensorFlow implementation of Text Boxes encoding / decoding.
 12 | # =========================================================================== #
 13 | 
 14 | def tf_text_bboxes_encode_layer(bboxes,
 15 |                                anchors_layer,
 16 |                                matching_threshold=0.1,
 17 |                                prior_scaling=[0.1, 0.1, 0.2, 0.2],
 18 |                                dtype=tf.float32):
 19 |     
 20 |     """
 21 |     Encode groundtruth labels and bounding boxes using Textbox anchors from
 22 |     one layer.
 23 | 
 24 |     Arguments:
 25 |       bboxes: Nx4 Tensor(float) with bboxes relative coordinates;
 26 |       anchors_layer: Numpy array with layer anchors;
 27 |       matching_threshold: Threshold for positive match with groundtruth bboxes;
 28 |       prior_scaling: Scaling of encoded coordinates.
 29 | 
 30 |     Return:
 31 |       (target_localizations, target_scores): Target Tensors.
 32 |     # thisi is a binary problem, so target_score and tartget_labels are same.
 33 |     """
 34 |     # Anchors coordinates and volume.
 35 | 
 36 |     yref, xref, href, wref = anchors_layer
 37 |     print yref.shape
 38 |     print href.shape
 39 |     print bboxes.shape
 40 |     ymin = yref - href / 2.
 41 |     xmin = xref - wref / 2.
 42 |     ymax = yref + href / 2.
 43 |     xmax = xref + wref / 2. 
 44 |     vol_anchors = (xmax - xmin) * (ymax - ymin)
 45 |     
 46 |     # Initialize tensors...
 47 |     shape = (yref.shape[0], yref.shape[1], yref.shape[2], href.size)
 48 |     # all follow the shape(feat.size, feat.size, 2, 6)
 49 |     #feat_labels = tf.zeros(shape, dtype=tf.int64)
 50 |     feat_scores = tf.zeros(shape, dtype=dtype)
 51 | 
 52 |     feat_ymin = tf.zeros(shape, dtype=dtype)
 53 |     feat_xmin = tf.zeros(shape, dtype=dtype)
 54 |     feat_ymax = tf.ones(shape, dtype=dtype)
 55 |     feat_xmax = tf.ones(shape, dtype=dtype)
 56 | 
 57 |     def jaccard_with_anchors(bbox):
 58 |         """
 59 |         Compute jaccard score between a box and the anchors.
 60 |         """
 61 |         int_ymin = tf.maximum(ymin, bbox[0])
 62 |         int_xmin = tf.maximum(xmin, bbox[1])
 63 |         int_ymax = tf.minimum(ymax, bbox[2])
 64 |         int_xmax = tf.minimum(xmax, bbox[3])
 65 |         h = tf.maximum(int_ymax - int_ymin, 0.)
 66 |         w = tf.maximum(int_xmax - int_xmin, 0.)
 67 |         # Volumes.
 68 |         inter_vol = h * w
 69 |         union_vol = vol_anchors - inter_vol \
 70 |             + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
 71 |         jaccard = tf.div(inter_vol, union_vol)
 72 |         return jaccard
 73 |     
 74 |     """
 75 |     # never use in Textbox
 76 |     def intersection_with_anchors(bbox):
 77 |         '''
 78 |         Compute intersection between score a box and the anchors.
 79 |         '''
 80 |         int_ymin = tf.maximum(ymin, bbox[0])
 81 |         int_xmin = tf.maximum(xmin, bbox[1])
 82 |         int_ymax = tf.minimum(ymax, bbox[2])
 83 |         int_xmax = tf.minimum(xmax, bbox[3])
 84 |         h = tf.maximum(int_ymax - int_ymin, 0.)
 85 |         w = tf.maximum(int_xmax - int_xmin, 0.)
 86 |         inter_vol = h * w
 87 |         scores = tf.div(inter_vol, vol_anchors)
 88 |         return scores
 89 |     """
 90 |     
 91 |     def condition(i, feat_scores,
 92 |                   feat_ymin, feat_xmin, feat_ymax, feat_xmax):
 93 |         """Condition: check label index.
 94 |         """
 95 |         r = tf.less(i, tf.shape(bboxes)[0])
 96 |         return r
 97 | 
 98 |     def body(i, feat_scores,feat_ymin, feat_xmin, feat_ymax, feat_xmax):
 99 |         """Body: update feature labels, scores and bboxes.
100 |         Follow the original SSD paper for that purpose:
101 |           - assign values when jaccard > 0.5;
102 |           - only update if beat the score of other bboxes.
103 |         """
104 |         # Jaccard score.
105 |         bbox = bboxes[i]
106 |         jaccard = jaccard_with_anchors(bbox)
107 |         # Mask: check threshold + scores + no annotations + num_classes.
108 |         mask = tf.greater(jaccard, feat_scores)
109 |         mask = tf.logical_and(mask, tf.greater(jaccard, matching_threshold))
110 |         #mask = tf.logical_and(mask, feat_scores > -0.5)
111 |         #mask = tf.logical_and(mask, label < num_classes)
112 |         imask = tf.cast(mask, tf.int64)
113 |         fmask = tf.cast(mask, dtype)
114 |         # Update values using mask.
115 |         #feat_labels = imask * label + (1 - imask) * feat_labels
116 |         feat_scores = tf.where(mask, jaccard, feat_scores)
117 | 
118 |         feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin
119 |         feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin
120 |         feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax
121 |         feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax
122 | 
123 |         # Check no annotation label: ignore these anchors...
124 |         #interscts = intersection_with_anchors(bbox)
125 |         #mask = tf.logical_and(interscts > ignore_threshold,
126 |         #                     label == no_annotation_label)
127 |         # Replace scores by -1.
128 |         #feat_scores = tf.where(mask, -tf.cast(mask, dtype), feat_scores)
129 | 
130 |         return [i+1, feat_scores,
131 |                 feat_ymin, feat_xmin, feat_ymax, feat_xmax]
132 |     # Main loop definition.
133 | 
134 |     i = 0
135 |     [i,feat_scores,
136 |      feat_ymin, feat_xmin,
137 |      feat_ymax, feat_xmax] = tf.while_loop(condition, body,
138 |                                            [i, feat_scores,
139 |                                             feat_ymin, feat_xmin,
140 |                                             feat_ymax, feat_xmax])
141 |     '''
142 |     for i, bbox in enumerate(tf.unpack(bboxes, axis=0)):
143 |         [i,feat_scores,feat_ymin, 
144 |         feat_xmin, feat_ymax, feat_xmax] = body(i, feat_scores,
145 |                                                 feat_ymin, feat_xmin, 
146 |                                                 feat_ymax, feat_xmax,bbox)
147 |     '''
148 |     # Transform to center / size.
149 |     feat_cy = (feat_ymax + feat_ymin) / 2.
150 |     feat_cx = (feat_xmax + feat_xmin) / 2.
151 |     feat_h = feat_ymax - feat_ymin
152 |     feat_w = feat_xmax - feat_xmin
153 |     # Encode features.
154 |     feat_cy = (feat_cy - yref) / href / prior_scaling[0]
155 |     feat_cx = (feat_cx - xref) / wref / prior_scaling[1]
156 |     feat_h = tf.log(feat_h / href) / prior_scaling[2]
157 |     feat_w = tf.log(feat_w / wref) / prior_scaling[3]
158 |     # Use SSD ordering: x / y / w / h instead of ours.
159 |     feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1)
160 |     return feat_localizations, feat_scores
161 | 
162 | 
163 | 
164 | def tf_text_bboxes_encode(bboxes,
165 |                          anchors,
166 |                          matching_threshold=0.1,
167 |                          prior_scaling=[0.1, 0.1, 0.2, 0.2],
168 |                          dtype=tf.float32,
169 |                          scope='text_bboxes_encode'):
170 |     """Encode groundtruth labels and bounding boxes using SSD net anchors.
171 |     Encoding boxes for all feature layers.
172 | 
173 |     Arguments:
174 |       bboxes: Nx4 Tensor(float) with bboxes relative coordinates;
175 |       anchors: List of Numpy array with layer anchors;
176 |       matching_threshold: Threshold for positive match with groundtruth bboxes;
177 |       prior_scaling: Scaling of encoded coordinates.
178 | 
179 |     Return:
180 |       (target_labels, target_localizations, target_scores):
181 |         Each element is a list of target Tensors.
182 |     """
183 | 
184 |     with tf.name_scope('text_bboxes_encode'):
185 |         target_labels = []
186 |         target_localizations = []
187 |         target_scores = []
188 |         for i, anchors_layer in enumerate(anchors):
189 |             with tf.name_scope('bboxes_encode_block_%i' % i):
190 |                 t_loc, t_scores = \
191 |                     tf_text_bboxes_encode_layer(bboxes, anchors_layer,
192 |                                                 matching_threshold,
193 |                                                prior_scaling, dtype)
194 |                 target_localizations.append(t_loc)
195 |                 target_scores.append(t_scores)
196 |         return target_localizations, target_scores
197 | 
198 | 
199 | 
200 | 


--------------------------------------------------------------------------------
/nets/txtbox_300.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """ 
  3 | This framework is based on SSD_tensorlow(https://github.com/balancap/SSD-Tensorflow)
  4 | Add descriptions
  5 | """
  6 | 
  7 | import math
  8 | from collections import namedtuple
  9 | 
 10 | import numpy as np
 11 | import tensorflow as tf
 12 | 
 13 | import tf_extended as tfe
 14 | from nets import custom_layers
 15 | from nets import textbox_common
 16 | 
 17 | slim = tf.contrib.slim
 18 | 
 19 | # =========================================================================== #
 20 | # Text class definition.
 21 | # =========================================================================== #
 22 | TextboxParams = namedtuple('TextboxParameters', 
 23 | 										['img_shape',
 24 | 										 'num_classes',
 25 | 										 'feat_layers',
 26 | 										 'feat_shapes',
 27 | 										 'scale_range',
 28 | 										 'anchor_ratios',
 29 | 										 'normalizations',
 30 | 										 'prior_scaling',
 31 | 										 'step',
 32 | 										 'scales'
 33 | 										 ])
 34 | 
 35 | class TextboxNet(object):
 36 | 	"""
 37 | 	Implementation of the Textbox 300 network.
 38 | 
 39 | 	The default features layers with 300x300 image input are:
 40 | 	  conv4_3 ==> 38 x 38
 41 | 	  fc7 ==> 19 x 19
 42 | 	  conv6_2 ==> 10 x 10
 43 | 	  conv7_2 ==> 5 x 5
 44 | 	  conv8_2 ==> 3 x 3
 45 | 	  pool6 ==> 1 x 1
 46 | 	The default image size used to train this network is 300x300.
 47 | 	"""
 48 | 	default_params = TextboxParams(
 49 | 		img_shape=(300, 300),
 50 | 		num_classes=2,
 51 | 		feat_layers=['conv4', 'conv7', 'conv8', 'conv9', 'conv10', 'global'],
 52 | 		feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
 53 | 		scale_range=[0.20, 0.90],
 54 | 		anchor_ratios=[1,2,3,5,7,10],
 55 | 		normalizations=[20, -1, -1, -1, -1, -1],
 56 | 		prior_scaling=[0.1, 0.1, 0.2, 0.2],
 57 | 		step = 0.14 ,
 58 | 		scales = [0.2, 0.34, 0.48, 0.62, 0.76, 0.90]
 59 | 		)
 60 | 
 61 | 	def __init__(self, params=None):
 62 | 		"""
 63 | 		Init the Textbox net with some parameters. Use the default ones
 64 | 		if none provided.
 65 | 		"""
 66 | 		if isinstance(params, TextboxParams):
 67 | 			self.params = params
 68 | 		else:
 69 | 			self.params = self.default_params
 70 | 			#self.params.step = (scale_range[1] - scale_range[0])/ 5 
 71 | 			#self.params.scales = [scale_range[0] + i* self.params.step for i in range(6)]
 72 | 
 73 | 	# ======================================================================= #
 74 | 	def net(self, inputs,
 75 | 			is_training=True,
 76 | 			dropout_keep_prob=0.5,
 77 | 			reuse=None,
 78 | 			scope='text_box_300'):
 79 | 		"""
 80 | 		Text network definition.
 81 | 		"""
 82 | 		r = text_net(inputs,
 83 | 					feat_layers=self.params.feat_layers,
 84 | 					normalizations=self.params.normalizations,
 85 | 					is_training=is_training,
 86 | 					dropout_keep_prob=dropout_keep_prob,
 87 | 					reuse=reuse,
 88 | 					scope=scope)
 89 | 		# Update feature shapes (try at least!)
 90 | 		"""
 91 | 		if update_feat_shapes:
 92 | 			shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes)
 93 | 			self.params = self.params._replace(feat_shapes=shapes)
 94 | 		"""
 95 | 		return r
 96 | 
 97 | 	def arg_scope(self, weight_decay=0.0005, data_format='NHWC'):
 98 | 		"""Network arg_scope.
 99 | 		"""
100 | 		return ssd_arg_scope(weight_decay, data_format=data_format)
101 | 
102 | 	def arg_scope_caffe(self, caffe_scope):
103 | 		"""Caffe arg_scope used for weights importing.
104 | 		"""
105 | 		return ssd_arg_scope_caffe(caffe_scope)
106 | 
107 | 	# ======================================================================= #
108 | 	'''
109 | 	def update_feature_shapes(self, predictions):
110 | 		"""Update feature shapes from predictions collection (Tensor or Numpy
111 | 		array).
112 | 		"""
113 | 		shapes = ssd_feat_shapes_from_net(predictions, self.params.feat_shapes)
114 | 		self.params = self.params._replace(feat_shapes=shapes)
115 | 	'''
116 | 
117 | 	def anchors(self, img_shape, dtype=np.float32):
118 | 		"""Compute the default anchor boxes, given an image shape.
119 | 		"""
120 | 		return textbox_achor_all_layers(img_shape,
121 | 									  self.params.feat_shapes,
122 | 									  self.params.anchor_ratios,
123 | 									  self.params.scales,
124 | 									  0.5,
125 | 									  dtype)
126 | 
127 | 	def bboxes_encode(self, bboxes, anchors,
128 | 					  scope='text_bboxes_encode'):
129 | 		"""Encode labels and bounding boxes.
130 | 		"""
131 | 		return textbox_common.tf_text_bboxes_encode(
132 | 						bboxes, anchors,
133 | 						matching_threshold=0.1,
134 | 						prior_scaling=self.params.prior_scaling,
135 | 						scope=scope)
136 | 
137 | 	def losses(self, logits, localisations,
138 | 			   glocalisations, gscores,
139 | 			   match_threshold=0.1,
140 | 			   negative_ratio=3.,
141 | 			   alpha=1.,
142 | 			   label_smoothing=0.,
143 | 			   scope='ssd_losses'):
144 | 		"""Define the SSD network losses.
145 | 		"""
146 | 		return ssd_losses(logits, localisations,
147 | 						  glocalisations, gscores,
148 | 						  match_threshold=match_threshold,
149 | 						  negative_ratio=negative_ratio,
150 | 						  alpha=alpha,
151 | 						  label_smoothing=label_smoothing,
152 | 						  scope=scope)
153 | 
154 | 
155 | 
156 | def text_net(inputs,
157 | 			feat_layers=TextboxNet.default_params.feat_layers,
158 | 			normalizations=TextboxNet.default_params.normalizations,
159 | 			is_training=True,
160 | 			dropout_keep_prob=0.5,
161 | 			reuse=None,
162 | 			scope='text_box_300'):
163 | 	end_points = {}
164 | 	with tf.variable_scope(scope, 'text_box_300', [inputs], reuse=reuse):
165 | 		# Original VGG-16 blocks.
166 | 		net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
167 | 		end_points['conv1'] = net
168 | 		net = slim.max_pool2d(net, [2, 2], scope='pool1')
169 | 		# Block 2.
170 | 		net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
171 | 		end_points['conv2'] = net
172 | 		net = slim.max_pool2d(net, [2, 2], scope='pool2')
173 | 		# Block 3.
174 | 		net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
175 | 		end_points['conv3'] = net
176 | 		net = slim.max_pool2d(net, [2, 2], scope='pool3')
177 | 		# Block 4.
178 | 		net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
179 | 		end_points['conv4'] = net
180 | 		net = slim.max_pool2d(net, [2, 2], scope='pool4')
181 | 		# Block 5.
182 | 		net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
183 | 		end_points['conv5'] = net
184 | 		net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')
185 | 
186 | 		# Additional SSD blocks.
187 | 		# Block 6: let's dilate the hell out of it!
188 | 		net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
189 | 		end_points['conv6'] = net
190 | 		# Block 7: 1x1 conv. Because the fuck.
191 | 		net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
192 | 		end_points['conv7'] = net
193 | 
194 | 		# Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
195 | 		end_point = 'conv8'
196 | 		with tf.variable_scope(end_point):
197 | 			net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
198 | 			net = custom_layers.pad2d(net, pad=(1, 1))
199 | 			net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
200 | 		end_points[end_point] = net
201 | 		end_point = 'conv9'
202 | 		with tf.variable_scope(end_point):
203 | 			net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
204 | 			net = custom_layers.pad2d(net, pad=(1, 1))
205 | 			net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
206 | 		end_points[end_point] = net
207 | 		end_point = 'conv10'
208 | 		with tf.variable_scope(end_point):
209 | 			net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
210 | 			net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
211 | 		end_points[end_point] = net
212 | 		end_point = 'global'
213 | 		with tf.variable_scope(end_point):
214 | 			net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
215 | 			net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
216 | 		end_points[end_point] = net
217 | 
218 | 		# Prediction and localisations layers.
219 | 		predictions = []
220 | 		logits = []
221 | 		localisations = []
222 | 		for i, layer in enumerate(feat_layers):
223 | 			with tf.variable_scope(layer + '_box'):
224 | 				p, l = text_multibox_layer(layer,
225 | 										  end_points[layer],
226 | 										  normalizations[i])
227 | 			#predictions.append(prediction_fn(p))
228 | 			logits.append(p)
229 | 			localisations.append(l)
230 | 
231 | 		return localisations, logits, end_points
232 | 
233 | 
234 | def text_multibox_layer(layer,
235 | 					   inputs,
236 | 					   normalization=-1):
237 | 	"""
238 | 	Construct a multibox layer, return a class and localization predictions.
239 | 	The  most different between textbox and ssd is the prediction shape
240 | 	where textbox has prediction score shape (38,38,2,6)
241 | 	and location has shape (38,38,2,6,4)
242 | 	besise,the kernel for fisrt 5 layers is 1*5 and padding is (0,2)
243 | 	kernel for the last layer is 1*1 and padding is 0
244 | 	"""
245 | 	net = inputs
246 | 	if normalization > 0:
247 | 		net = custom_layers.l2_normalization(net, scaling=True)
248 | 	# Number of anchors.
249 | 	num_anchors = 6
250 | 	num_classes = 2
251 | 	# Location.
252 | 	num_loc_pred = 2*num_anchors * 4
253 | 	if(layer == 'global'):
254 | 		loc_pred = slim.conv2d(net, num_loc_pred, [1, 1], activation_fn=None, padding = 'VALID',
255 | 						   scope='conv_loc')
256 | 	else:
257 | 		loc_pred = slim.conv2d(net, num_loc_pred, [1, 5], activation_fn=None, padding = 'SAME',
258 | 						   scope='conv_loc')
259 | 	#loc_pred = custom_layers.channel_to_last(loc_pred)
260 | 	loc_pred = tf.reshape(loc_pred, loc_pred.get_shape().as_list()[:-1] + [2,num_anchors,4])
261 | 	# Class prediction.
262 | 	scores_pred = 2 * num_anchors * num_classes
263 | 	if(layer == 'global'):
264 | 		sco_pred = slim.conv2d(net, scores_pred, [1, 1], activation_fn=None, padding = 'VALID',
265 | 						   scope='conv_cls')
266 | 	else:
267 | 		sco_pred = slim.conv2d(net, scores_pred, [1, 5], activation_fn=None, padding = 'SAME',
268 | 						   scope='conv_cls')
269 | 	#cls_pred = custom_layers.channel_to_last(cls_pred)
270 | 	sco_pred = tf.reshape(sco_pred, sco_pred.get_shape().as_list()[:-1] + [2,num_anchors,num_classes])
271 | 	return sco_pred, loc_pred
272 | 
273 | 
274 | 
275 | ## produce anchor for one layer
276 | # each feature point has 12 default textboxes(6 boxes + 6 offsets boxes)
277 | # aspect ratios = (1,2,3,5,7,10)
278 | # feat_size :
279 | 	# conv4_3 ==> 38 x 38
280 | 	# fc7 ==> 19 x 19
281 | 	# conv6_2 ==> 10 x 10
282 | 	# conv7_2 ==> 5 x 5
283 | 	# conv8_2 ==> 3 x 3
284 | 	# pool6 ==> 1 x 1
285 | 
286 | def textbox_anchor_one_layer(img_shape,
287 | 							 feat_size,
288 | 							 ratios,
289 | 							 scale,
290 | 							 offset = 0.5,
291 | 							 dtype=np.float32):
292 | 	# Follow the papers scheme
293 | 	# 12 ahchor boxes with out sk' = sqrt(sk * sk+1)
294 | 	y, x = np.mgrid[0:feat_size[0], 0:feat_size[1]] + 0.5
295 | 	y = y.astype(dtype) / feat_size[0]
296 | 	x = x.astype(dtype) / feat_size[1]
297 | 	x_offset = x
298 | 	y_offset = y + offset
299 | 	x_out = np.stack((x, x_offset), -1)
300 | 	y_out = np.stack((y, y_offset), -1)
301 | 	y_out = np.expand_dims(y_out, axis=-1)
302 | 	x_out = np.expand_dims(x_out, axis=-1)
303 | 
304 | 
305 | 	# 
306 | 	num_anchors = 6
307 | 	h = np.zeros((num_anchors, ), dtype=dtype)
308 | 	w = np.zeros((num_anchors, ), dtype=dtype)
309 | 	for i ,r in enumerate(ratios):
310 | 		h[i] = scale / math.sqrt(r) / feat_size[0]
311 | 		w[i] = scale * math.sqrt(r) / feat_size[1]
312 | 	return y_out, x_out, h, w
313 | 
314 | 
315 | 
316 | ## produce anchor for all layers
317 | def textbox_achor_all_layers(img_shape,
318 | 						   layers_shape,
319 | 						   anchor_ratios,
320 | 						   scales,
321 | 						   offset=0.5,
322 | 						   dtype=np.float32):
323 | 	"""
324 | 	Compute anchor boxes for all feature layers.
325 | 	"""
326 | 	layers_anchors = []
327 | 	for i, s in enumerate(layers_shape):
328 | 		anchor_bboxes = textbox_anchor_one_layer(img_shape, s,
329 | 												 anchor_ratios,
330 | 												 scales[i],
331 | 												 offset=offset, dtype=dtype)
332 | 		layers_anchors.append(anchor_bboxes)
333 | 	return layers_anchors
334 | 
335 | def ssd_arg_scope(weight_decay=0.0005, data_format='NHWC'):
336 | 	"""Defines the VGG arg scope.
337 | 
338 | 	Args:
339 | 	  weight_decay: The l2 regularization coefficient.
340 | 
341 | 	Returns:
342 | 	  An arg_scope.
343 | 	"""
344 | 	with slim.arg_scope([slim.conv2d, slim.fully_connected],
345 | 						activation_fn=tf.nn.relu,
346 | 						weights_regularizer=slim.l2_regularizer(weight_decay),
347 | 						weights_initializer=tf.contrib.layers.xavier_initializer(),
348 | 						biases_initializer=tf.zeros_initializer()):
349 | 		with slim.arg_scope([slim.conv2d, slim.max_pool2d],
350 | 							padding='SAME',
351 | 							data_format=data_format):
352 | 			with slim.arg_scope([custom_layers.pad2d,
353 | 								 custom_layers.l2_normalization,
354 | 								 custom_layers.channel_to_last],
355 | 								data_format=data_format) as sc:
356 | 				return sc
357 | 
358 | 
359 | # =========================================================================== #
360 | # Caffe scope: importing weights at initialization.
361 | # =========================================================================== #
362 | def ssd_arg_scope_caffe(caffe_scope):
363 | 	"""Caffe scope definition.
364 | 
365 | 	Args:
366 | 	  caffe_scope: Caffe scope object with loaded weights.
367 | 
368 | 	Returns:
369 | 	  An arg_scope.
370 | 	"""
371 | 	# Default network arg scope.
372 | 	with slim.arg_scope([slim.conv2d],
373 | 						activation_fn=tf.nn.relu,
374 | 						weights_initializer=caffe_scope.conv_weights_init(),
375 | 						biases_initializer=caffe_scope.conv_biases_init()):
376 | 		with slim.arg_scope([slim.fully_connected],
377 | 							activation_fn=tf.nn.relu):
378 | 			with slim.arg_scope([custom_layers.l2_normalization],
379 | 								scale_initializer=caffe_scope.l2_norm_scale_init()):
380 | 				with slim.arg_scope([slim.conv2d, slim.max_pool2d],
381 | 									padding='SAME') as sc:
382 | 					return sc
383 | 
384 | 
385 | # =========================================================================== #
386 | # Text loss function.
387 | # =========================================================================== #
388 | def ssd_losses(logits, localisations,
389 | 			   glocalisations, gscores,
390 | 			   match_threshold=0.1,
391 | 			   negative_ratio=3.,
392 | 			   alpha=1.,
393 | 			   label_smoothing=0.,
394 | 			   scope=None):
395 | 	"""Loss functions for training the text box network.
396 | 
397 | 
398 | 	Arguments:
399 | 	  logits: (list of) predictions logits Tensors;
400 | 	  localisations: (list of) localisations Tensors;
401 | 	  glocalisations: (list of) groundtruth localisations Tensors;
402 | 	  gscores: (list of) groundtruth score Tensors;
403 | 	"""
404 | 	with tf.name_scope(scope, 'text_loss'):
405 | 		l_cross_pos = []
406 | 		l_cross_neg = []
407 | 		l_loc = []
408 | 		for i in range(len(logits)):
409 | 			dtype = logits[i].dtype
410 | 			with tf.name_scope('block_%i' % i):
411 | 				# Determine weights Tensor.
412 | 				pmask = gscores[i] > match_threshold
413 | 				ipmask = tf.cast(pmask, tf.int32)
414 | 				fpmask = tf.cast(pmask, dtype)
415 | 				n_positives = tf.reduce_sum(fpmask)
416 | 
417 | 				# Negative mask
418 | 				# Number of negative entries to select.
419 | 				n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
420 | 
421 | 				nvalues = tf.where(tf.cast(1-ipmask,tf.bool), gscores[i], np.zeros(gscores[i].shape))
422 | 				nvalues_flat = tf.reshape(nvalues, [-1])
423 | 				val, idxes = tf.nn.top_k(nvalues_flat, k=n_neg)
424 | 				minval = val[-1]
425 | 				# Final negative mask.
426 | 				nmask = nvalues > minval
427 | 				fnmask = tf.cast(nmask, dtype)
428 | 				inmask = tf.cast(nmask, tf.int32)
429 | 				# Add cross-entropy loss.
430 | 				with tf.name_scope('cross_entropy_pos'):
431 | 					loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i],
432 | 																		  labels=ipmask)
433 | 					loss = tf.losses.compute_weighted_loss(loss, fpmask)
434 | 					l_cross_pos.append(loss)
435 | 
436 | 				with tf.name_scope('cross_entropy_neg'):
437 | 					loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits[i],
438 | 																		  labels=inmask)
439 | 					loss = tf.losses.compute_weighted_loss(loss, fnmask)
440 | 					l_cross_neg.append(loss)
441 | 
442 | 				# Add localization loss: smooth L1, L2, ...
443 | 				with tf.name_scope('localization'):
444 | 					# Weights Tensor: positive mask + random negative.
445 | 					weights = tf.expand_dims(alpha * fpmask, axis=-1)
446 | 					loss = custom_layers.abs_smooth(localisations[i] - glocalisations[i])
447 | 					loss = tf.losses.compute_weighted_loss(loss, weights)
448 | 					l_loc.append(loss)
449 | 
450 | 		# Additional total losses...
451 | 		with tf.name_scope('total'):
452 | 			total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
453 | 			total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
454 | 			total_cross = tf.add(total_cross_pos, total_cross_neg, 'cross_entropy')
455 | 			total_loc = tf.add_n(l_loc, 'localization')
456 | 
457 | 			# Add to EXTRA LOSSES TF.collection
458 | 			tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
459 | 			tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
460 | 			tf.add_to_collection('EXTRA_LOSSES', total_cross)
461 | 			tf.add_to_collection('EXTRA_LOSSES', total_loc)
462 | 
463 | 
464 | 
465 | 
466 | 
467 | 
468 | 
469 | 
470 | 
471 | 
472 | 
473 | 


--------------------------------------------------------------------------------
/processing/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/processing/image_processing.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | import tf_extended as tfe
  4 | import os
  5 | import matplotlib.pyplot as plt
  6 | import skimage.io as skio
  7 | import cv2
  8 | import numpy as np
  9 | 
 10 | 
 11 | def image_processing(image, bbox,labels, text_shape,train = True):
 12 | 	Height = text_shape[0]
 13 | 	Width = text_shape[1]
 14 | 	image = tf.image.convert_image_dtype(image, dtype=tf.float32)
 15 | 	if train:
 16 | 		image,labels,bbox = distorted_image(image, Height,labels,Width,bbox)
 17 | 	else:
 18 | 		image = eval_image(image, Height, Width)
 19 | 
 20 | 	return image, labels, bbox
 21 | 
 22 | def distorted_image(image, height,labels,width,bbox,scope = None):
 23 | 	# Each bounding box has shape [1, num_boxes, box coords] and
 24 | 	# the coordinates are ordered [ymin, xmin, ymax, xmax].
 25 | 
 26 | 	# Display the bounding box in the first thread only.
 27 | 	with tf.name_scope(scope, 'distorted_bounding_box_crop',
 28 | 	 [image, bbox,height,width]):
 29 | 		
 30 | 		bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
 31 | 						tf.shape(image),
 32 | 						bounding_boxes=bbox,
 33 | 						min_object_covered=0.1,
 34 | 						aspect_ratio_range=(0.9,1.1),
 35 | 						area_range=(0.1,1.0),
 36 | 						max_attempts=200,
 37 | 						use_image_if_no_bounding_boxes=True)
 38 | 
 39 | 		distort_bbox = distort_bbox[0, 0]
 40 | 
 41 | 		# Crop the image to the specified bounding box.
 42 | 		cropped_image = tf.slice(image, bbox_begin, bbox_size)
 43 | 		# Restore the shape since the dynamic slice loses 3rd dimension.
 44 | 		
 45 | 		distorted_image = tf.image.resize_images(cropped_image, [height, width],
 46 | 											 method=tf.image.ResizeMethod.BILINEAR)
 47 | 		distorted_image.set_shape([height, width, 3])
 48 | 
 49 | 		distorted_image = tf.image.random_flip_left_right(distorted_image)
 50 | 		# Randomly distort the colors.
 51 | 		distorted_image = distort_color(distorted_image)
 52 | 
 53 | 
 54 | 		bboxes = tfe.bboxes_resize(distort_bbox, bbox)
 55 | 		print "labels: %s " % (labels)
 56 | 		label, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,threshold = 0.4)
 57 | 		print "bboxes: %s " % (bboxes)
 58 | 		return distorted_image, label, bboxes
 59 | 	
 60 | 
 61 | 
 62 | def eval_image(image, height, width, scope=None):
 63 | 	"""Prepare one image for evaluation.
 64 | 
 65 | 	Args:
 66 | 	image: 3-D float Tensor
 67 | 	height: integer
 68 | 	width: integer
 69 | 	scope: Optional scope for op_scope.
 70 | 	Returns:
 71 | 	3-D float Tensor of prepared image.
 72 | 	"""
 73 | 	with tf.name_scope(scope, 'eval_image',[image, height, width]):
 74 | 	# Crop the central region of the image with an area containing 87.5% of
 75 | 	# the original image.
 76 | 		image = tf.image.central_crop(image, central_fraction=0.875)
 77 | 
 78 | 		# Resize the image to the original height and width.
 79 | 		image = tf.expand_dims(image, 0)
 80 | 		image = tf.image.resize_bilinear(image, [height, width],
 81 | 										 align_corners=False)
 82 | 		image = tf.squeeze(image, [0])
 83 | 	return image
 84 | 
 85 | 
 86 | def distort_color(image, scope=None):
 87 | 	"""Distort the color of the image.
 88 | 
 89 | 	Each color distortion is non-commutative and thus ordering of the color ops
 90 | 	matters. Ideally we would randomly permute the ordering of the color ops.
 91 | 	Rather then adding that level of complication, we select a distinct ordering
 92 | 	of color ops for each preprocessing thread.
 93 | 
 94 | 	Args:
 95 | 	image: Tensor containing single image.
 96 | 	thread_id: preprocessing thread ID.
 97 | 	scope: Optional scope for op_scope.
 98 | 	Returns:
 99 | 	color-distorted image
100 | 	"""
101 | 	color_ordering = np.random.randint(2)
102 | 	if color_ordering == 0:
103 | 	  image = tf.image.random_brightness(image, max_delta=32. / 255.)
104 | 	  image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
105 | 	  image = tf.image.random_hue(image, max_delta=0.2)
106 | 	  image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
107 | 	elif color_ordering == 1:
108 | 	  image = tf.image.random_brightness(image, max_delta=32. / 255.)
109 | 	  image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
110 | 	  image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
111 | 	  image = tf.image.random_hue(image, max_delta=0.2)
112 | 
113 | 	# The random_* ops do not necessarily clamp.
114 | 	image = tf.clip_by_value(image, 0.0, 1.0)
115 | 	return image
116 | 


--------------------------------------------------------------------------------
/processing/image_processing2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file pre-process images from a datasets and
  3 | output batch iamges and labels(bboxes)
  4 | 
  5 | parse examples from tfrecord
  6 | 	1.parse_example
  7 | 
  8 | Pre-processing images :
  9 | 	1. crop and pad images randomly
 10 | 	2. crop and pad bbox
 11 | 	3. Transform images and bboxes to input/output vectors
 12 | 
 13 | """
 14 | 
 15 | import tensorflow as tf
 16 | import tf_extended as tfe
 17 | import os
 18 | import matplotlib.pyplot as plt
 19 | import skimage.io as skio
 20 | import cv2
 21 | 
 22 | 
 23 | FLAGS = tf.app.flags.FLAGS
 24 | 
 25 | tf.app.flags.DEFINE_integer('batch_size', 1,
 26 | 							"""Number of images to process in a batch.""")
 27 | tf.app.flags.DEFINE_integer('Height', 300,
 28 | 							"""Provide square images of this size.""")
 29 | tf.app.flags.DEFINE_integer('Width', 300,
 30 | 							"""Provide square images of this size.""")
 31 | tf.app.flags.DEFINE_integer('num_preprocess_threads', 4,
 32 | 							"""Number of preprocessing threads per tower. """
 33 | 							"""Please make this a multiple of 4.""")
 34 | tf.app.flags.DEFINE_integer('num_readers', 1,
 35 | 							"""Number of parallel readers during train.""")
 36 | 
 37 | # Images are preprocessed asynchronously using multiple threads specified by
 38 | # --num_preprocss_threads and the resulting processed images are stored in a
 39 | # random shuffling queue. The shuffling queue dequeues --batch_size images
 40 | # for processing on a given Inception tower. A larger shuffling queue guarantees
 41 | # better mixing across examples within a batch and results in slightly higher
 42 | # predictive performance in a trained model. Empirically,
 43 | # --input_queue_memory_factor=16 works well. A value of 16 implies a queue size
 44 | # of 1024*16 images. Assuming RGB 299x299 images, this implies a queue size of
 45 | # 16GB. If the machine is memory limited, then decrease this factor to
 46 | # decrease the CPU memory footprint, accordingly.
 47 | 
 48 | tf.app.flags.DEFINE_integer('input_queue_memory_factor', 1,
 49 | 							"""Size of the queue of preprocessed images. """
 50 | 							"""Default is ideal but try smaller values, e.g. """
 51 | 							"""4, 2 or 1, if host memory is constrained. See """
 52 | 							"""comments in code for more details.""")
 53 | 
 54 | 
 55 | def distorted_inputs(data_files, batch_size=None, num_preprocess_threads=None):
 56 | 	"""Generate batches of distorted versions of ImageNet images.
 57 | 
 58 | 	Use this function as the inputs for training a network.
 59 | 
 60 | 	Distorting images provides a useful technique for augmenting the data
 61 | 	set during training in order to make the network invariant to aspects
 62 | 	of the image that do not effect the label.
 63 | 
 64 | 	Args:
 65 | 	dataset: instance of Dataset class specifying the dataset.
 66 | 	batch_size: integer, number of examples in batch
 67 | 	num_preprocess_threads: integer, total number of preprocessing threads but
 68 | 	  None defaults to FLAGS.num_preprocess_threads.
 69 | 
 70 | 	Returns:
 71 | 	images: Images. 4D tensor of size [batch_size, FLAGS.image_size,
 72 | 									   FLAGS.image_size, 3].
 73 | 	labels: 1-D integer Tensor of [batch_size].
 74 | 	"""
 75 | 	if not batch_size:
 76 | 		batch_size = FLAGS.batch_size
 77 | 
 78 | 	# Force all input processing onto CPU in order to reserve the GPU for
 79 | 	# the forward inference and back-propagation.
 80 | 	with tf.device('/cpu:0'):
 81 | 		images ,box,name= batch_inputs(
 82 | 			data_files, batch_size, train=True,
 83 | 			num_preprocess_threads=num_preprocess_threads,
 84 | 			num_readers=FLAGS.num_readers)
 85 | 	return images,box,name
 86 | 
 87 | def parse_example(example_serialized):
 88 | 	"""
 89 | 	One example proto containing following fields
 90 | 	'image/height': int64_feature(shape[0]),
 91 | 	'image/width': int64_feature(shape[1]),
 92 | 	'image/channels': int64_feature(shape[2]),
 93 | 	'image/shape': int64_feature(shape),
 94 | 	'image/object/bbox/xmin': float_feature(xmin),
 95 | 	'image/object/bbox/xmax': float_feature(xmax),
 96 | 	'image/object/bbox/ymin': float_feature(ymin),
 97 | 	'image/object/bbox/ymax': float_feature(ymax),
 98 | 	'image/object/bbox/label': int64_feature(label),
 99 | 	'image/format': bytes_feature('jpeg'),
100 | 	'image/encoded': bytes_feature(image_data.tostring()),
101 | 
102 | 	Input : example_serialized
103 | 
104 | 	Ouput: 
105 | 		Image_buffer
106 | 	"""
107 | 	feature_map = {
108 | 		'image/height': tf.FixedLenFeature([1], tf.int64),
109 | 		'image/width': tf.FixedLenFeature([1], tf.int64),
110 | 		'image/channels': tf.FixedLenFeature([1], tf.int64),
111 | 		'image/shape': tf.FixedLenFeature([3], tf.int64),
112 | 		'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
113 | 		'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
114 | 		'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
115 | 		'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
116 | 		'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
117 | 		'image/format': tf.FixedLenFeature([], tf.string, default_value='jpeg'),
118 | 		'image/encoded': tf.FixedLenFeature([], tf.string, default_value=''),
119 | 		'image/name': tf.VarLenFeature(dtype = tf.string),
120 | 	}
121 | 	features = tf.parse_single_example(example_serialized, feature_map)
122 | 	#image = tf.decode_raw(features['image/encoded'], tf.uint8)
123 | 	xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
124 | 	ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
125 | 	xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
126 | 	ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
127 | 	bboxes = tf.concat([ymin, xmin, ymax, xmax],0)
128 | 	bboxes = tf.expand_dims(bboxes,0)
129 | 	bboxes = tf.transpose(bboxes, [0,2,1])
130 | 	Image_buffer = features['image/encoded']
131 | 	label = tf.expand_dims(features['image/object/bbox/label'].values, 0)
132 | 	width = tf.cast(features['image/height'], dtype=tf.int64)
133 | 	height = tf.cast(features['image/width'], dtype=tf.int64)
134 | 	name = tf.cast(features['image/name'], dtype = tf.string)
135 | 	print "name %s" % (name) 
136 | 	return Image_buffer, label, bboxes, name
137 | 
138 | 
139 | 
140 | def image_processing(image_buffer, bbox,labels, train,thread_id = 0):
141 | 	image = decode_jpeg(image_buffer)
142 | 	Height = FLAGS.Height
143 | 	Width = FLAGS.Width
144 | 
145 | 	if train:
146 | 		image,labels,bbox = distorted_image(image, Height,labels,Width,bbox,thread_id)
147 | 	else:
148 | 		image = eval_image(image, Height, Width)
149 | 
150 | 	return image, labels, bbox
151 | 
152 | def distorted_image(image, height,labels,width,bbox,thread_id,scope = None):
153 | 	# Each bounding box has shape [1, num_boxes, box coords] and
154 | 	# the coordinates are ordered [ymin, xmin, ymax, xmax].
155 | 
156 | 	# Display the bounding box in the first thread only.
157 | 	with tf.name_scope(scope, 'distorted_bounding_box_crop',
158 | 	 [image, bbox,height,width]):
159 | 		if not thread_id:
160 | 			image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
161 | 													bbox)
162 | 			tf.summary.image('image_with_bounding_boxes', image_with_box)
163 | 
164 | 
165 | 		bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
166 | 						tf.shape(image),
167 | 						bounding_boxes=bbox,
168 | 						min_object_covered=0.1,
169 | 						aspect_ratio_range=(0.9,1.1),
170 | 						area_range=(0.1,1.0),
171 | 						max_attempts=200,
172 | 						use_image_if_no_bounding_boxes=True)
173 | 
174 | 		if not thread_id:
175 | 			image_with_distorted_box = tf.image.draw_bounding_boxes(
176 | 					tf.expand_dims(image, 0), distort_bbox)
177 | 			tf.summary.image('images_with_distorted_bounding_box',
178 | 					   image_with_distorted_box)
179 | 
180 | 		distort_bbox = distort_bbox[0, 0]
181 | 
182 | 		# Crop the image to the specified bounding box.
183 | 		cropped_image = tf.slice(image, bbox_begin, bbox_size)
184 | 		# Restore the shape since the dynamic slice loses 3rd dimension.
185 | 		
186 | 		distorted_image = tf.image.resize_images(cropped_image, [height, width],
187 | 											 method=tf.image.ResizeMethod.BILINEAR)
188 | 		distorted_image.set_shape([height, width, 3])
189 | 		if not thread_id:
190 | 			tf.summary.image('cropped_resized_image',
191 | 					   tf.expand_dims(distorted_image, 0))
192 | 		distorted_image = tf.image.random_flip_left_right(distorted_image)
193 | 		# Randomly distort the colors.
194 | 		distorted_image = distort_color(distorted_image, thread_id)
195 | 
196 | 		if not thread_id:
197 | 			tf.summary.image('final_distorted_image',
198 | 					   tf.expand_dims(distorted_image, 0))
199 | 		# Update bounding boxes: resize and filter out.
200 | 
201 | 		bboxes = tfe.bboxes_resize(distort_bbox, bbox)
202 | 		print "labels: %s " % (labels)
203 | 		label, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,threshold = 0.4)
204 | 
205 | 		return distorted_image, label, bboxes
206 | 
207 | 
208 | 
209 | def decode_jpeg(image_buffer, scope=None):
210 | 	"""Decode a JPEG string into one 3-D float image Tensor.
211 | 
212 | 	Args:
213 | 	image_buffer: scalar string Tensor.
214 | 	scope: Optional scope for op_scope.
215 | 	Returns:
216 | 	3-D float Tensor with values ranging from [0, 1).
217 | 	"""
218 | 	with tf.name_scope(scope, 'decode_jpeg',[image_buffer]):
219 | 	# Decode the string as an RGB JPEG.
220 | 	# Note that the resulting image contains an unknown height and width
221 | 	# that is set dynamically by decode_jpeg. In other words, the height
222 | 	# and width of image is unknown at compile-time.
223 | 		image = tf.image.decode_jpeg(image_buffer, channels=3)
224 | 	# After this point, all image pixels reside in [0,1)
225 | 	# until the very end, when they're rescaled to (-1, 1).  The various
226 | 	# adjust_* ops all require this range for dtype float.
227 | 		image = tf.image.convert_image_dtype(image, dtype=tf.float32)
228 | 		print 'image after decode %s' % (image)
229 | 	return image	
230 | 
231 | 
232 | def eval_image(image, height, width, scope=None):
233 | 	"""Prepare one image for evaluation.
234 | 
235 | 	Args:
236 | 	image: 3-D float Tensor
237 | 	height: integer
238 | 	width: integer
239 | 	scope: Optional scope for op_scope.
240 | 	Returns:
241 | 	3-D float Tensor of prepared image.
242 | 	"""
243 | 	with tf.name_scope(scope, 'eval_image',[image, height, width]):
244 | 	# Crop the central region of the image with an area containing 87.5% of
245 | 	# the original image.
246 | 		image = tf.image.central_crop(image, central_fraction=0.875)
247 | 
248 | 		# Resize the image to the original height and width.
249 | 		image = tf.expand_dims(image, 0)
250 | 		image = tf.image.resize_bilinear(image, [height, width],
251 | 										 align_corners=False)
252 | 		image = tf.squeeze(image, [0])
253 | 	return image
254 | 
255 | 
256 | def distort_color(image, thread_id=0, scope=None):
257 |   """Distort the color of the image.
258 | 
259 |   Each color distortion is non-commutative and thus ordering of the color ops
260 |   matters. Ideally we would randomly permute the ordering of the color ops.
261 |   Rather then adding that level of complication, we select a distinct ordering
262 |   of color ops for each preprocessing thread.
263 | 
264 |   Args:
265 | 	image: Tensor containing single image.
266 | 	thread_id: preprocessing thread ID.
267 | 	scope: Optional scope for op_scope.
268 |   Returns:
269 | 	color-distorted image
270 |   """
271 |   with tf.name_scope( scope, 'distort_color',[image]):
272 | 	color_ordering = thread_id % 2
273 | 
274 | 	if color_ordering == 0:
275 | 	  image = tf.image.random_brightness(image, max_delta=32. / 255.)
276 | 	  image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
277 | 	  image = tf.image.random_hue(image, max_delta=0.2)
278 | 	  image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
279 | 	elif color_ordering == 1:
280 | 	  image = tf.image.random_brightness(image, max_delta=32. / 255.)
281 | 	  image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
282 | 	  image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
283 | 	  image = tf.image.random_hue(image, max_delta=0.2)
284 | 
285 | 	# The random_* ops do not necessarily clamp.
286 | 	image = tf.clip_by_value(image, 0.0, 1.0)
287 | 	return image
288 | 
289 | 
290 | def batch_inputs(data_files, batch_size, train, num_preprocess_threads=None,num_readers=4):
291 | 
292 | 	"""Contruct batches of training or evaluation examples from the image dataset.
293 | 	Args:
294 | 	dataset: instance of Dataset class specifying the dataset.
295 | 	  See dataset.py for details.
296 | 	batch_size: integer
297 | 	train: boolean
298 | 	num_preprocess_threads: integer, total number of preprocessing threads
299 | 	num_readers: integer, number of parallel readers
300 | 
301 | 	Returns:
302 | 	images: 4-D float Tensor of a batch of images
303 | 	labels: 1-D integer Tensor of [batch_size].
304 | 
305 | 	Raises:
306 | 	ValueError: if data is not found
307 | 	"""
308 | 
309 | 	#print 1
310 | 	with tf.name_scope('batch_processing'):
311 | 		if data_files is None:
312 | 		  raise ValueError('No data files found for this dataset')
313 | 
314 | 		# Create filename_queue
315 | 		if train:
316 | 		  filename_queue = tf.train.string_input_producer(data_files,num_epochs = 2,
317 | 														  shuffle=True,
318 | 														  capacity=16)
319 | 		else:
320 | 		  filename_queue = tf.train.string_input_producer(data_files, num_epochs = 2,
321 | 														  shuffle=False,
322 | 														  capacity=1)
323 | 		if num_preprocess_threads is None:
324 | 		  num_preprocess_threads = FLAGS.num_preprocess_threads
325 | 
326 | 		if num_preprocess_threads % 4:
327 | 		  raise ValueError('Please make num_preprocess_threads a multiple '
328 | 						   'of 4 (%d % 4 != 0).', num_preprocess_threads)
329 | 
330 | 		if num_readers is None:
331 | 		  num_readers = FLAGS.num_readers
332 | 
333 | 		if num_readers < 1:
334 | 		  raise ValueError('Please make num_readers at least 1')
335 | 
336 | 		# Approximate number of examples per shard.
337 | 		
338 | 		examples_per_shard = 512
339 | 
340 | 		# Size the random shuffle queue to balance between good global
341 | 		# mixing (more examples) and memory use (fewer examples).
342 | 		# 1 image uses 299*299*3*4 bytes = 1MB
343 | 		# The default input_queue_memory_factor is 16 implying a shuffling queue
344 | 		# size: examples_per_shard * 16 * 1MB = 17.6GB
345 | 
346 | 		min_queue_examples = examples_per_shard * FLAGS.input_queue_memory_factor
347 | 		if train:
348 | 		  examples_queue = tf.RandomShuffleQueue(
349 | 			  capacity=min_queue_examples + 3 * batch_size,
350 | 			  min_after_dequeue=min_queue_examples,
351 | 			  dtypes=[tf.string])
352 | 		else:
353 | 		  examples_queue = tf.FIFOQueue(
354 | 			  capacity=examples_per_shard + 3 * batch_size,
355 | 			  dtypes=[tf.string])
356 | 
357 | 		# Create multiple readers to populate the queue of examples.
358 | 		if num_readers > 1:
359 | 		  enqueue_ops = []
360 | 		  for _ in range(num_readers):
361 | 			reader = tf.TFRecordReader()
362 | 			_, value = reader.read(filename_queue)
363 | 			enqueue_ops.append(examples_queue.enqueue([value]))
364 | 
365 | 		  tf.train.queue_runner.add_queue_runner(
366 | 			  tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
367 | 		  example_serialized = examples_queue.dequeue()
368 | 		else:
369 | 		  reader = tf.TFRecordReader()
370 | 		  _, example_serialized = reader.read(filename_queue)
371 | 
372 | 		images_and_labels = []
373 | 		for thread_id in range(num_preprocess_threads):
374 | 		  # Parse a serialized Example proto to extract the image and metadata.
375 | 		  image_buffer, label_index, bbox, name= parse_example(example_serialized)
376 | 		  image,labels,bbox = image_processing(image_buffer, bbox,label_index,
377 | 		  										train, thread_id)
378 | 		  
379 | 		  images_and_labels.append([image, bbox[1,:],name])
380 | 
381 | 		images ,box,names= tf.train.batch_join(
382 | 			images_and_labels,
383 | 			batch_size=batch_size,
384 | 			capacity=2 * num_preprocess_threads * batch_size)
385 | 		print 'box shape %s' % (box.shape)
386 | 
387 | 		# Reshape images into these desired dimensions.
388 | 		
389 | 		print 'image batch phase %s' % (images)
390 | 		height = FLAGS.Height
391 | 		width = FLAGS.Width
392 | 		depth = 3
393 | 
394 | 		#images = tf.cast(images, tf.float32)
395 | 		#images = tf.reshape(images, shape=[batch_size, height, width, depth])
396 | 
397 | 		print 'image reshape %s' % (images)
398 | 
399 | 		# Display the training images in the visualizer.
400 | 
401 | 		tf.summary.image('images', images)
402 | 
403 | 	return images, box, names
404 | 
405 | 
406 | 
407 | def main(_):
408 | 	data_dir = '/Users/xiaodiu/Documents/github/projecttextbox/TextBoxes-TensorFlow/data/sythtext/'
409 | 	tf_record_pattern = os.path.join(data_dir, '*.tfrecord')
410 | 	data_files = tf.gfile.Glob(tf_record_pattern)
411 | 	print data_files
412 | 	images ,box,name= distorted_inputs(data_files)
413 | 	print images.shape
414 | 	
415 | 	with tf.Session() as sess:
416 | 	    sess.run(tf.global_variables_initializer())
417 | 	    sess.run(tf.local_variables_initializer())
418 | 	    coord = tf.train.Coordinator()
419 | 	    threads = tf.train.start_queue_runners(coord=coord)
420 | 	    #print sess.run(shape)
421 | 	    img = sess.run(images)
422 | 	    boxb = sess.run(box)
423 | 	    name = sess.run(name)
424 | 	    print name
425 | 	    print img.shape
426 | 	    print img[0,:,:,:]
427 | 	    #skio.imshow(img[1,:,:,:])
428 | 	    image = img[0,:,:,:]
429 | 	    xmin = int(boxb[0,1] * 300)
430 | 	    ymin = int(boxb[0,0] * 300)
431 | 	    xmax = int(boxb[0,3] * 300)
432 | 	    ymax = int(boxb[0,2] * 300)
433 | 	    skio.imshow(cv2.rectangle(image,(xmin,ymin),(xmax,ymax),(0,0,0)))
434 | 	    skio.show()
435 | 	    skio.imshow(skio.imread(data_dir+ name))
436 | 	    skio.show()
437 | 	    coord.request_stop()
438 | 	    coord.join(threads)
439 | 	 
440 | if __name__ == '__main__':
441 | 	tf.app.run()
442 | 
443 | 
444 | 
445 | 
446 | 


--------------------------------------------------------------------------------
/processing/ssd_vgg_preprocessing.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Pre-processing images for SSD-type networks.
 16 | """
 17 | from enum import Enum, IntEnum
 18 | import numpy as np
 19 | 
 20 | import tensorflow as tf
 21 | import tf_extended as tfe
 22 | 
 23 | from tensorflow.python.ops import control_flow_ops
 24 | 
 25 | from processing import tf_image
 26 | 
 27 | 
 28 | slim = tf.contrib.slim
 29 | 
 30 | # Resizing strategies.
 31 | Resize = IntEnum('Resize', ('NONE',                # Nothing!
 32 |                             'CENTRAL_CROP',        # Crop (and pad if necessary).
 33 |                             'PAD_AND_RESIZE',      # Pad, and resize to output shape.
 34 |                             'WARP_RESIZE'))        # Warp resize.
 35 | 
 36 | # VGG mean parameters.
 37 | _R_MEAN = 123.
 38 | _G_MEAN = 117.
 39 | _B_MEAN = 104.
 40 | 
 41 | # Some training pre-processing parameters.
 42 | BBOX_CROP_OVERLAP = 0.4        # Minimum overlap to keep a bbox after cropping.
 43 | CROP_RATIO_RANGE = (0.8, 1.2)  # Distortion ratio during cropping.
 44 | EVAL_SIZE = (300, 300)
 45 | 
 46 | 
 47 | def tf_image_whitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN]):
 48 |     """Subtracts the given means from each image channel.
 49 | 
 50 |     Returns:
 51 |         the centered image.
 52 |     """
 53 |     if image.get_shape().ndims != 3:
 54 |         raise ValueError('Input must be of size [height, width, C>0]')
 55 |     num_channels = image.get_shape().as_list()[-1]
 56 |     if len(means) != num_channels:
 57 |         raise ValueError('len(means) must match the number of channels')
 58 | 
 59 |     mean = tf.constant(means, dtype=image.dtype)
 60 |     image = image - mean
 61 |     return image
 62 | 
 63 | 
 64 | def tf_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
 65 |     """Re-convert to original image distribution, and convert to int if
 66 |     necessary.
 67 | 
 68 |     Returns:
 69 |       Centered image.
 70 |     """
 71 |     mean = tf.constant(means, dtype=image.dtype)
 72 |     image = image + mean
 73 |     if to_int:
 74 |         image = tf.cast(image, tf.int32)
 75 |     return image
 76 | 
 77 | 
 78 | def np_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
 79 |     """Re-convert to original image distribution, and convert to int if
 80 |     necessary. Numpy version.
 81 | 
 82 |     Returns:
 83 |       Centered image.
 84 |     """
 85 |     img = np.copy(image)
 86 |     img += np.array(means, dtype=img.dtype)
 87 |     if to_int:
 88 |         img = img.astype(np.uint8)
 89 |     return img
 90 | 
 91 | 
 92 | def tf_summary_image(image, bboxes, name='image', unwhitened=False):
 93 |     """Add image with bounding boxes to summary.
 94 |     """
 95 |     if unwhitened:
 96 |         image = tf_image_unwhitened(image)
 97 |     image = tf.expand_dims(image, 0)
 98 |     bboxes = tf.expand_dims(bboxes, 0)
 99 |     image_with_box = tf.image.draw_bounding_boxes(image, bboxes)
100 |     tf.summary.image(name, image_with_box)
101 | 
102 | 
103 | def apply_with_random_selector(x, func, num_cases):
104 |     """Computes func(x, sel), with sel sampled from [0...num_cases-1].
105 | 
106 |     Args:
107 |         x: input Tensor.
108 |         func: Python function to apply.
109 |         num_cases: Python int32, number of cases to sample sel from.
110 | 
111 |     Returns:
112 |         The result of func(x, sel), where func receives the value of the
113 |         selector as a python integer, but sel is sampled dynamically.
114 |     """
115 |     sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
116 |     # Pass the real x only to one of the func calls.
117 |     return control_flow_ops.merge([
118 |             func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
119 |             for case in range(num_cases)])[0]
120 | 
121 | 
122 | def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
123 |     """Distort the color of a Tensor image.
124 | 
125 |     Each color distortion is non-commutative and thus ordering of the color ops
126 |     matters. Ideally we would randomly permute the ordering of the color ops.
127 |     Rather then adding that level of complication, we select a distinct ordering
128 |     of color ops for each preprocessing thread.
129 | 
130 |     Args:
131 |         image: 3-D Tensor containing single image in [0, 1].
132 |         color_ordering: Python int, a type of distortion (valid values: 0-3).
133 |         fast_mode: Avoids slower ops (random_hue and random_contrast)
134 |         scope: Optional scope for name_scope.
135 |     Returns:
136 |         3-D Tensor color-distorted image on range [0, 1]
137 |     Raises:
138 |         ValueError: if color_ordering not in [0, 3]
139 |     """
140 |     with tf.name_scope(scope, 'distort_color', [image]):
141 |         if fast_mode:
142 |             if color_ordering == 0:
143 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
144 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
145 |             else:
146 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
147 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
148 |         else:
149 |             if color_ordering == 0:
150 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
151 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
152 |                 image = tf.image.random_hue(image, max_delta=0.2)
153 |                 image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
154 |             elif color_ordering == 1:
155 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
156 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
157 |                 image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
158 |                 image = tf.image.random_hue(image, max_delta=0.2)
159 |             elif color_ordering == 2:
160 |                 image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
161 |                 image = tf.image.random_hue(image, max_delta=0.2)
162 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
163 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
164 |             elif color_ordering == 3:
165 |                 image = tf.image.random_hue(image, max_delta=0.2)
166 |                 image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
167 |                 image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
168 |                 image = tf.image.random_brightness(image, max_delta=32. / 255.)
169 |             else:
170 |                 raise ValueError('color_ordering must be in [0, 3]')
171 |         # The random_* ops do not necessarily clamp.
172 |         return tf.clip_by_value(image, 0.0, 1.0)
173 | 
174 | 
175 | def distorted_bounding_box_crop(image,
176 |                                 labels,
177 |                                 bboxes,
178 |                                 min_object_covered=0.05,
179 |                                 aspect_ratio_range=(0.9, 1.1),
180 |                                 area_range=(0.1, 1.0),
181 |                                 max_attempts=200,
182 |                                 scope=None):
183 |     """Generates cropped_image using a one of the bboxes randomly distorted.
184 | 
185 |     See `tf.image.sample_distorted_bounding_box` for more documentation.
186 | 
187 |     Args:
188 |         image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
189 |         bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
190 |             where each coordinate is [0, 1) and the coordinates are arranged
191 |             as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
192 |             image.
193 |         min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
194 |             area of the image must contain at least this fraction of any bounding box
195 |             supplied.
196 |         aspect_ratio_range: An optional list of `floats`. The cropped area of the
197 |             image must have an aspect ratio = width / height within this range.
198 |         area_range: An optional list of `floats`. The cropped area of the image
199 |             must contain a fraction of the supplied image within in this range.
200 |         max_attempts: An optional `int`. Number of attempts at generating a cropped
201 |             region of the image of the specified constraints. After `max_attempts`
202 |             failures, return the entire image.
203 |         scope: Optional scope for name_scope.
204 |     Returns:
205 |         A tuple, a 3-D Tensor cropped_image and the distorted bbox
206 |     """
207 |     with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
208 |         # Each bounding box has shape [1, num_boxes, box coords] and
209 |         # the coordinates are ordered [ymin, xmin, ymax, xmax].
210 |         bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
211 |                 tf.shape(image),
212 |                 bounding_boxes=tf.expand_dims(bboxes, 0),
213 |                 min_object_covered=min_object_covered,
214 |                 aspect_ratio_range=aspect_ratio_range,
215 |                 area_range=area_range,
216 |                 max_attempts=max_attempts,
217 |                 use_image_if_no_bounding_boxes=True)
218 |         distort_bbox = distort_bbox[0, 0]
219 | 
220 |         # Crop the image to the specified bounding box.
221 |         cropped_image = tf.slice(image, bbox_begin, bbox_size)
222 |         # Restore the shape since the dynamic slice loses 3rd dimension.
223 |         cropped_image.set_shape([None, None, 3])
224 | 
225 |         # Update bounding boxes: resize and filter out.
226 |         bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
227 |         labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,
228 |                                                    BBOX_CROP_OVERLAP)
229 |         return cropped_image, labels, bboxes, distort_bbox
230 | 
231 | 
232 | def preprocess_for_train(image, labels, bboxes,
233 |                          out_shape, data_format='NHWC',
234 |                          scope='ssd_preprocessing_train'):
235 |     """Preprocesses the given image for training.
236 | 
237 |     Note that the actual resizing scale is sampled from
238 |         [`resize_size_min`, `resize_size_max`].
239 | 
240 |     Args:
241 |         image: A `Tensor` representing an image of arbitrary size.
242 |         output_height: The height of the image after preprocessing.
243 |         output_width: The width of the image after preprocessing.
244 |         resize_side_min: The lower bound for the smallest side of the image for
245 |             aspect-preserving resizing.
246 |         resize_side_max: The upper bound for the smallest side of the image for
247 |             aspect-preserving resizing.
248 | 
249 |     Returns:
250 |         A preprocessed image.
251 |     """
252 |     fast_mode = False
253 |     with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
254 |         if image.get_shape().ndims != 3:
255 |             raise ValueError('Input must be of size [height, width, C>0]')
256 |         # Convert to float scaled [0, 1].
257 |         if image.dtype != tf.float32:
258 |             image = tf.image.convert_image_dtype(image, dtype=tf.float32)
259 |         tf_summary_image(image, bboxes, 'image_with_bboxes')
260 | 
261 |         # # Remove DontCare labels.
262 |         # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
263 |         #                                                     labels,
264 |         #                                                     bboxes)
265 | 
266 |         # Distort image and bounding boxes.
267 |         dst_image = image
268 |         dst_image, labels, bboxes, distort_bbox = \
269 |             distorted_bounding_box_crop(image, labels, bboxes,
270 |                                         aspect_ratio_range=CROP_RATIO_RANGE)
271 |         # Resize image to output size.
272 |         dst_image = tf_image.resize_image(dst_image, out_shape,
273 |                                           method=tf.image.ResizeMethod.BILINEAR,
274 |                                           align_corners=False)
275 |         tf_summary_image(dst_image, bboxes, 'image_shape_distorted')
276 | 
277 |         # Randomly flip the image horizontally.
278 |         dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
279 | 
280 |         # Randomly distort the colors. There are 4 ways to do it.
281 |         dst_image = apply_with_random_selector(
282 |                 dst_image,
283 |                 lambda x, ordering: distort_color(x, ordering, fast_mode),
284 |                 num_cases=4)
285 |         tf_summary_image(dst_image, bboxes, 'image_color_distorted')
286 | 
287 |         # Rescale to VGG input scale.
288 |         image = dst_image * 255.
289 |         image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
290 |         # Image data format.
291 |         if data_format == 'NCHW':
292 |             image = tf.transpose(image, perm=(2, 0, 1))
293 |         return image, labels, bboxes
294 | 
295 | 
296 | def preprocess_for_eval(image, labels, bboxes,
297 |                         out_shape=EVAL_SIZE, data_format='NHWC',
298 |                         difficults=None, resize=Resize.WARP_RESIZE,
299 |                         scope='ssd_preprocessing_train'):
300 |     """Preprocess an image for evaluation.
301 | 
302 |     Args:
303 |         image: A `Tensor` representing an image of arbitrary size.
304 |         out_shape: Output shape after pre-processing (if resize != None)
305 |         resize: Resize strategy.
306 | 
307 |     Returns:
308 |         A preprocessed image.
309 |     """
310 |     with tf.name_scope(scope):
311 |         if image.get_shape().ndims != 3:
312 |             raise ValueError('Input must be of size [height, width, C>0]')
313 | 
314 |         image = tf.to_float(image)
315 |         image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
316 | 
317 |         # Add image rectangle to bboxes.
318 |         bbox_img = tf.constant([[0., 0., 1., 1.]])
319 |         if bboxes is None:
320 |             bboxes = bbox_img
321 |         else:
322 |             bboxes = tf.concat([bbox_img, bboxes], axis=0)
323 | 
324 |         if resize == Resize.NONE:
325 |             # No resizing...
326 |             pass
327 |         elif resize == Resize.CENTRAL_CROP:
328 |             # Central cropping of the image.
329 |             image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
330 |                 image, bboxes, out_shape[0], out_shape[1])
331 |         elif resize == Resize.PAD_AND_RESIZE:
332 |             # Resize image first: find the correct factor...
333 |             shape = tf.shape(image)
334 |             factor = tf.minimum(tf.to_double(1.0),
335 |                                 tf.minimum(tf.to_double(out_shape[0] / shape[0]),
336 |                                            tf.to_double(out_shape[1] / shape[1])))
337 |             resize_shape = factor * tf.to_double(shape[0:2])
338 |             resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)
339 | 
340 |             image = tf_image.resize_image(image, resize_shape,
341 |                                           method=tf.image.ResizeMethod.BILINEAR,
342 |                                           align_corners=False)
343 |             # Pad to expected size.
344 |             image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
345 |                 image, bboxes, out_shape[0], out_shape[1])
346 |         elif resize == Resize.WARP_RESIZE:
347 |             # Warp resize of the image.
348 |             image = tf_image.resize_image(image, out_shape,
349 |                                           method=tf.image.ResizeMethod.BILINEAR,
350 |                                           align_corners=False)
351 | 
352 |         # Split back bounding boxes.
353 |         bbox_img = bboxes[0]
354 |         bboxes = bboxes[1:]
355 |         # Remove difficult boxes.
356 |         if difficults is not None:
357 |             mask = tf.logical_not(tf.cast(difficults, tf.bool))
358 |             labels = tf.boolean_mask(labels, mask)
359 |             bboxes = tf.boolean_mask(bboxes, mask)
360 |         # Image data format.
361 |         if data_format == 'NCHW':
362 |             image = tf.transpose(image, perm=(2, 0, 1))
363 |         return image, labels, bboxes, bbox_img
364 | 
365 | 
366 | def preprocess_image(image,
367 |                      labels,
368 |                      bboxes,
369 |                      out_shape,
370 |                      data_format,
371 |                      is_training=False,
372 |                      **kwargs):
373 |     """Pre-process an given image.
374 | 
375 |     Args:
376 |       image: A `Tensor` representing an image of arbitrary size.
377 |       output_height: The height of the image after preprocessing.
378 |       output_width: The width of the image after preprocessing.
379 |       is_training: `True` if we're preprocessing the image for training and
380 |         `False` otherwise.
381 |       resize_side_min: The lower bound for the smallest side of the image for
382 |         aspect-preserving resizing. If `is_training` is `False`, then this value
383 |         is used for rescaling.
384 |       resize_side_max: The upper bound for the smallest side of the image for
385 |         aspect-preserving resizing. If `is_training` is `False`, this value is
386 |          ignored. Otherwise, the resize side is sampled from
387 |          [resize_size_min, resize_size_max].
388 | 
389 |     Returns:
390 |       A preprocessed image.
391 |     """
392 |     if is_training:
393 |         return preprocess_for_train(image, labels, bboxes,
394 |                                     out_shape=out_shape,
395 |                                     data_format=data_format)
396 |     else:
397 |         return preprocess_for_eval(image, labels, bboxes,
398 |                                    out_shape=out_shape,
399 |                                    data_format=data_format,
400 |                                    **kwargs)
401 | 


--------------------------------------------------------------------------------
/processing/test_processing.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script will test all functions and scripts in data pre-processing phase
 3 | Test functions includes:
 4 | 	image_processing.
 5 | 		data <- tfrecord
 6 | 		image_buffer, label_index, bbox, name <-  parse_example
 7 | 		image,labels,bbox <- image_processing
 8 | 			image <- distorted_image 
 9 | """
10 | 
11 | import tensorflow as tf
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | import skimage.io as skio
15 | 
16 | import tf_extended as tfe
17 | from image_processing import *
18 | import cv2
19 | 
20 | def visualize_bbox(image, bboxes):
21 | 	"""
22 | 	Input: image (height, width, channels)
23 | 		   bboxes (numof bboxes, 4) in order(ymin, xmin, ymax, xmax)
24 | 		          range(0,1) 
25 | 	"""
26 | 	numofbox = bboxes.shape[0]
27 | 	width = image.shape[1]
28 | 	height = image.shape[0]
29 | 	def norm(x):
30 | 		if x < 0:
31 | 			x = 0
32 | 		else:
33 | 			if x > 1:
34 | 				x = 1
35 | 		return x
36 | 	xmin = [int(norm(i) * width) for i in bboxes[:,1]]
37 | 	ymin = [int(norm(i) * height) for i in bboxes[:,0]]
38 | 	ymax = [int(norm(i) * height) for i in bboxes[:,2]]
39 | 	xmax = [int(norm(i) * width) for i in bboxes[:,3]]
40 | 
41 | 	for i in range(numofbox):
42 | 		image = cv2.rectangle(image,(xmin[i],ymin[i]),
43 | 							 (xmax[i],ymax[i]),(0,0,0))
44 | 	skio.imshow(image)
45 | 	skio.show()
46 | 
47 | 
48 | 
49 | 
50 | if __name__ == "__main__":
51 | 	data_dir = '/Users/xiaodiu/Documents/github/projecttextbox/TextBoxes-TensorFlow/data/sythtext/'
52 | 	file_name = data_dir + '1.tfrecord'
53 | 	## test if file_name exists  
54 | 	
55 | 	example = tf.python_io.tf_record_iterator(file_name).next()
56 | 	image_buffer, label, bboxes, name= parse_example(example)
57 | 	image,label,bboxes = image_processing(image_buffer, bboxes,label,
58 | 										 train= True, thread_id = 0)
59 | 
60 | 	with tf.Session() as sess:
61 | 		sess.run(tf.global_variables_initializer())
62 | 		Image, label, bboxes = sess.run([image, label, bboxes])
63 | 		print label.shape
64 | 		print bboxes
65 | 		#print name
66 | 		#print width
67 | 		#print height
68 | 		print Image.shape
69 | 		visualize_bbox(Image, bboxes)
70 | 		skio.imshow(Image)
71 | 		skio.show()
72 | 
73 | 


--------------------------------------------------------------------------------
/processing/tf_image.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2015 The TensorFlow Authors and Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Custom image operations.
 16 | Most of the following methods extend TensorFlow image library, and part of
 17 | the code is shameless copy-paste of the former!
 18 | """
 19 | import tensorflow as tf
 20 | 
 21 | from tensorflow.python.framework import constant_op
 22 | from tensorflow.python.framework import dtypes
 23 | from tensorflow.python.framework import ops
 24 | from tensorflow.python.framework import tensor_shape
 25 | from tensorflow.python.framework import tensor_util
 26 | from tensorflow.python.ops import array_ops
 27 | from tensorflow.python.ops import check_ops
 28 | from tensorflow.python.ops import clip_ops
 29 | from tensorflow.python.ops import control_flow_ops
 30 | from tensorflow.python.ops import gen_image_ops
 31 | from tensorflow.python.ops import gen_nn_ops
 32 | from tensorflow.python.ops import string_ops
 33 | from tensorflow.python.ops import math_ops
 34 | from tensorflow.python.ops import random_ops
 35 | from tensorflow.python.ops import variables
 36 | 
 37 | 
 38 | # =========================================================================== #
 39 | # Modification of TensorFlow image routines.
 40 | # =========================================================================== #
 41 | def _assert(cond, ex_type, msg):
 42 |     """A polymorphic assert, works with tensors and boolean expressions.
 43 |     If `cond` is not a tensor, behave like an ordinary assert statement, except
 44 |     that a empty list is returned. If `cond` is a tensor, return a list
 45 |     containing a single TensorFlow assert op.
 46 |     Args:
 47 |       cond: Something evaluates to a boolean value. May be a tensor.
 48 |       ex_type: The exception class to use.
 49 |       msg: The error message.
 50 |     Returns:
 51 |       A list, containing at most one assert op.
 52 |     """
 53 |     if _is_tensor(cond):
 54 |         return [control_flow_ops.Assert(cond, [msg])]
 55 |     else:
 56 |         if not cond:
 57 |             raise ex_type(msg)
 58 |         else:
 59 |             return []
 60 | 
 61 | 
 62 | def _is_tensor(x):
 63 |     """Returns `True` if `x` is a symbolic tensor-like object.
 64 |     Args:
 65 |       x: A python object to check.
 66 |     Returns:
 67 |       `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
 68 |     """
 69 |     return isinstance(x, (ops.Tensor, variables.Variable))
 70 | 
 71 | 
 72 | def _ImageDimensions(image):
 73 |     """Returns the dimensions of an image tensor.
 74 |     Args:
 75 |       image: A 3-D Tensor of shape `[height, width, channels]`.
 76 |     Returns:
 77 |       A list of `[height, width, channels]` corresponding to the dimensions of the
 78 |         input image.  Dimensions that are statically known are python integers,
 79 |         otherwise they are integer scalar tensors.
 80 |     """
 81 |     if image.get_shape().is_fully_defined():
 82 |         return image.get_shape().as_list()
 83 |     else:
 84 |         static_shape = image.get_shape().with_rank(3).as_list()
 85 |         dynamic_shape = array_ops.unstack(array_ops.shape(image), 3)
 86 |         return [s if s is not None else d
 87 |                 for s, d in zip(static_shape, dynamic_shape)]
 88 | 
 89 | 
 90 | def _Check3DImage(image, require_static=True):
 91 |     """Assert that we are working with properly shaped image.
 92 |     Args:
 93 |       image: 3-D Tensor of shape [height, width, channels]
 94 |         require_static: If `True`, requires that all dimensions of `image` are
 95 |         known and non-zero.
 96 |     Raises:
 97 |       ValueError: if `image.shape` is not a 3-vector.
 98 |     Returns:
 99 |       An empty list, if `image` has fully defined dimensions. Otherwise, a list
100 |         containing an assert op is returned.
101 |     """
102 |     try:
103 |         image_shape = image.get_shape().with_rank(3)
104 |     except ValueError:
105 |         raise ValueError("'image' must be three-dimensional.")
106 |     if require_static and not image_shape.is_fully_defined():
107 |         raise ValueError("'image' must be fully defined.")
108 |     if any(x == 0 for x in image_shape):
109 |         raise ValueError("all dims of 'image.shape' must be > 0: %s" %
110 |                          image_shape)
111 |     if not image_shape.is_fully_defined():
112 |         return [check_ops.assert_positive(array_ops.shape(image),
113 |                                           ["all dims of 'image.shape' "
114 |                                            "must be > 0."])]
115 |     else:
116 |         return []
117 | 
118 | 
119 | def fix_image_flip_shape(image, result):
120 |     """Set the shape to 3 dimensional if we don't know anything else.
121 |     Args:
122 |       image: original image size
123 |       result: flipped or transformed image
124 |     Returns:
125 |       An image whose shape is at least None,None,None.
126 |     """
127 |     image_shape = image.get_shape()
128 |     if image_shape == tensor_shape.unknown_shape():
129 |         result.set_shape([None, None, None])
130 |     else:
131 |         result.set_shape(image_shape)
132 |     return result
133 | 
134 | 
135 | # =========================================================================== #
136 | # Image + BBoxes methods: cropping, resizing, flipping, ...
137 | # =========================================================================== #
138 | def bboxes_crop_or_pad(bboxes,
139 |                        height, width,
140 |                        offset_y, offset_x,
141 |                        target_height, target_width):
142 |     """Adapt bounding boxes to crop or pad operations.
143 |     Coordinates are always supposed to be relative to the image.
144 | 
145 |     Arguments:
146 |       bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max];
147 |       height, width: Original image dimension;
148 |       offset_y, offset_x: Offset to apply,
149 |         negative if cropping, positive if padding;
150 |       target_height, target_width: Target dimension after cropping / padding.
151 |     """
152 |     with tf.name_scope('bboxes_crop_or_pad'):
153 |         # Rescale bounding boxes in pixels.
154 |         scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype)
155 |         bboxes = bboxes * scale
156 |         # Add offset.
157 |         offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype)
158 |         bboxes = bboxes + offset
159 |         # Rescale to target dimension.
160 |         scale = tf.cast(tf.stack([target_height, target_width,
161 |                                   target_height, target_width]), bboxes.dtype)
162 |         bboxes = bboxes / scale
163 |         return bboxes
164 | 
165 | 
166 | def resize_image_bboxes_with_crop_or_pad(image, bboxes,
167 |                                          target_height, target_width):
168 |     """Crops and/or pads an image to a target width and height.
169 |     Resizes an image to a target width and height by either centrally
170 |     cropping the image or padding it evenly with zeros.
171 | 
172 |     If `width` or `height` is greater than the specified `target_width` or
173 |     `target_height` respectively, this op centrally crops along that dimension.
174 |     If `width` or `height` is smaller than the specified `target_width` or
175 |     `target_height` respectively, this op centrally pads with 0 along that
176 |     dimension.
177 |     Args:
178 |       image: 3-D tensor of shape `[height, width, channels]`
179 |       target_height: Target height.
180 |       target_width: Target width.
181 |     Raises:
182 |       ValueError: if `target_height` or `target_width` are zero or negative.
183 |     Returns:
184 |       Cropped and/or padded image of shape
185 |         `[target_height, target_width, channels]`
186 |     """
187 |     with tf.name_scope('resize_with_crop_or_pad'):
188 |         image = ops.convert_to_tensor(image, name='image')
189 | 
190 |         assert_ops = []
191 |         assert_ops += _Check3DImage(image, require_static=False)
192 |         assert_ops += _assert(target_width > 0, ValueError,
193 |                               'target_width must be > 0.')
194 |         assert_ops += _assert(target_height > 0, ValueError,
195 |                               'target_height must be > 0.')
196 | 
197 |         image = control_flow_ops.with_dependencies(assert_ops, image)
198 |         # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
199 |         # Make sure our checks come first, so that error messages are clearer.
200 |         if _is_tensor(target_height):
201 |             target_height = control_flow_ops.with_dependencies(
202 |                 assert_ops, target_height)
203 |         if _is_tensor(target_width):
204 |             target_width = control_flow_ops.with_dependencies(assert_ops, target_width)
205 | 
206 |         def max_(x, y):
207 |             if _is_tensor(x) or _is_tensor(y):
208 |                 return math_ops.maximum(x, y)
209 |             else:
210 |                 return max(x, y)
211 | 
212 |         def min_(x, y):
213 |             if _is_tensor(x) or _is_tensor(y):
214 |                 return math_ops.minimum(x, y)
215 |             else:
216 |                 return min(x, y)
217 | 
218 |         def equal_(x, y):
219 |             if _is_tensor(x) or _is_tensor(y):
220 |                 return math_ops.equal(x, y)
221 |             else:
222 |                 return x == y
223 | 
224 |         height, width, _ = _ImageDimensions(image)
225 |         width_diff = target_width - width
226 |         offset_crop_width = max_(-width_diff // 2, 0)
227 |         offset_pad_width = max_(width_diff // 2, 0)
228 | 
229 |         height_diff = target_height - height
230 |         offset_crop_height = max_(-height_diff // 2, 0)
231 |         offset_pad_height = max_(height_diff // 2, 0)
232 | 
233 |         # Maybe crop if needed.
234 |         height_crop = min_(target_height, height)
235 |         width_crop = min_(target_width, width)
236 |         cropped = tf.image.crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
237 |                                                 height_crop, width_crop)
238 |         bboxes = bboxes_crop_or_pad(bboxes,
239 |                                     height, width,
240 |                                     -offset_crop_height, -offset_crop_width,
241 |                                     height_crop, width_crop)
242 |         # Maybe pad if needed.
243 |         resized = tf.image.pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
244 |                                                target_height, target_width)
245 |         bboxes = bboxes_crop_or_pad(bboxes,
246 |                                     height_crop, width_crop,
247 |                                     offset_pad_height, offset_pad_width,
248 |                                     target_height, target_width)
249 | 
250 |         # In theory all the checks below are redundant.
251 |         if resized.get_shape().ndims is None:
252 |             raise ValueError('resized contains no shape.')
253 | 
254 |         resized_height, resized_width, _ = _ImageDimensions(resized)
255 | 
256 |         assert_ops = []
257 |         assert_ops += _assert(equal_(resized_height, target_height), ValueError,
258 |                               'resized height is not correct.')
259 |         assert_ops += _assert(equal_(resized_width, target_width), ValueError,
260 |                               'resized width is not correct.')
261 | 
262 |         resized = control_flow_ops.with_dependencies(assert_ops, resized)
263 |         return resized, bboxes
264 | 
265 | 
266 | def resize_image(image, size,
267 |                  method=tf.image.ResizeMethod.BILINEAR,
268 |                  align_corners=False):
269 |     """Resize an image and bounding boxes.
270 |     """
271 |     # Resize image.
272 |     with tf.name_scope('resize_image'):
273 |         height, width, channels = _ImageDimensions(image)
274 |         image = tf.expand_dims(image, 0)
275 |         image = tf.image.resize_images(image, size,
276 |                                        method, align_corners)
277 |         image = tf.reshape(image, tf.stack([size[0], size[1], channels]))
278 |         return image
279 | 
280 | 
281 | def random_flip_left_right(image, bboxes, seed=None):
282 |     """Random flip left-right of an image and its bounding boxes.
283 |     """
284 |     def flip_bboxes(bboxes):
285 |         """Flip bounding boxes coordinates.
286 |         """
287 |         bboxes = tf.stack([bboxes[:, 0], 1 - bboxes[:, 3],
288 |                            bboxes[:, 2], 1 - bboxes[:, 1]], axis=-1)
289 |         return bboxes
290 | 
291 |     # Random flip. Tensorflow implementation.
292 |     with tf.name_scope('random_flip_left_right'):
293 |         image = ops.convert_to_tensor(image, name='image')
294 |         _Check3DImage(image, require_static=False)
295 |         uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
296 |         mirror_cond = math_ops.less(uniform_random, .5)
297 |         # Flip image.
298 |         result = control_flow_ops.cond(mirror_cond,
299 |                                        lambda: array_ops.reverse_v2(image, [1]),
300 |                                        lambda: image)
301 |         # Flip bboxes.
302 |         bboxes = control_flow_ops.cond(mirror_cond,
303 |                                        lambda: flip_bboxes(bboxes),
304 |                                        lambda: bboxes)
305 |         return fix_image_flip_shape(image, result), bboxes
306 | 
307 | 


--------------------------------------------------------------------------------
/tf_extended/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional metrics.
16 | """
17 | 
18 | # pylint: disable=unused-import,line-too-long,g-importing-member,wildcard-import
19 | from tf_extended.metrics import *
20 | from tf_extended.tensors import *
21 | from tf_extended.bboxes import *
22 | from tf_extended.image import *
23 | from tf_extended.math import *
24 | 
25 | 


--------------------------------------------------------------------------------
/tf_extended/bboxes.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """TF Extended: additional bounding boxes methods.
 16 | """
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | 
 20 | from tf_extended import tensors as tfe_tensors
 21 | from tf_extended import math as tfe_math
 22 | 
 23 | 
 24 | # =========================================================================== #
 25 | # Standard boxes algorithms.
 26 | # =========================================================================== #
 27 | def bboxes_sort_all_classes(classes, scores, bboxes, top_k=400, scope=None):
 28 |     """Sort bounding boxes by decreasing order and keep only the top_k.
 29 |     Assume the input Tensors mix-up objects with different classes.
 30 |     Assume a batch-type input.
 31 | 
 32 |     Args:
 33 |       classes: Batch x N Tensor containing integer classes.
 34 |       scores: Batch x N Tensor containing float scores.
 35 |       bboxes: Batch x N x 4 Tensor containing boxes coordinates.
 36 |       top_k: Top_k boxes to keep.
 37 |     Return:
 38 |       classes, scores, bboxes: Sorted tensors of shape Batch x Top_k.
 39 |     """
 40 |     with tf.name_scope(scope, 'bboxes_sort', [classes, scores, bboxes]):
 41 |         scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
 42 | 
 43 |         # Trick to be able to use tf.gather: map for each element in the batch.
 44 |         def fn_gather(classes, bboxes, idxes):
 45 |             cl = tf.gather(classes, idxes)
 46 |             bb = tf.gather(bboxes, idxes)
 47 |             return [cl, bb]
 48 |         r = tf.map_fn(lambda x: fn_gather(x[0], x[1], x[2]),
 49 |                       [classes, bboxes, idxes],
 50 |                       dtype=[classes.dtype, bboxes.dtype],
 51 |                       parallel_iterations=10,
 52 |                       back_prop=False,
 53 |                       swap_memory=False,
 54 |                       infer_shape=True)
 55 |         classes = r[0]
 56 |         bboxes = r[1]
 57 |         return classes, scores, bboxes
 58 | 
 59 | 
 60 | def bboxes_sort(scores, bboxes, top_k=400, scope=None):
 61 |     """Sort bounding boxes by decreasing order and keep only the top_k.
 62 |     If inputs are dictionnaries, assume every key is a different class.
 63 |     Assume a batch-type input.
 64 | 
 65 |     Args:
 66 |       scores: Batch x N Tensor/Dictionary containing float scores.
 67 |       bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
 68 |       top_k: Top_k boxes to keep.
 69 |     Return:
 70 |       scores, bboxes: Sorted Tensors/Dictionaries of shape Batch x Top_k x 1|4.
 71 |     """
 72 |     # Dictionaries as inputs.
 73 |     if isinstance(scores, dict) or isinstance(bboxes, dict):
 74 |         with tf.name_scope(scope, 'bboxes_sort_dict'):
 75 |             d_scores = {}
 76 |             d_bboxes = {}
 77 |             for c in scores.keys():
 78 |                 s, b = bboxes_sort(scores[c], bboxes[c], top_k=top_k)
 79 |                 d_scores[c] = s
 80 |                 d_bboxes[c] = b
 81 |             return d_scores, d_bboxes
 82 | 
 83 |     # Tensors inputs.
 84 |     with tf.name_scope(scope, 'bboxes_sort', [scores, bboxes]):
 85 |         # Sort scores...
 86 |         scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
 87 | 
 88 |         # Trick to be able to use tf.gather: map for each element in the first dim.
 89 |         def fn_gather(bboxes, idxes):
 90 |             bb = tf.gather(bboxes, idxes)
 91 |             return [bb]
 92 |         r = tf.map_fn(lambda x: fn_gather(x[0], x[1]),
 93 |                       [bboxes, idxes],
 94 |                       dtype=[bboxes.dtype],
 95 |                       parallel_iterations=10,
 96 |                       back_prop=False,
 97 |                       swap_memory=False,
 98 |                       infer_shape=True)
 99 |         bboxes = r[0]
100 |         return scores, bboxes
101 | 
102 | 
103 | def bboxes_clip(bbox_ref, bboxes, scope=None):
104 |     """Clip bounding boxes to a reference box.
105 |     Batch-compatible if the first dimension of `bbox_ref` and `bboxes`
106 |     can be broadcasted.
107 | 
108 |     Args:
109 |       bbox_ref: Reference bounding box. Nx4 or 4 shaped-Tensor;
110 |       bboxes: Bounding boxes to clip. Nx4 or 4 shaped-Tensor or dictionary.
111 |     Return:
112 |       Clipped bboxes.
113 |     """
114 |     # Bboxes is dictionary.
115 |     if isinstance(bboxes, dict):
116 |         with tf.name_scope(scope, 'bboxes_clip_dict'):
117 |             d_bboxes = {}
118 |             for c in bboxes.keys():
119 |                 d_bboxes[c] = bboxes_clip(bbox_ref, bboxes[c])
120 |             return d_bboxes
121 | 
122 |     # Tensors inputs.
123 |     with tf.name_scope(scope, 'bboxes_clip'):
124 |         # Easier with transposed bboxes. Especially for broadcasting.
125 |         bbox_ref = tf.transpose(bbox_ref)
126 |         bboxes = tf.transpose(bboxes)
127 |         # Intersection bboxes and reference bbox.
128 |         ymin = tf.maximum(bboxes[0], bbox_ref[0])
129 |         xmin = tf.maximum(bboxes[1], bbox_ref[1])
130 |         ymax = tf.minimum(bboxes[2], bbox_ref[2])
131 |         xmax = tf.minimum(bboxes[3], bbox_ref[3])
132 |         bboxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax], axis=0))
133 |         return bboxes
134 | 
135 | 
136 | def bboxes_resize(bbox_ref, bboxes, name=None):
137 |     """Resize bounding boxes based on a reference bounding box,
138 |     assuming that the latter is [0, 0, 1, 1] after transform. Useful for
139 |     updating a collection of boxes after cropping an image.
140 |     """
141 |     # Bboxes is dictionary.
142 |     if isinstance(bboxes, dict):
143 |         with tf.name_scope(name, 'bboxes_resize_dict'):
144 |             d_bboxes = {}
145 |             for c in bboxes.keys():
146 |                 d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c])
147 |             return d_bboxes
148 | 
149 |     # Tensors inputs.
150 |     with tf.name_scope(name, 'bboxes_resize'):
151 |         # Translate.
152 |         v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]])
153 |         bboxes = bboxes - v
154 |         # Scale.
155 |         s = tf.stack([bbox_ref[2] - bbox_ref[0],
156 |                       bbox_ref[3] - bbox_ref[1],
157 |                       bbox_ref[2] - bbox_ref[0],
158 |                       bbox_ref[3] - bbox_ref[1]])
159 |         bboxes = bboxes / s
160 |         return bboxes
161 | 
162 | 
163 | def bboxes_nms(scores, bboxes, nms_threshold=0.5, keep_top_k=200, scope=None):
164 |     """Apply non-maximum selection to bounding boxes. In comparison to TF
165 |     implementation, use classes information for matching.
166 |     Should only be used on single-entries. Use batch version otherwise.
167 | 
168 |     Args:
169 |       scores: N Tensor containing float scores.
170 |       bboxes: N x 4 Tensor containing boxes coordinates.
171 |       nms_threshold: Matching threshold in NMS algorithm;
172 |       keep_top_k: Number of total object to keep after NMS.
173 |     Return:
174 |       classes, scores, bboxes Tensors, sorted by score.
175 |         Padded with zero if necessary.
176 |     """
177 |     with tf.name_scope(scope, 'bboxes_nms_single', [scores, bboxes]):
178 |         # Apply NMS algorithm.
179 |         idxes = tf.image.non_max_suppression(bboxes, scores,
180 |                                              keep_top_k, nms_threshold)
181 |         scores = tf.gather(scores, idxes)
182 |         bboxes = tf.gather(bboxes, idxes)
183 |         # Pad results.
184 |         scores = tfe_tensors.pad_axis(scores, 0, keep_top_k, axis=0)
185 |         bboxes = tfe_tensors.pad_axis(bboxes, 0, keep_top_k, axis=0)
186 |         return scores, bboxes
187 | 
188 | 
189 | def bboxes_nms_batch(scores, bboxes, nms_threshold=0.5, keep_top_k=200,
190 |                      scope=None):
191 |     """Apply non-maximum selection to bounding boxes. In comparison to TF
192 |     implementation, use classes information for matching.
193 |     Use only on batched-inputs. Use zero-padding in order to batch output
194 |     results.
195 | 
196 |     Args:
197 |       scores: Batch x N Tensor/Dictionary containing float scores.
198 |       bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
199 |       nms_threshold: Matching threshold in NMS algorithm;
200 |       keep_top_k: Number of total object to keep after NMS.
201 |     Return:
202 |       scores, bboxes Tensors/Dictionaries, sorted by score.
203 |         Padded with zero if necessary.
204 |     """
205 |     # Dictionaries as inputs.
206 |     if isinstance(scores, dict) or isinstance(bboxes, dict):
207 |         with tf.name_scope(scope, 'bboxes_nms_batch_dict'):
208 |             d_scores = {}
209 |             d_bboxes = {}
210 |             for c in scores.keys():
211 |                 s, b = bboxes_nms_batch(scores[c], bboxes[c],
212 |                                         nms_threshold=nms_threshold,
213 |                                         keep_top_k=keep_top_k)
214 |                 d_scores[c] = s
215 |                 d_bboxes[c] = b
216 |             return d_scores, d_bboxes
217 | 
218 |     # Tensors inputs.
219 |     with tf.name_scope(scope, 'bboxes_nms_batch'):
220 |         r = tf.map_fn(lambda x: bboxes_nms(x[0], x[1],
221 |                                            nms_threshold, keep_top_k),
222 |                       (scores, bboxes),
223 |                       dtype=(scores.dtype, bboxes.dtype),
224 |                       parallel_iterations=10,
225 |                       back_prop=False,
226 |                       swap_memory=False,
227 |                       infer_shape=True)
228 |         scores, bboxes = r
229 |         return scores, bboxes
230 | 
231 | 
232 | # def bboxes_fast_nms(classes, scores, bboxes,
233 | #                     nms_threshold=0.5, eta=3., num_classes=21,
234 | #                     pad_output=True, scope=None):
235 | #     with tf.name_scope(scope, 'bboxes_fast_nms',
236 | #                        [classes, scores, bboxes]):
237 | 
238 | #         nms_classes = tf.zeros((0,), dtype=classes.dtype)
239 | #         nms_scores = tf.zeros((0,), dtype=scores.dtype)
240 | #         nms_bboxes = tf.zeros((0, 4), dtype=bboxes.dtype)
241 | 
242 | 
243 | def bboxes_matching(label, scores, bboxes,
244 |                     glabels, gbboxes, gdifficults,
245 |                     matching_threshold=0.5, scope=None):
246 |     """Matching a collection of detected boxes with groundtruth values.
247 |     Does not accept batched-inputs.
248 |     The algorithm goes as follows: for every detected box, check
249 |     if one grountruth box is matching. If none, then considered as False Positive.
250 |     If the grountruth box is already matched with another one, it also counts
251 |     as a False Positive. We refer the Pascal VOC documentation for the details.
252 | 
253 |     Args:
254 |       rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score;
255 |       glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
256 |         zero-class objects are ignored.
257 |       matching_threshold: Threshold for a positive match.
258 |     Return: Tuple of:
259 |        n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
260 |          size because of zero padding).
261 |        tp_match: (N,)-shaped boolean Tensor containing with True Positives.
262 |        fp_match: (N,)-shaped boolean Tensor containing with False Positives.
263 |     """
264 |     with tf.name_scope(scope, 'bboxes_matching_single',
265 |                        [scores, bboxes, glabels, gbboxes]):
266 |         rsize = tf.size(scores)
267 |         rshape = tf.shape(scores)
268 |         rlabel = tf.cast(label, glabels.dtype)
269 |         # Number of groundtruth boxes.
270 |         gdifficults = tf.cast(gdifficults, tf.bool)
271 |         n_gbboxes = tf.count_nonzero(tf.logical_and(tf.equal(glabels, label),
272 |                                                     tf.logical_not(gdifficults)))
273 |         # Grountruth matching arrays.
274 |         gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool)
275 |         grange = tf.range(tf.size(glabels), dtype=tf.int32)
276 |         # True/False positive matching TensorArrays.
277 |         sdtype = tf.bool
278 |         ta_tp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
279 |         ta_fp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
280 | 
281 |         # Loop over returned objects.
282 |         def m_condition(i, ta_tp, ta_fp, gmatch):
283 |             r = tf.less(i, rsize)
284 |             return r
285 | 
286 |         def m_body(i, ta_tp, ta_fp, gmatch):
287 |             # Jaccard score with groundtruth bboxes.
288 |             rbbox = bboxes[i]
289 |             jaccard = bboxes_jaccard(rbbox, gbboxes)
290 |             jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype)
291 | 
292 |             # Best fit, checking it's above threshold.
293 |             idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32)
294 |             jcdmax = jaccard[idxmax]
295 |             match = jcdmax > matching_threshold
296 |             existing_match = gmatch[idxmax]
297 |             not_difficult = tf.logical_not(gdifficults[idxmax])
298 | 
299 |             # TP: match & no previous match and FP: previous match | no match.
300 |             # If difficult: no record, i.e FP=False and TP=False.
301 |             tp = tf.logical_and(not_difficult,
302 |                                 tf.logical_and(match, tf.logical_not(existing_match)))
303 |             ta_tp = ta_tp.write(i, tp)
304 |             fp = tf.logical_and(not_difficult,
305 |                                 tf.logical_or(existing_match, tf.logical_not(match)))
306 |             ta_fp = ta_fp.write(i, fp)
307 |             # Update grountruth match.
308 |             mask = tf.logical_and(tf.equal(grange, idxmax),
309 |                                   tf.logical_and(not_difficult, match))
310 |             gmatch = tf.logical_or(gmatch, mask)
311 | 
312 |             return [i+1, ta_tp, ta_fp, gmatch]
313 |         # Main loop definition.
314 |         i = 0
315 |         [i, ta_tp_bool, ta_fp_bool, gmatch] = \
316 |             tf.while_loop(m_condition, m_body,
317 |                           [i, ta_tp_bool, ta_fp_bool, gmatch],
318 |                           parallel_iterations=1,
319 |                           back_prop=False)
320 |         # TensorArrays to Tensors and reshape.
321 |         tp_match = tf.reshape(ta_tp_bool.stack(), rshape)
322 |         fp_match = tf.reshape(ta_fp_bool.stack(), rshape)
323 | 
324 |         # Some debugging information...
325 |         # tp_match = tf.Print(tp_match,
326 |         #                     [n_gbboxes,
327 |         #                      tf.reduce_sum(tf.cast(tp_match, tf.int64)),
328 |         #                      tf.reduce_sum(tf.cast(fp_match, tf.int64)),
329 |         #                      tf.reduce_sum(tf.cast(gmatch, tf.int64))],
330 |         #                     'Matching (NG, TP, FP, GM): ')
331 |         return n_gbboxes, tp_match, fp_match
332 | 
333 | 
334 | def bboxes_matching_batch(labels, scores, bboxes,
335 |                           glabels, gbboxes, gdifficults,
336 |                           matching_threshold=0.5, scope=None):
337 |     """Matching a collection of detected boxes with groundtruth values.
338 |     Batched-inputs version.
339 | 
340 |     Args:
341 |       rclasses, rscores, rbboxes: BxN(x4) Tensors. Detected objects, sorted by score;
342 |       glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
343 |         zero-class objects are ignored.
344 |       matching_threshold: Threshold for a positive match.
345 |     Return: Tuple or Dictionaries with:
346 |        n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
347 |          size because of zero padding).
348 |        tp: (B, N)-shaped boolean Tensor containing with True Positives.
349 |        fp: (B, N)-shaped boolean Tensor containing with False Positives.
350 |     """
351 |     # Dictionaries as inputs.
352 |     if isinstance(scores, dict) or isinstance(bboxes, dict):
353 |         with tf.name_scope(scope, 'bboxes_matching_batch_dict'):
354 |             d_n_gbboxes = {}
355 |             d_tp = {}
356 |             d_fp = {}
357 |             for c in labels:
358 |                 n, tp, fp, _ = bboxes_matching_batch(c, scores[c], bboxes[c],
359 |                                                      glabels, gbboxes, gdifficults,
360 |                                                      matching_threshold)
361 |                 d_n_gbboxes[c] = n
362 |                 d_tp[c] = tp
363 |                 d_fp[c] = fp
364 |             return d_n_gbboxes, d_tp, d_fp, scores
365 | 
366 |     with tf.name_scope(scope, 'bboxes_matching_batch',
367 |                        [scores, bboxes, glabels, gbboxes]):
368 |         r = tf.map_fn(lambda x: bboxes_matching(labels, x[0], x[1],
369 |                                                 x[2], x[3], x[4],
370 |                                                 matching_threshold),
371 |                       (scores, bboxes, glabels, gbboxes, gdifficults),
372 |                       dtype=(tf.int64, tf.bool, tf.bool),
373 |                       parallel_iterations=10,
374 |                       back_prop=False,
375 |                       swap_memory=True,
376 |                       infer_shape=True)
377 |         return r[0], r[1], r[2], scores
378 | 
379 | 
380 | # =========================================================================== #
381 | # Some filteting methods.
382 | # =========================================================================== #
383 | def bboxes_filter_center(labels, bboxes, margins=[0., 0., 0., 0.],
384 |                          scope=None):
385 |     """Filter out bounding boxes whose center are not in
386 |     the rectangle [0, 0, 1, 1] + margins. The margin Tensor
387 |     can be used to enforce or loosen this condition.
388 | 
389 |     Return:
390 |       labels, bboxes: Filtered elements.
391 |     """
392 |     with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
393 |         cy = (bboxes[:, 0] + bboxes[:, 2]) / 2.
394 |         cx = (bboxes[:, 1] + bboxes[:, 3]) / 2.
395 |         mask = tf.greater(cy, margins[0])
396 |         mask = tf.logical_and(mask, tf.greater(cx, margins[1]))
397 |         mask = tf.logical_and(mask, tf.less(cx, 1. + margins[2]))
398 |         mask = tf.logical_and(mask, tf.less(cx, 1. + margins[3]))
399 |         # Boolean masking...
400 |         labels = tf.boolean_mask(labels, mask)
401 |         bboxes = tf.boolean_mask(bboxes, mask)
402 |         return labels, bboxes
403 | 
404 | 
405 | def bboxes_filter_overlap(labels, bboxes, threshold=0.5,
406 |                           scope=None):
407 |     """Filter out bounding boxes based on overlap with reference
408 |     box [0, 0, 1, 1].
409 | 
410 |     Return:
411 |       labels, bboxes: Filtered elements.
412 |     """
413 |     with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
414 |         scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype),
415 |                                      bboxes)
416 |         mask = scores > threshold
417 |         labels = tf.boolean_mask(labels, mask)
418 |         bboxes = tf.boolean_mask(bboxes, mask)
419 |         return labels, bboxes
420 | 
421 | 
422 | def bboxes_filter_labels(labels, bboxes,
423 |                          out_labels=[], num_classes=np.inf,
424 |                          scope=None):
425 |     """Filter out labels from a collection. Typically used to get
426 |     of DontCare elements. Also remove elements based on the number of classes.
427 | 
428 |     Return:
429 |       labels, bboxes: Filtered elements.
430 |     """
431 |     with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]):
432 |         mask = tf.greater_equal(labels, num_classes)
433 |         for l in labels:
434 |             mask = tf.logical_and(mask, tf.not_equal(labels, l))
435 |         labels = tf.boolean_mask(labels, mask)
436 |         bboxes = tf.boolean_mask(bboxes, mask)
437 |         return labels, bboxes
438 | 
439 | 
440 | # =========================================================================== #
441 | # Standard boxes computation.
442 | # =========================================================================== #
443 | def bboxes_jaccard(bbox_ref, bboxes, name=None):
444 |     """Compute jaccard score between a reference box and a collection
445 |     of bounding boxes.
446 | 
447 |     Args:
448 |       bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
449 |       bboxes: (N, 4) Tensor, collection of bounding boxes.
450 |     Return:
451 |       (N,) Tensor with Jaccard scores.
452 |     """
453 |     with tf.name_scope(name, 'bboxes_jaccard'):
454 |         # Should be more efficient to first transpose.
455 |         bboxes = tf.transpose(bboxes)
456 |         bbox_ref = tf.transpose(bbox_ref)
457 |         # Intersection bbox and volume.
458 |         int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
459 |         int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
460 |         int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
461 |         int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
462 |         h = tf.maximum(int_ymax - int_ymin, 0.)
463 |         w = tf.maximum(int_xmax - int_xmin, 0.)
464 |         # Volumes.
465 |         inter_vol = h * w
466 |         union_vol = -inter_vol \
467 |             + (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) \
468 |             + (bbox_ref[2] - bbox_ref[0]) * (bbox_ref[3] - bbox_ref[1])
469 |         jaccard = tfe_math.safe_divide(inter_vol, union_vol, 'jaccard')
470 |         return jaccard
471 | 
472 | 
473 | def bboxes_intersection(bbox_ref, bboxes, name=None):
474 |     """Compute relative intersection between a reference box and a
475 |     collection of bounding boxes. Namely, compute the quotient between
476 |     intersection area and box area.
477 | 
478 |     Args:
479 |       bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
480 |       bboxes: (N, 4) Tensor, collection of bounding boxes.
481 |     Return:
482 |       (N,) Tensor with relative intersection.
483 |     """
484 |     with tf.name_scope(name, 'bboxes_intersection'):
485 |         # Should be more efficient to first transpose.
486 |         bboxes = tf.transpose(bboxes)
487 |         bbox_ref = tf.transpose(bbox_ref)
488 |         # Intersection bbox and volume.
489 |         int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
490 |         int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
491 |         int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
492 |         int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
493 |         h = tf.maximum(int_ymax - int_ymin, 0.)
494 |         w = tf.maximum(int_xmax - int_xmin, 0.)
495 |         # Volumes.
496 |         inter_vol = h * w
497 |         bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1])
498 |         scores = tfe_math.safe_divide(inter_vol, bboxes_vol, 'intersection')
499 |         return scores
500 | 


--------------------------------------------------------------------------------
/tf_extended/image.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gxd1994/TextBoxes-TensorFlow/7ae19de6c4e7bccaa5695762bd1f0864b9f4e593/tf_extended/image.py


--------------------------------------------------------------------------------
/tf_extended/math.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional math functions.
16 | """
17 | import tensorflow as tf
18 | 
19 | from tensorflow.python.ops import array_ops
20 | from tensorflow.python.ops import math_ops
21 | from tensorflow.python.framework import dtypes
22 | from tensorflow.python.framework import ops
23 | 
24 | 
25 | def safe_divide(numerator, denominator, name):
26 |     """Divides two values, returning 0 if the denominator is <= 0.
27 |     Args:
28 |       numerator: A real `Tensor`.
29 |       denominator: A real `Tensor`, with dtype matching `numerator`.
30 |       name: Name for the returned op.
31 |     Returns:
32 |       0 if `denominator` <= 0, else `numerator` / `denominator`
33 |     """
34 |     return tf.where(
35 |         math_ops.greater(denominator, 0),
36 |         math_ops.divide(numerator, denominator),
37 |         tf.zeros_like(numerator),
38 |         name=name)
39 | 
40 | 
41 | def cummax(x, reverse=False, name=None):
42 |     """Compute the cumulative maximum of the tensor `x` along `axis`. This
43 |     operation is similar to the more classic `cumsum`. Only support 1D Tensor
44 |     for now.
45 | 
46 |     Args:
47 |     x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
48 |        `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
49 |        `complex128`, `qint8`, `quint8`, `qint32`, `half`.
50 |        axis: A `Tensor` of type `int32` (default: 0).
51 |        reverse: A `bool` (default: False).
52 |        name: A name for the operation (optional).
53 |     Returns:
54 |     A `Tensor`. Has the same type as `x`.
55 |     """
56 |     with ops.name_scope(name, "Cummax", [x]) as name:
57 |         x = ops.convert_to_tensor(x, name="x")
58 |         # Not very optimal: should directly integrate reverse into tf.scan.
59 |         if reverse:
60 |             x = tf.reverse(x, axis=[0])
61 |         # 'Accumlating' maximum: ensure it is always increasing.
62 |         cmax = tf.scan(lambda a, y: tf.maximum(a, y), x,
63 |                        initializer=None, parallel_iterations=1,
64 |                        back_prop=False, swap_memory=False)
65 |         if reverse:
66 |             cmax = tf.reverse(cmax, axis=[0])
67 |         return cmax
68 | 


--------------------------------------------------------------------------------
/tf_extended/metrics.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """TF Extended: additional metrics.
 16 | """
 17 | import tensorflow as tf
 18 | import numpy as np
 19 | 
 20 | from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 21 | from tensorflow.python.framework import dtypes
 22 | from tensorflow.python.framework import ops
 23 | from tensorflow.python.ops import array_ops
 24 | from tensorflow.python.ops import math_ops
 25 | from tensorflow.python.ops import nn
 26 | from tensorflow.python.ops import state_ops
 27 | from tensorflow.python.ops import variable_scope
 28 | from tensorflow.python.ops import variables
 29 | 
 30 | from tf_extended import math as tfe_math
 31 | 
 32 | 
 33 | # =========================================================================== #
 34 | # TensorFlow utils
 35 | # =========================================================================== #
 36 | def _create_local(name, shape, collections=None, validate_shape=True,
 37 |                   dtype=dtypes.float32):
 38 |     """Creates a new local variable.
 39 |     Args:
 40 |         name: The name of the new or existing variable.
 41 |         shape: Shape of the new or existing variable.
 42 |         collections: A list of collection names to which the Variable will be added.
 43 |         validate_shape: Whether to validate the shape of the variable.
 44 |         dtype: Data type of the variables.
 45 |     Returns:
 46 |         The created variable.
 47 |     """
 48 |     # Make sure local variables are added to tf.GraphKeys.LOCAL_VARIABLES
 49 |     collections = list(collections or [])
 50 |     collections += [ops.GraphKeys.LOCAL_VARIABLES]
 51 |     return variables.Variable(
 52 |             initial_value=array_ops.zeros(shape, dtype=dtype),
 53 |             name=name,
 54 |             trainable=False,
 55 |             collections=collections,
 56 |             validate_shape=validate_shape)
 57 | 
 58 | 
 59 | def _safe_div(numerator, denominator, name):
 60 |     """Divides two values, returning 0 if the denominator is <= 0.
 61 |     Args:
 62 |       numerator: A real `Tensor`.
 63 |       denominator: A real `Tensor`, with dtype matching `numerator`.
 64 |       name: Name for the returned op.
 65 |     Returns:
 66 |       0 if `denominator` <= 0, else `numerator` / `denominator`
 67 |     """
 68 |     return tf.where(
 69 |         math_ops.greater(denominator, 0),
 70 |         math_ops.divide(numerator, denominator),
 71 |         tf.zeros_like(numerator),
 72 |         name=name)
 73 | 
 74 | 
 75 | def _broadcast_weights(weights, values):
 76 |     """Broadcast `weights` to the same shape as `values`.
 77 |     This returns a version of `weights` following the same broadcast rules as
 78 |     `mul(weights, values)`. When computing a weighted average, use this function
 79 |     to broadcast `weights` before summing them; e.g.,
 80 |     `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
 81 |     Args:
 82 |       weights: `Tensor` whose shape is broadcastable to `values`.
 83 |       values: `Tensor` of any shape.
 84 |     Returns:
 85 |       `weights` broadcast to `values` shape.
 86 |     """
 87 |     weights_shape = weights.get_shape()
 88 |     values_shape = values.get_shape()
 89 |     if(weights_shape.is_fully_defined() and
 90 |        values_shape.is_fully_defined() and
 91 |        weights_shape.is_compatible_with(values_shape)):
 92 |         return weights
 93 |     return math_ops.mul(
 94 |         weights, array_ops.ones_like(values), name='broadcast_weights')
 95 | 
 96 | 
 97 | # =========================================================================== #
 98 | # TF Extended metrics: TP and FP arrays.
 99 | # =========================================================================== #
100 | def precision_recall(num_gbboxes, num_detections, tp, fp, scores,
101 |                      dtype=tf.float64, scope=None):
102 |     """Compute precision and recall from scores, true positives and false
103 |     positives booleans arrays
104 |     """
105 |     # Input dictionaries: dict outputs as streaming metrics.
106 |     if isinstance(scores, dict):
107 |         d_precision = {}
108 |         d_recall = {}
109 |         for c in num_gbboxes.keys():
110 |             scope = 'precision_recall_%s' % c
111 |             p, r = precision_recall(num_gbboxes[c], num_detections[c],
112 |                                     tp[c], fp[c], scores[c],
113 |                                     dtype, scope)
114 |             d_precision[c] = p
115 |             d_recall[c] = r
116 |         return d_precision, d_recall
117 | 
118 |     # Sort by score.
119 |     with tf.name_scope(scope, 'precision_recall',
120 |                        [num_gbboxes, num_detections, tp, fp, scores]):
121 |         # Sort detections by score.
122 |         scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True)
123 |         tp = tf.gather(tp, idxes)
124 |         fp = tf.gather(fp, idxes)
125 |         # Computer recall and precision.
126 |         tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
127 |         fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
128 |         recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall')
129 |         precision = _safe_div(tp, tp + fp, 'precision')
130 |         return tf.tuple([precision, recall])
131 | 
132 | 
133 | def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores,
134 |                            remove_zero_scores=True,
135 |                            metrics_collections=None,
136 |                            updates_collections=None,
137 |                            name=None):
138 |     """Streaming computation of True and False Positive arrays. This metrics
139 |     also keeps track of scores and number of grountruth objects.
140 |     """
141 |     # Input dictionaries: dict outputs as streaming metrics.
142 |     if isinstance(scores, dict) or isinstance(fp, dict):
143 |         d_values = {}
144 |         d_update_ops = {}
145 |         for c in num_gbboxes.keys():
146 |             scope = 'streaming_tp_fp_%s' % c
147 |             v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c],
148 |                                            remove_zero_scores,
149 |                                            metrics_collections,
150 |                                            updates_collections,
151 |                                            name=scope)
152 |             d_values[c] = v
153 |             d_update_ops[c] = up
154 |         return d_values, d_update_ops
155 | 
156 |     # Input Tensors...
157 |     with variable_scope.variable_scope(name, 'streaming_tp_fp',
158 |                                        [num_gbboxes, tp, fp, scores]):
159 |         num_gbboxes = math_ops.to_int64(num_gbboxes)
160 |         scores = math_ops.to_float(scores)
161 |         stype = tf.bool
162 |         tp = tf.cast(tp, stype)
163 |         fp = tf.cast(fp, stype)
164 |         # Reshape TP and FP tensors and clean away 0 class values.
165 |         scores = tf.reshape(scores, [-1])
166 |         tp = tf.reshape(tp, [-1])
167 |         fp = tf.reshape(fp, [-1])
168 |         # Remove TP and FP both false.
169 |         mask = tf.logical_or(tp, fp)
170 |         if remove_zero_scores:
171 |             rm_threshold = 1e-4
172 |             mask = tf.logical_and(mask, tf.greater(scores, rm_threshold))
173 |             scores = tf.boolean_mask(scores, mask)
174 |             tp = tf.boolean_mask(tp, mask)
175 |             fp = tf.boolean_mask(fp, mask)
176 | 
177 |         # Local variables accumlating information over batches.
178 |         v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64)
179 |         v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32)
180 |         v_scores = _create_local('v_scores', shape=[0, ])
181 |         v_tp = _create_local('v_tp', shape=[0, ], dtype=stype)
182 |         v_fp = _create_local('v_fp', shape=[0, ], dtype=stype)
183 | 
184 |         # Update operations.
185 |         nobjects_op = state_ops.assign_add(v_nobjects,
186 |                                            tf.reduce_sum(num_gbboxes))
187 |         ndetections_op = state_ops.assign_add(v_ndetections,
188 |                                               tf.size(scores, out_type=tf.int32))
189 |         scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0),
190 |                                      validate_shape=False)
191 |         tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0),
192 |                                  validate_shape=False)
193 |         fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0),
194 |                                  validate_shape=False)
195 | 
196 |         # Value and update ops.
197 |         val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores)
198 |         with ops.control_dependencies([nobjects_op, ndetections_op,
199 |                                        scores_op, tp_op, fp_op]):
200 |             update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op)
201 | 
202 |         if metrics_collections:
203 |             ops.add_to_collections(metrics_collections, val)
204 |         if updates_collections:
205 |             ops.add_to_collections(updates_collections, update_op)
206 |         return val, update_op
207 | 
208 | 
209 | # =========================================================================== #
210 | # Average precision computations.
211 | # =========================================================================== #
212 | def average_precision_voc12(precision, recall, name=None):
213 |     """Compute (interpolated) average precision from precision and recall Tensors.
214 | 
215 |     The implementation follows Pascal 2012 and ILSVRC guidelines.
216 |     See also: https://sanchom.wordpress.com/tag/average-precision/
217 |     """
218 |     with tf.name_scope(name, 'average_precision_voc12', [precision, recall]):
219 |         # Convert to float64 to decrease error on Riemann sums.
220 |         precision = tf.cast(precision, dtype=tf.float64)
221 |         recall = tf.cast(recall, dtype=tf.float64)
222 | 
223 |         # Add bounds values to precision and recall.
224 |         precision = tf.concat([[0.], precision, [0.]], axis=0)
225 |         recall = tf.concat([[0.], recall, [1.]], axis=0)
226 |         # Ensures precision is increasing in reverse order.
227 |         precision = tfe_math.cummax(precision, reverse=True)
228 | 
229 |         # Riemann sums for estimating the integral.
230 |         # mean_pre = (precision[1:] + precision[:-1]) / 2.
231 |         mean_pre = precision[1:]
232 |         diff_rec = recall[1:] - recall[:-1]
233 |         ap = tf.reduce_sum(mean_pre * diff_rec)
234 |         return ap
235 | 
236 | 
237 | def average_precision_voc07(precision, recall, name=None):
238 |     """Compute (interpolated) average precision from precision and recall Tensors.
239 | 
240 |     The implementation follows Pascal 2007 guidelines.
241 |     See also: https://sanchom.wordpress.com/tag/average-precision/
242 |     """
243 |     with tf.name_scope(name, 'average_precision_voc07', [precision, recall]):
244 |         # Convert to float64 to decrease error on cumulated sums.
245 |         precision = tf.cast(precision, dtype=tf.float64)
246 |         recall = tf.cast(recall, dtype=tf.float64)
247 |         # Add zero-limit value to avoid any boundary problem...
248 |         precision = tf.concat([precision, [0.]], axis=0)
249 |         recall = tf.concat([recall, [np.inf]], axis=0)
250 | 
251 |         # Split the integral into 10 bins.
252 |         l_aps = []
253 |         for t in np.arange(0., 1.1, 0.1):
254 |             mask = tf.greater_equal(recall, t)
255 |             v = tf.reduce_max(tf.boolean_mask(precision, mask))
256 |             l_aps.append(v / 11.)
257 |         ap = tf.add_n(l_aps)
258 |         return ap
259 | 
260 | 
261 | def precision_recall_values(xvals, precision, recall, name=None):
262 |     """Compute values on the precision/recall curve.
263 | 
264 |     Args:
265 |       x: Python list of floats;
266 |       precision: 1D Tensor decreasing.
267 |       recall: 1D Tensor increasing.
268 |     Return:
269 |       list of precision values.
270 |     """
271 |     with ops.name_scope(name, "precision_recall_values",
272 |                         [precision, recall]) as name:
273 |         # Add bounds values to precision and recall.
274 |         precision = tf.concat([[0.], precision, [0.]], axis=0)
275 |         recall = tf.concat([[0.], recall, [1.]], axis=0)
276 |         precision = tfe_math.cummax(precision, reverse=True)
277 | 
278 |         prec_values = []
279 |         for x in xvals:
280 |             mask = tf.less_equal(recall, x)
281 |             val = tf.reduce_min(tf.boolean_mask(precision, mask))
282 |             prec_values.append(val)
283 |         return tf.tuple(prec_values)
284 | 
285 | 
286 | # =========================================================================== #
287 | # TF Extended metrics: old stuff!
288 | # =========================================================================== #
289 | def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None):
290 |     """Compute precision and recall from scores, true positives and false
291 |     positives booleans arrays
292 |     """
293 |     # Sort by score.
294 |     with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]):
295 |         # Sort detections by score.
296 |         scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True)
297 |         tp = tf.gather(tp, idxes)
298 |         fp = tf.gather(fp, idxes)
299 |         # Computer recall and precision.
300 |         dtype = tf.float64
301 |         tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
302 |         fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
303 |         recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall')
304 |         precision = _safe_div(tp, tp + fp, 'precision')
305 | 
306 |         return tf.tuple([precision, recall])
307 | 
308 | 
309 | def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores,
310 |                                       tp_tensor, fp_tensor,
311 |                                       remove_zero_labels=True,
312 |                                       metrics_collections=None,
313 |                                       updates_collections=None,
314 |                                       name=None):
315 |     """Streaming computation of precision / recall arrays. This metrics
316 |     keeps tracks of boolean True positives and False positives arrays.
317 |     """
318 |     with variable_scope.variable_scope(name, 'stream_precision_recall',
319 |                                        [n_gbboxes, rclasses, tp_tensor, fp_tensor]):
320 |         n_gbboxes = math_ops.to_int64(n_gbboxes)
321 |         rclasses = math_ops.to_int64(rclasses)
322 |         rscores = math_ops.to_float(rscores)
323 | 
324 |         stype = tf.int32
325 |         tp_tensor = tf.cast(tp_tensor, stype)
326 |         fp_tensor = tf.cast(fp_tensor, stype)
327 | 
328 |         # Reshape TP and FP tensors and clean away 0 class values.
329 |         rclasses = tf.reshape(rclasses, [-1])
330 |         rscores = tf.reshape(rscores, [-1])
331 |         tp_tensor = tf.reshape(tp_tensor, [-1])
332 |         fp_tensor = tf.reshape(fp_tensor, [-1])
333 |         if remove_zero_labels:
334 |             mask = tf.greater(rclasses, 0)
335 |             rclasses = tf.boolean_mask(rclasses, mask)
336 |             rscores = tf.boolean_mask(rscores, mask)
337 |             tp_tensor = tf.boolean_mask(tp_tensor, mask)
338 |             fp_tensor = tf.boolean_mask(fp_tensor, mask)
339 | 
340 |         # Local variables accumlating information over batches.
341 |         v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64)
342 |         v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32)
343 |         v_scores = _create_local('v_scores', shape=[0, ])
344 |         v_tp = _create_local('v_tp', shape=[0, ], dtype=stype)
345 |         v_fp = _create_local('v_fp', shape=[0, ], dtype=stype)
346 | 
347 |         # Update operations.
348 |         nobjects_op = state_ops.assign_add(v_nobjects,
349 |                                            tf.reduce_sum(n_gbboxes))
350 |         ndetections_op = state_ops.assign_add(v_ndetections,
351 |                                               tf.size(rscores, out_type=tf.int32))
352 |         scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0),
353 |                                      validate_shape=False)
354 |         tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0),
355 |                                  validate_shape=False)
356 |         fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0),
357 |                                  validate_shape=False)
358 | 
359 |         # Precision and recall computations.
360 |         # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value')
361 |         r = _precision_recall(v_nobjects, v_ndetections, v_scores,
362 |                               v_tp, v_fp, 'value')
363 | 
364 |         with ops.control_dependencies([nobjects_op, ndetections_op,
365 |                                        scores_op, tp_op, fp_op]):
366 |             update_op = _precision_recall(nobjects_op, ndetections_op,
367 |                                           scores_op, tp_op, fp_op, 'update_op')
368 | 
369 |             # update_op = tf.Print(update_op,
370 |             #                      [tf.reduce_sum(tf.cast(mask, tf.int64)),
371 |             #                       tf.reduce_sum(tf.cast(mask2, tf.int64)),
372 |             #                       tf.reduce_min(rscores),
373 |             #                       tf.reduce_sum(n_gbboxes)],
374 |             #                      'Metric: ')
375 |             # Some debugging stuff!
376 |             # update_op = tf.Print(update_op,
377 |             #                      [tf.shape(tp_op),
378 |             #                       tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)],
379 |             #                      'TP and FP shape: ')
380 |             # update_op[0] = tf.Print(update_op,
381 |             #                      [nobjects_op],
382 |             #                      '# Groundtruth bboxes: ')
383 |             # update_op = tf.Print(update_op,
384 |             #                      [update_op[0][0],
385 |             #                       update_op[0][-1],
386 |             #                       tf.reduce_min(update_op[0]),
387 |             #                       tf.reduce_max(update_op[0]),
388 |             #                       tf.reduce_min(update_op[1]),
389 |             #                       tf.reduce_max(update_op[1])],
390 |             #                      'Precision and recall :')
391 | 
392 |         if metrics_collections:
393 |             ops.add_to_collections(metrics_collections, r)
394 |         if updates_collections:
395 |             ops.add_to_collections(updates_collections, update_op)
396 |         return r, update_op
397 | 
398 | 


--------------------------------------------------------------------------------
/tf_extended/tensors.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Paul Balanca. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """TF Extended: additional tensors operations.
16 | """
17 | import tensorflow as tf
18 | 
19 | from tensorflow.contrib.framework.python.ops import variables as contrib_variables
20 | from tensorflow.contrib.metrics.python.ops import set_ops
21 | from tensorflow.python.framework import dtypes
22 | from tensorflow.python.framework import ops
23 | from tensorflow.python.framework import sparse_tensor
24 | from tensorflow.python.ops import array_ops
25 | from tensorflow.python.ops import check_ops
26 | from tensorflow.python.ops import control_flow_ops
27 | from tensorflow.python.ops import math_ops
28 | from tensorflow.python.ops import nn
29 | from tensorflow.python.ops import state_ops
30 | from tensorflow.python.ops import variable_scope
31 | from tensorflow.python.ops import variables
32 | 
33 | 
34 | def get_shape(x, rank=None):
35 |     """Returns the dimensions of a Tensor as list of integers or scale tensors.
36 | 
37 |     Args:
38 |       x: N-d Tensor;
39 |       rank: Rank of the Tensor. If None, will try to guess it.
40 |     Returns:
41 |       A list of `[d1, d2, ..., dN]` corresponding to the dimensions of the
42 |         input tensor.  Dimensions that are statically known are python integers,
43 |         otherwise they are integer scalar tensors.
44 |     """
45 |     if x.get_shape().is_fully_defined():
46 |         return x.get_shape().as_list()
47 |     else:
48 |         static_shape = x.get_shape()
49 |         if rank is None:
50 |             static_shape = static_shape.as_list()
51 |             rank = len(static_shape)
52 |         else:
53 |             static_shape = x.get_shape().with_rank(rank).as_list()
54 |         dynamic_shape = tf.unstack(tf.shape(x), rank)
55 |         return [s if s is not None else d
56 |                 for s, d in zip(static_shape, dynamic_shape)]
57 | 
58 | 
59 | def pad_axis(x, offset, size, axis=0, name=None):
60 |     """Pad a tensor on an axis, with a given offset and output size.
61 |     The tensor is padded with zero (i.e. CONSTANT mode). Note that the if the
62 |     `size` is smaller than existing size + `offset`, the output tensor
63 |     was the latter dimension.
64 | 
65 |     Args:
66 |       x: Tensor to pad;
67 |       offset: Offset to add on the dimension chosen;
68 |       size: Final size of the dimension.
69 |     Return:
70 |       Padded tensor whose dimension on `axis` is `size`, or greater if
71 |       the input vector was larger.
72 |     """
73 |     with tf.name_scope(name, 'pad_axis'):
74 |         shape = get_shape(x)
75 |         rank = len(shape)
76 |         # Padding description.
77 |         new_size = tf.maximum(size-offset-shape[axis], 0)
78 |         pad1 = tf.stack([0]*axis + [offset] + [0]*(rank-axis-1))
79 |         pad2 = tf.stack([0]*axis + [new_size] + [0]*(rank-axis-1))
80 |         paddings = tf.stack([pad1, pad2], axis=1)
81 |         x = tf.pad(x, paddings, mode='CONSTANT')
82 |         # Reshape, to get fully defined shape if possible.
83 |         # TODO: fix with tf.slice
84 |         shape[axis] = size
85 |         x = tf.reshape(x, tf.stack(shape))
86 |         return x
87 | 
88 | 
89 | # def select_at_index(idx, val, t):
90 | #     """Return a tensor.
91 | #     """
92 | #     idx = tf.expand_dims(tf.expand_dims(idx, 0), 0)
93 | #     val = tf.expand_dims(val, 0)
94 | #     t = t + tf.scatter_nd(idx, val, tf.shape(t))
95 | #     return t
96 | 


--------------------------------------------------------------------------------
/tf_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 Paul Balanca. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Diverse TensorFlow utils, for training, evaluation and so on!
 16 | """
 17 | from __future__ import absolute_import
 18 | from __future__ import division
 19 | from __future__ import print_function
 20 | import os
 21 | from pprint import pprint
 22 | 
 23 | import tensorflow as tf
 24 | from tensorflow.contrib.slim.python.slim.data import parallel_reader
 25 | 
 26 | slim = tf.contrib.slim
 27 | 
 28 | 
 29 | # =========================================================================== #
 30 | # General tools.
 31 | # =========================================================================== #
 32 | def reshape_list(l, shape=None):
 33 |     """Reshape list of (list): 1D to 2D or the other way around.
 34 | 
 35 |     Args:
 36 |       l: List or List of list.
 37 |       shape: 1D or 2D shape.
 38 |     Return
 39 |       Reshaped list.
 40 |     """
 41 |     r = []
 42 |     if shape is None:
 43 |         # Flatten everything.
 44 |         for a in l:
 45 |             if isinstance(a, (list, tuple)):
 46 |                 r = r + list(a)
 47 |             else:
 48 |                 r.append(a)
 49 |     else:
 50 |         # Reshape to list of list.
 51 |         i = 0
 52 |         for s in shape:
 53 |             if s == 1:
 54 |                 r.append(l[i])
 55 |             else:
 56 |                 r.append(l[i:i+s])
 57 |             i += s
 58 |     return r
 59 | 
 60 | 
 61 | # =========================================================================== #
 62 | # Training utils.
 63 | # =========================================================================== #
 64 | def print_configuration(flags, ssd_params, data_sources, save_dir=None):
 65 |     """Print the training configuration.
 66 |     """
 67 |     def print_config(stream=None):
 68 |         print('\n# =========================================================================== #', file=stream)
 69 |         print('# Training | Evaluation flags:', file=stream)
 70 |         print('# =========================================================================== #', file=stream)
 71 |         pprint(flags, stream=stream)
 72 | 
 73 |         print('\n# =========================================================================== #', file=stream)
 74 |         print('# SSD net parameters:', file=stream)
 75 |         print('# =========================================================================== #', file=stream)
 76 |         pprint(dict(ssd_params._asdict()), stream=stream)
 77 | 
 78 |         print('\n# =========================================================================== #', file=stream)
 79 |         print('# Training | Evaluation dataset files:', file=stream)
 80 |         print('# =========================================================================== #', file=stream)
 81 |         data_files = parallel_reader.get_data_files(data_sources)
 82 |         pprint(data_files, stream=stream)
 83 |         print('', file=stream)
 84 | 
 85 |     print_config(None)
 86 |     # Save to a text file as well.
 87 |     if save_dir is not None:
 88 |         if not os.path.exists(save_dir):
 89 |             os.makedirs(save_dir)
 90 |         path = os.path.join(save_dir, 'training_config.txt')
 91 |         with open(path, "w") as out:
 92 |             print_config(out)
 93 | 
 94 | 
 95 | def configure_learning_rate(flags, num_samples_per_epoch, global_step):
 96 |     """Configures the learning rate.
 97 | 
 98 |     Args:
 99 |       num_samples_per_epoch: The number of samples in each epoch of training.
100 |       global_step: The global_step tensor.
101 |     Returns:
102 |       A `Tensor` representing the learning rate.
103 |     """
104 |     decay_steps = int(num_samples_per_epoch / flags.batch_size *
105 |                       flags.num_epochs_per_decay)
106 | 
107 |     if flags.learning_rate_decay_type == 'exponential':
108 |         return tf.train.exponential_decay(flags.learning_rate,
109 |                                           global_step,
110 |                                           decay_steps,
111 |                                           flags.learning_rate_decay_factor,
112 |                                           staircase=True,
113 |                                           name='exponential_decay_learning_rate')
114 |     elif flags.learning_rate_decay_type == 'fixed':
115 |         return tf.constant(flags.learning_rate, name='fixed_learning_rate')
116 |     elif flags.learning_rate_decay_type == 'polynomial':
117 |         return tf.train.polynomial_decay(flags.learning_rate,
118 |                                          global_step,
119 |                                          decay_steps,
120 |                                          flags.end_learning_rate,
121 |                                          power=1.0,
122 |                                          cycle=False,
123 |                                          name='polynomial_decay_learning_rate')
124 |     else:
125 |         raise ValueError('learning_rate_decay_type [%s] was not recognized',
126 |                          flags.learning_rate_decay_type)
127 | 
128 | 
129 | def configure_optimizer(flags, learning_rate):
130 |     """Configures the optimizer used for training.
131 | 
132 |     Args:
133 |       learning_rate: A scalar or `Tensor` learning rate.
134 |     Returns:
135 |       An instance of an optimizer.
136 |     """
137 |     if flags.optimizer == 'adadelta':
138 |         optimizer = tf.train.AdadeltaOptimizer(
139 |             learning_rate,
140 |             rho=flags.adadelta_rho,
141 |             epsilon=flags.opt_epsilon)
142 |     elif flags.optimizer == 'adagrad':
143 |         optimizer = tf.train.AdagradOptimizer(
144 |             learning_rate,
145 |             initial_accumulator_value=flags.adagrad_initial_accumulator_value)
146 |     elif flags.optimizer == 'adam':
147 |         optimizer = tf.train.AdamOptimizer(
148 |             learning_rate,
149 |             beta1=flags.adam_beta1,
150 |             beta2=flags.adam_beta2,
151 |             epsilon=flags.opt_epsilon)
152 |     elif flags.optimizer == 'ftrl':
153 |         optimizer = tf.train.FtrlOptimizer(
154 |             learning_rate,
155 |             learning_rate_power=flags.ftrl_learning_rate_power,
156 |             initial_accumulator_value=flags.ftrl_initial_accumulator_value,
157 |             l1_regularization_strength=flags.ftrl_l1,
158 |             l2_regularization_strength=flags.ftrl_l2)
159 |     elif flags.optimizer == 'momentum':
160 |         optimizer = tf.train.MomentumOptimizer(
161 |             learning_rate,
162 |             momentum=flags.momentum,
163 |             name='Momentum')
164 |     elif flags.optimizer == 'rmsprop':
165 |         optimizer = tf.train.RMSPropOptimizer(
166 |             learning_rate,
167 |             decay=flags.rmsprop_decay,
168 |             momentum=flags.rmsprop_momentum,
169 |             epsilon=flags.opt_epsilon)
170 |     elif flags.optimizer == 'sgd':
171 |         optimizer = tf.train.GradientDescentOptimizer(learning_rate)
172 |     else:
173 |         raise ValueError('Optimizer [%s] was not recognized', flags.optimizer)
174 |     return optimizer
175 | 
176 | 
177 | def add_variables_summaries(learning_rate):
178 |     summaries = []
179 |     for variable in slim.get_model_variables():
180 |         summaries.append(tf.summary.histogram(variable.op.name, variable))
181 |     summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate))
182 |     return summaries
183 | 
184 | 
185 | def update_model_scope(var, ckpt_scope, new_scope):
186 |     return var.op.name.replace(new_scope,'vgg_16')
187 | 
188 | 
189 | def get_init_fn(flags):
190 |     """Returns a function run by the chief worker to warm-start the training.
191 |     Note that the init_fn is only run when initializing the model during the very
192 |     first global step.
193 | 
194 |     Returns:
195 |       An init function run by the supervisor.
196 |     """
197 |     if flags.checkpoint_path is None:
198 |         return None
199 |     # Warn the user if a checkpoint exists in the train_dir. Then ignore.
200 |     if tf.train.latest_checkpoint(flags.train_dir):
201 |         tf.logging.info(
202 |             'Ignoring --checkpoint_path because a checkpoint already exists in %s'
203 |             % flags.train_dir)
204 |         return None
205 | 
206 |     exclusions = []
207 |     if flags.checkpoint_exclude_scopes:
208 |         exclusions = [scope.strip()
209 |                       for scope in flags.checkpoint_exclude_scopes.split(',')]
210 | 
211 |     # TODO(sguada) variables.filter_variables()
212 |     variables_to_restore = []
213 |     for var in slim.get_model_variables():
214 |         excluded = False
215 |         for exclusion in exclusions:
216 |             if var.op.name.startswith(exclusion):
217 |                 excluded = True
218 |                 break
219 |         if not excluded:
220 |             variables_to_restore.append(var)
221 |     # Change model scope if necessary.
222 |     if flags.checkpoint_model_scope is not None:
223 |         variables_to_restore = \
224 |             {var.op.name.replace(flags.model_name,
225 |                                  flags.checkpoint_model_scope): var
226 |              for var in variables_to_restore}
227 | 
228 | 
229 |     if tf.gfile.IsDirectory(flags.checkpoint_path):
230 |         checkpoint_path = tf.train.latest_checkpoint(flags.checkpoint_path)
231 |     else:
232 |         checkpoint_path = flags.checkpoint_path
233 |     tf.logging.info('Fine-tuning from %s' % checkpoint_path)
234 | 
235 |     return slim.assign_from_checkpoint_fn(
236 |         checkpoint_path,
237 |         variables_to_restore,
238 |         ignore_missing_vars=flags.ignore_missing_vars)
239 | 
240 | 
241 | def get_variables_to_train(flags):
242 |     """Returns a list of variables to train.
243 | 
244 |     Returns:
245 |       A list of variables to train by the optimizer.
246 |     """
247 |     if flags.trainable_scopes is None:
248 |         return tf.trainable_variables()
249 |     else:
250 |         scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')]
251 | 
252 |     variables_to_train = []
253 |     for scope in scopes:
254 |         variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
255 |         variables_to_train.extend(variables)
256 |     return variables_to_train
257 | 
258 | 
259 | # =========================================================================== #
260 | # Evaluation utils.
261 | # =========================================================================== #
262 | 


--------------------------------------------------------------------------------