├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── data
    ├── __init__.py
    ├── data_augment.ipynb
    ├── data_generators.ipynb
    ├── pascal_voc_parser-pyfile.py
    └── pascal_voc_parser.ipynb
├── importing notebooks.ipynb
└── keras_frcnn
    ├── __init__.py
    ├── fixed_batch_normalization.ipynb
    ├── resnet.ipynb
    ├── roi_pooling_conv.ipynb
    └── vgg.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Anderson Banihirwe
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Keras - Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
2 | 
3 | Keras implementation of the paper: Shaoqing Ren et al. [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497).
4 | 
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/__init__.py


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/data/__init__.py


--------------------------------------------------------------------------------
/data/data_augment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import cv2\n",
 10 |     "import numpy as np\n",
 11 |     "import copy"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "def augment(img_data, config, augment=True):\n",
 21 |     "    assert 'filepath' in img_data\n",
 22 |     "    assert 'bboxes' in img_data\n",
 23 |     "    assert 'width' in img_data\n",
 24 |     "    assert 'height' in img_data\n",
 25 |     "    \n",
 26 |     "    img_data_aug = copy.deepcopy(img_data)\n",
 27 |     "    \n",
 28 |     "    img = cv2.imread(img_data_aug['filepath'])\n",
 29 |     "    \n",
 30 |     "    if augment:\n",
 31 |     "        rows, cols = img.shape[:2]\n",
 32 |     "        \n",
 33 |     "        if config.use_horizontal_flips and np.random.randint(0, 2) == 0:\n",
 34 |     "            img = cv2.flip(img, 1)\n",
 35 |     "            for bbox in img_data_aug['bboxes']:\n",
 36 |     "                x1 = bbox['x1']\n",
 37 |     "                x2 = bbox['x2']\n",
 38 |     "                bbox['x2'] = cols - x1\n",
 39 |     "                bbox['x1'] = cols - x2\n",
 40 |     "                \n",
 41 |     "        if config.use_vertical_flips and np.random.randint(0, 2) == 0:\n",
 42 |     "            img = cv2.flip(img, 0)\n",
 43 |     "            for bbox in img_data_aug['bboxes']:\n",
 44 |     "                y1 = bbox['y1']\n",
 45 |     "                y2 = bbox['y2']\n",
 46 |     "                bbox['y2'] = rows - y1\n",
 47 |     "                bbox['y1'] = rows - y2\n",
 48 |     "                \n",
 49 |     "        if config.rot_90:\n",
 50 |     "            angle = np.random.choice([0, 90, 180, 270], 1)[0]\n",
 51 |     "            if angle == 270:\n",
 52 |     "                img = np.transpose(img, (1, 0, 2))\n",
 53 |     "                img = cv2.flip(img, 0)\n",
 54 |     "            elif angle == 180:\n",
 55 |     "                img = cv2.flip(img, -1)\n",
 56 |     "            elif angle == 90:\n",
 57 |     "                img = np.transpose(img, (1, 0, 2))\n",
 58 |     "                img = cv2.flip(img, 1)\n",
 59 |     "                \n",
 60 |     "            elif angle == 0:\n",
 61 |     "                pass\n",
 62 |     "            \n",
 63 |     "            for bbox in img_data_aug['bboxes']:\n",
 64 |     "                x1 = bbox['x1']\n",
 65 |     "                x2 = bbox['x2']\n",
 66 |     "                y1 = bbox['y1']\n",
 67 |     "                y2 = bbox['y2']\n",
 68 |     "                \n",
 69 |     "                if angle == 270:\n",
 70 |     "                    bbox['x1'] = y1\n",
 71 |     "                    bbox['x2'] = y2\n",
 72 |     "                    bbox['y1'] = cols - x2\n",
 73 |     "                    bbox['y2'] = cols - x1\n",
 74 |     "                    \n",
 75 |     "                elif angle == 180:\n",
 76 |     "                    bbox['x2'] = cols - x1\n",
 77 |     "                    bbox['x1'] = cols - x2\n",
 78 |     "                    bbox['y2'] = rows - y1\n",
 79 |     "                    bbox['y1'] = rows - y2\n",
 80 |     "                    \n",
 81 |     "                elif angle == 90:\n",
 82 |     "                    bbox['x1'] = rows - y2\n",
 83 |     "                    bbox['x2'] = rows - y1\n",
 84 |     "                    bbox['y1'] = x1\n",
 85 |     "                    bbox['y2'] = x2 \n",
 86 |     "                    \n",
 87 |     "                elif angle == 0:\n",
 88 |     "                    pass\n",
 89 |     "                \n",
 90 |     "    img_data_aug['width'] = img.shape[1]\n",
 91 |     "    img_data_aug['height'] = img.shape[0]\n",
 92 |     "    return img_data_aug, img\n",
 93 |     "                "
 94 |    ]
 95 |   }
 96 |  ],
 97 |  "metadata": {
 98 |   "kernelspec": {
 99 |    "display_name": "Python 3",
100 |    "language": "python",
101 |    "name": "python3"
102 |   },
103 |   "language_info": {
104 |    "codemirror_mode": {
105 |     "name": "ipython",
106 |     "version": 3
107 |    },
108 |    "file_extension": ".py",
109 |    "mimetype": "text/x-python",
110 |    "name": "python",
111 |    "nbconvert_exporter": "python",
112 |    "pygments_lexer": "ipython3",
113 |    "version": "3.6.3"
114 |   }
115 |  },
116 |  "nbformat": 4,
117 |  "nbformat_minor": 2
118 | }
119 | 


--------------------------------------------------------------------------------
/data/data_generators.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "importing Jupyter notebook from data_augment.ipynb\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "from __future__ import absolute_import\n",
 18 |     "import numpy as np\n",
 19 |     "import cv2\n",
 20 |     "import random\n",
 21 |     "import copy\n",
 22 |     "import sys\n",
 23 |     "import os\n",
 24 |     "from themachine.nbfinder import NotebookFinder\n",
 25 |     "sys.meta_path.append('.')\n",
 26 |     "sys.meta_path.append(NotebookFinder())\n",
 27 |     "import data_augment\n",
 28 |     "import threading\n",
 29 |     "import itertools\n"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 6,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "def union(au, bu, area_intersection):\n",
 39 |     "    area_a = (au[2] - au[0]) * (au[3] - au[1])\n",
 40 |     "    area_b = (bu[2] - bu[0]) * (bu[3] - bu[1])\n",
 41 |     "    area_union = area_a + area_b - area_intersection\n",
 42 |     "    return union"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 7,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "def intersection(ai, bi):\n",
 52 |     "    x = max(ai[0], bi[0])\n",
 53 |     "    y = max(ai[1], bi[1])\n",
 54 |     "    w = min(ai[2], bi[2]) - x\n",
 55 |     "    h = min(ai[3], bi[3]) - y\n",
 56 |     "    if w < 0 or h < 0:\n",
 57 |     "        return 0\n",
 58 |     "    return w*h"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 9,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def iou(a, b):\n",
 68 |     "    # a and b should be (x1,y1,x2,y2)\n",
 69 |     "    if a[0] >= a[2] or a[1] >= a[3] or b[0] >= b[2] or b[1] >= b[3]:\n",
 70 |     "        return 0.0\n",
 71 |     "    \n",
 72 |     "    area_i = intersection(a, b)\n",
 73 |     "    area_u = union(a, b, area_i)\n",
 74 |     "\n",
 75 |     "    return float(area_i) / float(area_u + 1e-6)\n"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 10,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "def get_new_img_size(width, height, img_min_side=600):\n",
 85 |     "    if width <= height:\n",
 86 |     "        f = float(img_min_side) / width\n",
 87 |     "        resized_height = int(f * height)\n",
 88 |     "        resized_width = img_min_side\n",
 89 |     "        \n",
 90 |     "    else:\n",
 91 |     "        f = float(img_min_side) / height\n",
 92 |     "        resized_width = int(f * width)\n",
 93 |     "        resized_height = img_min_side\n",
 94 |     "        \n",
 95 |     "    return resized_width, resized_height"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": []
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 3",
109 |    "language": "python",
110 |    "name": "python3"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.6.3"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 2
127 | }
128 | 


--------------------------------------------------------------------------------
/data/pascal_voc_parser-pyfile.py:
--------------------------------------------------------------------------------
  1 | # %load ../pascal_voc_parser.py
  2 | import os
  3 | import cv2
  4 | import xml.etree.ElementTree as ET
  5 | import numpy as np
  6 | def get_data(input_path):
  7 |     all_imgs = []
  8 | 
  9 |     classes_count = {}
 10 | 
 11 |     class_mapping = {}
 12 | 
 13 |     visualise = False
 14 | 
 15 |     data_paths = [os.path.join(input_path,s) for s in ['VOC2007', 'VOC2012']]
 16 | 
 17 | 
 18 |     print('Parsing annotation files')
 19 | 
 20 |     for data_path in data_paths:
 21 | 
 22 |         annot_path = os.path.join(data_path, 'Annotations')
 23 |         imgs_path = os.path.join(data_path, 'JPEGImages')
 24 |         imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')
 25 |         imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')
 26 | 
 27 |         trainval_files = []
 28 |         test_files = []
 29 |         try:
 30 |             with open(imgsets_path_trainval) as f:
 31 |                 for line in f:
 32 |                     trainval_files.append(line.strip() + '.jpg')
 33 |         except Exception as e:
 34 |             print(e)
 35 | 
 36 |         try:
 37 |             with open(imgsets_path_test) as f:
 38 |                 for line in f:
 39 |                     test_files.append(line.strip() + '.jpg')
 40 |         except Exception as e:
 41 |             if data_path[-7:] == 'VOC2012':
 42 |                 # this is expected, most pascal voc distibutions dont have the test.txt file
 43 |                 pass
 44 |             else:
 45 |                 print(e)
 46 | 
 47 |         annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]
 48 |         idx = 0
 49 |         for annot in annots:
 50 |             try:
 51 |                 idx += 1
 52 | 
 53 |                 et = ET.parse(annot)
 54 |                 element = et.getroot()
 55 | 
 56 |                 element_objs = element.findall('object')
 57 |                 element_filename = element.find('filename').text
 58 |                 element_width = int(element.find('size').find('width').text)
 59 |                 element_height = int(element.find('size').find('height').text)
 60 | 
 61 |                 if len(element_objs) > 0:
 62 |                     annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,
 63 |                                        'height': element_height, 'bboxes': []}
 64 | 
 65 |                     if element_filename in trainval_files:
 66 |                         annotation_data['imageset'] = 'trainval'
 67 |                     elif element_filename in test_files:
 68 |                         annotation_data['imageset'] = 'test'
 69 |                     else:
 70 |                         annotation_data['imageset'] = 'trainval'
 71 | 
 72 |                 for element_obj in element_objs:
 73 |                     class_name = element_obj.find('name').text
 74 |                     if class_name not in classes_count:
 75 |                         classes_count[class_name] = 1
 76 |                     else:
 77 |                         classes_count[class_name] += 1
 78 | 
 79 |                     if class_name not in class_mapping:
 80 |                         class_mapping[class_name] = len(class_mapping)
 81 | 
 82 |                     obj_bbox = element_obj.find('bndbox')
 83 |                     x1 = int(round(float(obj_bbox.find('xmin').text)))
 84 |                     y1 = int(round(float(obj_bbox.find('ymin').text)))
 85 |                     x2 = int(round(float(obj_bbox.find('xmax').text)))
 86 |                     y2 = int(round(float(obj_bbox.find('ymax').text)))
 87 |                     difficulty = int(element_obj.find('difficult').text) == 1
 88 |                     annotation_data['bboxes'].append(
 89 |                         {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})
 90 |                 all_imgs.append(annotation_data)
 91 | 
 92 |                 if visualise:
 93 |                     img = cv2.imread(annotation_data['filepath'])
 94 |                     for bbox in annotation_data['bboxes']:
 95 |                         cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[
 96 |                                         'x2'], bbox['y2']), (0, 0, 255))
 97 |                     cv2.imshow('img', img)
 98 |                     cv2.waitKey(0)
 99 | 
100 |             except Exception as e:
101 |                 print(e)
102 |                 continue
103 |     return all_imgs, classes_count, class_mapping
104 | 


--------------------------------------------------------------------------------
/data/pascal_voc_parser.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": "true"
  7 |    },
  8 |    "source": [
  9 |     " # Table of Contents\n",
 10 |     "<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\" id=\"toc-level0\"></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {
 17 |     "collapsed": true
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "# %load ../pascal_voc_parser.py\n",
 22 |     "import os\n",
 23 |     "import cv2\n",
 24 |     "import xml.etree.ElementTree as ET\n",
 25 |     "import numpy as np\n",
 26 |     "def get_data(input_path, visualise=False):\n",
 27 |     "    \"\"\"Load data from an input file.\n",
 28 |     "      https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/pascal_voc_parser.py#L19\n",
 29 |     "    Args: \n",
 30 |     "          input_path     (string)    :  path for the input file\n",
 31 |     "          visualise      (bool)      :  show images with annotation if True\n",
 32 |     "          \n",
 33 |     "    Returns:\n",
 34 |     "         all_imgs         (list)     : list of images\n",
 35 |     "         classes_count    (dict)     : dictionary containg classes information\n",
 36 |     "         class_mapping    (dict)     : dictionary containing class mapping\n",
 37 |     "    \"\"\"\n",
 38 |     "    all_imgs = []\n",
 39 |     "\n",
 40 |     "    classes_count = {}\n",
 41 |     "\n",
 42 |     "    class_mapping = {}\n",
 43 |     "\n",
 44 |     "    data_paths = [os.path.join(input_path,s) for s in ['VOC2012']]\n",
 45 |     "\n",
 46 |     "\n",
 47 |     "    print('Parsing annotation files....')\n",
 48 |     "\n",
 49 |     "    for data_path in data_paths:\n",
 50 |     "\n",
 51 |     "        annot_path = os.path.join(data_path, 'Annotations')\n",
 52 |     "        imgs_path = os.path.join(data_path, 'JPEGImages')\n",
 53 |     "        imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')\n",
 54 |     "        imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')\n",
 55 |     "\n",
 56 |     "        trainval_files = []\n",
 57 |     "        test_files = []\n",
 58 |     "        try:\n",
 59 |     "            with open(imgsets_path_trainval) as f:\n",
 60 |     "                for line in f:\n",
 61 |     "                    trainval_files.append(line.strip() + '.jpg')\n",
 62 |     "        except Exception as e:\n",
 63 |     "            print(e)\n",
 64 |     "\n",
 65 |     "        try:\n",
 66 |     "            with open(imgsets_path_test) as f:\n",
 67 |     "                for line in f:\n",
 68 |     "                    test_files.append(line.strip() + '.jpg')\n",
 69 |     "        except Exception as e:\n",
 70 |     "            if data_path[-7:] == 'VOC2012':\n",
 71 |     "                # this is expected, most pascal voc distibutions dont have the test.txt file\n",
 72 |     "                pass\n",
 73 |     "            else:\n",
 74 |     "                print(e)\n",
 75 |     "\n",
 76 |     "        annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]\n",
 77 |     "        idx = 0\n",
 78 |     "        for annot in annots:\n",
 79 |     "            try:\n",
 80 |     "                idx += 1\n",
 81 |     "\n",
 82 |     "                et = ET.parse(annot)\n",
 83 |     "                element = et.getroot()\n",
 84 |     "\n",
 85 |     "                element_objs = element.findall('object')\n",
 86 |     "                element_filename = element.find('filename').text\n",
 87 |     "                element_width = int(element.find('size').find('width').text)\n",
 88 |     "                element_height = int(element.find('size').find('height').text)\n",
 89 |     "\n",
 90 |     "                if len(element_objs) > 0:\n",
 91 |     "                    annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,\n",
 92 |     "                                       'height': element_height, 'bboxes': []}\n",
 93 |     "\n",
 94 |     "                    if element_filename in trainval_files:\n",
 95 |     "                        annotation_data['imageset'] = 'trainval'\n",
 96 |     "                    elif element_filename in test_files:\n",
 97 |     "                        annotation_data['imageset'] = 'test'\n",
 98 |     "                    else:\n",
 99 |     "                        annotation_data['imageset'] = 'trainval'\n",
100 |     "\n",
101 |     "                for element_obj in element_objs:\n",
102 |     "                    class_name = element_obj.find('name').text\n",
103 |     "                    if class_name not in classes_count:\n",
104 |     "                        classes_count[class_name] = 1\n",
105 |     "                    else:\n",
106 |     "                        classes_count[class_name] += 1\n",
107 |     "\n",
108 |     "                    if class_name not in class_mapping:\n",
109 |     "                        class_mapping[class_name] = len(class_mapping)\n",
110 |     "\n",
111 |     "                    obj_bbox = element_obj.find('bndbox')\n",
112 |     "                    x1 = int(round(float(obj_bbox.find('xmin').text)))\n",
113 |     "                    y1 = int(round(float(obj_bbox.find('ymin').text)))\n",
114 |     "                    x2 = int(round(float(obj_bbox.find('xmax').text)))\n",
115 |     "                    y2 = int(round(float(obj_bbox.find('ymax').text)))\n",
116 |     "                    difficulty = int(element_obj.find('difficult').text) == 1\n",
117 |     "                    annotation_data['bboxes'].append(\n",
118 |     "                        {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})\n",
119 |     "                all_imgs.append(annotation_data)\n",
120 |     "\n",
121 |     "                if visualise:\n",
122 |     "                    img = cv2.imread(annotation_data['filepath'])\n",
123 |     "                    for bbox in annotation_data['bboxes']:\n",
124 |     "                        cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[\n",
125 |     "                                        'x2'], bbox['y2']), (0, 0, 255))\n",
126 |     "                    cv2.imshow('img', img)\n",
127 |     "                    cv2.waitKey(0)\n",
128 |     "\n",
129 |     "            except Exception as e:\n",
130 |     "                print(e)\n",
131 |     "                continue\n",
132 |     "    if 'bg' not in classes_count:\n",
133 |     "        classes_count['bg'] = 0\n",
134 |     "        class_mapping['bg'] = len(class_mapping)\n",
135 |     "        \n",
136 |     "        \n",
137 |     "    print(\"Parsing annotation files Finished without error!\")\n",
138 |     "    return all_imgs, classes_count, class_mapping\n"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "if __name__ == '__main__':\n",
148 |     "    pass\n",
149 |     "    #all_imgs, classes_count, class_mapping = get_data('/home/abanihi/Documents/deep-data/VOCdevkit/')\n",
150 |     "    #print(classes_count)\n",
151 |     "    #print(class_mapping)\n",
152 |     "    "
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {
159 |     "collapsed": true
160 |    },
161 |    "outputs": [],
162 |    "source": []
163 |   }
164 |  ],
165 |  "metadata": {
166 |   "kernelspec": {
167 |    "display_name": "Python 3",
168 |    "language": "python",
169 |    "name": "python3"
170 |   },
171 |   "language_info": {
172 |    "codemirror_mode": {
173 |     "name": "ipython",
174 |     "version": 3
175 |    },
176 |    "file_extension": ".py",
177 |    "mimetype": "text/x-python",
178 |    "name": "python",
179 |    "nbconvert_exporter": "python",
180 |    "pygments_lexer": "ipython3",
181 |    "version": "3.6.3"
182 |   },
183 |   "toc": {
184 |    "nav_menu": {},
185 |    "number_sections": true,
186 |    "sideBar": true,
187 |    "skip_h1_title": false,
188 |    "toc_cell": true,
189 |    "toc_position": {},
190 |    "toc_section_display": "block",
191 |    "toc_window_display": false
192 |   }
193 |  },
194 |  "nbformat": 4,
195 |  "nbformat_minor": 2
196 | }
197 | 


--------------------------------------------------------------------------------
/importing notebooks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": "true"
  7 |    },
  8 |    "source": [
  9 |     " # Table of Contents\n",
 10 |     "<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\" id=\"toc-level0\"><li><span><a href=\"http://localhost:8888/notebooks/importing%20notebooks.ipynb#Importing-Jupyter-Notebooks-as-Modules\" data-toc-modified-id=\"Importing-Jupyter-Notebooks-as-Modules-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Importing Jupyter Notebooks as Modules</a></span><ul class=\"toc-item\"><li><span><a href=\"http://localhost:8888/notebooks/importing%20notebooks.ipynb#Notebook-Loader\" data-toc-modified-id=\"Notebook-Loader-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>Notebook Loader</a></span></li><li><span><a href=\"http://localhost:8888/notebooks/importing%20notebooks.ipynb#The-Module-Finder\" data-toc-modified-id=\"The-Module-Finder-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>The Module Finder</a></span></li><li><span><a href=\"http://localhost:8888/notebooks/importing%20notebooks.ipynb#Register-the-hook\" data-toc-modified-id=\"Register-the-hook-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>Register the hook</a></span></li><li><span><a href=\"http://localhost:8888/notebooks/importing%20notebooks.ipynb#Aside:-displaying-notebooks\" data-toc-modified-id=\"Aside:-displaying-notebooks-1.4\"><span class=\"toc-item-num\">1.4&nbsp;&nbsp;</span>Aside: displaying notebooks</a></span></li></ul></li></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Importing Jupyter Notebooks as Modules"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "http://nbviewer.jupyter.org/github/jupyter/notebook/blob/master/docs/source/examples/Notebook/Importing%20Notebooks.ipynb"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 1,
 30 |    "metadata": {
 31 |     "collapsed": true
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import io, os, sys, types"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {
 42 |     "collapsed": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "from IPython import get_ipython\n",
 47 |     "from nbformat import read\n",
 48 |     "from IPython.core.interactiveshell import InteractiveShell"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "Import hooks typically take the form of two objects:\n",
 56 |     "\n",
 57 |     "1. a Module **Loader**, which takes a module name (e.g. 'IPython.display'), and returns a Module\n",
 58 |     "2. a Module **Finder**, which figures out whether a module might exist, and tells Python what **Loader** to use\n",
 59 |     "\n"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 3,
 65 |    "metadata": {
 66 |     "collapsed": true
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "def find_notebook(fullname, path=None):\n",
 71 |     "    \"\"\"find a notebook, given its fully qualified name and an optional path\n",
 72 |     "    \n",
 73 |     "    This turns \"foo.bar\" into \"foo/bar.ipynb\"\n",
 74 |     "    and tries turning \"Foo_Bar\" into \"Foo Bar\" if Foo_Bar\n",
 75 |     "    does not exist.\n",
 76 |     "    \"\"\"\n",
 77 |     "    name = fullname.rsplit('.', 1)[-1]\n",
 78 |     "    if not path:\n",
 79 |     "        path = ['']\n",
 80 |     "    for d in path:\n",
 81 |     "        nb_path = os.path.join(d, name + \".ipynb\")\n",
 82 |     "        if os.path.isfile(nb_path):\n",
 83 |     "            return nb_path\n",
 84 |     "        # let import Notebook_Name find \"Notebook Name.ipynb\"\n",
 85 |     "        nb_path = nb_path.replace(\"_\", \" \")\n",
 86 |     "        if os.path.isfile(nb_path):\n",
 87 |     "            return nb_path\n",
 88 |     "            \n"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## Notebook Loader\n",
 96 |     "\n",
 97 |     "Here we have our Notebook Loader. It's actually quite simple - once we figure out the filename of the module, all it does is:\n",
 98 |     "\n",
 99 |     "   1. load the notebook document into memory\n",
100 |     "   2. create an empty Module\n",
101 |     "   3. execute every cell in the Module namespace\n",
102 |     "\n",
103 |     "Since IPython cells can have extended syntax, the IPython transform is applied to turn each of these cells into their pure-Python counterparts before executing them. If all of your notebook cells are pure-Python, this step is unnecessary.\n"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 4,
109 |    "metadata": {
110 |     "collapsed": true
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "class NotebookLoader(object):\n",
115 |     "    \"\"\"Module Loader for Jupyter Notebooks\"\"\"\n",
116 |     "    def __init__(self, path=None):\n",
117 |     "        self.shell = InteractiveShell.instance()\n",
118 |     "        self.path = path\n",
119 |     "    \n",
120 |     "    def load_module(self, fullname):\n",
121 |     "        \"\"\"import a notebook as a module\"\"\"\n",
122 |     "        path = find_notebook(fullname, self.path)\n",
123 |     "        \n",
124 |     "        print (\"importing Jupyter notebook from %s\" % path)\n",
125 |     "                                       \n",
126 |     "        # load the notebook object\n",
127 |     "        with io.open(path, 'r', encoding='utf-8') as f:\n",
128 |     "            nb = read(f, 4)\n",
129 |     "        \n",
130 |     "        \n",
131 |     "        # create the module and add it to sys.modules\n",
132 |     "        # if name in sys.modules:\n",
133 |     "        #    return sys.modules[name]\n",
134 |     "        mod = types.ModuleType(fullname)\n",
135 |     "        mod.__file__ = path\n",
136 |     "        mod.__loader__ = self\n",
137 |     "        mod.__dict__['get_ipython'] = get_ipython\n",
138 |     "        sys.modules[fullname] = mod\n",
139 |     "        \n",
140 |     "        # extra work to ensure that magics that would affect the user_ns\n",
141 |     "        # actually affect the notebook module's ns\n",
142 |     "        save_user_ns = self.shell.user_ns\n",
143 |     "        self.shell.user_ns = mod.__dict__\n",
144 |     "        \n",
145 |     "        try:\n",
146 |     "            \n",
147 |     "              for cell in nb.cells:\n",
148 |     "                    \n",
149 |     "                    if cell.cell_type == 'code':\n",
150 |     "                        # transform the input to executable Python\n",
151 |     "                        code = self.shell.input_transformer_manager.transform_cell(cell.source)\n",
152 |     "                        # run the code in themodule\n",
153 |     "                    exec(code, mod.__dict__)\n",
154 |     "        finally:\n",
155 |     "            self.shell.user_ns = save_user_ns\n",
156 |     "        return mod\n"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "## The Module Finder\n",
164 |     "\n",
165 |     "The finder is a simple object that tells you whether a name can be imported, and returns the appropriate loader. All this one does is check, when you do:\n",
166 |     "\n",
167 |     "```python \n",
168 |     "import mynotebook\n",
169 |     "```\n",
170 |     "\n",
171 |     "it checks whether ```mynotebook.ipynb``` exists. If a notebook is found, then it returns a NotebookLoader.\n",
172 |     "\n",
173 |     "Any extra logic is just for resolving paths within packages.\n"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 5,
179 |    "metadata": {
180 |     "collapsed": true
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "class NotebookFinder(object):\n",
185 |     "    \"\"\"Module finder that locates Jupyter Notebooks\"\"\"\n",
186 |     "    def __init__(self):\n",
187 |     "        self.loaders = {}\n",
188 |     "    \n",
189 |     "    def find_module(self, fullname, path=None):\n",
190 |     "        nb_path = find_notebook(fullname, path)\n",
191 |     "        if not nb_path:\n",
192 |     "            return\n",
193 |     "        \n",
194 |     "        key = path\n",
195 |     "        if path:\n",
196 |     "            # lists aren't hashable\n",
197 |     "            key = os.path.sep.join(path)\n",
198 |     "        \n",
199 |     "        if key not in self.loaders:\n",
200 |     "            self.loaders[key] = NotebookLoader(path)\n",
201 |     "        return self.loaders[key]\n"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "## Register the hook"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "metadata": {},
214 |    "source": [
215 |     "Now we register the NotebookFinder with ```sys.meta_path```"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 6,
221 |    "metadata": {
222 |     "collapsed": true
223 |    },
224 |    "outputs": [],
225 |    "source": [
226 |     "sys.meta_path.append(NotebookFinder())"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "After this point, my notebooks should be importable.\n",
234 |     "\n",
235 |     "Let's look at what we have in the CWD:\n"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 7,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "name": "stdout",
245 |      "output_type": "stream",
246 |      "text": [
247 |       "__init__.py  pascal_voc_parser.ipynb  pascal_voc_parser-pyfile.py  \u001b[0m\u001b[01;34m__pycache__\u001b[0m/\r\n"
248 |      ]
249 |     }
250 |    ],
251 |    "source": [
252 |     "ls data/"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 8,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "name": "stdout",
262 |      "output_type": "stream",
263 |      "text": [
264 |       "importing Jupyter notebook from /home/abanihi/Documents/Github/keras-faster-rcnn/data/pascal_voc_parser.ipynb\n",
265 |       "Parsing annotation files....\n",
266 |       "Parsing annotation files Finished without error!\n"
267 |      ]
268 |     }
269 |    ],
270 |    "source": [
271 |     "from data import pascal_voc_parser as p"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "metadata": {},
277 |    "source": [
278 |     "\n",
279 |     "## Aside: displaying notebooks\n",
280 |     "\n",
281 |     "Here is some simple code to display the contents of a notebook with syntax highlighting, etc.\n"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": 10,
287 |    "metadata": {},
288 |    "outputs": [
289 |     {
290 |      "data": {
291 |       "text/html": [
292 |        "\n",
293 |        "<style type='text/css'>\n",
294 |        ".hll { background-color: #ffffcc }\n",
295 |        ".c { color: #408080; font-style: italic } /* Comment */\n",
296 |        ".err { border: 1px solid #FF0000 } /* Error */\n",
297 |        ".k { color: #008000; font-weight: bold } /* Keyword */\n",
298 |        ".o { color: #666666 } /* Operator */\n",
299 |        ".ch { color: #408080; font-style: italic } /* Comment.Hashbang */\n",
300 |        ".cm { color: #408080; font-style: italic } /* Comment.Multiline */\n",
301 |        ".cp { color: #BC7A00 } /* Comment.Preproc */\n",
302 |        ".cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */\n",
303 |        ".c1 { color: #408080; font-style: italic } /* Comment.Single */\n",
304 |        ".cs { color: #408080; font-style: italic } /* Comment.Special */\n",
305 |        ".gd { color: #A00000 } /* Generic.Deleted */\n",
306 |        ".ge { font-style: italic } /* Generic.Emph */\n",
307 |        ".gr { color: #FF0000 } /* Generic.Error */\n",
308 |        ".gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
309 |        ".gi { color: #00A000 } /* Generic.Inserted */\n",
310 |        ".go { color: #888888 } /* Generic.Output */\n",
311 |        ".gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
312 |        ".gs { font-weight: bold } /* Generic.Strong */\n",
313 |        ".gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
314 |        ".gt { color: #0044DD } /* Generic.Traceback */\n",
315 |        ".kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
316 |        ".kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
317 |        ".kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
318 |        ".kp { color: #008000 } /* Keyword.Pseudo */\n",
319 |        ".kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
320 |        ".kt { color: #B00040 } /* Keyword.Type */\n",
321 |        ".m { color: #666666 } /* Literal.Number */\n",
322 |        ".s { color: #BA2121 } /* Literal.String */\n",
323 |        ".na { color: #7D9029 } /* Name.Attribute */\n",
324 |        ".nb { color: #008000 } /* Name.Builtin */\n",
325 |        ".nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
326 |        ".no { color: #880000 } /* Name.Constant */\n",
327 |        ".nd { color: #AA22FF } /* Name.Decorator */\n",
328 |        ".ni { color: #999999; font-weight: bold } /* Name.Entity */\n",
329 |        ".ne { color: #D2413A; font-weight: bold } /* Name.Exception */\n",
330 |        ".nf { color: #0000FF } /* Name.Function */\n",
331 |        ".nl { color: #A0A000 } /* Name.Label */\n",
332 |        ".nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
333 |        ".nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
334 |        ".nv { color: #19177C } /* Name.Variable */\n",
335 |        ".ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
336 |        ".w { color: #bbbbbb } /* Text.Whitespace */\n",
337 |        ".mb { color: #666666 } /* Literal.Number.Bin */\n",
338 |        ".mf { color: #666666 } /* Literal.Number.Float */\n",
339 |        ".mh { color: #666666 } /* Literal.Number.Hex */\n",
340 |        ".mi { color: #666666 } /* Literal.Number.Integer */\n",
341 |        ".mo { color: #666666 } /* Literal.Number.Oct */\n",
342 |        ".sa { color: #BA2121 } /* Literal.String.Affix */\n",
343 |        ".sb { color: #BA2121 } /* Literal.String.Backtick */\n",
344 |        ".sc { color: #BA2121 } /* Literal.String.Char */\n",
345 |        ".dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
346 |        ".sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
347 |        ".s2 { color: #BA2121 } /* Literal.String.Double */\n",
348 |        ".se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
349 |        ".sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
350 |        ".si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
351 |        ".sx { color: #008000 } /* Literal.String.Other */\n",
352 |        ".sr { color: #BB6688 } /* Literal.String.Regex */\n",
353 |        ".s1 { color: #BA2121 } /* Literal.String.Single */\n",
354 |        ".ss { color: #19177C } /* Literal.String.Symbol */\n",
355 |        ".bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
356 |        ".fm { color: #0000FF } /* Name.Function.Magic */\n",
357 |        ".vc { color: #19177C } /* Name.Variable.Class */\n",
358 |        ".vg { color: #19177C } /* Name.Variable.Global */\n",
359 |        ".vi { color: #19177C } /* Name.Variable.Instance */\n",
360 |        ".vm { color: #19177C } /* Name.Variable.Magic */\n",
361 |        ".il { color: #666666 } /* Literal.Number.Integer.Long */\n",
362 |        "</style>\n"
363 |       ],
364 |       "text/plain": [
365 |        "<IPython.core.display.HTML object>"
366 |       ]
367 |      },
368 |      "metadata": {},
369 |      "output_type": "display_data"
370 |     }
371 |    ],
372 |    "source": [
373 |     "\n",
374 |     "\n",
375 |     "from pygments import highlight\n",
376 |     "from pygments.lexers import PythonLexer\n",
377 |     "from pygments.formatters import HtmlFormatter\n",
378 |     "\n",
379 |     "from IPython.display import display, HTML\n",
380 |     "\n",
381 |     "formatter = HtmlFormatter()\n",
382 |     "lexer = PythonLexer()\n",
383 |     "\n",
384 |     "# publish the CSS for pygments highlighting\n",
385 |     "display(HTML(\"\"\"\n",
386 |     "<style type='text/css'>\n",
387 |     "%s\n",
388 |     "</style>\n",
389 |     "\"\"\" % formatter.get_style_defs()\n",
390 |     "))\n",
391 |     "\n"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": 11,
397 |    "metadata": {
398 |     "collapsed": true
399 |    },
400 |    "outputs": [],
401 |    "source": [
402 |     "def show_notebook(fname):\n",
403 |     "    \"\"\"display a short summary of the cells of a notebook\"\"\"\n",
404 |     "    with io.open(fname, 'r', encoding='utf-8') as f:\n",
405 |     "        nb = read(f, 4)\n",
406 |     "    html = []\n",
407 |     "    for cell in nb.cells:\n",
408 |     "        html.append(\"<h4>%s cell</h4>\" % cell.cell_type)\n",
409 |     "        if cell.cell_type == 'code':\n",
410 |     "            html.append(highlight(cell.source, lexer, formatter))\n",
411 |     "        else:\n",
412 |     "            html.append(\"<pre>%s</pre>\" % cell.source)\n",
413 |     "    display(HTML('\\n'.join(html)))"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": 12,
419 |    "metadata": {},
420 |    "outputs": [
421 |     {
422 |      "data": {
423 |       "text/html": [
424 |        "<h4>markdown cell</h4>\n",
425 |        "<pre> # Table of Contents\n",
426 |        "<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\" id=\"toc-level0\"></ul></div></pre>\n",
427 |        "<h4>code cell</h4>\n",
428 |        "<div class=\"highlight\"><pre><span></span><span class=\"c1\"># %load ../pascal_voc_parser.py</span>\n",
429 |        "<span class=\"kn\">import</span> <span class=\"nn\">os</span>\n",
430 |        "<span class=\"kn\">import</span> <span class=\"nn\">cv2</span>\n",
431 |        "<span class=\"kn\">import</span> <span class=\"nn\">xml.etree.ElementTree</span> <span class=\"kn\">as</span> <span class=\"nn\">ET</span>\n",
432 |        "<span class=\"kn\">import</span> <span class=\"nn\">numpy</span> <span class=\"kn\">as</span> <span class=\"nn\">np</span>\n",
433 |        "<span class=\"k\">def</span> <span class=\"nf\">get_data</span><span class=\"p\">(</span><span class=\"n\">input_path</span><span class=\"p\">,</span> <span class=\"n\">visualise</span><span class=\"o\">=</span><span class=\"bp\">False</span><span class=\"p\">):</span>\n",
434 |        "    <span class=\"sd\">&quot;&quot;&quot;Load data from an input file.</span>\n",
435 |        "<span class=\"sd\">      https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/pascal_voc_parser.py#L19</span>\n",
436 |        "<span class=\"sd\">      </span>\n",
437 |        "<span class=\"sd\">    &quot;&quot;&quot;</span>\n",
438 |        "    <span class=\"n\">all_imgs</span> <span class=\"o\">=</span> <span class=\"p\">[]</span>\n",
439 |        "\n",
440 |        "    <span class=\"n\">classes_count</span> <span class=\"o\">=</span> <span class=\"p\">{}</span>\n",
441 |        "\n",
442 |        "    <span class=\"n\">class_mapping</span> <span class=\"o\">=</span> <span class=\"p\">{}</span>\n",
443 |        "\n",
444 |        "    <span class=\"n\">data_paths</span> <span class=\"o\">=</span> <span class=\"p\">[</span><span class=\"n\">os</span><span class=\"o\">.</span><span class=\"n\">path</span><span class=\"o\">.</span><span class=\"n\">join</span><span class=\"p\">(</span><span class=\"n\">input_path</span><span class=\"p\">,</span><span class=\"n\">s</span><span class=\"p\">)</span> <span class=\"k\">for</span> <span class=\"n\">s</span> <span class=\"ow\">in</span> <span class=\"p\">[</span><span class=\"s1\">&#39;VOC2012&#39;</span><span class=\"p\">]]</span>\n",
445 |        "\n",
446 |        "\n",
447 |        "    <span class=\"k\">print</span><span class=\"p\">(</span><span class=\"s1\">&#39;Parsing annotation files....&#39;</span><span class=\"p\">)</span>\n",
448 |        "\n",
449 |        "    <span class=\"k\">for</span> <span class=\"n\">data_path</span> <span class=\"ow\">in</span> <span class=\"n\">data_paths</span><span class=\"p\">:</span>\n",
450 |        "\n",
451 |        "        <span class=\"n\">annot_path</span> <span class=\"o\">=</span> <span class=\"n\">os</span><span class=\"o\">.</span><span class=\"n\">path</span><span class=\"o\">.</span><span class=\"n\">join</span><span class=\"p\">(</span><span class=\"n\">data_path</span><span class=\"p\">,</span> <span class=\"s1\">&#39;Annotations&#39;</span><span class=\"p\">)</span>\n",
452 |        "        <span class=\"n\">imgs_path</span> <span class=\"o\">=</span> <span class=\"n\">os</span><span class=\"o\">.</span><span class=\"n\">path</span><span class=\"o\">.</span><span class=\"n\">join</span><span class=\"p\">(</span><span class=\"n\">data_path</span><span class=\"p\">,</span> <span class=\"s1\">&#39;JPEGImages&#39;</span><span class=\"p\">)</span>\n",
453 |        "        <span class=\"n\">imgsets_path_trainval</span> <span class=\"o\">=</span> <span class=\"n\">os</span><span class=\"o\">.</span><span class=\"n\">path</span><span class=\"o\">.</span><span class=\"n\">join</span><span class=\"p\">(</span><span class=\"n\">data_path</span><span class=\"p\">,</span> <span class=\"s1\">&#39;ImageSets&#39;</span><span class=\"p\">,</span><span class=\"s1\">&#39;Main&#39;</span><span class=\"p\">,</span><span class=\"s1\">&#39;trainval.txt&#39;</span><span class=\"p\">)</span>\n",
454 |        "        <span class=\"n\">imgsets_path_test</span> <span class=\"o\">=</span> <span class=\"n\">os</span><span class=\"o\">.</span><span class=\"n\">path</span><span class=\"o\">.</span><span class=\"n\">join</span><span class=\"p\">(</span><span class=\"n\">data_path</span><span class=\"p\">,</span> <span class=\"s1\">&#39;ImageSets&#39;</span><span class=\"p\">,</span><span class=\"s1\">&#39;Main&#39;</span><span class=\"p\">,</span><span class=\"s1\">&#39;test.txt&#39;</span><span class=\"p\">)</span>\n",
455 |        "\n",
456 |        "        <span class=\"n\">trainval_files</span> <span class=\"o\">=</span> <span class=\"p\">[]</span>\n",
457 |        "        <span class=\"n\">test_files</span> <span class=\"o\">=</span> <span class=\"p\">[]</span>\n",
458 |        "        <span class=\"k\">try</span><span class=\"p\">:</span>\n",
459 |        "            <span class=\"k\">with</span> <span class=\"nb\">open</span><span class=\"p\">(</span><span class=\"n\">imgsets_path_trainval</span><span class=\"p\">)</span> <span class=\"k\">as</span> <span class=\"n\">f</span><span class=\"p\">:</span>\n",
460 |        "                <span class=\"k\">for</span> <span class=\"n\">line</span> <span class=\"ow\">in</span> <span class=\"n\">f</span><span class=\"p\">:</span>\n",
461 |        "                    <span class=\"n\">trainval_files</span><span class=\"o\">.</span><span class=\"n\">append</span><span class=\"p\">(</span><span class=\"n\">line</span><span class=\"o\">.</span><span class=\"n\">strip</span><span class=\"p\">()</span> <span class=\"o\">+</span> <span class=\"s1\">&#39;.jpg&#39;</span><span class=\"p\">)</span>\n",
462 |        "        <span class=\"k\">except</span> <span class=\"ne\">Exception</span> <span class=\"k\">as</span> <span class=\"n\">e</span><span class=\"p\">:</span>\n",
463 |        "            <span class=\"k\">print</span><span class=\"p\">(</span><span class=\"n\">e</span><span class=\"p\">)</span>\n",
464 |        "\n",
465 |        "        <span class=\"k\">try</span><span class=\"p\">:</span>\n",
466 |        "            <span class=\"k\">with</span> <span class=\"nb\">open</span><span class=\"p\">(</span><span class=\"n\">imgsets_path_test</span><span class=\"p\">)</span> <span class=\"k\">as</span> <span class=\"n\">f</span><span class=\"p\">:</span>\n",
467 |        "                <span class=\"k\">for</span> <span class=\"n\">line</span> <span class=\"ow\">in</span> <span class=\"n\">f</span><span class=\"p\">:</span>\n",
468 |        "                    <span class=\"n\">test_files</span><span class=\"o\">.</span><span class=\"n\">append</span><span class=\"p\">(</span><span class=\"n\">line</span><span class=\"o\">.</span><span class=\"n\">strip</span><span class=\"p\">()</span> <span class=\"o\">+</span> <span class=\"s1\">&#39;.jpg&#39;</span><span class=\"p\">)</span>\n",
469 |        "        <span class=\"k\">except</span> <span class=\"ne\">Exception</span> <span class=\"k\">as</span> <span class=\"n\">e</span><span class=\"p\">:</span>\n",
470 |        "            <span class=\"k\">if</span> <span class=\"n\">data_path</span><span class=\"p\">[</span><span class=\"o\">-</span><span class=\"mi\">7</span><span class=\"p\">:]</span> <span class=\"o\">==</span> <span class=\"s1\">&#39;VOC2012&#39;</span><span class=\"p\">:</span>\n",
471 |        "                <span class=\"c1\"># this is expected, most pascal voc distibutions dont have the test.txt file</span>\n",
472 |        "                <span class=\"k\">pass</span>\n",
473 |        "            <span class=\"k\">else</span><span class=\"p\">:</span>\n",
474 |        "                <span class=\"k\">print</span><span class=\"p\">(</span><span class=\"n\">e</span><span class=\"p\">)</span>\n",
475 |        "\n",
476 |        "        <span class=\"n\">annots</span> <span class=\"o\">=</span> <span class=\"p\">[</span><span class=\"n\">os</span><span class=\"o\">.</span><span class=\"n\">path</span><span class=\"o\">.</span><span class=\"n\">join</span><span class=\"p\">(</span><span class=\"n\">annot_path</span><span class=\"p\">,</span> <span class=\"n\">s</span><span class=\"p\">)</span> <span class=\"k\">for</span> <span class=\"n\">s</span> <span class=\"ow\">in</span> <span class=\"n\">os</span><span class=\"o\">.</span><span class=\"n\">listdir</span><span class=\"p\">(</span><span class=\"n\">annot_path</span><span class=\"p\">)]</span>\n",
477 |        "        <span class=\"n\">idx</span> <span class=\"o\">=</span> <span class=\"mi\">0</span>\n",
478 |        "        <span class=\"k\">for</span> <span class=\"n\">annot</span> <span class=\"ow\">in</span> <span class=\"n\">annots</span><span class=\"p\">:</span>\n",
479 |        "            <span class=\"k\">try</span><span class=\"p\">:</span>\n",
480 |        "                <span class=\"n\">idx</span> <span class=\"o\">+=</span> <span class=\"mi\">1</span>\n",
481 |        "\n",
482 |        "                <span class=\"n\">et</span> <span class=\"o\">=</span> <span class=\"n\">ET</span><span class=\"o\">.</span><span class=\"n\">parse</span><span class=\"p\">(</span><span class=\"n\">annot</span><span class=\"p\">)</span>\n",
483 |        "                <span class=\"n\">element</span> <span class=\"o\">=</span> <span class=\"n\">et</span><span class=\"o\">.</span><span class=\"n\">getroot</span><span class=\"p\">()</span>\n",
484 |        "\n",
485 |        "                <span class=\"n\">element_objs</span> <span class=\"o\">=</span> <span class=\"n\">element</span><span class=\"o\">.</span><span class=\"n\">findall</span><span class=\"p\">(</span><span class=\"s1\">&#39;object&#39;</span><span class=\"p\">)</span>\n",
486 |        "                <span class=\"n\">element_filename</span> <span class=\"o\">=</span> <span class=\"n\">element</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;filename&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span>\n",
487 |        "                <span class=\"n\">element_width</span> <span class=\"o\">=</span> <span class=\"nb\">int</span><span class=\"p\">(</span><span class=\"n\">element</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;size&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;width&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span><span class=\"p\">)</span>\n",
488 |        "                <span class=\"n\">element_height</span> <span class=\"o\">=</span> <span class=\"nb\">int</span><span class=\"p\">(</span><span class=\"n\">element</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;size&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;height&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span><span class=\"p\">)</span>\n",
489 |        "\n",
490 |        "                <span class=\"k\">if</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"n\">element_objs</span><span class=\"p\">)</span> <span class=\"o\">&gt;</span> <span class=\"mi\">0</span><span class=\"p\">:</span>\n",
491 |        "                    <span class=\"n\">annotation_data</span> <span class=\"o\">=</span> <span class=\"p\">{</span><span class=\"s1\">&#39;filepath&#39;</span><span class=\"p\">:</span> <span class=\"n\">os</span><span class=\"o\">.</span><span class=\"n\">path</span><span class=\"o\">.</span><span class=\"n\">join</span><span class=\"p\">(</span><span class=\"n\">imgs_path</span><span class=\"p\">,</span> <span class=\"n\">element_filename</span><span class=\"p\">),</span> <span class=\"s1\">&#39;width&#39;</span><span class=\"p\">:</span> <span class=\"n\">element_width</span><span class=\"p\">,</span>\n",
492 |        "                                       <span class=\"s1\">&#39;height&#39;</span><span class=\"p\">:</span> <span class=\"n\">element_height</span><span class=\"p\">,</span> <span class=\"s1\">&#39;bboxes&#39;</span><span class=\"p\">:</span> <span class=\"p\">[]}</span>\n",
493 |        "\n",
494 |        "                    <span class=\"k\">if</span> <span class=\"n\">element_filename</span> <span class=\"ow\">in</span> <span class=\"n\">trainval_files</span><span class=\"p\">:</span>\n",
495 |        "                        <span class=\"n\">annotation_data</span><span class=\"p\">[</span><span class=\"s1\">&#39;imageset&#39;</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"s1\">&#39;trainval&#39;</span>\n",
496 |        "                    <span class=\"k\">elif</span> <span class=\"n\">element_filename</span> <span class=\"ow\">in</span> <span class=\"n\">test_files</span><span class=\"p\">:</span>\n",
497 |        "                        <span class=\"n\">annotation_data</span><span class=\"p\">[</span><span class=\"s1\">&#39;imageset&#39;</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"s1\">&#39;test&#39;</span>\n",
498 |        "                    <span class=\"k\">else</span><span class=\"p\">:</span>\n",
499 |        "                        <span class=\"n\">annotation_data</span><span class=\"p\">[</span><span class=\"s1\">&#39;imageset&#39;</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"s1\">&#39;trainval&#39;</span>\n",
500 |        "\n",
501 |        "                <span class=\"k\">for</span> <span class=\"n\">element_obj</span> <span class=\"ow\">in</span> <span class=\"n\">element_objs</span><span class=\"p\">:</span>\n",
502 |        "                    <span class=\"n\">class_name</span> <span class=\"o\">=</span> <span class=\"n\">element_obj</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;name&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span>\n",
503 |        "                    <span class=\"k\">if</span> <span class=\"n\">class_name</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"n\">classes_count</span><span class=\"p\">:</span>\n",
504 |        "                        <span class=\"n\">classes_count</span><span class=\"p\">[</span><span class=\"n\">class_name</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"mi\">1</span>\n",
505 |        "                    <span class=\"k\">else</span><span class=\"p\">:</span>\n",
506 |        "                        <span class=\"n\">classes_count</span><span class=\"p\">[</span><span class=\"n\">class_name</span><span class=\"p\">]</span> <span class=\"o\">+=</span> <span class=\"mi\">1</span>\n",
507 |        "\n",
508 |        "                    <span class=\"k\">if</span> <span class=\"n\">class_name</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"n\">class_mapping</span><span class=\"p\">:</span>\n",
509 |        "                        <span class=\"n\">class_mapping</span><span class=\"p\">[</span><span class=\"n\">class_name</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"n\">class_mapping</span><span class=\"p\">)</span>\n",
510 |        "\n",
511 |        "                    <span class=\"n\">obj_bbox</span> <span class=\"o\">=</span> <span class=\"n\">element_obj</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;bndbox&#39;</span><span class=\"p\">)</span>\n",
512 |        "                    <span class=\"n\">x1</span> <span class=\"o\">=</span> <span class=\"nb\">int</span><span class=\"p\">(</span><span class=\"nb\">round</span><span class=\"p\">(</span><span class=\"nb\">float</span><span class=\"p\">(</span><span class=\"n\">obj_bbox</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;xmin&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span><span class=\"p\">)))</span>\n",
513 |        "                    <span class=\"n\">y1</span> <span class=\"o\">=</span> <span class=\"nb\">int</span><span class=\"p\">(</span><span class=\"nb\">round</span><span class=\"p\">(</span><span class=\"nb\">float</span><span class=\"p\">(</span><span class=\"n\">obj_bbox</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;ymin&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span><span class=\"p\">)))</span>\n",
514 |        "                    <span class=\"n\">x2</span> <span class=\"o\">=</span> <span class=\"nb\">int</span><span class=\"p\">(</span><span class=\"nb\">round</span><span class=\"p\">(</span><span class=\"nb\">float</span><span class=\"p\">(</span><span class=\"n\">obj_bbox</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;xmax&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span><span class=\"p\">)))</span>\n",
515 |        "                    <span class=\"n\">y2</span> <span class=\"o\">=</span> <span class=\"nb\">int</span><span class=\"p\">(</span><span class=\"nb\">round</span><span class=\"p\">(</span><span class=\"nb\">float</span><span class=\"p\">(</span><span class=\"n\">obj_bbox</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;ymax&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span><span class=\"p\">)))</span>\n",
516 |        "                    <span class=\"n\">difficulty</span> <span class=\"o\">=</span> <span class=\"nb\">int</span><span class=\"p\">(</span><span class=\"n\">element_obj</span><span class=\"o\">.</span><span class=\"n\">find</span><span class=\"p\">(</span><span class=\"s1\">&#39;difficult&#39;</span><span class=\"p\">)</span><span class=\"o\">.</span><span class=\"n\">text</span><span class=\"p\">)</span> <span class=\"o\">==</span> <span class=\"mi\">1</span>\n",
517 |        "                    <span class=\"n\">annotation_data</span><span class=\"p\">[</span><span class=\"s1\">&#39;bboxes&#39;</span><span class=\"p\">]</span><span class=\"o\">.</span><span class=\"n\">append</span><span class=\"p\">(</span>\n",
518 |        "                        <span class=\"p\">{</span><span class=\"s1\">&#39;class&#39;</span><span class=\"p\">:</span> <span class=\"n\">class_name</span><span class=\"p\">,</span> <span class=\"s1\">&#39;x1&#39;</span><span class=\"p\">:</span> <span class=\"n\">x1</span><span class=\"p\">,</span> <span class=\"s1\">&#39;x2&#39;</span><span class=\"p\">:</span> <span class=\"n\">x2</span><span class=\"p\">,</span> <span class=\"s1\">&#39;y1&#39;</span><span class=\"p\">:</span> <span class=\"n\">y1</span><span class=\"p\">,</span> <span class=\"s1\">&#39;y2&#39;</span><span class=\"p\">:</span> <span class=\"n\">y2</span><span class=\"p\">,</span> <span class=\"s1\">&#39;difficult&#39;</span><span class=\"p\">:</span> <span class=\"n\">difficulty</span><span class=\"p\">})</span>\n",
519 |        "                <span class=\"n\">all_imgs</span><span class=\"o\">.</span><span class=\"n\">append</span><span class=\"p\">(</span><span class=\"n\">annotation_data</span><span class=\"p\">)</span>\n",
520 |        "\n",
521 |        "                <span class=\"k\">if</span> <span class=\"n\">visualise</span><span class=\"p\">:</span>\n",
522 |        "                    <span class=\"n\">img</span> <span class=\"o\">=</span> <span class=\"n\">cv2</span><span class=\"o\">.</span><span class=\"n\">imread</span><span class=\"p\">(</span><span class=\"n\">annotation_data</span><span class=\"p\">[</span><span class=\"s1\">&#39;filepath&#39;</span><span class=\"p\">])</span>\n",
523 |        "                    <span class=\"k\">for</span> <span class=\"n\">bbox</span> <span class=\"ow\">in</span> <span class=\"n\">annotation_data</span><span class=\"p\">[</span><span class=\"s1\">&#39;bboxes&#39;</span><span class=\"p\">]:</span>\n",
524 |        "                        <span class=\"n\">cv2</span><span class=\"o\">.</span><span class=\"n\">rectangle</span><span class=\"p\">(</span><span class=\"n\">img</span><span class=\"p\">,</span> <span class=\"p\">(</span><span class=\"n\">bbox</span><span class=\"p\">[</span><span class=\"s1\">&#39;x1&#39;</span><span class=\"p\">],</span> <span class=\"n\">bbox</span><span class=\"p\">[</span><span class=\"s1\">&#39;y1&#39;</span><span class=\"p\">]),</span> <span class=\"p\">(</span><span class=\"n\">bbox</span><span class=\"p\">[</span>\n",
525 |        "                                        <span class=\"s1\">&#39;x2&#39;</span><span class=\"p\">],</span> <span class=\"n\">bbox</span><span class=\"p\">[</span><span class=\"s1\">&#39;y2&#39;</span><span class=\"p\">]),</span> <span class=\"p\">(</span><span class=\"mi\">0</span><span class=\"p\">,</span> <span class=\"mi\">0</span><span class=\"p\">,</span> <span class=\"mi\">255</span><span class=\"p\">))</span>\n",
526 |        "                    <span class=\"n\">cv2</span><span class=\"o\">.</span><span class=\"n\">imshow</span><span class=\"p\">(</span><span class=\"s1\">&#39;img&#39;</span><span class=\"p\">,</span> <span class=\"n\">img</span><span class=\"p\">)</span>\n",
527 |        "                    <span class=\"n\">cv2</span><span class=\"o\">.</span><span class=\"n\">waitKey</span><span class=\"p\">(</span><span class=\"mi\">0</span><span class=\"p\">)</span>\n",
528 |        "\n",
529 |        "            <span class=\"k\">except</span> <span class=\"ne\">Exception</span> <span class=\"k\">as</span> <span class=\"n\">e</span><span class=\"p\">:</span>\n",
530 |        "                <span class=\"k\">print</span><span class=\"p\">(</span><span class=\"n\">e</span><span class=\"p\">)</span>\n",
531 |        "                <span class=\"k\">continue</span>\n",
532 |        "                \n",
533 |        "    <span class=\"k\">print</span><span class=\"p\">(</span><span class=\"s2\">&quot;Parsing annotation files Finished without error!&quot;</span><span class=\"p\">)</span>\n",
534 |        "    <span class=\"k\">return</span> <span class=\"n\">all_imgs</span><span class=\"p\">,</span> <span class=\"n\">classes_count</span><span class=\"p\">,</span> <span class=\"n\">class_mapping</span>\n",
535 |        "</pre></div>\n",
536 |        "\n",
537 |        "<h4>code cell</h4>\n",
538 |        "<div class=\"highlight\"><pre><span></span><span class=\"n\">all_imgs</span><span class=\"p\">,</span> <span class=\"n\">classes_count</span><span class=\"p\">,</span> <span class=\"n\">class_mapping</span> <span class=\"o\">=</span> <span class=\"n\">get_data</span><span class=\"p\">(</span><span class=\"s1\">&#39;/home/abanihi/Documents/deep-data/VOCdevkit/&#39;</span><span class=\"p\">)</span>\n",
539 |        "</pre></div>\n",
540 |        "\n",
541 |        "<h4>code cell</h4>\n",
542 |        "<div class=\"highlight\"><pre><span></span><span class=\"n\">classes_count</span>\n",
543 |        "</pre></div>\n",
544 |        "\n",
545 |        "<h4>code cell</h4>\n",
546 |        "<div class=\"highlight\"><pre><span></span><span class=\"n\">class_mapping</span>\n",
547 |        "</pre></div>\n",
548 |        "\n",
549 |        "<h4>code cell</h4>\n",
550 |        "<div class=\"highlight\"><pre><span></span><span class=\"nb\">type</span><span class=\"p\">(</span><span class=\"n\">all_imgs</span><span class=\"p\">)</span>\n",
551 |        "</pre></div>\n",
552 |        "\n",
553 |        "<h4>code cell</h4>\n",
554 |        "<div class=\"highlight\"><pre><span></span><span class=\"n\">all_imgs</span><span class=\"p\">[</span><span class=\"mi\">0</span><span class=\"p\">]</span>\n",
555 |        "</pre></div>\n",
556 |        "\n",
557 |        "<h4>code cell</h4>\n",
558 |        "<div class=\"highlight\"><pre><span></span><span class=\"k\">if</span> <span class=\"s1\">&#39;bg&#39;</span> <span class=\"ow\">not</span> <span class=\"ow\">in</span> <span class=\"n\">classes_count</span><span class=\"p\">:</span>\n",
559 |        "    <span class=\"n\">classes_count</span><span class=\"p\">[</span><span class=\"s1\">&#39;bg&#39;</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"mi\">0</span>\n",
560 |        "    <span class=\"n\">class_mapping</span><span class=\"p\">[</span><span class=\"s1\">&#39;bg&#39;</span><span class=\"p\">]</span> <span class=\"o\">=</span> <span class=\"nb\">len</span><span class=\"p\">(</span><span class=\"n\">class_mapping</span><span class=\"p\">)</span>\n",
561 |        "</pre></div>\n",
562 |        "\n",
563 |        "<h4>code cell</h4>\n",
564 |        "<div class=\"highlight\"><pre><span></span><span class=\"n\">classes_count</span>\n",
565 |        "</pre></div>\n",
566 |        "\n",
567 |        "<h4>code cell</h4>\n",
568 |        "<div class=\"highlight\"><pre><span></span>\n",
569 |        "</pre></div>\n"
570 |       ],
571 |       "text/plain": [
572 |        "<IPython.core.display.HTML object>"
573 |       ]
574 |      },
575 |      "metadata": {},
576 |      "output_type": "display_data"
577 |     }
578 |    ],
579 |    "source": [
580 |     "show_notebook(\"data/pascal_voc_parser.ipynb\")"
581 |    ]
582 |   }
583 |  ],
584 |  "metadata": {
585 |   "kernelspec": {
586 |    "display_name": "Python 3",
587 |    "language": "python",
588 |    "name": "python3"
589 |   },
590 |   "language_info": {
591 |    "codemirror_mode": {
592 |     "name": "ipython",
593 |     "version": 3
594 |    },
595 |    "file_extension": ".py",
596 |    "mimetype": "text/x-python",
597 |    "name": "python",
598 |    "nbconvert_exporter": "python",
599 |    "pygments_lexer": "ipython3",
600 |    "version": "3.6.2"
601 |   },
602 |   "toc": {
603 |    "nav_menu": {},
604 |    "number_sections": true,
605 |    "sideBar": true,
606 |    "skip_h1_title": false,
607 |    "toc_cell": true,
608 |    "toc_position": {},
609 |    "toc_section_display": "block",
610 |    "toc_window_display": true
611 |   }
612 |  },
613 |  "nbformat": 4,
614 |  "nbformat_minor": 2
615 | }
616 | 


--------------------------------------------------------------------------------
/keras_frcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/keras_frcnn/__init__.py


--------------------------------------------------------------------------------
/keras_frcnn/fixed_batch_normalization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Reference: https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/FixedBatchNormalization.py"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stderr",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "Using TensorFlow backend.\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "from keras.engine import Layer, InputSpec\n",
 25 |     "from keras import initializers, regularizers\n",
 26 |     "from keras import backend as K"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 3,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "class FixedBatchNormalization(Layer):\n",
 36 |     "    \n",
 37 |     "    def __init__(self, epsilon=1e-3, axis=-1,\n",
 38 |     "                 weights=None, beta_init='zero', gamma_init='one',\n",
 39 |     "                 gamma_regularizer=None, beta_regularizer=None, **kwargs):\n",
 40 |     "        \n",
 41 |     "        self.supports_masking = True\n",
 42 |     "        self.beta_init = initializers.get(beta_init)\n",
 43 |     "        self.gamma_init = initializers.get(gamma_init)\n",
 44 |     "        self.epsilon = epsilon\n",
 45 |     "        self.axis = axis\n",
 46 |     "        self.gamma_regularizer = regularizers.get(gamma_regularizer)\n",
 47 |     "        self.beta_regularizer = regularizers.get(beta_regularizer)\n",
 48 |     "        self.initial_weights = weights\n",
 49 |     "        super(FixedBatchNormalization, self).__init__(**kwargs)\n",
 50 |     "        \n",
 51 |     "    def build(self, input_shape):\n",
 52 |     "        self.input_shape = [InputSpec(shape=input_shape)]\n",
 53 |     "        shape = (input_shape[self.axis], )\n",
 54 |     "        \n",
 55 |     "        self.gamma = self.add_weight(shape,\n",
 56 |     "                                     initializer = self.gamma_init,\n",
 57 |     "                                     regularizer = self.gamma_regularizer,\n",
 58 |     "                                     name = '{}_gamma'.format(self.name),\\\n",
 59 |     "                                     trainable = False)\n",
 60 |     "        self.beta = self.add_weight(shape,\n",
 61 |     "                                    initializer = self.beta_init,\n",
 62 |     "                                    regularizer = self.beta_regularizer,\n",
 63 |     "                                    name = '{}_beta'.format(self.name),\n",
 64 |     "                                    trainable = False)\\\n",
 65 |     "        \n",
 66 |     "        self.running_mean = self.add_weight(shape,\n",
 67 |     "                                           initializer = 'zero',\n",
 68 |     "                                           name = '{}_running_mean'.format(self.name),\n",
 69 |     "                                           trainable = False)\n",
 70 |     "        \n",
 71 |     "        self.running_std = self.add_weight(shape,\n",
 72 |     "                                           initializer = 'zero',\n",
 73 |     "                                          name = '{}_running_std'.format(self.name),\n",
 74 |     "                                          trainable = False)\n",
 75 |     "        \n",
 76 |     "        if self.initial_weights is not None:\n",
 77 |     "            self.set_weights(self.initial_weights)\n",
 78 |     "            del self.initial_weights\n",
 79 |     "            \n",
 80 |     "        self.built = True\n",
 81 |     "        \n",
 82 |     "    def call(self, x, mask=None):\n",
 83 |     "        \n",
 84 |     "        assert self.built, 'Layer must be built before being called'\n",
 85 |     "        input_shape = K.int_shape(x)\n",
 86 |     "        \n",
 87 |     "        reduction_axes = list(range(len(input_shape)))\n",
 88 |     "        del reduction_axes[self.axis]\n",
 89 |     "        \n",
 90 |     "        broadcast_shape = [1] * len(input_shape)\n",
 91 |     "        broadcast_shape[self.axis] = input_shape[self.axis]\n",
 92 |     "        \n",
 93 |     "        if sorted(reduction_axes) == range(K.ndim(x))[:-1]:\n",
 94 |     "            x_normed = K.batch_normalization(\n",
 95 |     "                x, self.running_mean, self.running_std,\n",
 96 |     "                self.beta, self.gamma, epsilon=self.epsilon)\n",
 97 |     "            \n",
 98 |     "        else:\n",
 99 |     "            # need  broadcasting\n",
100 |     "            broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape)\n",
101 |     "            broadcast_running_std  = K.reshape(self.running_std, broadcast_shape)\n",
102 |     "            broadcast_beta = K.reshape(self.beta, broadcast_shape)\n",
103 |     "            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)\n",
104 |     "            x_normed = K.batch_normalization(\n",
105 |     "                x, broadcast_running_mean, broadcast_running_std,\n",
106 |     "                broadcast_beta, broadcast_gamma, epsilon=self.epsilon)\n",
107 |     "            \n",
108 |     "        return x_normed\n",
109 |     "    \n",
110 |     "    def get_config(self):\n",
111 |     "        \n",
112 |     "        config = {'epsilon': self.epsilon,\n",
113 |     "                  'axis': self.axis,\n",
114 |     "                  'gamma_regularizer': self.gamma_regularizer.get_config() if self.gamma_regularizer else None,\n",
115 |     "                  'beta_regularizer': self.beta_regularizer.get_config() if self.beta_regularizer else None}\n",
116 |     "        \n",
117 |     "        base_config = super(FixedBatchNormalization, self).get_config()\n",
118 |     "        return dict(list(base_config.items()) + list(config.items()))"
119 |    ]
120 |   }
121 |  ],
122 |  "metadata": {
123 |   "kernelspec": {
124 |    "display_name": "Python 3",
125 |    "language": "python",
126 |    "name": "python3"
127 |   },
128 |   "language_info": {
129 |    "codemirror_mode": {
130 |     "name": "ipython",
131 |     "version": 3
132 |    },
133 |    "file_extension": ".py",
134 |    "mimetype": "text/x-python",
135 |    "name": "python",
136 |    "nbconvert_exporter": "python",
137 |    "pygments_lexer": "ipython3",
138 |    "version": "3.6.3"
139 |   }
140 |  },
141 |  "nbformat": 4,
142 |  "nbformat_minor": 2
143 | }
144 | 


--------------------------------------------------------------------------------
/keras_frcnn/resnet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# ResNet50 model for Keras.\n",
  8 |     " Reference:\n",
  9 |     "- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) Adapted from code contributed by BigMoyan\n",
 10 |     "- https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/resnet.py"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stderr",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "Using TensorFlow backend.\n"
 23 |      ]
 24 |     }
 25 |    ],
 26 |    "source": [
 27 |     "from __future__ import print_function\n",
 28 |     "from __future__ import absolute_import\n",
 29 |     "import sys\n",
 30 |     "import os\n",
 31 |     "from themachine.nbfinder import NotebookFinder\n",
 32 |     "sys.meta_path.append(NotebookFinder())\n",
 33 |     "from keras.layers import Input, Add, Dense, Activation, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, \\\n",
 34 |     "    AveragePooling2D, TimeDistributed\n",
 35 |     "\n",
 36 |     "from keras import backend as K\n",
 37 |     "\n",
 38 |     "#from keras_frcnn.RoiPoolingConv import RoiPoolingConv\n",
 39 |     "#from keras_frcnn.FixedBatchNormalization import FixedBatchNormalization\n"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 2,
 45 |    "metadata": {},
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stdout",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "importing Jupyter notebook from roi_pooling_conv.ipynb\n",
 52 |       "importing Jupyter notebook from fixed_batch_normalization.ipynb\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "from roi_pooling_conv import RoiPoolingConv\n",
 58 |     "from fixed_batch_normalization import FixedBatchNormalization"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 8,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def get_weight_path():\n",
 68 |     "    if K.image_dim_ordering() == 'th':\n",
 69 |     "        return 'resnet50_weights_th_dim_ordering_th_kernels_notop.h5'\n",
 70 |     "    else:\n",
 71 |     "        return 'resnet50_weights_tf_dim_ordering_tf_kernels.h5'\n",
 72 |     "    \n",
 73 |     "def get_img_output_length(width, height):\n",
 74 |     "    def get_output_length(input_length):\n",
 75 |     "        # zero_pad\n",
 76 |     "        input_length += 6\n",
 77 |     "        \n",
 78 |     "        # apply 4 strided convolutions\n",
 79 |     "        filter_sizes = [7, 3, 1, 1]\n",
 80 |     "        \n",
 81 |     "        stride = 2\n",
 82 |     "        \n",
 83 |     "        for filter_size in filter_sizes:\n",
 84 |     "            input_length = (input_length - filter_size + stride) / stride\n",
 85 |     "            \n",
 86 |     "        return input_length\n",
 87 |     "    \n",
 88 |     "    return get_output_length(width), get_output_length(height)\n"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 7,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True):\n",
 98 |     "\n",
 99 |     "    nb_filter1, nb_filter2, nb_filter3 = filters\n",
100 |     "    \n",
101 |     "    if K.image_dim_ordering() == 'tf':\n",
102 |     "        bn_axis = 3\n",
103 |     "    else:\n",
104 |     "        bn_axis = 1\n",
105 |     "\n",
106 |     "    conv_name_base = 'res' + str(stage) + block + '_branch'\n",
107 |     "    bn_name_base = 'bn' + str(stage) + block + '_branch'\n",
108 |     "\n",
109 |     "    x = Convolution2D(nb_filter1, (1, 1), name=conv_name_base + '2a', trainable=trainable)(input_tensor)\n",
110 |     "    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)\n",
111 |     "    x = Activation('relu')(x)\n",
112 |     "\n",
113 |     "    x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)\n",
114 |     "    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)\n",
115 |     "    x = Activation('relu')(x)\n",
116 |     "\n",
117 |     "    x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)\n",
118 |     "    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)\n",
119 |     "\n",
120 |     "    x = Add()([x, input_tensor])\n",
121 |     "    x = Activation('relu')(x)\n",
122 |     "    return x\n",
123 |     "\n",
124 |     "\n",
125 |     "def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainable=True):\n",
126 |     "\n",
127 |     "    # identity block time distributed\n",
128 |     "\n",
129 |     "    nb_filter1, nb_filter2, nb_filter3 = filters\n",
130 |     "    if K.image_dim_ordering() == 'tf':\n",
131 |     "        bn_axis = 3\n",
132 |     "    else:\n",
133 |     "        bn_axis = 1\n",
134 |     "\n",
135 |     "    conv_name_base = 'res' + str(stage) + block + '_branch'\n",
136 |     "    bn_name_base = 'bn' + str(stage) + block + '_branch'\n",
137 |     "\n",
138 |     "    x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2a')(input_tensor)\n",
139 |     "    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)\n",
140 |     "    x = Activation('relu')(x)\n",
141 |     "\n",
142 |     "    x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), trainable=trainable, kernel_initializer='normal',padding='same'), name=conv_name_base + '2b')(x)\n",
143 |     "    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)\n",
144 |     "    x = Activation('relu')(x)\n",
145 |     "\n",
146 |     "    x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2c')(x)\n",
147 |     "    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)\n",
148 |     "\n",
149 |     "    x = Add()([x, input_tensor])\n",
150 |     "    x = Activation('relu')(x)\n",
151 |     "\n",
152 |     "    return x\n"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 10,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), trainable=True):\n",
162 |     "\n",
163 |     "    nb_filter1, nb_filter2, nb_filter3 = filters\n",
164 |     "    if K.image_dim_ordering() == 'tf':\n",
165 |     "        bn_axis = 3\n",
166 |     "    else:\n",
167 |     "        bn_axis = 1\n",
168 |     "\n",
169 |     "    conv_name_base = 'res' + str(stage) + block + '_branch'\n",
170 |     "    bn_name_base = 'bn' + str(stage) + block + '_branch'\n",
171 |     "\n",
172 |     "    x = Convolution2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', trainable=trainable)(input_tensor)\n",
173 |     "    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)\n",
174 |     "    x = Activation('relu')(x)\n",
175 |     "\n",
176 |     "    x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)\n",
177 |     "    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)\n",
178 |     "    x = Activation('relu')(x)\n",
179 |     "\n",
180 |     "    x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)\n",
181 |     "    x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)\n",
182 |     "\n",
183 |     "    shortcut = Convolution2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', trainable=trainable)(input_tensor)\n",
184 |     "    shortcut = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)\n",
185 |     "\n",
186 |     "    x = Add()([x, shortcut])\n",
187 |     "    x = Activation('relu')(x)\n",
188 |     "    return x\n",
189 |     "\n",
190 |     "\n",
191 |     "def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape, strides=(2, 2), trainable=True):\n",
192 |     "\n",
193 |     "    # conv block time distributed\n",
194 |     "\n",
195 |     "    nb_filter1, nb_filter2, nb_filter3 = filters\n",
196 |     "    if K.image_dim_ordering() == 'tf':\n",
197 |     "        bn_axis = 3\n",
198 |     "    else:\n",
199 |     "        bn_axis = 1\n",
200 |     "\n",
201 |     "    conv_name_base = 'res' + str(stage) + block + '_branch'\n",
202 |     "    bn_name_base = 'bn' + str(stage) + block + '_branch'\n",
203 |     "\n",
204 |     "    x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), input_shape=input_shape, name=conv_name_base + '2a')(input_tensor)\n",
205 |     "    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)\n",
206 |     "    x = Activation('relu')(x)\n",
207 |     "\n",
208 |     "    x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2b')(x)\n",
209 |     "    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)\n",
210 |     "    x = Activation('relu')(x)\n",
211 |     "\n",
212 |     "    x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), kernel_initializer='normal'), name=conv_name_base + '2c', trainable=trainable)(x)\n",
213 |     "    x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)\n",
214 |     "\n",
215 |     "    shortcut = TimeDistributed(Convolution2D(nb_filter3, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '1')(input_tensor)\n",
216 |     "    shortcut = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut)\n",
217 |     "\n",
218 |     "    x = Add()([x, shortcut])\n",
219 |     "    x = Activation('relu')(x)\n",
220 |     "    return x"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 11,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "def nn_base(input_tensor=None, trainable=False):\n",
230 |     "\n",
231 |     "    # Determine proper input shape\n",
232 |     "    if K.image_dim_ordering() == 'th':\n",
233 |     "        input_shape = (3, None, None)\n",
234 |     "    else:\n",
235 |     "        input_shape = (None, None, 3)\n",
236 |     "\n",
237 |     "    if input_tensor is None:\n",
238 |     "        img_input = Input(shape=input_shape)\n",
239 |     "    else:\n",
240 |     "        if not K.is_keras_tensor(input_tensor):\n",
241 |     "            img_input = Input(tensor=input_tensor, shape=input_shape)\n",
242 |     "        else:\n",
243 |     "            img_input = input_tensor\n",
244 |     "\n",
245 |     "    if K.image_dim_ordering() == 'tf':\n",
246 |     "        bn_axis = 3\n",
247 |     "    else:\n",
248 |     "        bn_axis = 1\n",
249 |     "\n",
250 |     "    x = ZeroPadding2D((3, 3))(img_input)\n",
251 |     "\n",
252 |     "    x = Convolution2D(64, (7, 7), strides=(2, 2), name='conv1', trainable = trainable)(x)\n",
253 |     "    x = FixedBatchNormalization(axis=bn_axis, name='bn_conv1')(x)\n",
254 |     "    x = Activation('relu')(x)\n",
255 |     "    x = MaxPooling2D((3, 3), strides=(2, 2))(x)\n",
256 |     "\n",
257 |     "    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable = trainable)\n",
258 |     "    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable = trainable)\n",
259 |     "    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable = trainable)\n",
260 |     "\n",
261 |     "    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable = trainable)\n",
262 |     "    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable = trainable)\n",
263 |     "    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable = trainable)\n",
264 |     "    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable = trainable)\n",
265 |     "\n",
266 |     "    x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable = trainable)\n",
267 |     "    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable = trainable)\n",
268 |     "    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable = trainable)\n",
269 |     "    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable = trainable)\n",
270 |     "    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable = trainable)\n",
271 |     "    x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable = trainable)\n",
272 |     "\n",
273 |     "    return x\n",
274 |     "\n"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": 12,
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "def classifier_layers(x, input_shape, trainable=False):\n",
284 |     "\n",
285 |     "    # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n",
286 |     "    # (hence a smaller stride in the region that follows the ROI pool)\n",
287 |     "    if K.backend() == 'tensorflow':\n",
288 |     "        x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', input_shape=input_shape, strides=(2, 2), trainable=trainable)\n",
289 |     "    elif K.backend() == 'theano':\n",
290 |     "        x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', input_shape=input_shape, strides=(1, 1), trainable=trainable)\n",
291 |     "\n",
292 |     "    x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='b', trainable=trainable)\n",
293 |     "    x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='c', trainable=trainable)\n",
294 |     "    x = TimeDistributed(AveragePooling2D((7, 7)), name='avg_pool')(x)\n",
295 |     "\n",
296 |     "    return x"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "code",
301 |    "execution_count": 13,
302 |    "metadata": {},
303 |    "outputs": [],
304 |    "source": [
305 |     "def rpn(base_layers,num_anchors):\n",
306 |     "\n",
307 |     "    x = Convolution2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)\n",
308 |     "\n",
309 |     "    x_class = Convolution2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)\n",
310 |     "    x_regr = Convolution2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)\n",
311 |     "\n",
312 |     "    return [x_class, x_regr, base_layers]\n",
313 |     "\n",
314 |     "def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False):\n",
315 |     "\n",
316 |     "    # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n",
317 |     "\n",
318 |     "    if K.backend() == 'tensorflow':\n",
319 |     "        pooling_regions = 14\n",
320 |     "        input_shape = (num_rois,14,14,1024)\n",
321 |     "    elif K.backend() == 'theano':\n",
322 |     "        pooling_regions = 7\n",
323 |     "        input_shape = (num_rois,1024,7,7)\n",
324 |     "\n",
325 |     "    out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])\n",
326 |     "    out = classifier_layers(out_roi_pool, input_shape=input_shape, trainable=True)\n",
327 |     "\n",
328 |     "    out = TimeDistributed(Flatten())(out)\n",
329 |     "\n",
330 |     "    out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)\n",
331 |     "    # note: no regression target for bg class\n",
332 |     "    out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)\n",
333 |     "    return [out_class, out_regr]"
334 |    ]
335 |   }
336 |  ],
337 |  "metadata": {
338 |   "kernelspec": {
339 |    "display_name": "Python 3",
340 |    "language": "python",
341 |    "name": "python3"
342 |   },
343 |   "language_info": {
344 |    "codemirror_mode": {
345 |     "name": "ipython",
346 |     "version": 3
347 |    },
348 |    "file_extension": ".py",
349 |    "mimetype": "text/x-python",
350 |    "name": "python",
351 |    "nbconvert_exporter": "python",
352 |    "pygments_lexer": "ipython3",
353 |    "version": "3.6.3"
354 |   }
355 |  },
356 |  "nbformat": 4,
357 |  "nbformat_minor": 2
358 | }
359 | 


--------------------------------------------------------------------------------
/keras_frcnn/roi_pooling_conv.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Reference: https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/RoiPoolingConv.py"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stderr",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "Using TensorFlow backend.\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "from keras.engine.topology import Layer\n",
 25 |     "import keras.backend as K"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "if K.backend() == 'tensorflow':\n",
 35 |     "    import tensorflow as tf"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 5,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "class RoiPoolingConv(Layer):\n",
 45 |     "    \"\"\"ROI pooling layer for 2D inputs.\n",
 46 |     "    See Spatial Pyramid pooling in Deep Convolutional Networks for Visual\n",
 47 |     "    Recognition, K. He, X. Zhang, S. Ren, J. Sun\n",
 48 |     "    \n",
 49 |     "    # Arguments\n",
 50 |     "    pool_size: int\n",
 51 |     "            size of pooling region to use, pool_size = 7 will result in a 7x7 region.\n",
 52 |     "    num_rois: number of regions of interest to be used.\n",
 53 |     "    \n",
 54 |     "    # Input shape\n",
 55 |     "        list of two 4D tensors [X_img, X_roi] with shape:\n",
 56 |     "        \n",
 57 |     "     X_img:\n",
 58 |     "         `(1, channels, rows, cols)` if dim_ordering='th'\n",
 59 |     "        or 4D tensor with shape:\n",
 60 |     "        `(1, rows, cols, channels)` if dim_ordering='tf'.\n",
 61 |     "    X_roi:\n",
 62 |     "        `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)\n",
 63 |     "        \n",
 64 |     "    # Output shape\n",
 65 |     "        3D tensor with shape:\n",
 66 |     "        `(1, num_rois, channels, pool_size, pool_size)`\n",
 67 |     "    \"\"\"\n",
 68 |     "    \n",
 69 |     "    def __init__(self, pool_size, num_rois, **kwargs):\n",
 70 |     "        \n",
 71 |     "        self.dim_ordering = K.image_dim_ordering()\n",
 72 |     "        assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'\n",
 73 |     "        \n",
 74 |     "        self.pool_size = pool_size\n",
 75 |     "        self.num_rois = num_rois\n",
 76 |     "        \n",
 77 |     "        super(RoiPoolingConv, self).__init__(**kwargs)\n",
 78 |     "        \n",
 79 |     "        \n",
 80 |     "    def build(self, input_shape):\n",
 81 |     "        if self.dim_ordering == 'th':\n",
 82 |     "            self.nb_channels = input_shape[0][1]\n",
 83 |     "            \n",
 84 |     "        elif self.dim_ordering == 'tf':\n",
 85 |     "            self.nb_channels = input_shape[0][3]\n",
 86 |     "            \n",
 87 |     "    def compute_output_shape(self, input_shape):\n",
 88 |     "        if self.dim_ordering == 'th':\n",
 89 |     "            return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size\n",
 90 |     "        \n",
 91 |     "        else:\n",
 92 |     "            return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels\n",
 93 |     "        \n",
 94 |     "     \n",
 95 |     "    def call(self, x, mask=None):\n",
 96 |     "        assert(len(x) == 2)\n",
 97 |     "        \n",
 98 |     "        img = x[0]\n",
 99 |     "        rois = x[1]\n",
100 |     "        \n",
101 |     "        input_shape = K.shape(img)\n",
102 |     "        \n",
103 |     "        outputs = []\n",
104 |     "        \n",
105 |     "        for roi_idx in range(self.num_rois):\n",
106 |     "            \n",
107 |     "            x = rois[0, roi_idx, 0]\n",
108 |     "            y = rois[0, roi_idx, 1]\n",
109 |     "            w = rois[0, roi_idx, 2]\n",
110 |     "            h = rois[0, roi_idx, 3]\n",
111 |     "            \n",
112 |     "            row_length = w / float(self.pool_size)\n",
113 |     "            col_length = h / float(self.pool_size)\n",
114 |     "            \n",
115 |     "            num_pool_regions = self.pool_size\n",
116 |     "            \n",
117 |     "            #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op\n",
118 |     "            # in theano. The theano implementation is much less efficient and leads to long compile times\n",
119 |     "\n",
120 |     "            if self.dim_ordering == 'th':\n",
121 |     "                for jy in range(num_pool_regions):\n",
122 |     "                    for ix in range(num_pool_regions):\n",
123 |     "                        x1 = x + ix * row_length\n",
124 |     "                        x2 = x1 + row_length\n",
125 |     "                        y1 = y + jy * col_length\n",
126 |     "                        y2 = y1 + col_length\n",
127 |     "                        \n",
128 |     "                        x1 = K.cast(x1, 'int32')\n",
129 |     "                        x2 = K.cast(x2, 'int32')\n",
130 |     "                        y1 = K.cast(y1, 'int32')\n",
131 |     "                        y2 = K.cast(y2, 'int32')\n",
132 |     "                        \n",
133 |     "                        x2 = x1 + K.maximum(1, x2-x1)\n",
134 |     "                        y2 = y1 + K.maximum(1, y2-y1)\n",
135 |     "                        \n",
136 |     "                        new_shape = [input_shape[0], input_shape[1],\n",
137 |     "                                    y2 - y1, x2 - x1]\n",
138 |     "                        \n",
139 |     "                        x_crop = img[:, :, y1:y2, x1:x2]\n",
140 |     "                        xm = K.reshape(x_crop, new_shape)\n",
141 |     "                        pooled_val = K.max(xm, axis=(2, 3))\n",
142 |     "                        outputs.append(pooled_val)\n",
143 |     "                        \n",
144 |     "            elif self.dim_ordering == 'tf':\n",
145 |     "                x = K.cast(x, 'int32')\n",
146 |     "                y = K.cast(y, 'int32')\n",
147 |     "                w = K.cast(w, 'int32')\n",
148 |     "                h = K.cast(h, 'int32')\n",
149 |     "                \n",
150 |     "                rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))\n",
151 |     "                outputs.append(rs)\n",
152 |     "                \n",
153 |     "        final_output = K.concatenate(outputs, axis=0)\n",
154 |     "        final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))\n",
155 |     "        \n",
156 |     "        if self.dim_ordering == 'th':\n",
157 |     "            final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))\n",
158 |     "            \n",
159 |     "        else:\n",
160 |     "            final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))\n",
161 |     "            \n",
162 |     "        return final_output\n",
163 |     "        \n",
164 |     "    def get_config(self):\n",
165 |     "        config = {'pool_size': self.pool_size,\n",
166 |     "                  'num_rois': self.num_rois}\n",
167 |     "        base_config = super(RoiPoolingConv, self).get_config()\n",
168 |     "        return dict(list(base_config.items()) + list(config.items()))"
169 |    ]
170 |   }
171 |  ],
172 |  "metadata": {
173 |   "kernelspec": {
174 |    "display_name": "Python 3",
175 |    "language": "python",
176 |    "name": "python3"
177 |   },
178 |   "language_info": {
179 |    "codemirror_mode": {
180 |     "name": "ipython",
181 |     "version": 3
182 |    },
183 |    "file_extension": ".py",
184 |    "mimetype": "text/x-python",
185 |    "name": "python",
186 |    "nbconvert_exporter": "python",
187 |    "pygments_lexer": "ipython3",
188 |    "version": "3.6.3"
189 |   }
190 |  },
191 |  "nbformat": 4,
192 |  "nbformat_minor": 2
193 | }
194 | 


--------------------------------------------------------------------------------
/keras_frcnn/vgg.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# VGG16 model for Keras.\n",
  8 |     "\n",
  9 |     "Reference\n",
 10 |     "- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stderr",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "Using TensorFlow backend.\n"
 23 |      ]
 24 |     }
 25 |    ],
 26 |    "source": [
 27 |     "import warnings\n",
 28 |     "warnings.filterwarnings('ignore')\n",
 29 |     "from __future__ import print_function\n",
 30 |     "from __future__ import absolute_import\n",
 31 |     "import sys\n",
 32 |     "import os\n",
 33 |     "from themachine.nbfinder import NotebookFinder\n",
 34 |     "sys.meta_path.append(NotebookFinder())\n",
 35 |     "from keras.models import Model\n",
 36 |     "from keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout\n",
 37 |     "from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed\n",
 38 |     "from keras.engine.topology import get_source_inputs\n",
 39 |     "from keras.utils import layer_utils\n",
 40 |     "from keras.utils.data_utils import get_file\n",
 41 |     "from keras import backend as K"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "importing Jupyter notebook from roi_pooling_conv.ipynb\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "source": [
 58 |     "from roi_pooling_conv import RoiPoolingConv"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def get_weight_path():\n",
 68 |     "    if K.image_dim_ordering() == 'th':\n",
 69 |     "        print('pretrained weights not available for VGG with theano backend')\n",
 70 |     "        return\n",
 71 |     "    else:\n",
 72 |     "        return 'vgg16_weights_tf_dim_ordering_tf_kernels.h5'\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "def get_img_output_length(width, height):\n",
 76 |     "    def get_output_length(input_length):\n",
 77 |     "        return input_length//16\n",
 78 |     "\n",
 79 |     "    return get_output_length(width), get_output_length(height) "
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 5,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "def nn_base(input_tensor=None, trainable=False):\n",
 89 |     "\n",
 90 |     "\n",
 91 |     "    # Determine proper input shape\n",
 92 |     "    if K.image_dim_ordering() == 'th':\n",
 93 |     "        input_shape = (3, None, None)\n",
 94 |     "    else:\n",
 95 |     "        input_shape = (None, None, 3)\n",
 96 |     "\n",
 97 |     "    if input_tensor is None:\n",
 98 |     "        img_input = Input(shape=input_shape)\n",
 99 |     "    else:\n",
100 |     "        if not K.is_keras_tensor(input_tensor):\n",
101 |     "            img_input = Input(tensor=input_tensor, shape=input_shape)\n",
102 |     "        else:\n",
103 |     "            img_input = input_tensor\n",
104 |     "\n",
105 |     "    if K.image_dim_ordering() == 'tf':\n",
106 |     "        bn_axis = 3\n",
107 |     "    else:\n",
108 |     "        bn_axis = 1\n",
109 |     "\n",
110 |     "    # Block 1\n",
111 |     "    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)\n",
112 |     "    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)\n",
113 |     "    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)\n",
114 |     "\n",
115 |     "    # Block 2\n",
116 |     "    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)\n",
117 |     "    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)\n",
118 |     "    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)\n",
119 |     "\n",
120 |     "    # Block 3\n",
121 |     "    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)\n",
122 |     "    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)\n",
123 |     "    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)\n",
124 |     "    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)\n",
125 |     "\n",
126 |     "    # Block 4\n",
127 |     "    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)\n",
128 |     "    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)\n",
129 |     "    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)\n",
130 |     "    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)\n",
131 |     "\n",
132 |     "    # Block 5\n",
133 |     "    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)\n",
134 |     "    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)\n",
135 |     "    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)\n",
136 |     "    # x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)\n",
137 |     "\n",
138 |     "    return x\n"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 6,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "def rpn(base_layers, num_anchors):\n",
148 |     "\n",
149 |     "    x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)\n",
150 |     "\n",
151 |     "    x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)\n",
152 |     "    x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)\n",
153 |     "\n",
154 |     "    return [x_class, x_regr, base_layers]\n",
155 |     "\n",
156 |     "\n",
157 |     "def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False):\n",
158 |     "\n",
159 |     "    # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n",
160 |     "\n",
161 |     "    if K.backend() == 'tensorflow':\n",
162 |     "        pooling_regions = 7\n",
163 |     "        input_shape = (num_rois,7,7,512)\n",
164 |     "    elif K.backend() == 'theano':\n",
165 |     "        pooling_regions = 7\n",
166 |     "        input_shape = (num_rois,512,7,7)\n",
167 |     "\n",
168 |     "    out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])\n",
169 |     "\n",
170 |     "    out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool)\n",
171 |     "    out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out)\n",
172 |     "    out = TimeDistributed(Dropout(0.5))(out)\n",
173 |     "    out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out)\n",
174 |     "    out = TimeDistributed(Dropout(0.5))(out)\n",
175 |     "\n",
176 |     "    out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)\n",
177 |     "    # note: no regression target for bg class\n",
178 |     "    out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)\n",
179 |     "\n",
180 |     "    return [out_class, out_regr]\n",
181 |     "\n"
182 |    ]
183 |   }
184 |  ],
185 |  "metadata": {
186 |   "kernelspec": {
187 |    "display_name": "Python 3",
188 |    "language": "python",
189 |    "name": "python3"
190 |   },
191 |   "language_info": {
192 |    "codemirror_mode": {
193 |     "name": "ipython",
194 |     "version": 3
195 |    },
196 |    "file_extension": ".py",
197 |    "mimetype": "text/x-python",
198 |    "name": "python",
199 |    "nbconvert_exporter": "python",
200 |    "pygments_lexer": "ipython3",
201 |    "version": "3.6.3"
202 |   }
203 |  },
204 |  "nbformat": 4,
205 |  "nbformat_minor": 2
206 | }
207 | 


--------------------------------------------------------------------------------