├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── data
├── __init__.py
├── data_augment.ipynb
├── data_generators.ipynb
├── pascal_voc_parser-pyfile.py
└── pascal_voc_parser.ipynb
├── importing notebooks.ipynb
└── keras_frcnn
├── __init__.py
├── fixed_batch_normalization.ipynb
├── resnet.ipynb
├── roi_pooling_conv.ipynb
└── vgg.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Anderson Banihirwe
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Keras - Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
2 |
3 | Keras implementation of the paper: Shaoqing Ren et al. [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497).
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/__init__.py
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/data/__init__.py
--------------------------------------------------------------------------------
/data/data_augment.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import cv2\n",
10 | "import numpy as np\n",
11 | "import copy"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "def augment(img_data, config, augment=True):\n",
21 | " assert 'filepath' in img_data\n",
22 | " assert 'bboxes' in img_data\n",
23 | " assert 'width' in img_data\n",
24 | " assert 'height' in img_data\n",
25 | " \n",
26 | " img_data_aug = copy.deepcopy(img_data)\n",
27 | " \n",
28 | " img = cv2.imread(img_data_aug['filepath'])\n",
29 | " \n",
30 | " if augment:\n",
31 | " rows, cols = img.shape[:2]\n",
32 | " \n",
33 | " if config.use_horizontal_flips and np.random.randint(0, 2) == 0:\n",
34 | " img = cv2.flip(img, 1)\n",
35 | " for bbox in img_data_aug['bboxes']:\n",
36 | " x1 = bbox['x1']\n",
37 | " x2 = bbox['x2']\n",
38 | " bbox['x2'] = cols - x1\n",
39 | " bbox['x1'] = cols - x2\n",
40 | " \n",
41 | " if config.use_vertical_flips and np.random.randint(0, 2) == 0:\n",
42 | " img = cv2.flip(img, 0)\n",
43 | " for bbox in img_data_aug['bboxes']:\n",
44 | " y1 = bbox['y1']\n",
45 | " y2 = bbox['y2']\n",
46 | " bbox['y2'] = rows - y1\n",
47 | " bbox['y1'] = rows - y2\n",
48 | " \n",
49 | " if config.rot_90:\n",
50 | " angle = np.random.choice([0, 90, 180, 270], 1)[0]\n",
51 | " if angle == 270:\n",
52 | " img = np.transpose(img, (1, 0, 2))\n",
53 | " img = cv2.flip(img, 0)\n",
54 | " elif angle == 180:\n",
55 | " img = cv2.flip(img, -1)\n",
56 | " elif angle == 90:\n",
57 | " img = np.transpose(img, (1, 0, 2))\n",
58 | " img = cv2.flip(img, 1)\n",
59 | " \n",
60 | " elif angle == 0:\n",
61 | " pass\n",
62 | " \n",
63 | " for bbox in img_data_aug['bboxes']:\n",
64 | " x1 = bbox['x1']\n",
65 | " x2 = bbox['x2']\n",
66 | " y1 = bbox['y1']\n",
67 | " y2 = bbox['y2']\n",
68 | " \n",
69 | " if angle == 270:\n",
70 | " bbox['x1'] = y1\n",
71 | " bbox['x2'] = y2\n",
72 | " bbox['y1'] = cols - x2\n",
73 | " bbox['y2'] = cols - x1\n",
74 | " \n",
75 | " elif angle == 180:\n",
76 | " bbox['x2'] = cols - x1\n",
77 | " bbox['x1'] = cols - x2\n",
78 | " bbox['y2'] = rows - y1\n",
79 | " bbox['y1'] = rows - y2\n",
80 | " \n",
81 | " elif angle == 90:\n",
82 | " bbox['x1'] = rows - y2\n",
83 | " bbox['x2'] = rows - y1\n",
84 | " bbox['y1'] = x1\n",
85 | " bbox['y2'] = x2 \n",
86 | " \n",
87 | " elif angle == 0:\n",
88 | " pass\n",
89 | " \n",
90 | " img_data_aug['width'] = img.shape[1]\n",
91 | " img_data_aug['height'] = img.shape[0]\n",
92 | " return img_data_aug, img\n",
93 | " "
94 | ]
95 | }
96 | ],
97 | "metadata": {
98 | "kernelspec": {
99 | "display_name": "Python 3",
100 | "language": "python",
101 | "name": "python3"
102 | },
103 | "language_info": {
104 | "codemirror_mode": {
105 | "name": "ipython",
106 | "version": 3
107 | },
108 | "file_extension": ".py",
109 | "mimetype": "text/x-python",
110 | "name": "python",
111 | "nbconvert_exporter": "python",
112 | "pygments_lexer": "ipython3",
113 | "version": "3.6.3"
114 | }
115 | },
116 | "nbformat": 4,
117 | "nbformat_minor": 2
118 | }
119 |
--------------------------------------------------------------------------------
/data/data_generators.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 5,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "importing Jupyter notebook from data_augment.ipynb\n"
13 | ]
14 | }
15 | ],
16 | "source": [
17 | "from __future__ import absolute_import\n",
18 | "import numpy as np\n",
19 | "import cv2\n",
20 | "import random\n",
21 | "import copy\n",
22 | "import sys\n",
23 | "import os\n",
24 | "from themachine.nbfinder import NotebookFinder\n",
25 | "sys.meta_path.append('.')\n",
26 | "sys.meta_path.append(NotebookFinder())\n",
27 | "import data_augment\n",
28 | "import threading\n",
29 | "import itertools\n"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 6,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "def union(au, bu, area_intersection):\n",
39 | " area_a = (au[2] - au[0]) * (au[3] - au[1])\n",
40 | " area_b = (bu[2] - bu[0]) * (bu[3] - bu[1])\n",
41 | " area_union = area_a + area_b - area_intersection\n",
42 | " return union"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 7,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "def intersection(ai, bi):\n",
52 | " x = max(ai[0], bi[0])\n",
53 | " y = max(ai[1], bi[1])\n",
54 | " w = min(ai[2], bi[2]) - x\n",
55 | " h = min(ai[3], bi[3]) - y\n",
56 | " if w < 0 or h < 0:\n",
57 | " return 0\n",
58 | " return w*h"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 9,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "def iou(a, b):\n",
68 | " # a and b should be (x1,y1,x2,y2)\n",
69 | " if a[0] >= a[2] or a[1] >= a[3] or b[0] >= b[2] or b[1] >= b[3]:\n",
70 | " return 0.0\n",
71 | " \n",
72 | " area_i = intersection(a, b)\n",
73 | " area_u = union(a, b, area_i)\n",
74 | "\n",
75 | " return float(area_i) / float(area_u + 1e-6)\n"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 10,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "def get_new_img_size(width, height, img_min_side=600):\n",
85 | " if width <= height:\n",
86 | " f = float(img_min_side) / width\n",
87 | " resized_height = int(f * height)\n",
88 | " resized_width = img_min_side\n",
89 | " \n",
90 | " else:\n",
91 | " f = float(img_min_side) / height\n",
92 | " resized_width = int(f * width)\n",
93 | " resized_height = img_min_side\n",
94 | " \n",
95 | " return resized_width, resized_height"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": []
104 | }
105 | ],
106 | "metadata": {
107 | "kernelspec": {
108 | "display_name": "Python 3",
109 | "language": "python",
110 | "name": "python3"
111 | },
112 | "language_info": {
113 | "codemirror_mode": {
114 | "name": "ipython",
115 | "version": 3
116 | },
117 | "file_extension": ".py",
118 | "mimetype": "text/x-python",
119 | "name": "python",
120 | "nbconvert_exporter": "python",
121 | "pygments_lexer": "ipython3",
122 | "version": "3.6.3"
123 | }
124 | },
125 | "nbformat": 4,
126 | "nbformat_minor": 2
127 | }
128 |
--------------------------------------------------------------------------------
/data/pascal_voc_parser-pyfile.py:
--------------------------------------------------------------------------------
1 | # %load ../pascal_voc_parser.py
2 | import os
3 | import cv2
4 | import xml.etree.ElementTree as ET
5 | import numpy as np
6 | def get_data(input_path):
7 | all_imgs = []
8 |
9 | classes_count = {}
10 |
11 | class_mapping = {}
12 |
13 | visualise = False
14 |
15 | data_paths = [os.path.join(input_path,s) for s in ['VOC2007', 'VOC2012']]
16 |
17 |
18 | print('Parsing annotation files')
19 |
20 | for data_path in data_paths:
21 |
22 | annot_path = os.path.join(data_path, 'Annotations')
23 | imgs_path = os.path.join(data_path, 'JPEGImages')
24 | imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')
25 | imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')
26 |
27 | trainval_files = []
28 | test_files = []
29 | try:
30 | with open(imgsets_path_trainval) as f:
31 | for line in f:
32 | trainval_files.append(line.strip() + '.jpg')
33 | except Exception as e:
34 | print(e)
35 |
36 | try:
37 | with open(imgsets_path_test) as f:
38 | for line in f:
39 | test_files.append(line.strip() + '.jpg')
40 | except Exception as e:
41 | if data_path[-7:] == 'VOC2012':
42 | # this is expected, most pascal voc distibutions dont have the test.txt file
43 | pass
44 | else:
45 | print(e)
46 |
47 | annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]
48 | idx = 0
49 | for annot in annots:
50 | try:
51 | idx += 1
52 |
53 | et = ET.parse(annot)
54 | element = et.getroot()
55 |
56 | element_objs = element.findall('object')
57 | element_filename = element.find('filename').text
58 | element_width = int(element.find('size').find('width').text)
59 | element_height = int(element.find('size').find('height').text)
60 |
61 | if len(element_objs) > 0:
62 | annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,
63 | 'height': element_height, 'bboxes': []}
64 |
65 | if element_filename in trainval_files:
66 | annotation_data['imageset'] = 'trainval'
67 | elif element_filename in test_files:
68 | annotation_data['imageset'] = 'test'
69 | else:
70 | annotation_data['imageset'] = 'trainval'
71 |
72 | for element_obj in element_objs:
73 | class_name = element_obj.find('name').text
74 | if class_name not in classes_count:
75 | classes_count[class_name] = 1
76 | else:
77 | classes_count[class_name] += 1
78 |
79 | if class_name not in class_mapping:
80 | class_mapping[class_name] = len(class_mapping)
81 |
82 | obj_bbox = element_obj.find('bndbox')
83 | x1 = int(round(float(obj_bbox.find('xmin').text)))
84 | y1 = int(round(float(obj_bbox.find('ymin').text)))
85 | x2 = int(round(float(obj_bbox.find('xmax').text)))
86 | y2 = int(round(float(obj_bbox.find('ymax').text)))
87 | difficulty = int(element_obj.find('difficult').text) == 1
88 | annotation_data['bboxes'].append(
89 | {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})
90 | all_imgs.append(annotation_data)
91 |
92 | if visualise:
93 | img = cv2.imread(annotation_data['filepath'])
94 | for bbox in annotation_data['bboxes']:
95 | cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[
96 | 'x2'], bbox['y2']), (0, 0, 255))
97 | cv2.imshow('img', img)
98 | cv2.waitKey(0)
99 |
100 | except Exception as e:
101 | print(e)
102 | continue
103 | return all_imgs, classes_count, class_mapping
104 |
--------------------------------------------------------------------------------
/data/pascal_voc_parser.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "toc": "true"
7 | },
8 | "source": [
9 | " # Table of Contents\n",
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {
17 | "collapsed": true
18 | },
19 | "outputs": [],
20 | "source": [
21 | "# %load ../pascal_voc_parser.py\n",
22 | "import os\n",
23 | "import cv2\n",
24 | "import xml.etree.ElementTree as ET\n",
25 | "import numpy as np\n",
26 | "def get_data(input_path, visualise=False):\n",
27 | " \"\"\"Load data from an input file.\n",
28 | " https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/pascal_voc_parser.py#L19\n",
29 | " Args: \n",
30 | " input_path (string) : path for the input file\n",
31 | " visualise (bool) : show images with annotation if True\n",
32 | " \n",
33 | " Returns:\n",
34 | " all_imgs (list) : list of images\n",
35 | " classes_count (dict) : dictionary containg classes information\n",
36 | " class_mapping (dict) : dictionary containing class mapping\n",
37 | " \"\"\"\n",
38 | " all_imgs = []\n",
39 | "\n",
40 | " classes_count = {}\n",
41 | "\n",
42 | " class_mapping = {}\n",
43 | "\n",
44 | " data_paths = [os.path.join(input_path,s) for s in ['VOC2012']]\n",
45 | "\n",
46 | "\n",
47 | " print('Parsing annotation files....')\n",
48 | "\n",
49 | " for data_path in data_paths:\n",
50 | "\n",
51 | " annot_path = os.path.join(data_path, 'Annotations')\n",
52 | " imgs_path = os.path.join(data_path, 'JPEGImages')\n",
53 | " imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')\n",
54 | " imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')\n",
55 | "\n",
56 | " trainval_files = []\n",
57 | " test_files = []\n",
58 | " try:\n",
59 | " with open(imgsets_path_trainval) as f:\n",
60 | " for line in f:\n",
61 | " trainval_files.append(line.strip() + '.jpg')\n",
62 | " except Exception as e:\n",
63 | " print(e)\n",
64 | "\n",
65 | " try:\n",
66 | " with open(imgsets_path_test) as f:\n",
67 | " for line in f:\n",
68 | " test_files.append(line.strip() + '.jpg')\n",
69 | " except Exception as e:\n",
70 | " if data_path[-7:] == 'VOC2012':\n",
71 | " # this is expected, most pascal voc distibutions dont have the test.txt file\n",
72 | " pass\n",
73 | " else:\n",
74 | " print(e)\n",
75 | "\n",
76 | " annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]\n",
77 | " idx = 0\n",
78 | " for annot in annots:\n",
79 | " try:\n",
80 | " idx += 1\n",
81 | "\n",
82 | " et = ET.parse(annot)\n",
83 | " element = et.getroot()\n",
84 | "\n",
85 | " element_objs = element.findall('object')\n",
86 | " element_filename = element.find('filename').text\n",
87 | " element_width = int(element.find('size').find('width').text)\n",
88 | " element_height = int(element.find('size').find('height').text)\n",
89 | "\n",
90 | " if len(element_objs) > 0:\n",
91 | " annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,\n",
92 | " 'height': element_height, 'bboxes': []}\n",
93 | "\n",
94 | " if element_filename in trainval_files:\n",
95 | " annotation_data['imageset'] = 'trainval'\n",
96 | " elif element_filename in test_files:\n",
97 | " annotation_data['imageset'] = 'test'\n",
98 | " else:\n",
99 | " annotation_data['imageset'] = 'trainval'\n",
100 | "\n",
101 | " for element_obj in element_objs:\n",
102 | " class_name = element_obj.find('name').text\n",
103 | " if class_name not in classes_count:\n",
104 | " classes_count[class_name] = 1\n",
105 | " else:\n",
106 | " classes_count[class_name] += 1\n",
107 | "\n",
108 | " if class_name not in class_mapping:\n",
109 | " class_mapping[class_name] = len(class_mapping)\n",
110 | "\n",
111 | " obj_bbox = element_obj.find('bndbox')\n",
112 | " x1 = int(round(float(obj_bbox.find('xmin').text)))\n",
113 | " y1 = int(round(float(obj_bbox.find('ymin').text)))\n",
114 | " x2 = int(round(float(obj_bbox.find('xmax').text)))\n",
115 | " y2 = int(round(float(obj_bbox.find('ymax').text)))\n",
116 | " difficulty = int(element_obj.find('difficult').text) == 1\n",
117 | " annotation_data['bboxes'].append(\n",
118 | " {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})\n",
119 | " all_imgs.append(annotation_data)\n",
120 | "\n",
121 | " if visualise:\n",
122 | " img = cv2.imread(annotation_data['filepath'])\n",
123 | " for bbox in annotation_data['bboxes']:\n",
124 | " cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[\n",
125 | " 'x2'], bbox['y2']), (0, 0, 255))\n",
126 | " cv2.imshow('img', img)\n",
127 | " cv2.waitKey(0)\n",
128 | "\n",
129 | " except Exception as e:\n",
130 | " print(e)\n",
131 | " continue\n",
132 | " if 'bg' not in classes_count:\n",
133 | " classes_count['bg'] = 0\n",
134 | " class_mapping['bg'] = len(class_mapping)\n",
135 | " \n",
136 | " \n",
137 | " print(\"Parsing annotation files Finished without error!\")\n",
138 | " return all_imgs, classes_count, class_mapping\n"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "if __name__ == '__main__':\n",
148 | " pass\n",
149 | " #all_imgs, classes_count, class_mapping = get_data('/home/abanihi/Documents/deep-data/VOCdevkit/')\n",
150 | " #print(classes_count)\n",
151 | " #print(class_mapping)\n",
152 | " "
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "metadata": {
159 | "collapsed": true
160 | },
161 | "outputs": [],
162 | "source": []
163 | }
164 | ],
165 | "metadata": {
166 | "kernelspec": {
167 | "display_name": "Python 3",
168 | "language": "python",
169 | "name": "python3"
170 | },
171 | "language_info": {
172 | "codemirror_mode": {
173 | "name": "ipython",
174 | "version": 3
175 | },
176 | "file_extension": ".py",
177 | "mimetype": "text/x-python",
178 | "name": "python",
179 | "nbconvert_exporter": "python",
180 | "pygments_lexer": "ipython3",
181 | "version": "3.6.3"
182 | },
183 | "toc": {
184 | "nav_menu": {},
185 | "number_sections": true,
186 | "sideBar": true,
187 | "skip_h1_title": false,
188 | "toc_cell": true,
189 | "toc_position": {},
190 | "toc_section_display": "block",
191 | "toc_window_display": false
192 | }
193 | },
194 | "nbformat": 4,
195 | "nbformat_minor": 2
196 | }
197 |
--------------------------------------------------------------------------------
/importing notebooks.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "toc": "true"
7 | },
8 | "source": [
9 | " # Table of Contents\n",
10 | ""
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "# Importing Jupyter Notebooks as Modules"
18 | ]
19 | },
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {},
23 | "source": [
24 | "http://nbviewer.jupyter.org/github/jupyter/notebook/blob/master/docs/source/examples/Notebook/Importing%20Notebooks.ipynb"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 1,
30 | "metadata": {
31 | "collapsed": true
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import io, os, sys, types"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 2,
41 | "metadata": {
42 | "collapsed": true
43 | },
44 | "outputs": [],
45 | "source": [
46 | "from IPython import get_ipython\n",
47 | "from nbformat import read\n",
48 | "from IPython.core.interactiveshell import InteractiveShell"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "Import hooks typically take the form of two objects:\n",
56 | "\n",
57 | "1. a Module **Loader**, which takes a module name (e.g. 'IPython.display'), and returns a Module\n",
58 | "2. a Module **Finder**, which figures out whether a module might exist, and tells Python what **Loader** to use\n",
59 | "\n"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 3,
65 | "metadata": {
66 | "collapsed": true
67 | },
68 | "outputs": [],
69 | "source": [
70 | "def find_notebook(fullname, path=None):\n",
71 | " \"\"\"find a notebook, given its fully qualified name and an optional path\n",
72 | " \n",
73 | " This turns \"foo.bar\" into \"foo/bar.ipynb\"\n",
74 | " and tries turning \"Foo_Bar\" into \"Foo Bar\" if Foo_Bar\n",
75 | " does not exist.\n",
76 | " \"\"\"\n",
77 | " name = fullname.rsplit('.', 1)[-1]\n",
78 | " if not path:\n",
79 | " path = ['']\n",
80 | " for d in path:\n",
81 | " nb_path = os.path.join(d, name + \".ipynb\")\n",
82 | " if os.path.isfile(nb_path):\n",
83 | " return nb_path\n",
84 | " # let import Notebook_Name find \"Notebook Name.ipynb\"\n",
85 | " nb_path = nb_path.replace(\"_\", \" \")\n",
86 | " if os.path.isfile(nb_path):\n",
87 | " return nb_path\n",
88 | " \n"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "metadata": {},
94 | "source": [
95 | "## Notebook Loader\n",
96 | "\n",
97 | "Here we have our Notebook Loader. It's actually quite simple - once we figure out the filename of the module, all it does is:\n",
98 | "\n",
99 | " 1. load the notebook document into memory\n",
100 | " 2. create an empty Module\n",
101 | " 3. execute every cell in the Module namespace\n",
102 | "\n",
103 | "Since IPython cells can have extended syntax, the IPython transform is applied to turn each of these cells into their pure-Python counterparts before executing them. If all of your notebook cells are pure-Python, this step is unnecessary.\n"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 4,
109 | "metadata": {
110 | "collapsed": true
111 | },
112 | "outputs": [],
113 | "source": [
114 | "class NotebookLoader(object):\n",
115 | " \"\"\"Module Loader for Jupyter Notebooks\"\"\"\n",
116 | " def __init__(self, path=None):\n",
117 | " self.shell = InteractiveShell.instance()\n",
118 | " self.path = path\n",
119 | " \n",
120 | " def load_module(self, fullname):\n",
121 | " \"\"\"import a notebook as a module\"\"\"\n",
122 | " path = find_notebook(fullname, self.path)\n",
123 | " \n",
124 | " print (\"importing Jupyter notebook from %s\" % path)\n",
125 | " \n",
126 | " # load the notebook object\n",
127 | " with io.open(path, 'r', encoding='utf-8') as f:\n",
128 | " nb = read(f, 4)\n",
129 | " \n",
130 | " \n",
131 | " # create the module and add it to sys.modules\n",
132 | " # if name in sys.modules:\n",
133 | " # return sys.modules[name]\n",
134 | " mod = types.ModuleType(fullname)\n",
135 | " mod.__file__ = path\n",
136 | " mod.__loader__ = self\n",
137 | " mod.__dict__['get_ipython'] = get_ipython\n",
138 | " sys.modules[fullname] = mod\n",
139 | " \n",
140 | " # extra work to ensure that magics that would affect the user_ns\n",
141 | " # actually affect the notebook module's ns\n",
142 | " save_user_ns = self.shell.user_ns\n",
143 | " self.shell.user_ns = mod.__dict__\n",
144 | " \n",
145 | " try:\n",
146 | " \n",
147 | " for cell in nb.cells:\n",
148 | " \n",
149 | " if cell.cell_type == 'code':\n",
150 | " # transform the input to executable Python\n",
151 | " code = self.shell.input_transformer_manager.transform_cell(cell.source)\n",
152 | " # run the code in themodule\n",
153 | " exec(code, mod.__dict__)\n",
154 | " finally:\n",
155 | " self.shell.user_ns = save_user_ns\n",
156 | " return mod\n"
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {},
162 | "source": [
163 | "## The Module Finder\n",
164 | "\n",
165 | "The finder is a simple object that tells you whether a name can be imported, and returns the appropriate loader. All this one does is check, when you do:\n",
166 | "\n",
167 | "```python \n",
168 | "import mynotebook\n",
169 | "```\n",
170 | "\n",
171 | "it checks whether ```mynotebook.ipynb``` exists. If a notebook is found, then it returns a NotebookLoader.\n",
172 | "\n",
173 | "Any extra logic is just for resolving paths within packages.\n"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 5,
179 | "metadata": {
180 | "collapsed": true
181 | },
182 | "outputs": [],
183 | "source": [
184 | "class NotebookFinder(object):\n",
185 | " \"\"\"Module finder that locates Jupyter Notebooks\"\"\"\n",
186 | " def __init__(self):\n",
187 | " self.loaders = {}\n",
188 | " \n",
189 | " def find_module(self, fullname, path=None):\n",
190 | " nb_path = find_notebook(fullname, path)\n",
191 | " if not nb_path:\n",
192 | " return\n",
193 | " \n",
194 | " key = path\n",
195 | " if path:\n",
196 | " # lists aren't hashable\n",
197 | " key = os.path.sep.join(path)\n",
198 | " \n",
199 | " if key not in self.loaders:\n",
200 | " self.loaders[key] = NotebookLoader(path)\n",
201 | " return self.loaders[key]\n"
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {},
207 | "source": [
208 | "## Register the hook"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "Now we register the NotebookFinder with ```sys.meta_path```"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 6,
221 | "metadata": {
222 | "collapsed": true
223 | },
224 | "outputs": [],
225 | "source": [
226 | "sys.meta_path.append(NotebookFinder())"
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "metadata": {},
232 | "source": [
233 | "After this point, my notebooks should be importable.\n",
234 | "\n",
235 | "Let's look at what we have in the CWD:\n"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 7,
241 | "metadata": {},
242 | "outputs": [
243 | {
244 | "name": "stdout",
245 | "output_type": "stream",
246 | "text": [
247 | "__init__.py pascal_voc_parser.ipynb pascal_voc_parser-pyfile.py \u001b[0m\u001b[01;34m__pycache__\u001b[0m/\r\n"
248 | ]
249 | }
250 | ],
251 | "source": [
252 | "ls data/"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 8,
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "name": "stdout",
262 | "output_type": "stream",
263 | "text": [
264 | "importing Jupyter notebook from /home/abanihi/Documents/Github/keras-faster-rcnn/data/pascal_voc_parser.ipynb\n",
265 | "Parsing annotation files....\n",
266 | "Parsing annotation files Finished without error!\n"
267 | ]
268 | }
269 | ],
270 | "source": [
271 | "from data import pascal_voc_parser as p"
272 | ]
273 | },
274 | {
275 | "cell_type": "markdown",
276 | "metadata": {},
277 | "source": [
278 | "\n",
279 | "## Aside: displaying notebooks\n",
280 | "\n",
281 | "Here is some simple code to display the contents of a notebook with syntax highlighting, etc.\n"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": 10,
287 | "metadata": {},
288 | "outputs": [
289 | {
290 | "data": {
291 | "text/html": [
292 | "\n",
293 | "\n"
363 | ],
364 | "text/plain": [
365 | ""
366 | ]
367 | },
368 | "metadata": {},
369 | "output_type": "display_data"
370 | }
371 | ],
372 | "source": [
373 | "\n",
374 | "\n",
375 | "from pygments import highlight\n",
376 | "from pygments.lexers import PythonLexer\n",
377 | "from pygments.formatters import HtmlFormatter\n",
378 | "\n",
379 | "from IPython.display import display, HTML\n",
380 | "\n",
381 | "formatter = HtmlFormatter()\n",
382 | "lexer = PythonLexer()\n",
383 | "\n",
384 | "# publish the CSS for pygments highlighting\n",
385 | "display(HTML(\"\"\"\n",
386 | "\n",
389 | "\"\"\" % formatter.get_style_defs()\n",
390 | "))\n",
391 | "\n"
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": 11,
397 | "metadata": {
398 | "collapsed": true
399 | },
400 | "outputs": [],
401 | "source": [
402 | "def show_notebook(fname):\n",
403 | " \"\"\"display a short summary of the cells of a notebook\"\"\"\n",
404 | " with io.open(fname, 'r', encoding='utf-8') as f:\n",
405 | " nb = read(f, 4)\n",
406 | " html = []\n",
407 | " for cell in nb.cells:\n",
408 | " html.append(\"%s cell
\" % cell.cell_type)\n",
409 | " if cell.cell_type == 'code':\n",
410 | " html.append(highlight(cell.source, lexer, formatter))\n",
411 | " else:\n",
412 | " html.append(\"%s
\" % cell.source)\n",
413 | " display(HTML('\\n'.join(html)))"
414 | ]
415 | },
416 | {
417 | "cell_type": "code",
418 | "execution_count": 12,
419 | "metadata": {},
420 | "outputs": [
421 | {
422 | "data": {
423 | "text/html": [
424 | "markdown cell
\n",
425 | " # Table of Contents\n",
426 | "
\n",
427 | "code cell
\n",
428 | "# %load ../pascal_voc_parser.py\n",
429 | "import os\n",
430 | "import cv2\n",
431 | "import xml.etree.ElementTree as ET\n",
432 | "import numpy as np\n",
433 | "def get_data(input_path, visualise=False):\n",
434 | " """Load data from an input file.\n",
435 | " https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/pascal_voc_parser.py#L19\n",
436 | " \n",
437 | " """\n",
438 | " all_imgs = []\n",
439 | "\n",
440 | " classes_count = {}\n",
441 | "\n",
442 | " class_mapping = {}\n",
443 | "\n",
444 | " data_paths = [os.path.join(input_path,s) for s in ['VOC2012']]\n",
445 | "\n",
446 | "\n",
447 | " print('Parsing annotation files....')\n",
448 | "\n",
449 | " for data_path in data_paths:\n",
450 | "\n",
451 | " annot_path = os.path.join(data_path, 'Annotations')\n",
452 | " imgs_path = os.path.join(data_path, 'JPEGImages')\n",
453 | " imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')\n",
454 | " imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')\n",
455 | "\n",
456 | " trainval_files = []\n",
457 | " test_files = []\n",
458 | " try:\n",
459 | " with open(imgsets_path_trainval) as f:\n",
460 | " for line in f:\n",
461 | " trainval_files.append(line.strip() + '.jpg')\n",
462 | " except Exception as e:\n",
463 | " print(e)\n",
464 | "\n",
465 | " try:\n",
466 | " with open(imgsets_path_test) as f:\n",
467 | " for line in f:\n",
468 | " test_files.append(line.strip() + '.jpg')\n",
469 | " except Exception as e:\n",
470 | " if data_path[-7:] == 'VOC2012':\n",
471 | " # this is expected, most pascal voc distibutions dont have the test.txt file\n",
472 | " pass\n",
473 | " else:\n",
474 | " print(e)\n",
475 | "\n",
476 | " annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]\n",
477 | " idx = 0\n",
478 | " for annot in annots:\n",
479 | " try:\n",
480 | " idx += 1\n",
481 | "\n",
482 | " et = ET.parse(annot)\n",
483 | " element = et.getroot()\n",
484 | "\n",
485 | " element_objs = element.findall('object')\n",
486 | " element_filename = element.find('filename').text\n",
487 | " element_width = int(element.find('size').find('width').text)\n",
488 | " element_height = int(element.find('size').find('height').text)\n",
489 | "\n",
490 | " if len(element_objs) > 0:\n",
491 | " annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,\n",
492 | " 'height': element_height, 'bboxes': []}\n",
493 | "\n",
494 | " if element_filename in trainval_files:\n",
495 | " annotation_data['imageset'] = 'trainval'\n",
496 | " elif element_filename in test_files:\n",
497 | " annotation_data['imageset'] = 'test'\n",
498 | " else:\n",
499 | " annotation_data['imageset'] = 'trainval'\n",
500 | "\n",
501 | " for element_obj in element_objs:\n",
502 | " class_name = element_obj.find('name').text\n",
503 | " if class_name not in classes_count:\n",
504 | " classes_count[class_name] = 1\n",
505 | " else:\n",
506 | " classes_count[class_name] += 1\n",
507 | "\n",
508 | " if class_name not in class_mapping:\n",
509 | " class_mapping[class_name] = len(class_mapping)\n",
510 | "\n",
511 | " obj_bbox = element_obj.find('bndbox')\n",
512 | " x1 = int(round(float(obj_bbox.find('xmin').text)))\n",
513 | " y1 = int(round(float(obj_bbox.find('ymin').text)))\n",
514 | " x2 = int(round(float(obj_bbox.find('xmax').text)))\n",
515 | " y2 = int(round(float(obj_bbox.find('ymax').text)))\n",
516 | " difficulty = int(element_obj.find('difficult').text) == 1\n",
517 | " annotation_data['bboxes'].append(\n",
518 | " {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})\n",
519 | " all_imgs.append(annotation_data)\n",
520 | "\n",
521 | " if visualise:\n",
522 | " img = cv2.imread(annotation_data['filepath'])\n",
523 | " for bbox in annotation_data['bboxes']:\n",
524 | " cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[\n",
525 | " 'x2'], bbox['y2']), (0, 0, 255))\n",
526 | " cv2.imshow('img', img)\n",
527 | " cv2.waitKey(0)\n",
528 | "\n",
529 | " except Exception as e:\n",
530 | " print(e)\n",
531 | " continue\n",
532 | " \n",
533 | " print("Parsing annotation files Finished without error!")\n",
534 | " return all_imgs, classes_count, class_mapping\n",
535 | "
\n",
536 | "\n",
537 | "code cell
\n",
538 | "all_imgs, classes_count, class_mapping = get_data('/home/abanihi/Documents/deep-data/VOCdevkit/')\n",
539 | "
\n",
540 | "\n",
541 | "code cell
\n",
542 | "classes_count\n",
543 | "
\n",
544 | "\n",
545 | "code cell
\n",
546 | "class_mapping\n",
547 | "
\n",
548 | "\n",
549 | "code cell
\n",
550 | "type(all_imgs)\n",
551 | "
\n",
552 | "\n",
553 | "code cell
\n",
554 | "\n",
556 | "\n",
557 | "code cell
\n",
558 | "if 'bg' not in classes_count:\n",
559 | " classes_count['bg'] = 0\n",
560 | " class_mapping['bg'] = len(class_mapping)\n",
561 | "
\n",
562 | "\n",
563 | "code cell
\n",
564 | "classes_count\n",
565 | "
\n",
566 | "\n",
567 | "code cell
\n",
568 | "\n"
570 | ],
571 | "text/plain": [
572 | ""
573 | ]
574 | },
575 | "metadata": {},
576 | "output_type": "display_data"
577 | }
578 | ],
579 | "source": [
580 | "show_notebook(\"data/pascal_voc_parser.ipynb\")"
581 | ]
582 | }
583 | ],
584 | "metadata": {
585 | "kernelspec": {
586 | "display_name": "Python 3",
587 | "language": "python",
588 | "name": "python3"
589 | },
590 | "language_info": {
591 | "codemirror_mode": {
592 | "name": "ipython",
593 | "version": 3
594 | },
595 | "file_extension": ".py",
596 | "mimetype": "text/x-python",
597 | "name": "python",
598 | "nbconvert_exporter": "python",
599 | "pygments_lexer": "ipython3",
600 | "version": "3.6.2"
601 | },
602 | "toc": {
603 | "nav_menu": {},
604 | "number_sections": true,
605 | "sideBar": true,
606 | "skip_h1_title": false,
607 | "toc_cell": true,
608 | "toc_position": {},
609 | "toc_section_display": "block",
610 | "toc_window_display": true
611 | }
612 | },
613 | "nbformat": 4,
614 | "nbformat_minor": 2
615 | }
616 |
--------------------------------------------------------------------------------
/keras_frcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/keras_frcnn/__init__.py
--------------------------------------------------------------------------------
/keras_frcnn/fixed_batch_normalization.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Reference: https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/FixedBatchNormalization.py"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stderr",
17 | "output_type": "stream",
18 | "text": [
19 | "Using TensorFlow backend.\n"
20 | ]
21 | }
22 | ],
23 | "source": [
24 | "from keras.engine import Layer, InputSpec\n",
25 | "from keras import initializers, regularizers\n",
26 | "from keras import backend as K"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 3,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "class FixedBatchNormalization(Layer):\n",
36 | " \n",
37 | " def __init__(self, epsilon=1e-3, axis=-1,\n",
38 | " weights=None, beta_init='zero', gamma_init='one',\n",
39 | " gamma_regularizer=None, beta_regularizer=None, **kwargs):\n",
40 | " \n",
41 | " self.supports_masking = True\n",
42 | " self.beta_init = initializers.get(beta_init)\n",
43 | " self.gamma_init = initializers.get(gamma_init)\n",
44 | " self.epsilon = epsilon\n",
45 | " self.axis = axis\n",
46 | " self.gamma_regularizer = regularizers.get(gamma_regularizer)\n",
47 | " self.beta_regularizer = regularizers.get(beta_regularizer)\n",
48 | " self.initial_weights = weights\n",
49 | " super(FixedBatchNormalization, self).__init__(**kwargs)\n",
50 | " \n",
51 | " def build(self, input_shape):\n",
52 | " self.input_shape = [InputSpec(shape=input_shape)]\n",
53 | " shape = (input_shape[self.axis], )\n",
54 | " \n",
55 | " self.gamma = self.add_weight(shape,\n",
56 | " initializer = self.gamma_init,\n",
57 | " regularizer = self.gamma_regularizer,\n",
58 | " name = '{}_gamma'.format(self.name),\\\n",
59 | " trainable = False)\n",
60 | " self.beta = self.add_weight(shape,\n",
61 | " initializer = self.beta_init,\n",
62 | " regularizer = self.beta_regularizer,\n",
63 | " name = '{}_beta'.format(self.name),\n",
64 | " trainable = False)\\\n",
65 | " \n",
66 | " self.running_mean = self.add_weight(shape,\n",
67 | " initializer = 'zero',\n",
68 | " name = '{}_running_mean'.format(self.name),\n",
69 | " trainable = False)\n",
70 | " \n",
71 | " self.running_std = self.add_weight(shape,\n",
72 | " initializer = 'zero',\n",
73 | " name = '{}_running_std'.format(self.name),\n",
74 | " trainable = False)\n",
75 | " \n",
76 | " if self.initial_weights is not None:\n",
77 | " self.set_weights(self.initial_weights)\n",
78 | " del self.initial_weights\n",
79 | " \n",
80 | " self.built = True\n",
81 | " \n",
82 | " def call(self, x, mask=None):\n",
83 | " \n",
84 | " assert self.built, 'Layer must be built before being called'\n",
85 | " input_shape = K.int_shape(x)\n",
86 | " \n",
87 | " reduction_axes = list(range(len(input_shape)))\n",
88 | " del reduction_axes[self.axis]\n",
89 | " \n",
90 | " broadcast_shape = [1] * len(input_shape)\n",
91 | " broadcast_shape[self.axis] = input_shape[self.axis]\n",
92 | " \n",
93 | " if sorted(reduction_axes) == range(K.ndim(x))[:-1]:\n",
94 | " x_normed = K.batch_normalization(\n",
95 | " x, self.running_mean, self.running_std,\n",
96 | " self.beta, self.gamma, epsilon=self.epsilon)\n",
97 | " \n",
98 | " else:\n",
99 | " # need broadcasting\n",
100 | " broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape)\n",
101 | " broadcast_running_std = K.reshape(self.running_std, broadcast_shape)\n",
102 | " broadcast_beta = K.reshape(self.beta, broadcast_shape)\n",
103 | " broadcast_gamma = K.reshape(self.gamma, broadcast_shape)\n",
104 | " x_normed = K.batch_normalization(\n",
105 | " x, broadcast_running_mean, broadcast_running_std,\n",
106 | " broadcast_beta, broadcast_gamma, epsilon=self.epsilon)\n",
107 | " \n",
108 | " return x_normed\n",
109 | " \n",
110 | " def get_config(self):\n",
111 | " \n",
112 | " config = {'epsilon': self.epsilon,\n",
113 | " 'axis': self.axis,\n",
114 | " 'gamma_regularizer': self.gamma_regularizer.get_config() if self.gamma_regularizer else None,\n",
115 | " 'beta_regularizer': self.beta_regularizer.get_config() if self.beta_regularizer else None}\n",
116 | " \n",
117 | " base_config = super(FixedBatchNormalization, self).get_config()\n",
118 | " return dict(list(base_config.items()) + list(config.items()))"
119 | ]
120 | }
121 | ],
122 | "metadata": {
123 | "kernelspec": {
124 | "display_name": "Python 3",
125 | "language": "python",
126 | "name": "python3"
127 | },
128 | "language_info": {
129 | "codemirror_mode": {
130 | "name": "ipython",
131 | "version": 3
132 | },
133 | "file_extension": ".py",
134 | "mimetype": "text/x-python",
135 | "name": "python",
136 | "nbconvert_exporter": "python",
137 | "pygments_lexer": "ipython3",
138 | "version": "3.6.3"
139 | }
140 | },
141 | "nbformat": 4,
142 | "nbformat_minor": 2
143 | }
144 |
--------------------------------------------------------------------------------
/keras_frcnn/resnet.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ResNet50 model for Keras.\n",
8 | " Reference:\n",
9 | "- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) Adapted from code contributed by BigMoyan\n",
10 | "- https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/resnet.py"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "name": "stderr",
20 | "output_type": "stream",
21 | "text": [
22 | "Using TensorFlow backend.\n"
23 | ]
24 | }
25 | ],
26 | "source": [
27 | "from __future__ import print_function\n",
28 | "from __future__ import absolute_import\n",
29 | "import sys\n",
30 | "import os\n",
31 | "from themachine.nbfinder import NotebookFinder\n",
32 | "sys.meta_path.append(NotebookFinder())\n",
33 | "from keras.layers import Input, Add, Dense, Activation, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, \\\n",
34 | " AveragePooling2D, TimeDistributed\n",
35 | "\n",
36 | "from keras import backend as K\n",
37 | "\n",
38 | "#from keras_frcnn.RoiPoolingConv import RoiPoolingConv\n",
39 | "#from keras_frcnn.FixedBatchNormalization import FixedBatchNormalization\n"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 2,
45 | "metadata": {},
46 | "outputs": [
47 | {
48 | "name": "stdout",
49 | "output_type": "stream",
50 | "text": [
51 | "importing Jupyter notebook from roi_pooling_conv.ipynb\n",
52 | "importing Jupyter notebook from fixed_batch_normalization.ipynb\n"
53 | ]
54 | }
55 | ],
56 | "source": [
57 | "from roi_pooling_conv import RoiPoolingConv\n",
58 | "from fixed_batch_normalization import FixedBatchNormalization"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 8,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "def get_weight_path():\n",
68 | " if K.image_dim_ordering() == 'th':\n",
69 | " return 'resnet50_weights_th_dim_ordering_th_kernels_notop.h5'\n",
70 | " else:\n",
71 | " return 'resnet50_weights_tf_dim_ordering_tf_kernels.h5'\n",
72 | " \n",
73 | "def get_img_output_length(width, height):\n",
74 | " def get_output_length(input_length):\n",
75 | " # zero_pad\n",
76 | " input_length += 6\n",
77 | " \n",
78 | " # apply 4 strided convolutions\n",
79 | " filter_sizes = [7, 3, 1, 1]\n",
80 | " \n",
81 | " stride = 2\n",
82 | " \n",
83 | " for filter_size in filter_sizes:\n",
84 | " input_length = (input_length - filter_size + stride) / stride\n",
85 | " \n",
86 | " return input_length\n",
87 | " \n",
88 | " return get_output_length(width), get_output_length(height)\n"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 7,
94 | "metadata": {},
95 | "outputs": [],
96 | "source": [
97 | "def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True):\n",
98 | "\n",
99 | " nb_filter1, nb_filter2, nb_filter3 = filters\n",
100 | " \n",
101 | " if K.image_dim_ordering() == 'tf':\n",
102 | " bn_axis = 3\n",
103 | " else:\n",
104 | " bn_axis = 1\n",
105 | "\n",
106 | " conv_name_base = 'res' + str(stage) + block + '_branch'\n",
107 | " bn_name_base = 'bn' + str(stage) + block + '_branch'\n",
108 | "\n",
109 | " x = Convolution2D(nb_filter1, (1, 1), name=conv_name_base + '2a', trainable=trainable)(input_tensor)\n",
110 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)\n",
111 | " x = Activation('relu')(x)\n",
112 | "\n",
113 | " x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)\n",
114 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)\n",
115 | " x = Activation('relu')(x)\n",
116 | "\n",
117 | " x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)\n",
118 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)\n",
119 | "\n",
120 | " x = Add()([x, input_tensor])\n",
121 | " x = Activation('relu')(x)\n",
122 | " return x\n",
123 | "\n",
124 | "\n",
125 | "def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainable=True):\n",
126 | "\n",
127 | " # identity block time distributed\n",
128 | "\n",
129 | " nb_filter1, nb_filter2, nb_filter3 = filters\n",
130 | " if K.image_dim_ordering() == 'tf':\n",
131 | " bn_axis = 3\n",
132 | " else:\n",
133 | " bn_axis = 1\n",
134 | "\n",
135 | " conv_name_base = 'res' + str(stage) + block + '_branch'\n",
136 | " bn_name_base = 'bn' + str(stage) + block + '_branch'\n",
137 | "\n",
138 | " x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2a')(input_tensor)\n",
139 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)\n",
140 | " x = Activation('relu')(x)\n",
141 | "\n",
142 | " x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), trainable=trainable, kernel_initializer='normal',padding='same'), name=conv_name_base + '2b')(x)\n",
143 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)\n",
144 | " x = Activation('relu')(x)\n",
145 | "\n",
146 | " x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2c')(x)\n",
147 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)\n",
148 | "\n",
149 | " x = Add()([x, input_tensor])\n",
150 | " x = Activation('relu')(x)\n",
151 | "\n",
152 | " return x\n"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 10,
158 | "metadata": {},
159 | "outputs": [],
160 | "source": [
161 | "def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), trainable=True):\n",
162 | "\n",
163 | " nb_filter1, nb_filter2, nb_filter3 = filters\n",
164 | " if K.image_dim_ordering() == 'tf':\n",
165 | " bn_axis = 3\n",
166 | " else:\n",
167 | " bn_axis = 1\n",
168 | "\n",
169 | " conv_name_base = 'res' + str(stage) + block + '_branch'\n",
170 | " bn_name_base = 'bn' + str(stage) + block + '_branch'\n",
171 | "\n",
172 | " x = Convolution2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', trainable=trainable)(input_tensor)\n",
173 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)\n",
174 | " x = Activation('relu')(x)\n",
175 | "\n",
176 | " x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)\n",
177 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)\n",
178 | " x = Activation('relu')(x)\n",
179 | "\n",
180 | " x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)\n",
181 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)\n",
182 | "\n",
183 | " shortcut = Convolution2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', trainable=trainable)(input_tensor)\n",
184 | " shortcut = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)\n",
185 | "\n",
186 | " x = Add()([x, shortcut])\n",
187 | " x = Activation('relu')(x)\n",
188 | " return x\n",
189 | "\n",
190 | "\n",
191 | "def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape, strides=(2, 2), trainable=True):\n",
192 | "\n",
193 | " # conv block time distributed\n",
194 | "\n",
195 | " nb_filter1, nb_filter2, nb_filter3 = filters\n",
196 | " if K.image_dim_ordering() == 'tf':\n",
197 | " bn_axis = 3\n",
198 | " else:\n",
199 | " bn_axis = 1\n",
200 | "\n",
201 | " conv_name_base = 'res' + str(stage) + block + '_branch'\n",
202 | " bn_name_base = 'bn' + str(stage) + block + '_branch'\n",
203 | "\n",
204 | " x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), input_shape=input_shape, name=conv_name_base + '2a')(input_tensor)\n",
205 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)\n",
206 | " x = Activation('relu')(x)\n",
207 | "\n",
208 | " x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2b')(x)\n",
209 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)\n",
210 | " x = Activation('relu')(x)\n",
211 | "\n",
212 | " x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), kernel_initializer='normal'), name=conv_name_base + '2c', trainable=trainable)(x)\n",
213 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)\n",
214 | "\n",
215 | " shortcut = TimeDistributed(Convolution2D(nb_filter3, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '1')(input_tensor)\n",
216 | " shortcut = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut)\n",
217 | "\n",
218 | " x = Add()([x, shortcut])\n",
219 | " x = Activation('relu')(x)\n",
220 | " return x"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 11,
226 | "metadata": {},
227 | "outputs": [],
228 | "source": [
229 | "def nn_base(input_tensor=None, trainable=False):\n",
230 | "\n",
231 | " # Determine proper input shape\n",
232 | " if K.image_dim_ordering() == 'th':\n",
233 | " input_shape = (3, None, None)\n",
234 | " else:\n",
235 | " input_shape = (None, None, 3)\n",
236 | "\n",
237 | " if input_tensor is None:\n",
238 | " img_input = Input(shape=input_shape)\n",
239 | " else:\n",
240 | " if not K.is_keras_tensor(input_tensor):\n",
241 | " img_input = Input(tensor=input_tensor, shape=input_shape)\n",
242 | " else:\n",
243 | " img_input = input_tensor\n",
244 | "\n",
245 | " if K.image_dim_ordering() == 'tf':\n",
246 | " bn_axis = 3\n",
247 | " else:\n",
248 | " bn_axis = 1\n",
249 | "\n",
250 | " x = ZeroPadding2D((3, 3))(img_input)\n",
251 | "\n",
252 | " x = Convolution2D(64, (7, 7), strides=(2, 2), name='conv1', trainable = trainable)(x)\n",
253 | " x = FixedBatchNormalization(axis=bn_axis, name='bn_conv1')(x)\n",
254 | " x = Activation('relu')(x)\n",
255 | " x = MaxPooling2D((3, 3), strides=(2, 2))(x)\n",
256 | "\n",
257 | " x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable = trainable)\n",
258 | " x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable = trainable)\n",
259 | " x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable = trainable)\n",
260 | "\n",
261 | " x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable = trainable)\n",
262 | " x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable = trainable)\n",
263 | " x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable = trainable)\n",
264 | " x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable = trainable)\n",
265 | "\n",
266 | " x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable = trainable)\n",
267 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable = trainable)\n",
268 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable = trainable)\n",
269 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable = trainable)\n",
270 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable = trainable)\n",
271 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable = trainable)\n",
272 | "\n",
273 | " return x\n",
274 | "\n"
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 12,
280 | "metadata": {},
281 | "outputs": [],
282 | "source": [
283 | "def classifier_layers(x, input_shape, trainable=False):\n",
284 | "\n",
285 | " # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n",
286 | " # (hence a smaller stride in the region that follows the ROI pool)\n",
287 | " if K.backend() == 'tensorflow':\n",
288 | " x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', input_shape=input_shape, strides=(2, 2), trainable=trainable)\n",
289 | " elif K.backend() == 'theano':\n",
290 | " x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', input_shape=input_shape, strides=(1, 1), trainable=trainable)\n",
291 | "\n",
292 | " x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='b', trainable=trainable)\n",
293 | " x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='c', trainable=trainable)\n",
294 | " x = TimeDistributed(AveragePooling2D((7, 7)), name='avg_pool')(x)\n",
295 | "\n",
296 | " return x"
297 | ]
298 | },
299 | {
300 | "cell_type": "code",
301 | "execution_count": 13,
302 | "metadata": {},
303 | "outputs": [],
304 | "source": [
305 | "def rpn(base_layers,num_anchors):\n",
306 | "\n",
307 | " x = Convolution2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)\n",
308 | "\n",
309 | " x_class = Convolution2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)\n",
310 | " x_regr = Convolution2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)\n",
311 | "\n",
312 | " return [x_class, x_regr, base_layers]\n",
313 | "\n",
314 | "def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False):\n",
315 | "\n",
316 | " # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n",
317 | "\n",
318 | " if K.backend() == 'tensorflow':\n",
319 | " pooling_regions = 14\n",
320 | " input_shape = (num_rois,14,14,1024)\n",
321 | " elif K.backend() == 'theano':\n",
322 | " pooling_regions = 7\n",
323 | " input_shape = (num_rois,1024,7,7)\n",
324 | "\n",
325 | " out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])\n",
326 | " out = classifier_layers(out_roi_pool, input_shape=input_shape, trainable=True)\n",
327 | "\n",
328 | " out = TimeDistributed(Flatten())(out)\n",
329 | "\n",
330 | " out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)\n",
331 | " # note: no regression target for bg class\n",
332 | " out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)\n",
333 | " return [out_class, out_regr]"
334 | ]
335 | }
336 | ],
337 | "metadata": {
338 | "kernelspec": {
339 | "display_name": "Python 3",
340 | "language": "python",
341 | "name": "python3"
342 | },
343 | "language_info": {
344 | "codemirror_mode": {
345 | "name": "ipython",
346 | "version": 3
347 | },
348 | "file_extension": ".py",
349 | "mimetype": "text/x-python",
350 | "name": "python",
351 | "nbconvert_exporter": "python",
352 | "pygments_lexer": "ipython3",
353 | "version": "3.6.3"
354 | }
355 | },
356 | "nbformat": 4,
357 | "nbformat_minor": 2
358 | }
359 |
--------------------------------------------------------------------------------
/keras_frcnn/roi_pooling_conv.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Reference: https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/RoiPoolingConv.py"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stderr",
17 | "output_type": "stream",
18 | "text": [
19 | "Using TensorFlow backend.\n"
20 | ]
21 | }
22 | ],
23 | "source": [
24 | "from keras.engine.topology import Layer\n",
25 | "import keras.backend as K"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "if K.backend() == 'tensorflow':\n",
35 | " import tensorflow as tf"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 5,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "class RoiPoolingConv(Layer):\n",
45 | " \"\"\"ROI pooling layer for 2D inputs.\n",
46 | " See Spatial Pyramid pooling in Deep Convolutional Networks for Visual\n",
47 | " Recognition, K. He, X. Zhang, S. Ren, J. Sun\n",
48 | " \n",
49 | " # Arguments\n",
50 | " pool_size: int\n",
51 | " size of pooling region to use, pool_size = 7 will result in a 7x7 region.\n",
52 | " num_rois: number of regions of interest to be used.\n",
53 | " \n",
54 | " # Input shape\n",
55 | " list of two 4D tensors [X_img, X_roi] with shape:\n",
56 | " \n",
57 | " X_img:\n",
58 | " `(1, channels, rows, cols)` if dim_ordering='th'\n",
59 | " or 4D tensor with shape:\n",
60 | " `(1, rows, cols, channels)` if dim_ordering='tf'.\n",
61 | " X_roi:\n",
62 | " `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)\n",
63 | " \n",
64 | " # Output shape\n",
65 | " 3D tensor with shape:\n",
66 | " `(1, num_rois, channels, pool_size, pool_size)`\n",
67 | " \"\"\"\n",
68 | " \n",
69 | " def __init__(self, pool_size, num_rois, **kwargs):\n",
70 | " \n",
71 | " self.dim_ordering = K.image_dim_ordering()\n",
72 | " assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'\n",
73 | " \n",
74 | " self.pool_size = pool_size\n",
75 | " self.num_rois = num_rois\n",
76 | " \n",
77 | " super(RoiPoolingConv, self).__init__(**kwargs)\n",
78 | " \n",
79 | " \n",
80 | " def build(self, input_shape):\n",
81 | " if self.dim_ordering == 'th':\n",
82 | " self.nb_channels = input_shape[0][1]\n",
83 | " \n",
84 | " elif self.dim_ordering == 'tf':\n",
85 | " self.nb_channels = input_shape[0][3]\n",
86 | " \n",
87 | " def compute_output_shape(self, input_shape):\n",
88 | " if self.dim_ordering == 'th':\n",
89 | " return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size\n",
90 | " \n",
91 | " else:\n",
92 | " return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels\n",
93 | " \n",
94 | " \n",
95 | " def call(self, x, mask=None):\n",
96 | " assert(len(x) == 2)\n",
97 | " \n",
98 | " img = x[0]\n",
99 | " rois = x[1]\n",
100 | " \n",
101 | " input_shape = K.shape(img)\n",
102 | " \n",
103 | " outputs = []\n",
104 | " \n",
105 | " for roi_idx in range(self.num_rois):\n",
106 | " \n",
107 | " x = rois[0, roi_idx, 0]\n",
108 | " y = rois[0, roi_idx, 1]\n",
109 | " w = rois[0, roi_idx, 2]\n",
110 | " h = rois[0, roi_idx, 3]\n",
111 | " \n",
112 | " row_length = w / float(self.pool_size)\n",
113 | " col_length = h / float(self.pool_size)\n",
114 | " \n",
115 | " num_pool_regions = self.pool_size\n",
116 | " \n",
117 | " #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op\n",
118 | " # in theano. The theano implementation is much less efficient and leads to long compile times\n",
119 | "\n",
120 | " if self.dim_ordering == 'th':\n",
121 | " for jy in range(num_pool_regions):\n",
122 | " for ix in range(num_pool_regions):\n",
123 | " x1 = x + ix * row_length\n",
124 | " x2 = x1 + row_length\n",
125 | " y1 = y + jy * col_length\n",
126 | " y2 = y1 + col_length\n",
127 | " \n",
128 | " x1 = K.cast(x1, 'int32')\n",
129 | " x2 = K.cast(x2, 'int32')\n",
130 | " y1 = K.cast(y1, 'int32')\n",
131 | " y2 = K.cast(y2, 'int32')\n",
132 | " \n",
133 | " x2 = x1 + K.maximum(1, x2-x1)\n",
134 | " y2 = y1 + K.maximum(1, y2-y1)\n",
135 | " \n",
136 | " new_shape = [input_shape[0], input_shape[1],\n",
137 | " y2 - y1, x2 - x1]\n",
138 | " \n",
139 | " x_crop = img[:, :, y1:y2, x1:x2]\n",
140 | " xm = K.reshape(x_crop, new_shape)\n",
141 | " pooled_val = K.max(xm, axis=(2, 3))\n",
142 | " outputs.append(pooled_val)\n",
143 | " \n",
144 | " elif self.dim_ordering == 'tf':\n",
145 | " x = K.cast(x, 'int32')\n",
146 | " y = K.cast(y, 'int32')\n",
147 | " w = K.cast(w, 'int32')\n",
148 | " h = K.cast(h, 'int32')\n",
149 | " \n",
150 | " rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))\n",
151 | " outputs.append(rs)\n",
152 | " \n",
153 | " final_output = K.concatenate(outputs, axis=0)\n",
154 | " final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))\n",
155 | " \n",
156 | " if self.dim_ordering == 'th':\n",
157 | " final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))\n",
158 | " \n",
159 | " else:\n",
160 | " final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))\n",
161 | " \n",
162 | " return final_output\n",
163 | " \n",
164 | " def get_config(self):\n",
165 | " config = {'pool_size': self.pool_size,\n",
166 | " 'num_rois': self.num_rois}\n",
167 | " base_config = super(RoiPoolingConv, self).get_config()\n",
168 | " return dict(list(base_config.items()) + list(config.items()))"
169 | ]
170 | }
171 | ],
172 | "metadata": {
173 | "kernelspec": {
174 | "display_name": "Python 3",
175 | "language": "python",
176 | "name": "python3"
177 | },
178 | "language_info": {
179 | "codemirror_mode": {
180 | "name": "ipython",
181 | "version": 3
182 | },
183 | "file_extension": ".py",
184 | "mimetype": "text/x-python",
185 | "name": "python",
186 | "nbconvert_exporter": "python",
187 | "pygments_lexer": "ipython3",
188 | "version": "3.6.3"
189 | }
190 | },
191 | "nbformat": 4,
192 | "nbformat_minor": 2
193 | }
194 |
--------------------------------------------------------------------------------
/keras_frcnn/vgg.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# VGG16 model for Keras.\n",
8 | "\n",
9 | "Reference\n",
10 | "- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 1,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "name": "stderr",
20 | "output_type": "stream",
21 | "text": [
22 | "Using TensorFlow backend.\n"
23 | ]
24 | }
25 | ],
26 | "source": [
27 | "import warnings\n",
28 | "warnings.filterwarnings('ignore')\n",
29 | "from __future__ import print_function\n",
30 | "from __future__ import absolute_import\n",
31 | "import sys\n",
32 | "import os\n",
33 | "from themachine.nbfinder import NotebookFinder\n",
34 | "sys.meta_path.append(NotebookFinder())\n",
35 | "from keras.models import Model\n",
36 | "from keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout\n",
37 | "from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed\n",
38 | "from keras.engine.topology import get_source_inputs\n",
39 | "from keras.utils import layer_utils\n",
40 | "from keras.utils.data_utils import get_file\n",
41 | "from keras import backend as K"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | "importing Jupyter notebook from roi_pooling_conv.ipynb\n"
54 | ]
55 | }
56 | ],
57 | "source": [
58 | "from roi_pooling_conv import RoiPoolingConv"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 4,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "def get_weight_path():\n",
68 | " if K.image_dim_ordering() == 'th':\n",
69 | " print('pretrained weights not available for VGG with theano backend')\n",
70 | " return\n",
71 | " else:\n",
72 | " return 'vgg16_weights_tf_dim_ordering_tf_kernels.h5'\n",
73 | "\n",
74 | "\n",
75 | "def get_img_output_length(width, height):\n",
76 | " def get_output_length(input_length):\n",
77 | " return input_length//16\n",
78 | "\n",
79 | " return get_output_length(width), get_output_length(height) "
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 5,
85 | "metadata": {},
86 | "outputs": [],
87 | "source": [
88 | "def nn_base(input_tensor=None, trainable=False):\n",
89 | "\n",
90 | "\n",
91 | " # Determine proper input shape\n",
92 | " if K.image_dim_ordering() == 'th':\n",
93 | " input_shape = (3, None, None)\n",
94 | " else:\n",
95 | " input_shape = (None, None, 3)\n",
96 | "\n",
97 | " if input_tensor is None:\n",
98 | " img_input = Input(shape=input_shape)\n",
99 | " else:\n",
100 | " if not K.is_keras_tensor(input_tensor):\n",
101 | " img_input = Input(tensor=input_tensor, shape=input_shape)\n",
102 | " else:\n",
103 | " img_input = input_tensor\n",
104 | "\n",
105 | " if K.image_dim_ordering() == 'tf':\n",
106 | " bn_axis = 3\n",
107 | " else:\n",
108 | " bn_axis = 1\n",
109 | "\n",
110 | " # Block 1\n",
111 | " x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)\n",
112 | " x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)\n",
113 | " x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)\n",
114 | "\n",
115 | " # Block 2\n",
116 | " x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)\n",
117 | " x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)\n",
118 | " x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)\n",
119 | "\n",
120 | " # Block 3\n",
121 | " x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)\n",
122 | " x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)\n",
123 | " x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)\n",
124 | " x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)\n",
125 | "\n",
126 | " # Block 4\n",
127 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)\n",
128 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)\n",
129 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)\n",
130 | " x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)\n",
131 | "\n",
132 | " # Block 5\n",
133 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)\n",
134 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)\n",
135 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)\n",
136 | " # x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)\n",
137 | "\n",
138 | " return x\n"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 6,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "def rpn(base_layers, num_anchors):\n",
148 | "\n",
149 | " x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)\n",
150 | "\n",
151 | " x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)\n",
152 | " x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)\n",
153 | "\n",
154 | " return [x_class, x_regr, base_layers]\n",
155 | "\n",
156 | "\n",
157 | "def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False):\n",
158 | "\n",
159 | " # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n",
160 | "\n",
161 | " if K.backend() == 'tensorflow':\n",
162 | " pooling_regions = 7\n",
163 | " input_shape = (num_rois,7,7,512)\n",
164 | " elif K.backend() == 'theano':\n",
165 | " pooling_regions = 7\n",
166 | " input_shape = (num_rois,512,7,7)\n",
167 | "\n",
168 | " out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])\n",
169 | "\n",
170 | " out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool)\n",
171 | " out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out)\n",
172 | " out = TimeDistributed(Dropout(0.5))(out)\n",
173 | " out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out)\n",
174 | " out = TimeDistributed(Dropout(0.5))(out)\n",
175 | "\n",
176 | " out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)\n",
177 | " # note: no regression target for bg class\n",
178 | " out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)\n",
179 | "\n",
180 | " return [out_class, out_regr]\n",
181 | "\n"
182 | ]
183 | }
184 | ],
185 | "metadata": {
186 | "kernelspec": {
187 | "display_name": "Python 3",
188 | "language": "python",
189 | "name": "python3"
190 | },
191 | "language_info": {
192 | "codemirror_mode": {
193 | "name": "ipython",
194 | "version": 3
195 | },
196 | "file_extension": ".py",
197 | "mimetype": "text/x-python",
198 | "name": "python",
199 | "nbconvert_exporter": "python",
200 | "pygments_lexer": "ipython3",
201 | "version": "3.6.3"
202 | }
203 | },
204 | "nbformat": 4,
205 | "nbformat_minor": 2
206 | }
207 |
--------------------------------------------------------------------------------