├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── data ├── __init__.py ├── data_augment.ipynb ├── data_generators.ipynb ├── pascal_voc_parser-pyfile.py └── pascal_voc_parser.ipynb ├── importing notebooks.ipynb └── keras_frcnn ├── __init__.py ├── fixed_batch_normalization.ipynb ├── resnet.ipynb ├── roi_pooling_conv.ipynb └── vgg.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Anderson Banihirwe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Keras - Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks 2 | 3 | Keras implementation of the paper: Shaoqing Ren et al. [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497). 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/__init__.py -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/data/__init__.py -------------------------------------------------------------------------------- /data/data_augment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import cv2\n", 10 | "import numpy as np\n", 11 | "import copy" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "def augment(img_data, config, augment=True):\n", 21 | " assert 'filepath' in img_data\n", 22 | " assert 'bboxes' in img_data\n", 23 | " assert 'width' in img_data\n", 24 | " assert 'height' in img_data\n", 25 | " \n", 26 | " img_data_aug = copy.deepcopy(img_data)\n", 27 | " \n", 28 | " img = cv2.imread(img_data_aug['filepath'])\n", 29 | " \n", 30 | " if augment:\n", 31 | " rows, cols = img.shape[:2]\n", 32 | " \n", 33 | " if config.use_horizontal_flips and np.random.randint(0, 2) == 0:\n", 34 | " img = cv2.flip(img, 1)\n", 35 | " for bbox in img_data_aug['bboxes']:\n", 36 | " x1 = bbox['x1']\n", 37 | " x2 = bbox['x2']\n", 38 | " bbox['x2'] = cols - x1\n", 39 | " bbox['x1'] = cols - x2\n", 40 | " \n", 41 | " if config.use_vertical_flips and np.random.randint(0, 2) == 0:\n", 42 | " img = cv2.flip(img, 0)\n", 43 | " for bbox in img_data_aug['bboxes']:\n", 44 | " y1 = bbox['y1']\n", 45 | " y2 = bbox['y2']\n", 46 | " bbox['y2'] = rows - y1\n", 47 | " bbox['y1'] = rows - y2\n", 48 | " \n", 49 | " if config.rot_90:\n", 50 | " angle = np.random.choice([0, 90, 180, 270], 1)[0]\n", 51 | " if angle == 270:\n", 52 | " img = np.transpose(img, (1, 0, 2))\n", 53 | " img = cv2.flip(img, 0)\n", 54 | " elif angle == 180:\n", 55 | " img = cv2.flip(img, -1)\n", 56 | " elif angle == 90:\n", 57 | " img = np.transpose(img, (1, 0, 2))\n", 58 | " img = cv2.flip(img, 1)\n", 59 | " \n", 60 | " elif angle == 0:\n", 61 | " pass\n", 62 | " \n", 63 | " for bbox in img_data_aug['bboxes']:\n", 64 | " x1 = bbox['x1']\n", 65 | " x2 = bbox['x2']\n", 66 | " y1 = bbox['y1']\n", 67 | " y2 = bbox['y2']\n", 68 | " \n", 69 | " if angle == 270:\n", 70 | " bbox['x1'] = y1\n", 71 | " bbox['x2'] = y2\n", 72 | " bbox['y1'] = cols - x2\n", 73 | " bbox['y2'] = cols - x1\n", 74 | " \n", 75 | " elif angle == 180:\n", 76 | " bbox['x2'] = cols - x1\n", 77 | " bbox['x1'] = cols - x2\n", 78 | " bbox['y2'] = rows - y1\n", 79 | " bbox['y1'] = rows - y2\n", 80 | " \n", 81 | " elif angle == 90:\n", 82 | " bbox['x1'] = rows - y2\n", 83 | " bbox['x2'] = rows - y1\n", 84 | " bbox['y1'] = x1\n", 85 | " bbox['y2'] = x2 \n", 86 | " \n", 87 | " elif angle == 0:\n", 88 | " pass\n", 89 | " \n", 90 | " img_data_aug['width'] = img.shape[1]\n", 91 | " img_data_aug['height'] = img.shape[0]\n", 92 | " return img_data_aug, img\n", 93 | " " 94 | ] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "Python 3", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.6.3" 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 2 118 | } 119 | -------------------------------------------------------------------------------- /data/data_generators.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "importing Jupyter notebook from data_augment.ipynb\n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "from __future__ import absolute_import\n", 18 | "import numpy as np\n", 19 | "import cv2\n", 20 | "import random\n", 21 | "import copy\n", 22 | "import sys\n", 23 | "import os\n", 24 | "from themachine.nbfinder import NotebookFinder\n", 25 | "sys.meta_path.append('.')\n", 26 | "sys.meta_path.append(NotebookFinder())\n", 27 | "import data_augment\n", 28 | "import threading\n", 29 | "import itertools\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 6, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "def union(au, bu, area_intersection):\n", 39 | " area_a = (au[2] - au[0]) * (au[3] - au[1])\n", 40 | " area_b = (bu[2] - bu[0]) * (bu[3] - bu[1])\n", 41 | " area_union = area_a + area_b - area_intersection\n", 42 | " return union" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 7, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "def intersection(ai, bi):\n", 52 | " x = max(ai[0], bi[0])\n", 53 | " y = max(ai[1], bi[1])\n", 54 | " w = min(ai[2], bi[2]) - x\n", 55 | " h = min(ai[3], bi[3]) - y\n", 56 | " if w < 0 or h < 0:\n", 57 | " return 0\n", 58 | " return w*h" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 9, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "def iou(a, b):\n", 68 | " # a and b should be (x1,y1,x2,y2)\n", 69 | " if a[0] >= a[2] or a[1] >= a[3] or b[0] >= b[2] or b[1] >= b[3]:\n", 70 | " return 0.0\n", 71 | " \n", 72 | " area_i = intersection(a, b)\n", 73 | " area_u = union(a, b, area_i)\n", 74 | "\n", 75 | " return float(area_i) / float(area_u + 1e-6)\n" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 10, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "def get_new_img_size(width, height, img_min_side=600):\n", 85 | " if width <= height:\n", 86 | " f = float(img_min_side) / width\n", 87 | " resized_height = int(f * height)\n", 88 | " resized_width = img_min_side\n", 89 | " \n", 90 | " else:\n", 91 | " f = float(img_min_side) / height\n", 92 | " resized_width = int(f * width)\n", 93 | " resized_height = img_min_side\n", 94 | " \n", 95 | " return resized_width, resized_height" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Python 3", 109 | "language": "python", 110 | "name": "python3" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.6.3" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 2 127 | } 128 | -------------------------------------------------------------------------------- /data/pascal_voc_parser-pyfile.py: -------------------------------------------------------------------------------- 1 | # %load ../pascal_voc_parser.py 2 | import os 3 | import cv2 4 | import xml.etree.ElementTree as ET 5 | import numpy as np 6 | def get_data(input_path): 7 | all_imgs = [] 8 | 9 | classes_count = {} 10 | 11 | class_mapping = {} 12 | 13 | visualise = False 14 | 15 | data_paths = [os.path.join(input_path,s) for s in ['VOC2007', 'VOC2012']] 16 | 17 | 18 | print('Parsing annotation files') 19 | 20 | for data_path in data_paths: 21 | 22 | annot_path = os.path.join(data_path, 'Annotations') 23 | imgs_path = os.path.join(data_path, 'JPEGImages') 24 | imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt') 25 | imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt') 26 | 27 | trainval_files = [] 28 | test_files = [] 29 | try: 30 | with open(imgsets_path_trainval) as f: 31 | for line in f: 32 | trainval_files.append(line.strip() + '.jpg') 33 | except Exception as e: 34 | print(e) 35 | 36 | try: 37 | with open(imgsets_path_test) as f: 38 | for line in f: 39 | test_files.append(line.strip() + '.jpg') 40 | except Exception as e: 41 | if data_path[-7:] == 'VOC2012': 42 | # this is expected, most pascal voc distibutions dont have the test.txt file 43 | pass 44 | else: 45 | print(e) 46 | 47 | annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)] 48 | idx = 0 49 | for annot in annots: 50 | try: 51 | idx += 1 52 | 53 | et = ET.parse(annot) 54 | element = et.getroot() 55 | 56 | element_objs = element.findall('object') 57 | element_filename = element.find('filename').text 58 | element_width = int(element.find('size').find('width').text) 59 | element_height = int(element.find('size').find('height').text) 60 | 61 | if len(element_objs) > 0: 62 | annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width, 63 | 'height': element_height, 'bboxes': []} 64 | 65 | if element_filename in trainval_files: 66 | annotation_data['imageset'] = 'trainval' 67 | elif element_filename in test_files: 68 | annotation_data['imageset'] = 'test' 69 | else: 70 | annotation_data['imageset'] = 'trainval' 71 | 72 | for element_obj in element_objs: 73 | class_name = element_obj.find('name').text 74 | if class_name not in classes_count: 75 | classes_count[class_name] = 1 76 | else: 77 | classes_count[class_name] += 1 78 | 79 | if class_name not in class_mapping: 80 | class_mapping[class_name] = len(class_mapping) 81 | 82 | obj_bbox = element_obj.find('bndbox') 83 | x1 = int(round(float(obj_bbox.find('xmin').text))) 84 | y1 = int(round(float(obj_bbox.find('ymin').text))) 85 | x2 = int(round(float(obj_bbox.find('xmax').text))) 86 | y2 = int(round(float(obj_bbox.find('ymax').text))) 87 | difficulty = int(element_obj.find('difficult').text) == 1 88 | annotation_data['bboxes'].append( 89 | {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty}) 90 | all_imgs.append(annotation_data) 91 | 92 | if visualise: 93 | img = cv2.imread(annotation_data['filepath']) 94 | for bbox in annotation_data['bboxes']: 95 | cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[ 96 | 'x2'], bbox['y2']), (0, 0, 255)) 97 | cv2.imshow('img', img) 98 | cv2.waitKey(0) 99 | 100 | except Exception as e: 101 | print(e) 102 | continue 103 | return all_imgs, classes_count, class_mapping 104 | -------------------------------------------------------------------------------- /data/pascal_voc_parser.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": "true" 7 | }, 8 | "source": [ 9 | " # Table of Contents\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": { 17 | "collapsed": true 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "# %load ../pascal_voc_parser.py\n", 22 | "import os\n", 23 | "import cv2\n", 24 | "import xml.etree.ElementTree as ET\n", 25 | "import numpy as np\n", 26 | "def get_data(input_path, visualise=False):\n", 27 | " \"\"\"Load data from an input file.\n", 28 | " https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/pascal_voc_parser.py#L19\n", 29 | " Args: \n", 30 | " input_path (string) : path for the input file\n", 31 | " visualise (bool) : show images with annotation if True\n", 32 | " \n", 33 | " Returns:\n", 34 | " all_imgs (list) : list of images\n", 35 | " classes_count (dict) : dictionary containg classes information\n", 36 | " class_mapping (dict) : dictionary containing class mapping\n", 37 | " \"\"\"\n", 38 | " all_imgs = []\n", 39 | "\n", 40 | " classes_count = {}\n", 41 | "\n", 42 | " class_mapping = {}\n", 43 | "\n", 44 | " data_paths = [os.path.join(input_path,s) for s in ['VOC2012']]\n", 45 | "\n", 46 | "\n", 47 | " print('Parsing annotation files....')\n", 48 | "\n", 49 | " for data_path in data_paths:\n", 50 | "\n", 51 | " annot_path = os.path.join(data_path, 'Annotations')\n", 52 | " imgs_path = os.path.join(data_path, 'JPEGImages')\n", 53 | " imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')\n", 54 | " imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')\n", 55 | "\n", 56 | " trainval_files = []\n", 57 | " test_files = []\n", 58 | " try:\n", 59 | " with open(imgsets_path_trainval) as f:\n", 60 | " for line in f:\n", 61 | " trainval_files.append(line.strip() + '.jpg')\n", 62 | " except Exception as e:\n", 63 | " print(e)\n", 64 | "\n", 65 | " try:\n", 66 | " with open(imgsets_path_test) as f:\n", 67 | " for line in f:\n", 68 | " test_files.append(line.strip() + '.jpg')\n", 69 | " except Exception as e:\n", 70 | " if data_path[-7:] == 'VOC2012':\n", 71 | " # this is expected, most pascal voc distibutions dont have the test.txt file\n", 72 | " pass\n", 73 | " else:\n", 74 | " print(e)\n", 75 | "\n", 76 | " annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]\n", 77 | " idx = 0\n", 78 | " for annot in annots:\n", 79 | " try:\n", 80 | " idx += 1\n", 81 | "\n", 82 | " et = ET.parse(annot)\n", 83 | " element = et.getroot()\n", 84 | "\n", 85 | " element_objs = element.findall('object')\n", 86 | " element_filename = element.find('filename').text\n", 87 | " element_width = int(element.find('size').find('width').text)\n", 88 | " element_height = int(element.find('size').find('height').text)\n", 89 | "\n", 90 | " if len(element_objs) > 0:\n", 91 | " annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,\n", 92 | " 'height': element_height, 'bboxes': []}\n", 93 | "\n", 94 | " if element_filename in trainval_files:\n", 95 | " annotation_data['imageset'] = 'trainval'\n", 96 | " elif element_filename in test_files:\n", 97 | " annotation_data['imageset'] = 'test'\n", 98 | " else:\n", 99 | " annotation_data['imageset'] = 'trainval'\n", 100 | "\n", 101 | " for element_obj in element_objs:\n", 102 | " class_name = element_obj.find('name').text\n", 103 | " if class_name not in classes_count:\n", 104 | " classes_count[class_name] = 1\n", 105 | " else:\n", 106 | " classes_count[class_name] += 1\n", 107 | "\n", 108 | " if class_name not in class_mapping:\n", 109 | " class_mapping[class_name] = len(class_mapping)\n", 110 | "\n", 111 | " obj_bbox = element_obj.find('bndbox')\n", 112 | " x1 = int(round(float(obj_bbox.find('xmin').text)))\n", 113 | " y1 = int(round(float(obj_bbox.find('ymin').text)))\n", 114 | " x2 = int(round(float(obj_bbox.find('xmax').text)))\n", 115 | " y2 = int(round(float(obj_bbox.find('ymax').text)))\n", 116 | " difficulty = int(element_obj.find('difficult').text) == 1\n", 117 | " annotation_data['bboxes'].append(\n", 118 | " {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})\n", 119 | " all_imgs.append(annotation_data)\n", 120 | "\n", 121 | " if visualise:\n", 122 | " img = cv2.imread(annotation_data['filepath'])\n", 123 | " for bbox in annotation_data['bboxes']:\n", 124 | " cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[\n", 125 | " 'x2'], bbox['y2']), (0, 0, 255))\n", 126 | " cv2.imshow('img', img)\n", 127 | " cv2.waitKey(0)\n", 128 | "\n", 129 | " except Exception as e:\n", 130 | " print(e)\n", 131 | " continue\n", 132 | " if 'bg' not in classes_count:\n", 133 | " classes_count['bg'] = 0\n", 134 | " class_mapping['bg'] = len(class_mapping)\n", 135 | " \n", 136 | " \n", 137 | " print(\"Parsing annotation files Finished without error!\")\n", 138 | " return all_imgs, classes_count, class_mapping\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "if __name__ == '__main__':\n", 148 | " pass\n", 149 | " #all_imgs, classes_count, class_mapping = get_data('/home/abanihi/Documents/deep-data/VOCdevkit/')\n", 150 | " #print(classes_count)\n", 151 | " #print(class_mapping)\n", 152 | " " 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": true 160 | }, 161 | "outputs": [], 162 | "source": [] 163 | } 164 | ], 165 | "metadata": { 166 | "kernelspec": { 167 | "display_name": "Python 3", 168 | "language": "python", 169 | "name": "python3" 170 | }, 171 | "language_info": { 172 | "codemirror_mode": { 173 | "name": "ipython", 174 | "version": 3 175 | }, 176 | "file_extension": ".py", 177 | "mimetype": "text/x-python", 178 | "name": "python", 179 | "nbconvert_exporter": "python", 180 | "pygments_lexer": "ipython3", 181 | "version": "3.6.3" 182 | }, 183 | "toc": { 184 | "nav_menu": {}, 185 | "number_sections": true, 186 | "sideBar": true, 187 | "skip_h1_title": false, 188 | "toc_cell": true, 189 | "toc_position": {}, 190 | "toc_section_display": "block", 191 | "toc_window_display": false 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 2 196 | } 197 | -------------------------------------------------------------------------------- /importing notebooks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": "true" 7 | }, 8 | "source": [ 9 | " # Table of Contents\n", 10 | "
" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Importing Jupyter Notebooks as Modules" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "http://nbviewer.jupyter.org/github/jupyter/notebook/blob/master/docs/source/examples/Notebook/Importing%20Notebooks.ipynb" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import io, os, sys, types" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "from IPython import get_ipython\n", 47 | "from nbformat import read\n", 48 | "from IPython.core.interactiveshell import InteractiveShell" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Import hooks typically take the form of two objects:\n", 56 | "\n", 57 | "1. a Module **Loader**, which takes a module name (e.g. 'IPython.display'), and returns a Module\n", 58 | "2. a Module **Finder**, which figures out whether a module might exist, and tells Python what **Loader** to use\n", 59 | "\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "def find_notebook(fullname, path=None):\n", 71 | " \"\"\"find a notebook, given its fully qualified name and an optional path\n", 72 | " \n", 73 | " This turns \"foo.bar\" into \"foo/bar.ipynb\"\n", 74 | " and tries turning \"Foo_Bar\" into \"Foo Bar\" if Foo_Bar\n", 75 | " does not exist.\n", 76 | " \"\"\"\n", 77 | " name = fullname.rsplit('.', 1)[-1]\n", 78 | " if not path:\n", 79 | " path = ['']\n", 80 | " for d in path:\n", 81 | " nb_path = os.path.join(d, name + \".ipynb\")\n", 82 | " if os.path.isfile(nb_path):\n", 83 | " return nb_path\n", 84 | " # let import Notebook_Name find \"Notebook Name.ipynb\"\n", 85 | " nb_path = nb_path.replace(\"_\", \" \")\n", 86 | " if os.path.isfile(nb_path):\n", 87 | " return nb_path\n", 88 | " \n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "## Notebook Loader\n", 96 | "\n", 97 | "Here we have our Notebook Loader. It's actually quite simple - once we figure out the filename of the module, all it does is:\n", 98 | "\n", 99 | " 1. load the notebook document into memory\n", 100 | " 2. create an empty Module\n", 101 | " 3. execute every cell in the Module namespace\n", 102 | "\n", 103 | "Since IPython cells can have extended syntax, the IPython transform is applied to turn each of these cells into their pure-Python counterparts before executing them. If all of your notebook cells are pure-Python, this step is unnecessary.\n" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 4, 109 | "metadata": { 110 | "collapsed": true 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "class NotebookLoader(object):\n", 115 | " \"\"\"Module Loader for Jupyter Notebooks\"\"\"\n", 116 | " def __init__(self, path=None):\n", 117 | " self.shell = InteractiveShell.instance()\n", 118 | " self.path = path\n", 119 | " \n", 120 | " def load_module(self, fullname):\n", 121 | " \"\"\"import a notebook as a module\"\"\"\n", 122 | " path = find_notebook(fullname, self.path)\n", 123 | " \n", 124 | " print (\"importing Jupyter notebook from %s\" % path)\n", 125 | " \n", 126 | " # load the notebook object\n", 127 | " with io.open(path, 'r', encoding='utf-8') as f:\n", 128 | " nb = read(f, 4)\n", 129 | " \n", 130 | " \n", 131 | " # create the module and add it to sys.modules\n", 132 | " # if name in sys.modules:\n", 133 | " # return sys.modules[name]\n", 134 | " mod = types.ModuleType(fullname)\n", 135 | " mod.__file__ = path\n", 136 | " mod.__loader__ = self\n", 137 | " mod.__dict__['get_ipython'] = get_ipython\n", 138 | " sys.modules[fullname] = mod\n", 139 | " \n", 140 | " # extra work to ensure that magics that would affect the user_ns\n", 141 | " # actually affect the notebook module's ns\n", 142 | " save_user_ns = self.shell.user_ns\n", 143 | " self.shell.user_ns = mod.__dict__\n", 144 | " \n", 145 | " try:\n", 146 | " \n", 147 | " for cell in nb.cells:\n", 148 | " \n", 149 | " if cell.cell_type == 'code':\n", 150 | " # transform the input to executable Python\n", 151 | " code = self.shell.input_transformer_manager.transform_cell(cell.source)\n", 152 | " # run the code in themodule\n", 153 | " exec(code, mod.__dict__)\n", 154 | " finally:\n", 155 | " self.shell.user_ns = save_user_ns\n", 156 | " return mod\n" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## The Module Finder\n", 164 | "\n", 165 | "The finder is a simple object that tells you whether a name can be imported, and returns the appropriate loader. All this one does is check, when you do:\n", 166 | "\n", 167 | "```python \n", 168 | "import mynotebook\n", 169 | "```\n", 170 | "\n", 171 | "it checks whether ```mynotebook.ipynb``` exists. If a notebook is found, then it returns a NotebookLoader.\n", 172 | "\n", 173 | "Any extra logic is just for resolving paths within packages.\n" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 5, 179 | "metadata": { 180 | "collapsed": true 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "class NotebookFinder(object):\n", 185 | " \"\"\"Module finder that locates Jupyter Notebooks\"\"\"\n", 186 | " def __init__(self):\n", 187 | " self.loaders = {}\n", 188 | " \n", 189 | " def find_module(self, fullname, path=None):\n", 190 | " nb_path = find_notebook(fullname, path)\n", 191 | " if not nb_path:\n", 192 | " return\n", 193 | " \n", 194 | " key = path\n", 195 | " if path:\n", 196 | " # lists aren't hashable\n", 197 | " key = os.path.sep.join(path)\n", 198 | " \n", 199 | " if key not in self.loaders:\n", 200 | " self.loaders[key] = NotebookLoader(path)\n", 201 | " return self.loaders[key]\n" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "## Register the hook" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "Now we register the NotebookFinder with ```sys.meta_path```" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 6, 221 | "metadata": { 222 | "collapsed": true 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "sys.meta_path.append(NotebookFinder())" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "After this point, my notebooks should be importable.\n", 234 | "\n", 235 | "Let's look at what we have in the CWD:\n" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 7, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "name": "stdout", 245 | "output_type": "stream", 246 | "text": [ 247 | "__init__.py pascal_voc_parser.ipynb pascal_voc_parser-pyfile.py \u001b[0m\u001b[01;34m__pycache__\u001b[0m/\r\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "ls data/" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 8, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "importing Jupyter notebook from /home/abanihi/Documents/Github/keras-faster-rcnn/data/pascal_voc_parser.ipynb\n", 265 | "Parsing annotation files....\n", 266 | "Parsing annotation files Finished without error!\n" 267 | ] 268 | } 269 | ], 270 | "source": [ 271 | "from data import pascal_voc_parser as p" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "\n", 279 | "## Aside: displaying notebooks\n", 280 | "\n", 281 | "Here is some simple code to display the contents of a notebook with syntax highlighting, etc.\n" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 10, 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "data": { 291 | "text/html": [ 292 | "\n", 293 | "\n" 363 | ], 364 | "text/plain": [ 365 | "" 366 | ] 367 | }, 368 | "metadata": {}, 369 | "output_type": "display_data" 370 | } 371 | ], 372 | "source": [ 373 | "\n", 374 | "\n", 375 | "from pygments import highlight\n", 376 | "from pygments.lexers import PythonLexer\n", 377 | "from pygments.formatters import HtmlFormatter\n", 378 | "\n", 379 | "from IPython.display import display, HTML\n", 380 | "\n", 381 | "formatter = HtmlFormatter()\n", 382 | "lexer = PythonLexer()\n", 383 | "\n", 384 | "# publish the CSS for pygments highlighting\n", 385 | "display(HTML(\"\"\"\n", 386 | "\n", 389 | "\"\"\" % formatter.get_style_defs()\n", 390 | "))\n", 391 | "\n" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 11, 397 | "metadata": { 398 | "collapsed": true 399 | }, 400 | "outputs": [], 401 | "source": [ 402 | "def show_notebook(fname):\n", 403 | " \"\"\"display a short summary of the cells of a notebook\"\"\"\n", 404 | " with io.open(fname, 'r', encoding='utf-8') as f:\n", 405 | " nb = read(f, 4)\n", 406 | " html = []\n", 407 | " for cell in nb.cells:\n", 408 | " html.append(\"

%s cell

\" % cell.cell_type)\n", 409 | " if cell.cell_type == 'code':\n", 410 | " html.append(highlight(cell.source, lexer, formatter))\n", 411 | " else:\n", 412 | " html.append(\"
%s
\" % cell.source)\n", 413 | " display(HTML('\\n'.join(html)))" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 12, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "data": { 423 | "text/html": [ 424 | "

markdown cell

\n", 425 | "
 # Table of Contents\n",
426 |        "
    \n", 427 | "

    code cell

    \n", 428 | "
    # %load ../pascal_voc_parser.py\n",
    429 |        "import os\n",
    430 |        "import cv2\n",
    431 |        "import xml.etree.ElementTree as ET\n",
    432 |        "import numpy as np\n",
    433 |        "def get_data(input_path, visualise=False):\n",
    434 |        "    """Load data from an input file.\n",
    435 |        "      https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/pascal_voc_parser.py#L19\n",
    436 |        "      \n",
    437 |        "    """\n",
    438 |        "    all_imgs = []\n",
    439 |        "\n",
    440 |        "    classes_count = {}\n",
    441 |        "\n",
    442 |        "    class_mapping = {}\n",
    443 |        "\n",
    444 |        "    data_paths = [os.path.join(input_path,s) for s in ['VOC2012']]\n",
    445 |        "\n",
    446 |        "\n",
    447 |        "    print('Parsing annotation files....')\n",
    448 |        "\n",
    449 |        "    for data_path in data_paths:\n",
    450 |        "\n",
    451 |        "        annot_path = os.path.join(data_path, 'Annotations')\n",
    452 |        "        imgs_path = os.path.join(data_path, 'JPEGImages')\n",
    453 |        "        imgsets_path_trainval = os.path.join(data_path, 'ImageSets','Main','trainval.txt')\n",
    454 |        "        imgsets_path_test = os.path.join(data_path, 'ImageSets','Main','test.txt')\n",
    455 |        "\n",
    456 |        "        trainval_files = []\n",
    457 |        "        test_files = []\n",
    458 |        "        try:\n",
    459 |        "            with open(imgsets_path_trainval) as f:\n",
    460 |        "                for line in f:\n",
    461 |        "                    trainval_files.append(line.strip() + '.jpg')\n",
    462 |        "        except Exception as e:\n",
    463 |        "            print(e)\n",
    464 |        "\n",
    465 |        "        try:\n",
    466 |        "            with open(imgsets_path_test) as f:\n",
    467 |        "                for line in f:\n",
    468 |        "                    test_files.append(line.strip() + '.jpg')\n",
    469 |        "        except Exception as e:\n",
    470 |        "            if data_path[-7:] == 'VOC2012':\n",
    471 |        "                # this is expected, most pascal voc distibutions dont have the test.txt file\n",
    472 |        "                pass\n",
    473 |        "            else:\n",
    474 |        "                print(e)\n",
    475 |        "\n",
    476 |        "        annots = [os.path.join(annot_path, s) for s in os.listdir(annot_path)]\n",
    477 |        "        idx = 0\n",
    478 |        "        for annot in annots:\n",
    479 |        "            try:\n",
    480 |        "                idx += 1\n",
    481 |        "\n",
    482 |        "                et = ET.parse(annot)\n",
    483 |        "                element = et.getroot()\n",
    484 |        "\n",
    485 |        "                element_objs = element.findall('object')\n",
    486 |        "                element_filename = element.find('filename').text\n",
    487 |        "                element_width = int(element.find('size').find('width').text)\n",
    488 |        "                element_height = int(element.find('size').find('height').text)\n",
    489 |        "\n",
    490 |        "                if len(element_objs) > 0:\n",
    491 |        "                    annotation_data = {'filepath': os.path.join(imgs_path, element_filename), 'width': element_width,\n",
    492 |        "                                       'height': element_height, 'bboxes': []}\n",
    493 |        "\n",
    494 |        "                    if element_filename in trainval_files:\n",
    495 |        "                        annotation_data['imageset'] = 'trainval'\n",
    496 |        "                    elif element_filename in test_files:\n",
    497 |        "                        annotation_data['imageset'] = 'test'\n",
    498 |        "                    else:\n",
    499 |        "                        annotation_data['imageset'] = 'trainval'\n",
    500 |        "\n",
    501 |        "                for element_obj in element_objs:\n",
    502 |        "                    class_name = element_obj.find('name').text\n",
    503 |        "                    if class_name not in classes_count:\n",
    504 |        "                        classes_count[class_name] = 1\n",
    505 |        "                    else:\n",
    506 |        "                        classes_count[class_name] += 1\n",
    507 |        "\n",
    508 |        "                    if class_name not in class_mapping:\n",
    509 |        "                        class_mapping[class_name] = len(class_mapping)\n",
    510 |        "\n",
    511 |        "                    obj_bbox = element_obj.find('bndbox')\n",
    512 |        "                    x1 = int(round(float(obj_bbox.find('xmin').text)))\n",
    513 |        "                    y1 = int(round(float(obj_bbox.find('ymin').text)))\n",
    514 |        "                    x2 = int(round(float(obj_bbox.find('xmax').text)))\n",
    515 |        "                    y2 = int(round(float(obj_bbox.find('ymax').text)))\n",
    516 |        "                    difficulty = int(element_obj.find('difficult').text) == 1\n",
    517 |        "                    annotation_data['bboxes'].append(\n",
    518 |        "                        {'class': class_name, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'difficult': difficulty})\n",
    519 |        "                all_imgs.append(annotation_data)\n",
    520 |        "\n",
    521 |        "                if visualise:\n",
    522 |        "                    img = cv2.imread(annotation_data['filepath'])\n",
    523 |        "                    for bbox in annotation_data['bboxes']:\n",
    524 |        "                        cv2.rectangle(img, (bbox['x1'], bbox['y1']), (bbox[\n",
    525 |        "                                        'x2'], bbox['y2']), (0, 0, 255))\n",
    526 |        "                    cv2.imshow('img', img)\n",
    527 |        "                    cv2.waitKey(0)\n",
    528 |        "\n",
    529 |        "            except Exception as e:\n",
    530 |        "                print(e)\n",
    531 |        "                continue\n",
    532 |        "                \n",
    533 |        "    print("Parsing annotation files Finished without error!")\n",
    534 |        "    return all_imgs, classes_count, class_mapping\n",
    535 |        "
    \n", 536 | "\n", 537 | "

    code cell

    \n", 538 | "
    all_imgs, classes_count, class_mapping = get_data('/home/abanihi/Documents/deep-data/VOCdevkit/')\n",
    539 |        "
    \n", 540 | "\n", 541 | "

    code cell

    \n", 542 | "
    classes_count\n",
    543 |        "
    \n", 544 | "\n", 545 | "

    code cell

    \n", 546 | "
    class_mapping\n",
    547 |        "
    \n", 548 | "\n", 549 | "

    code cell

    \n", 550 | "
    type(all_imgs)\n",
    551 |        "
    \n", 552 | "\n", 553 | "

    code cell

    \n", 554 | "
    all_imgs[0]\n",
    555 |        "
    \n", 556 | "\n", 557 | "

    code cell

    \n", 558 | "
    if 'bg' not in classes_count:\n",
    559 |        "    classes_count['bg'] = 0\n",
    560 |        "    class_mapping['bg'] = len(class_mapping)\n",
    561 |        "
    \n", 562 | "\n", 563 | "

    code cell

    \n", 564 | "
    classes_count\n",
    565 |        "
    \n", 566 | "\n", 567 | "

    code cell

    \n", 568 | "
    \n",
    569 |        "
    \n" 570 | ], 571 | "text/plain": [ 572 | "" 573 | ] 574 | }, 575 | "metadata": {}, 576 | "output_type": "display_data" 577 | } 578 | ], 579 | "source": [ 580 | "show_notebook(\"data/pascal_voc_parser.ipynb\")" 581 | ] 582 | } 583 | ], 584 | "metadata": { 585 | "kernelspec": { 586 | "display_name": "Python 3", 587 | "language": "python", 588 | "name": "python3" 589 | }, 590 | "language_info": { 591 | "codemirror_mode": { 592 | "name": "ipython", 593 | "version": 3 594 | }, 595 | "file_extension": ".py", 596 | "mimetype": "text/x-python", 597 | "name": "python", 598 | "nbconvert_exporter": "python", 599 | "pygments_lexer": "ipython3", 600 | "version": "3.6.2" 601 | }, 602 | "toc": { 603 | "nav_menu": {}, 604 | "number_sections": true, 605 | "sideBar": true, 606 | "skip_h1_title": false, 607 | "toc_cell": true, 608 | "toc_position": {}, 609 | "toc_section_display": "block", 610 | "toc_window_display": true 611 | } 612 | }, 613 | "nbformat": 4, 614 | "nbformat_minor": 2 615 | } 616 | -------------------------------------------------------------------------------- /keras_frcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andersy005/keras-faster-rcnn/fbceef68d390cca3ee1e77c26189b6b72968448e/keras_frcnn/__init__.py -------------------------------------------------------------------------------- /keras_frcnn/fixed_batch_normalization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Reference: https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/FixedBatchNormalization.py" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "Using TensorFlow backend.\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "from keras.engine import Layer, InputSpec\n", 25 | "from keras import initializers, regularizers\n", 26 | "from keras import backend as K" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 3, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "class FixedBatchNormalization(Layer):\n", 36 | " \n", 37 | " def __init__(self, epsilon=1e-3, axis=-1,\n", 38 | " weights=None, beta_init='zero', gamma_init='one',\n", 39 | " gamma_regularizer=None, beta_regularizer=None, **kwargs):\n", 40 | " \n", 41 | " self.supports_masking = True\n", 42 | " self.beta_init = initializers.get(beta_init)\n", 43 | " self.gamma_init = initializers.get(gamma_init)\n", 44 | " self.epsilon = epsilon\n", 45 | " self.axis = axis\n", 46 | " self.gamma_regularizer = regularizers.get(gamma_regularizer)\n", 47 | " self.beta_regularizer = regularizers.get(beta_regularizer)\n", 48 | " self.initial_weights = weights\n", 49 | " super(FixedBatchNormalization, self).__init__(**kwargs)\n", 50 | " \n", 51 | " def build(self, input_shape):\n", 52 | " self.input_shape = [InputSpec(shape=input_shape)]\n", 53 | " shape = (input_shape[self.axis], )\n", 54 | " \n", 55 | " self.gamma = self.add_weight(shape,\n", 56 | " initializer = self.gamma_init,\n", 57 | " regularizer = self.gamma_regularizer,\n", 58 | " name = '{}_gamma'.format(self.name),\\\n", 59 | " trainable = False)\n", 60 | " self.beta = self.add_weight(shape,\n", 61 | " initializer = self.beta_init,\n", 62 | " regularizer = self.beta_regularizer,\n", 63 | " name = '{}_beta'.format(self.name),\n", 64 | " trainable = False)\\\n", 65 | " \n", 66 | " self.running_mean = self.add_weight(shape,\n", 67 | " initializer = 'zero',\n", 68 | " name = '{}_running_mean'.format(self.name),\n", 69 | " trainable = False)\n", 70 | " \n", 71 | " self.running_std = self.add_weight(shape,\n", 72 | " initializer = 'zero',\n", 73 | " name = '{}_running_std'.format(self.name),\n", 74 | " trainable = False)\n", 75 | " \n", 76 | " if self.initial_weights is not None:\n", 77 | " self.set_weights(self.initial_weights)\n", 78 | " del self.initial_weights\n", 79 | " \n", 80 | " self.built = True\n", 81 | " \n", 82 | " def call(self, x, mask=None):\n", 83 | " \n", 84 | " assert self.built, 'Layer must be built before being called'\n", 85 | " input_shape = K.int_shape(x)\n", 86 | " \n", 87 | " reduction_axes = list(range(len(input_shape)))\n", 88 | " del reduction_axes[self.axis]\n", 89 | " \n", 90 | " broadcast_shape = [1] * len(input_shape)\n", 91 | " broadcast_shape[self.axis] = input_shape[self.axis]\n", 92 | " \n", 93 | " if sorted(reduction_axes) == range(K.ndim(x))[:-1]:\n", 94 | " x_normed = K.batch_normalization(\n", 95 | " x, self.running_mean, self.running_std,\n", 96 | " self.beta, self.gamma, epsilon=self.epsilon)\n", 97 | " \n", 98 | " else:\n", 99 | " # need broadcasting\n", 100 | " broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape)\n", 101 | " broadcast_running_std = K.reshape(self.running_std, broadcast_shape)\n", 102 | " broadcast_beta = K.reshape(self.beta, broadcast_shape)\n", 103 | " broadcast_gamma = K.reshape(self.gamma, broadcast_shape)\n", 104 | " x_normed = K.batch_normalization(\n", 105 | " x, broadcast_running_mean, broadcast_running_std,\n", 106 | " broadcast_beta, broadcast_gamma, epsilon=self.epsilon)\n", 107 | " \n", 108 | " return x_normed\n", 109 | " \n", 110 | " def get_config(self):\n", 111 | " \n", 112 | " config = {'epsilon': self.epsilon,\n", 113 | " 'axis': self.axis,\n", 114 | " 'gamma_regularizer': self.gamma_regularizer.get_config() if self.gamma_regularizer else None,\n", 115 | " 'beta_regularizer': self.beta_regularizer.get_config() if self.beta_regularizer else None}\n", 116 | " \n", 117 | " base_config = super(FixedBatchNormalization, self).get_config()\n", 118 | " return dict(list(base_config.items()) + list(config.items()))" 119 | ] 120 | } 121 | ], 122 | "metadata": { 123 | "kernelspec": { 124 | "display_name": "Python 3", 125 | "language": "python", 126 | "name": "python3" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.6.3" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 2 143 | } 144 | -------------------------------------------------------------------------------- /keras_frcnn/resnet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ResNet50 model for Keras.\n", 8 | " Reference:\n", 9 | "- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) Adapted from code contributed by BigMoyan\n", 10 | "- https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/resnet.py" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stderr", 20 | "output_type": "stream", 21 | "text": [ 22 | "Using TensorFlow backend.\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "from __future__ import print_function\n", 28 | "from __future__ import absolute_import\n", 29 | "import sys\n", 30 | "import os\n", 31 | "from themachine.nbfinder import NotebookFinder\n", 32 | "sys.meta_path.append(NotebookFinder())\n", 33 | "from keras.layers import Input, Add, Dense, Activation, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, \\\n", 34 | " AveragePooling2D, TimeDistributed\n", 35 | "\n", 36 | "from keras import backend as K\n", 37 | "\n", 38 | "#from keras_frcnn.RoiPoolingConv import RoiPoolingConv\n", 39 | "#from keras_frcnn.FixedBatchNormalization import FixedBatchNormalization\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "importing Jupyter notebook from roi_pooling_conv.ipynb\n", 52 | "importing Jupyter notebook from fixed_batch_normalization.ipynb\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "from roi_pooling_conv import RoiPoolingConv\n", 58 | "from fixed_batch_normalization import FixedBatchNormalization" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 8, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "def get_weight_path():\n", 68 | " if K.image_dim_ordering() == 'th':\n", 69 | " return 'resnet50_weights_th_dim_ordering_th_kernels_notop.h5'\n", 70 | " else:\n", 71 | " return 'resnet50_weights_tf_dim_ordering_tf_kernels.h5'\n", 72 | " \n", 73 | "def get_img_output_length(width, height):\n", 74 | " def get_output_length(input_length):\n", 75 | " # zero_pad\n", 76 | " input_length += 6\n", 77 | " \n", 78 | " # apply 4 strided convolutions\n", 79 | " filter_sizes = [7, 3, 1, 1]\n", 80 | " \n", 81 | " stride = 2\n", 82 | " \n", 83 | " for filter_size in filter_sizes:\n", 84 | " input_length = (input_length - filter_size + stride) / stride\n", 85 | " \n", 86 | " return input_length\n", 87 | " \n", 88 | " return get_output_length(width), get_output_length(height)\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 7, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=True):\n", 98 | "\n", 99 | " nb_filter1, nb_filter2, nb_filter3 = filters\n", 100 | " \n", 101 | " if K.image_dim_ordering() == 'tf':\n", 102 | " bn_axis = 3\n", 103 | " else:\n", 104 | " bn_axis = 1\n", 105 | "\n", 106 | " conv_name_base = 'res' + str(stage) + block + '_branch'\n", 107 | " bn_name_base = 'bn' + str(stage) + block + '_branch'\n", 108 | "\n", 109 | " x = Convolution2D(nb_filter1, (1, 1), name=conv_name_base + '2a', trainable=trainable)(input_tensor)\n", 110 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)\n", 111 | " x = Activation('relu')(x)\n", 112 | "\n", 113 | " x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)\n", 114 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)\n", 115 | " x = Activation('relu')(x)\n", 116 | "\n", 117 | " x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)\n", 118 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)\n", 119 | "\n", 120 | " x = Add()([x, input_tensor])\n", 121 | " x = Activation('relu')(x)\n", 122 | " return x\n", 123 | "\n", 124 | "\n", 125 | "def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainable=True):\n", 126 | "\n", 127 | " # identity block time distributed\n", 128 | "\n", 129 | " nb_filter1, nb_filter2, nb_filter3 = filters\n", 130 | " if K.image_dim_ordering() == 'tf':\n", 131 | " bn_axis = 3\n", 132 | " else:\n", 133 | " bn_axis = 1\n", 134 | "\n", 135 | " conv_name_base = 'res' + str(stage) + block + '_branch'\n", 136 | " bn_name_base = 'bn' + str(stage) + block + '_branch'\n", 137 | "\n", 138 | " x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2a')(input_tensor)\n", 139 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)\n", 140 | " x = Activation('relu')(x)\n", 141 | "\n", 142 | " x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), trainable=trainable, kernel_initializer='normal',padding='same'), name=conv_name_base + '2b')(x)\n", 143 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)\n", 144 | " x = Activation('relu')(x)\n", 145 | "\n", 146 | " x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2c')(x)\n", 147 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)\n", 148 | "\n", 149 | " x = Add()([x, input_tensor])\n", 150 | " x = Activation('relu')(x)\n", 151 | "\n", 152 | " return x\n" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 10, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), trainable=True):\n", 162 | "\n", 163 | " nb_filter1, nb_filter2, nb_filter3 = filters\n", 164 | " if K.image_dim_ordering() == 'tf':\n", 165 | " bn_axis = 3\n", 166 | " else:\n", 167 | " bn_axis = 1\n", 168 | "\n", 169 | " conv_name_base = 'res' + str(stage) + block + '_branch'\n", 170 | " bn_name_base = 'bn' + str(stage) + block + '_branch'\n", 171 | "\n", 172 | " x = Convolution2D(nb_filter1, (1, 1), strides=strides, name=conv_name_base + '2a', trainable=trainable)(input_tensor)\n", 173 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)\n", 174 | " x = Activation('relu')(x)\n", 175 | "\n", 176 | " x = Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', name=conv_name_base + '2b', trainable=trainable)(x)\n", 177 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)\n", 178 | " x = Activation('relu')(x)\n", 179 | "\n", 180 | " x = Convolution2D(nb_filter3, (1, 1), name=conv_name_base + '2c', trainable=trainable)(x)\n", 181 | " x = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)\n", 182 | "\n", 183 | " shortcut = Convolution2D(nb_filter3, (1, 1), strides=strides, name=conv_name_base + '1', trainable=trainable)(input_tensor)\n", 184 | " shortcut = FixedBatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)\n", 185 | "\n", 186 | " x = Add()([x, shortcut])\n", 187 | " x = Activation('relu')(x)\n", 188 | " return x\n", 189 | "\n", 190 | "\n", 191 | "def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape, strides=(2, 2), trainable=True):\n", 192 | "\n", 193 | " # conv block time distributed\n", 194 | "\n", 195 | " nb_filter1, nb_filter2, nb_filter3 = filters\n", 196 | " if K.image_dim_ordering() == 'tf':\n", 197 | " bn_axis = 3\n", 198 | " else:\n", 199 | " bn_axis = 1\n", 200 | "\n", 201 | " conv_name_base = 'res' + str(stage) + block + '_branch'\n", 202 | " bn_name_base = 'bn' + str(stage) + block + '_branch'\n", 203 | "\n", 204 | " x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), input_shape=input_shape, name=conv_name_base + '2a')(input_tensor)\n", 205 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x)\n", 206 | " x = Activation('relu')(x)\n", 207 | "\n", 208 | " x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), padding='same', trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2b')(x)\n", 209 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x)\n", 210 | " x = Activation('relu')(x)\n", 211 | "\n", 212 | " x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), kernel_initializer='normal'), name=conv_name_base + '2c', trainable=trainable)(x)\n", 213 | " x = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x)\n", 214 | "\n", 215 | " shortcut = TimeDistributed(Convolution2D(nb_filter3, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '1')(input_tensor)\n", 216 | " shortcut = TimeDistributed(FixedBatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut)\n", 217 | "\n", 218 | " x = Add()([x, shortcut])\n", 219 | " x = Activation('relu')(x)\n", 220 | " return x" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 11, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "def nn_base(input_tensor=None, trainable=False):\n", 230 | "\n", 231 | " # Determine proper input shape\n", 232 | " if K.image_dim_ordering() == 'th':\n", 233 | " input_shape = (3, None, None)\n", 234 | " else:\n", 235 | " input_shape = (None, None, 3)\n", 236 | "\n", 237 | " if input_tensor is None:\n", 238 | " img_input = Input(shape=input_shape)\n", 239 | " else:\n", 240 | " if not K.is_keras_tensor(input_tensor):\n", 241 | " img_input = Input(tensor=input_tensor, shape=input_shape)\n", 242 | " else:\n", 243 | " img_input = input_tensor\n", 244 | "\n", 245 | " if K.image_dim_ordering() == 'tf':\n", 246 | " bn_axis = 3\n", 247 | " else:\n", 248 | " bn_axis = 1\n", 249 | "\n", 250 | " x = ZeroPadding2D((3, 3))(img_input)\n", 251 | "\n", 252 | " x = Convolution2D(64, (7, 7), strides=(2, 2), name='conv1', trainable = trainable)(x)\n", 253 | " x = FixedBatchNormalization(axis=bn_axis, name='bn_conv1')(x)\n", 254 | " x = Activation('relu')(x)\n", 255 | " x = MaxPooling2D((3, 3), strides=(2, 2))(x)\n", 256 | "\n", 257 | " x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), trainable = trainable)\n", 258 | " x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', trainable = trainable)\n", 259 | " x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', trainable = trainable)\n", 260 | "\n", 261 | " x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', trainable = trainable)\n", 262 | " x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', trainable = trainable)\n", 263 | " x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', trainable = trainable)\n", 264 | " x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', trainable = trainable)\n", 265 | "\n", 266 | " x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', trainable = trainable)\n", 267 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', trainable = trainable)\n", 268 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', trainable = trainable)\n", 269 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', trainable = trainable)\n", 270 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', trainable = trainable)\n", 271 | " x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', trainable = trainable)\n", 272 | "\n", 273 | " return x\n", 274 | "\n" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 12, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "def classifier_layers(x, input_shape, trainable=False):\n", 284 | "\n", 285 | " # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n", 286 | " # (hence a smaller stride in the region that follows the ROI pool)\n", 287 | " if K.backend() == 'tensorflow':\n", 288 | " x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', input_shape=input_shape, strides=(2, 2), trainable=trainable)\n", 289 | " elif K.backend() == 'theano':\n", 290 | " x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', input_shape=input_shape, strides=(1, 1), trainable=trainable)\n", 291 | "\n", 292 | " x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='b', trainable=trainable)\n", 293 | " x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='c', trainable=trainable)\n", 294 | " x = TimeDistributed(AveragePooling2D((7, 7)), name='avg_pool')(x)\n", 295 | "\n", 296 | " return x" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 13, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "def rpn(base_layers,num_anchors):\n", 306 | "\n", 307 | " x = Convolution2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)\n", 308 | "\n", 309 | " x_class = Convolution2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)\n", 310 | " x_regr = Convolution2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)\n", 311 | "\n", 312 | " return [x_class, x_regr, base_layers]\n", 313 | "\n", 314 | "def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False):\n", 315 | "\n", 316 | " # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n", 317 | "\n", 318 | " if K.backend() == 'tensorflow':\n", 319 | " pooling_regions = 14\n", 320 | " input_shape = (num_rois,14,14,1024)\n", 321 | " elif K.backend() == 'theano':\n", 322 | " pooling_regions = 7\n", 323 | " input_shape = (num_rois,1024,7,7)\n", 324 | "\n", 325 | " out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])\n", 326 | " out = classifier_layers(out_roi_pool, input_shape=input_shape, trainable=True)\n", 327 | "\n", 328 | " out = TimeDistributed(Flatten())(out)\n", 329 | "\n", 330 | " out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)\n", 331 | " # note: no regression target for bg class\n", 332 | " out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)\n", 333 | " return [out_class, out_regr]" 334 | ] 335 | } 336 | ], 337 | "metadata": { 338 | "kernelspec": { 339 | "display_name": "Python 3", 340 | "language": "python", 341 | "name": "python3" 342 | }, 343 | "language_info": { 344 | "codemirror_mode": { 345 | "name": "ipython", 346 | "version": 3 347 | }, 348 | "file_extension": ".py", 349 | "mimetype": "text/x-python", 350 | "name": "python", 351 | "nbconvert_exporter": "python", 352 | "pygments_lexer": "ipython3", 353 | "version": "3.6.3" 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 2 358 | } 359 | -------------------------------------------------------------------------------- /keras_frcnn/roi_pooling_conv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Reference: https://github.com/yhenon/keras-frcnn/blob/master/keras_frcnn/RoiPoolingConv.py" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stderr", 17 | "output_type": "stream", 18 | "text": [ 19 | "Using TensorFlow backend.\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "from keras.engine.topology import Layer\n", 25 | "import keras.backend as K" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "if K.backend() == 'tensorflow':\n", 35 | " import tensorflow as tf" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 5, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "class RoiPoolingConv(Layer):\n", 45 | " \"\"\"ROI pooling layer for 2D inputs.\n", 46 | " See Spatial Pyramid pooling in Deep Convolutional Networks for Visual\n", 47 | " Recognition, K. He, X. Zhang, S. Ren, J. Sun\n", 48 | " \n", 49 | " # Arguments\n", 50 | " pool_size: int\n", 51 | " size of pooling region to use, pool_size = 7 will result in a 7x7 region.\n", 52 | " num_rois: number of regions of interest to be used.\n", 53 | " \n", 54 | " # Input shape\n", 55 | " list of two 4D tensors [X_img, X_roi] with shape:\n", 56 | " \n", 57 | " X_img:\n", 58 | " `(1, channels, rows, cols)` if dim_ordering='th'\n", 59 | " or 4D tensor with shape:\n", 60 | " `(1, rows, cols, channels)` if dim_ordering='tf'.\n", 61 | " X_roi:\n", 62 | " `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)\n", 63 | " \n", 64 | " # Output shape\n", 65 | " 3D tensor with shape:\n", 66 | " `(1, num_rois, channels, pool_size, pool_size)`\n", 67 | " \"\"\"\n", 68 | " \n", 69 | " def __init__(self, pool_size, num_rois, **kwargs):\n", 70 | " \n", 71 | " self.dim_ordering = K.image_dim_ordering()\n", 72 | " assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'\n", 73 | " \n", 74 | " self.pool_size = pool_size\n", 75 | " self.num_rois = num_rois\n", 76 | " \n", 77 | " super(RoiPoolingConv, self).__init__(**kwargs)\n", 78 | " \n", 79 | " \n", 80 | " def build(self, input_shape):\n", 81 | " if self.dim_ordering == 'th':\n", 82 | " self.nb_channels = input_shape[0][1]\n", 83 | " \n", 84 | " elif self.dim_ordering == 'tf':\n", 85 | " self.nb_channels = input_shape[0][3]\n", 86 | " \n", 87 | " def compute_output_shape(self, input_shape):\n", 88 | " if self.dim_ordering == 'th':\n", 89 | " return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size\n", 90 | " \n", 91 | " else:\n", 92 | " return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels\n", 93 | " \n", 94 | " \n", 95 | " def call(self, x, mask=None):\n", 96 | " assert(len(x) == 2)\n", 97 | " \n", 98 | " img = x[0]\n", 99 | " rois = x[1]\n", 100 | " \n", 101 | " input_shape = K.shape(img)\n", 102 | " \n", 103 | " outputs = []\n", 104 | " \n", 105 | " for roi_idx in range(self.num_rois):\n", 106 | " \n", 107 | " x = rois[0, roi_idx, 0]\n", 108 | " y = rois[0, roi_idx, 1]\n", 109 | " w = rois[0, roi_idx, 2]\n", 110 | " h = rois[0, roi_idx, 3]\n", 111 | " \n", 112 | " row_length = w / float(self.pool_size)\n", 113 | " col_length = h / float(self.pool_size)\n", 114 | " \n", 115 | " num_pool_regions = self.pool_size\n", 116 | " \n", 117 | " #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op\n", 118 | " # in theano. The theano implementation is much less efficient and leads to long compile times\n", 119 | "\n", 120 | " if self.dim_ordering == 'th':\n", 121 | " for jy in range(num_pool_regions):\n", 122 | " for ix in range(num_pool_regions):\n", 123 | " x1 = x + ix * row_length\n", 124 | " x2 = x1 + row_length\n", 125 | " y1 = y + jy * col_length\n", 126 | " y2 = y1 + col_length\n", 127 | " \n", 128 | " x1 = K.cast(x1, 'int32')\n", 129 | " x2 = K.cast(x2, 'int32')\n", 130 | " y1 = K.cast(y1, 'int32')\n", 131 | " y2 = K.cast(y2, 'int32')\n", 132 | " \n", 133 | " x2 = x1 + K.maximum(1, x2-x1)\n", 134 | " y2 = y1 + K.maximum(1, y2-y1)\n", 135 | " \n", 136 | " new_shape = [input_shape[0], input_shape[1],\n", 137 | " y2 - y1, x2 - x1]\n", 138 | " \n", 139 | " x_crop = img[:, :, y1:y2, x1:x2]\n", 140 | " xm = K.reshape(x_crop, new_shape)\n", 141 | " pooled_val = K.max(xm, axis=(2, 3))\n", 142 | " outputs.append(pooled_val)\n", 143 | " \n", 144 | " elif self.dim_ordering == 'tf':\n", 145 | " x = K.cast(x, 'int32')\n", 146 | " y = K.cast(y, 'int32')\n", 147 | " w = K.cast(w, 'int32')\n", 148 | " h = K.cast(h, 'int32')\n", 149 | " \n", 150 | " rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))\n", 151 | " outputs.append(rs)\n", 152 | " \n", 153 | " final_output = K.concatenate(outputs, axis=0)\n", 154 | " final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))\n", 155 | " \n", 156 | " if self.dim_ordering == 'th':\n", 157 | " final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))\n", 158 | " \n", 159 | " else:\n", 160 | " final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))\n", 161 | " \n", 162 | " return final_output\n", 163 | " \n", 164 | " def get_config(self):\n", 165 | " config = {'pool_size': self.pool_size,\n", 166 | " 'num_rois': self.num_rois}\n", 167 | " base_config = super(RoiPoolingConv, self).get_config()\n", 168 | " return dict(list(base_config.items()) + list(config.items()))" 169 | ] 170 | } 171 | ], 172 | "metadata": { 173 | "kernelspec": { 174 | "display_name": "Python 3", 175 | "language": "python", 176 | "name": "python3" 177 | }, 178 | "language_info": { 179 | "codemirror_mode": { 180 | "name": "ipython", 181 | "version": 3 182 | }, 183 | "file_extension": ".py", 184 | "mimetype": "text/x-python", 185 | "name": "python", 186 | "nbconvert_exporter": "python", 187 | "pygments_lexer": "ipython3", 188 | "version": "3.6.3" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 2 193 | } 194 | -------------------------------------------------------------------------------- /keras_frcnn/vgg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# VGG16 model for Keras.\n", 8 | "\n", 9 | "Reference\n", 10 | "- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stderr", 20 | "output_type": "stream", 21 | "text": [ 22 | "Using TensorFlow backend.\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "import warnings\n", 28 | "warnings.filterwarnings('ignore')\n", 29 | "from __future__ import print_function\n", 30 | "from __future__ import absolute_import\n", 31 | "import sys\n", 32 | "import os\n", 33 | "from themachine.nbfinder import NotebookFinder\n", 34 | "sys.meta_path.append(NotebookFinder())\n", 35 | "from keras.models import Model\n", 36 | "from keras.layers import Flatten, Dense, Input, Conv2D, MaxPooling2D, Dropout\n", 37 | "from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, TimeDistributed\n", 38 | "from keras.engine.topology import get_source_inputs\n", 39 | "from keras.utils import layer_utils\n", 40 | "from keras.utils.data_utils import get_file\n", 41 | "from keras import backend as K" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "importing Jupyter notebook from roi_pooling_conv.ipynb\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "from roi_pooling_conv import RoiPoolingConv" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "def get_weight_path():\n", 68 | " if K.image_dim_ordering() == 'th':\n", 69 | " print('pretrained weights not available for VGG with theano backend')\n", 70 | " return\n", 71 | " else:\n", 72 | " return 'vgg16_weights_tf_dim_ordering_tf_kernels.h5'\n", 73 | "\n", 74 | "\n", 75 | "def get_img_output_length(width, height):\n", 76 | " def get_output_length(input_length):\n", 77 | " return input_length//16\n", 78 | "\n", 79 | " return get_output_length(width), get_output_length(height) " 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "def nn_base(input_tensor=None, trainable=False):\n", 89 | "\n", 90 | "\n", 91 | " # Determine proper input shape\n", 92 | " if K.image_dim_ordering() == 'th':\n", 93 | " input_shape = (3, None, None)\n", 94 | " else:\n", 95 | " input_shape = (None, None, 3)\n", 96 | "\n", 97 | " if input_tensor is None:\n", 98 | " img_input = Input(shape=input_shape)\n", 99 | " else:\n", 100 | " if not K.is_keras_tensor(input_tensor):\n", 101 | " img_input = Input(tensor=input_tensor, shape=input_shape)\n", 102 | " else:\n", 103 | " img_input = input_tensor\n", 104 | "\n", 105 | " if K.image_dim_ordering() == 'tf':\n", 106 | " bn_axis = 3\n", 107 | " else:\n", 108 | " bn_axis = 1\n", 109 | "\n", 110 | " # Block 1\n", 111 | " x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)\n", 112 | " x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)\n", 113 | " x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)\n", 114 | "\n", 115 | " # Block 2\n", 116 | " x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)\n", 117 | " x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)\n", 118 | " x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)\n", 119 | "\n", 120 | " # Block 3\n", 121 | " x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)\n", 122 | " x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)\n", 123 | " x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)\n", 124 | " x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)\n", 125 | "\n", 126 | " # Block 4\n", 127 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)\n", 128 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)\n", 129 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)\n", 130 | " x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)\n", 131 | "\n", 132 | " # Block 5\n", 133 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)\n", 134 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)\n", 135 | " x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)\n", 136 | " # x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)\n", 137 | "\n", 138 | " return x\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 6, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "def rpn(base_layers, num_anchors):\n", 148 | "\n", 149 | " x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer='normal', name='rpn_conv1')(base_layers)\n", 150 | "\n", 151 | " x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer='uniform', name='rpn_out_class')(x)\n", 152 | " x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer='zero', name='rpn_out_regress')(x)\n", 153 | "\n", 154 | " return [x_class, x_regr, base_layers]\n", 155 | "\n", 156 | "\n", 157 | "def classifier(base_layers, input_rois, num_rois, nb_classes = 21, trainable=False):\n", 158 | "\n", 159 | " # compile times on theano tend to be very high, so we use smaller ROI pooling regions to workaround\n", 160 | "\n", 161 | " if K.backend() == 'tensorflow':\n", 162 | " pooling_regions = 7\n", 163 | " input_shape = (num_rois,7,7,512)\n", 164 | " elif K.backend() == 'theano':\n", 165 | " pooling_regions = 7\n", 166 | " input_shape = (num_rois,512,7,7)\n", 167 | "\n", 168 | " out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([base_layers, input_rois])\n", 169 | "\n", 170 | " out = TimeDistributed(Flatten(name='flatten'))(out_roi_pool)\n", 171 | " out = TimeDistributed(Dense(4096, activation='relu', name='fc1'))(out)\n", 172 | " out = TimeDistributed(Dropout(0.5))(out)\n", 173 | " out = TimeDistributed(Dense(4096, activation='relu', name='fc2'))(out)\n", 174 | " out = TimeDistributed(Dropout(0.5))(out)\n", 175 | "\n", 176 | " out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer='zero'), name='dense_class_{}'.format(nb_classes))(out)\n", 177 | " # note: no regression target for bg class\n", 178 | " out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer='zero'), name='dense_regress_{}'.format(nb_classes))(out)\n", 179 | "\n", 180 | " return [out_class, out_regr]\n", 181 | "\n" 182 | ] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 3", 188 | "language": "python", 189 | "name": "python3" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.6.3" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 2 206 | } 207 | --------------------------------------------------------------------------------