├── .editorconfig ├── .readthedocs.yml ├── .spyproject ├── codestyle.ini ├── encoding.ini ├── vcs.ini └── workspace.ini ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── Written_Back_Results.jpg ├── __pycache__ └── normalize.cpython-36.pyc ├── camulote.py ├── docs ├── Makefile ├── api │ ├── datasets.rst │ └── models.rst ├── cli │ ├── checkpoint.rst │ ├── cloud.rst │ ├── dataset.rst │ ├── eval.rst │ ├── predict.rst │ ├── server.rst │ └── train.rst ├── conf.py ├── images │ └── luminoth-logo.png ├── index.rst ├── requirements.txt ├── tutorial │ ├── 01-first-steps.rst │ ├── 02-building-custom-traffic-dataset.rst │ ├── 03-training-the-model.rst │ ├── 04-visualizing-the-training-process.rst │ ├── 05-evaluating-models.rst │ ├── 06-creating-own-checkpoints.rst │ ├── 07-using-luminoth-from-python.rst │ ├── images │ │ ├── 01-first-steps │ │ │ ├── luminoth-predictions.jpg │ │ │ └── luminoth-web-server.jpg │ │ ├── 04-visualizing-the-training-process │ │ │ ├── losses-smoothing.png │ │ │ └── losses.png │ │ └── 05-evaluating-models │ │ │ ├── lumi-server-web-result.jpg │ │ │ └── validation-metrics-tensorboard.png │ └── index.rst └── usage │ ├── checkpoints.rst │ ├── cloud.rst │ ├── dataset.rst │ ├── evaluation.rst │ ├── installation.rst │ ├── quickstart.rst │ └── training.rst ├── examples └── sample_config.yml ├── imgs ├── Architecture.png ├── output1.png └── output2.png ├── luminoth ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── cli.cpython-36.pyc │ ├── eval.cpython-36.pyc │ ├── io.cpython-36.pyc │ ├── predict.cpython-36.pyc │ ├── tasks.cpython-36.pyc │ ├── train.cpython-36.pyc │ └── vis.cpython-36.pyc ├── cli.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── base_dataset.cpython-36.pyc │ │ ├── datasets.cpython-36.pyc │ │ ├── exceptions.cpython-36.pyc │ │ └── object_detection_dataset.cpython-36.pyc │ ├── base_dataset.py │ ├── datasets.py │ ├── exceptions.py │ ├── object_detection_dataset.py │ └── object_detection_dataset_test.py ├── eval.py ├── io.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── models.cpython-36.pyc │ ├── base │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── base_network.cpython-36.pyc │ │ │ ├── truncated_base_network.cpython-36.pyc │ │ │ └── truncated_vgg.cpython-36.pyc │ │ ├── base_network.py │ │ ├── base_network_test.py │ │ ├── truncated_base_network.py │ │ ├── truncated_base_network_test.py │ │ └── truncated_vgg.py │ ├── fasterrcnn │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── fasterrcnn.cpython-36.pyc │ │ │ ├── rcnn.cpython-36.pyc │ │ │ ├── rcnn_proposal.cpython-36.pyc │ │ │ ├── rcnn_target.cpython-36.pyc │ │ │ ├── roi_pool.cpython-36.pyc │ │ │ ├── rpn.cpython-36.pyc │ │ │ ├── rpn_proposal.cpython-36.pyc │ │ │ └── rpn_target.cpython-36.pyc │ │ ├── base_config.yml │ │ ├── fasterrcnn.py │ │ ├── fasterrcnn_test.py │ │ ├── rcnn.py │ │ ├── rcnn_proposal.py │ │ ├── rcnn_proposal_test.py │ │ ├── rcnn_target.py │ │ ├── rcnn_target_test.py │ │ ├── rcnn_test.py │ │ ├── roi_pool.py │ │ ├── roi_pool_test.py │ │ ├── rpn.py │ │ ├── rpn_proposal.py │ │ ├── rpn_proposal_test.py │ │ ├── rpn_target.py │ │ ├── rpn_target_test.py │ │ └── rpn_test.py │ ├── models.py │ └── ssd │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── feature_extractor.cpython-36.pyc │ │ ├── proposal.cpython-36.pyc │ │ ├── ssd.cpython-36.pyc │ │ ├── target.cpython-36.pyc │ │ └── utils.cpython-36.pyc │ │ ├── base_config.yml │ │ ├── feature_extractor.py │ │ ├── proposal.py │ │ ├── ssd.py │ │ ├── target.py │ │ └── utils.py ├── predict.py ├── tasks.py ├── tools │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-36.pyc │ ├── checkpoint │ │ ├── __init__.py │ │ └── __pycache__ │ │ │ └── __init__.cpython-36.pyc │ ├── cloud │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── cli.cpython-36.pyc │ │ │ └── gcloud.cpython-36.pyc │ │ ├── cli.py │ │ └── gcloud.py │ ├── dataset │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── cli.cpython-36.pyc │ │ │ ├── merge.cpython-36.pyc │ │ │ └── transform.cpython-36.pyc │ │ ├── cli.py │ │ ├── merge.py │ │ ├── readers │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ └── base_reader.cpython-36.pyc │ │ │ ├── base_reader.py │ │ │ └── object_detection │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-36.pyc │ │ │ │ ├── coco.cpython-36.pyc │ │ │ │ ├── csv_reader.cpython-36.pyc │ │ │ │ ├── flat_reader.cpython-36.pyc │ │ │ │ ├── imagenet.cpython-36.pyc │ │ │ │ ├── object_detection_reader.cpython-36.pyc │ │ │ │ ├── openimages.cpython-36.pyc │ │ │ │ ├── pascalvoc.cpython-36.pyc │ │ │ │ └── taggerine.cpython-36.pyc │ │ │ │ ├── coco.py │ │ │ │ ├── csv_reader.py │ │ │ │ ├── data │ │ │ │ └── imagenet_wnids.json │ │ │ │ ├── flat_reader.py │ │ │ │ ├── imagenet.py │ │ │ │ ├── object_detection_reader.py │ │ │ │ ├── openimages.py │ │ │ │ ├── pascalvoc.py │ │ │ │ └── taggerine.py │ │ ├── transform.py │ │ └── writers │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── base_writer.cpython-36.pyc │ │ │ └── object_detection_writer.cpython-36.pyc │ │ │ ├── base_writer.py │ │ │ └── object_detection_writer.py │ └── server │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── cli.cpython-36.pyc │ │ └── web.cpython-36.pyc │ │ ├── cli.py │ │ ├── static │ │ ├── favicon.ico │ │ ├── js │ │ │ ├── main.js │ │ │ └── vendor │ │ │ │ ├── hextorgba.js │ │ │ │ └── palette.js │ │ ├── luminoth-logo.svg │ │ ├── style.css │ │ └── tryolabs-logo.svg │ │ ├── templates │ │ └── index.html │ │ ├── web.py │ │ └── web_test.py ├── train.py ├── train_test.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── anchors.cpython-36.pyc │ │ ├── bbox_overlap.cpython-36.pyc │ │ ├── bbox_transform.cpython-36.pyc │ │ ├── bbox_transform_tf.cpython-36.pyc │ │ ├── checkpoint_downloader.cpython-36.pyc │ │ ├── config.cpython-36.pyc │ │ ├── dataset.cpython-36.pyc │ │ ├── experiments.cpython-36.pyc │ │ ├── homedir.cpython-36.pyc │ │ ├── image.cpython-36.pyc │ │ ├── image_vis.cpython-36.pyc │ │ ├── losses.cpython-36.pyc │ │ ├── predicting.cpython-36.pyc │ │ ├── training.cpython-36.pyc │ │ └── vars.cpython-36.pyc │ ├── anchors.py │ ├── anchors_test.py │ ├── bbox_overlap.py │ ├── bbox_overlap_test.py │ ├── bbox_transform.py │ ├── bbox_transform_test.py │ ├── bbox_transform_tf.py │ ├── checkpoint_downloader.py │ ├── config.py │ ├── dataset.py │ ├── debug.py │ ├── experiments.py │ ├── homedir.py │ ├── hooks │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── image_vis_hook.cpython-36.pyc │ │ │ └── var_vis_hook.cpython-36.pyc │ │ ├── image_vis_hook.py │ │ └── var_vis_hook.py │ ├── image.py │ ├── image_test.py │ ├── image_vis.py │ ├── losses.py │ ├── predicting.py │ ├── test │ │ ├── __init__.py │ │ ├── anchors.py │ │ └── gt_boxes.py │ ├── training.py │ └── vars.py └── vis.py ├── normalize.py ├── pdfcreation.py ├── run.py ├── setup.cfg ├── setup.py ├── tox.ini ├── untitled5.py └── untitled6.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | end_of_line = lf 11 | charset = utf-8 12 | 13 | [*.py] 14 | max_line_length = 79 15 | 16 | [*.html] 17 | indent_size = 2 18 | 19 | [*.yaml, *.yml] 20 | indent_size = 2 21 | 22 | # The JSON files contain newlines inconsistently 23 | [*.json] 24 | indent_size = 2 25 | insert_final_newline = ignore 26 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | python: 2 | pip_install: true 3 | -------------------------------------------------------------------------------- /.spyproject/codestyle.ini: -------------------------------------------------------------------------------- 1 | [codestyle] 2 | indentation = True 3 | 4 | [main] 5 | version = 0.1.0 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/encoding.ini: -------------------------------------------------------------------------------- 1 | [encoding] 2 | text_encoding = utf-8 3 | 4 | [main] 5 | version = 0.1.0 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/vcs.ini: -------------------------------------------------------------------------------- 1 | [vcs] 2 | use_version_control = False 3 | version_control_system = 4 | 5 | [main] 6 | version = 0.1.0 7 | 8 | -------------------------------------------------------------------------------- /.spyproject/workspace.ini: -------------------------------------------------------------------------------- 1 | [workspace] 2 | restore_data_on_startup = True 3 | save_data_on_exit = True 4 | save_history = True 5 | save_non_project_files = False 6 | 7 | [main] 8 | version = 0.1.0 9 | recent_files = ['D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\tools\\server\\web.py', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\utils\\image_vis.py', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\utils\\predicting.py', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\utils\\image_test.py', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\utils\\bbox_overlap_test.py', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\utils\\bbox_transform.py', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\predict.py', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\vis.py', 'C:\\Users\\Murali\\Anaconda3_2\\lib\\site-packages\\pytesseract\\pytesseract.py', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\tools\\server\\templates\\index.html', 'D:\\Sargunan\\luminoth-master\\luminoth-master\\luminoth\\tools\\server\\static\\js\\main.js', 'D:\\Sargunan\\luminoth-master\\untitled1.py'] 10 | 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | python: 4 | - '2.7' 5 | - '3.4' 6 | - '3.5' 7 | - '3.6' 8 | install: pip install tox-travis 9 | script: tox 10 | notifications: 11 | slack: 12 | rooms: 13 | secure: piy/NEf36gPZqw4nTAhs6dZ7Af2ozOt47RdnTSaed3tkwMfXJ+M3nccuCP4UfwmBP7fRRq2yfhLWCKIZagsFGAxhNZ/mSYgRdXfNUUofOWIJdR4+tQuqhxwgQkTfJjAWlYA+gA+GxrOnSFQ3ACRnfd/dUfthokqq7CWOzQtYYHGBSA1rAxuJU1x2qWbl+1tXa7BUJXCsR8ONz7vWDna4Znk+wixG3enOdd2pdrP2dqp9g0eUztySIT2zdjA2CB4iFIIqQLJ5fz3ab8Vpk4H9+JECB7lE8+SJVS9ZbBu8vuD9yZARBJGCPyD8HcwHTNrvlTu/V9p9W+6OeNCUXamZFuCkMHIwLgDxUntG/IyCptJqtG1n/7dOxikY1kfAIIcwMsmTQDej3xXHny1wyGMztD84w23KhNEPMJP7HsLVHmt26TIiT/yFeIOzGnh2ZPenMwtn7ti8uiweEpAH2G3tmH6bShv8myXEp++RcblSNeowHpyFhRsoevuqpT+dd+C+as2Y1DWBSFqvrfCro7AKloErJHtS3GXUiAf1Fi+Vys/sj+PLQFBh88GR5OY0lDC1cTfA4iy3Vy0IICRIylkQ4W0jfbgKF6B40syA6rM0mlu1v15P9oJvI+P5N9/G8jhUA54Ku3Xxmd6qpzlFvx2ivxmPBfHNc7g7MHJZFlzlzvk= 14 | on_success: change 15 | 16 | deploy: 17 | # Test PyPI in every change to master 18 | - provider: pypi 19 | server: https://test.pypi.org/legacy/ 20 | distributions: sdist bdist_wheel 21 | user: $PYPI_TEST_USERNAME 22 | password: $PYPI_TEST_PASSWORD 23 | skip_existing: true 24 | on: 25 | branch: master 26 | tags: false 27 | python: 3.6 28 | 29 | # Real PyPI in tags (ie. GitHub releases) 30 | - provider: pypi 31 | distributions: sdist bdist_wheel 32 | user: $PYPI_USERNAME 33 | password: $PYPI_PASSWORD 34 | on: 35 | branch: master 36 | tags: true 37 | python: 3.6 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, Tryolabs 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md *.txt tox.ini LICENSE *.yml 2 | graft luminoth 3 | recursive-include docs * 4 | recursive-include examples * 5 | 6 | global-exclude __pycache__ 7 | global-exclude *.py[co] 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning based Table Detection (LUMINOTH) 2 | Deep Learning based Table Detection (LUMINOTH) 3 | 4 | This project focuses on "Detection Tables in PDF and Extract contents" by [Keras and ObjectTensorFlow Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection). 5 | 6 | The system shall work in 2 steps: 7 | 8 | Step 1: Accept document input, read tables: System should have an input mechanism for accepting documents images (TIFF, JPEG). The document may have one or more tables. 9 | 10 | 11 | Step 2: Step 2: As an output, system should return the table content in an excel format,same as that in the sample data sets 12 | 13 | # More Details can be found in PPT 14 | 15 | ## Dataset used UNLV dataset 16 | 17 | 18 | ## Quick Demo 19 | 20 | https://youtu.be/cwIQlJRHuA4 21 | 22 | 23 | 24 | 25 | 26 | 27 | ***THE DEVELOPING IS ON PROGRESS! THE REPO WILL BE UPDATED SOON, !*** 28 | 29 | 30 | -------------- 31 | 32 | 33 | 34 | ## Architecture 35 | 36 |

37 | 48 | 50 |

51 | 52 | ------------------------------------------------------------------------------------------- 53 | 54 | ## Result 2 55 |

56 | 58 |

59 | 60 | ------------------------------------------------------------------------------------------- 61 | 62 | # Installation 63 | 64 | Luminoth currently supports Python 2.7 and 3.4–3.6. 65 | 66 | ## Pre-requisites 67 | 68 | To use Luminoth, [TensorFlow](https://www.tensorflow.org/install/) must be installed beforehand. If you want **GPU support**, you should install the GPU version of TensorFlow with `pip install tensorflow-gpu`, or else you can use the CPU version using `pip install tensorflow`. 69 | 70 | ## Installing Luminoth 71 | 72 | Just install from PyPI: 73 | 74 | ```bash 75 | pip install luminoth 76 | ``` 77 | 78 | Optionally, Luminoth can also install TensorFlow for you if you install it with `pip install luminoth[tf]` or `pip install luminoth[tf-gpu]`, depending on the version of TensorFlow you wish to use. 79 | 80 | ### Google Cloud 81 | 82 | If you wish to train using **Google Cloud ML Engine**, the optional dependencies must be installed: 83 | 84 | ```bash 85 | pip install luminoth[gcloud] 86 | ``` 87 | 88 | ## Installing from source 89 | 90 | First, clone the repo on your machine and then install with `pip`: 91 | 92 | ```bash 93 | git clone https://github.com/tryolabs/luminoth.git 94 | cd luminoth 95 | pip install -e . 96 | ``` 97 | 98 | 99 | # Run "run.py" 100 | 101 | 102 | ## License 103 | This system is available under the MIT license. See the LICENSE file for more info. 104 | -------------------------------------------------------------------------------- /Written_Back_Results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/Written_Back_Results.jpg -------------------------------------------------------------------------------- /__pycache__/normalize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/__pycache__/normalize.cpython-36.pyc -------------------------------------------------------------------------------- /camulote.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 21 15:51:00 2018 4 | 5 | @author: Murali 6 | """ 7 | 8 | from pdf2jpg import pdf2jpg 9 | 10 | result = pdf2jpg.convert_pdf2jpg('D:\\Sargunan\\Table\\001.pdf', 'c:\\temp\\p') 11 | print(result) 12 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Luminoth 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/api/datasets.rst: -------------------------------------------------------------------------------- 1 | .. _api/datasets: 2 | 3 | Datasets 4 | ======== 5 | -------------------------------------------------------------------------------- /docs/api/models.rst: -------------------------------------------------------------------------------- 1 | .. _api/models: 2 | 3 | Models 4 | ====== 5 | 6 | .. autoclass:: luminoth.models.base.BaseNetwork 7 | :members: 8 | :private-members: 9 | 10 | .. autoclass:: luminoth.models.base.TruncatedBaseNetwork 11 | :members: 12 | :private-members: 13 | 14 | .. autoclass:: luminoth.models.fasterrcnn.FasterRCNN 15 | :members: 16 | :private-members: 17 | 18 | .. autoclass:: luminoth.models.ssd.SSD 19 | :members: 20 | :private-members: 21 | -------------------------------------------------------------------------------- /docs/cli/checkpoint.rst: -------------------------------------------------------------------------------- 1 | .. _cli/checkpoint: 2 | 3 | Checkpoint management 4 | ===================== 5 | -------------------------------------------------------------------------------- /docs/cli/cloud.rst: -------------------------------------------------------------------------------- 1 | .. _cli/cloud: 2 | 3 | Cloud management 4 | ================ 5 | -------------------------------------------------------------------------------- /docs/cli/dataset.rst: -------------------------------------------------------------------------------- 1 | .. _cli/dataset: 2 | 3 | Dataset management 4 | ================== 5 | -------------------------------------------------------------------------------- /docs/cli/eval.rst: -------------------------------------------------------------------------------- 1 | .. _cli/eval: 2 | 3 | Evaluating a model 4 | ================== 5 | -------------------------------------------------------------------------------- /docs/cli/predict.rst: -------------------------------------------------------------------------------- 1 | .. _cli/predict: 2 | 3 | Predict with a model 4 | ==================== 5 | -------------------------------------------------------------------------------- /docs/cli/server.rst: -------------------------------------------------------------------------------- 1 | .. _cli/server: 2 | 3 | Web server 4 | ========== 5 | -------------------------------------------------------------------------------- /docs/cli/train.rst: -------------------------------------------------------------------------------- 1 | .. _cli/train: 2 | 3 | Training a model 4 | ================ 5 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # 2 | # Configuration file for the Sphinx documentation builder. 3 | # 4 | import pkg_resources 5 | import sys 6 | 7 | 8 | # -- Project information ----------------------------------------------------- 9 | 10 | project = 'Luminoth' 11 | copyright = '2018, Tryolabs' 12 | author = 'Tryolabs' 13 | 14 | try: 15 | # The full version, including alpha/beta/rc tags. 16 | release = pkg_resources.get_distribution('luminoth').version 17 | except pkg_resources.DistributionNotFound: 18 | print('Luminoth must be installed to build the documentation.') 19 | sys.exit(1) 20 | 21 | if 'dev' in release: 22 | # Trim everything after `dev`, if present. 23 | release = ''.join(release.partition('dev')[:2]) 24 | 25 | # The short X.Y version. 26 | version = '.'.join(release.split('.')[:2]) 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # If your documentation needs a minimal Sphinx version, state it here. 32 | # 33 | # needs_sphinx = '1.0' 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | 'sphinx.ext.autodoc', 40 | 'sphinx.ext.mathjax', 41 | 'sphinx.ext.napoleon', 42 | 'sphinx.ext.viewcode', 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # The suffix(es) of source filenames. 49 | # You can specify multiple suffix as a list of string: 50 | # 51 | # source_suffix = ['.rst', '.md'] 52 | source_suffix = '.rst' 53 | 54 | # The master toctree document. 55 | master_doc = 'index' 56 | 57 | # The language for content autogenerated by Sphinx. Refer to documentation 58 | # for a list of supported languages. 59 | # 60 | # This is also used if you do content translation via gettext catalogs. 61 | # Usually you set "language" from the command line for these cases. 62 | language = None 63 | 64 | # List of patterns, relative to source directory, that match files and 65 | # directories to ignore when looking for source files. 66 | # This pattern also affects html_static_path and html_extra_path . 67 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 68 | 69 | # The name of the Pygments (syntax highlighting) style to use. 70 | pygments_style = 'sphinx' 71 | 72 | 73 | # -- Options for HTML output ------------------------------------------------- 74 | 75 | # The theme to use for HTML and HTML Help pages. See the documentation for 76 | # a list of builtin themes. 77 | # 78 | html_theme = 'alabaster' 79 | 80 | # Theme options are theme-specific and customize the look and feel of a theme 81 | # further. For a list of options available for each theme, see the 82 | # documentation. 83 | # 84 | # html_theme_options = {} 85 | 86 | # Add any paths that contain custom static files (such as style sheets) here, 87 | # relative to this directory. They are copied after the builtin static files, 88 | # so a file named "default.css" will overwrite the builtin "default.css". 89 | html_static_path = ['_static'] 90 | 91 | # Custom sidebar templates, must be a dictionary that maps document names 92 | # to template names. 93 | # 94 | # The default sidebars (for documents that don't match any pattern) are 95 | # defined by theme itself. Builtin themes are using these templates by 96 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 97 | # 'searchbox.html']``. 98 | # 99 | html_sidebars = { 100 | '**': [ 101 | 'globaltoc.html', 'relations.html', 'searchbox.html' 102 | ], 103 | } 104 | 105 | 106 | # -- Options for HTMLHelp output --------------------------------------------- 107 | 108 | # Output file base name for HTML help builder. 109 | htmlhelp_basename = 'Luminothdoc' 110 | 111 | 112 | # -- Options for LaTeX output ------------------------------------------------ 113 | 114 | latex_elements = { 115 | # The paper size ('letterpaper' or 'a4paper'). 116 | # 117 | # 'papersize': 'letterpaper', 118 | 119 | # The font size ('10pt', '11pt' or '12pt'). 120 | # 121 | # 'pointsize': '10pt', 122 | 123 | # Additional stuff for the LaTeX preamble. 124 | # 125 | # 'preamble': '', 126 | 127 | # Latex figure (float) alignment 128 | # 129 | # 'figure_align': 'htbp', 130 | } 131 | 132 | # Grouping the document tree into LaTeX files. List of tuples 133 | # (source start file, target name, title, 134 | # author, documentclass [howto, manual, or own class]). 135 | latex_documents = [ 136 | (master_doc, 'Luminoth.tex', 'Luminoth Documentation', 137 | 'Tryolabs', 'manual'), 138 | ] 139 | 140 | 141 | # -- Options for manual page output ------------------------------------------ 142 | 143 | # One entry per manual page. List of tuples 144 | # (source start file, name, description, authors, manual section). 145 | man_pages = [ 146 | (master_doc, 'luminoth', 'Luminoth Documentation', 147 | [author], 1) 148 | ] 149 | 150 | 151 | # -- Options for Texinfo output ---------------------------------------------- 152 | 153 | # Grouping the document tree into Texinfo files. List of tuples 154 | # (source start file, target name, title, author, 155 | # dir menu entry, description, category) 156 | texinfo_documents = [ 157 | (master_doc, 'Luminoth', 'Luminoth Documentation', 158 | author, 'Luminoth', 'Computer vision toolkit.', 159 | 'Miscellaneous'), 160 | ] 161 | 162 | 163 | # -- Extension configuration ------------------------------------------------- 164 | -------------------------------------------------------------------------------- /docs/images/luminoth-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/docs/images/luminoth-logo.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. title:: Welcome to Luminoth 2 | .. _index: 3 | 4 | Welcome to 5 | ========== 6 | 7 | .. image:: images/luminoth-logo.png 8 | 9 | `Luminoth `_ is an open source toolkit for computer vision. 10 | Currently, we support object detection, but we are aiming for much more. It's built in 11 | Python, using `TensorFlow `_. 12 | 13 | The code is open source and `available on GitHub `_. 14 | 15 | Documentation 16 | ------------- 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | :caption: Usage: 21 | 22 | usage/installation 23 | usage/quickstart 24 | Tutorial 25 | usage/dataset 26 | usage/training 27 | usage/evaluation 28 | usage/cloud 29 | usage/checkpoints 30 | 31 | .. toctree:: 32 | :maxdepth: 2 33 | :caption: API reference: 34 | 35 | api/datasets 36 | api/models 37 | 38 | .. toctree:: 39 | :maxdepth: 2 40 | :caption: CLI reference: 41 | 42 | cli/checkpoint 43 | cli/cloud 44 | cli/dataset 45 | cli/eval 46 | cli/predict 47 | cli/server 48 | cli/train 49 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==1.5.0 2 | -------------------------------------------------------------------------------- /docs/tutorial/02-building-custom-traffic-dataset.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial/02-building-custom-traffic-dataset: 2 | 3 | Building custom traffic dataset 4 | =============================== 5 | 6 | Even though pre-trained checkpoints are really useful, most of the time you will want to 7 | train an object detector using your own dataset. For this, you need a source of images and 8 | their corresponding bounding box coordinates and labels, in some format that Luminoth can 9 | understand. In this case, we are interested in street traffic related objects, so we will 10 | need to source images relevant to our niche. 11 | 12 | How Luminoth handles datasets 13 | ----------------------------- 14 | 15 | Luminoth reads datasets natively only in TensorFlow's `TFRecords 16 | format `_. This is a 17 | binary format that will let Luminoth consume the data very efficiently. 18 | 19 | In order to use a custom dataset, you must first transform whatever format your data is 20 | in, to TFRecords files (one for each split — train, val, test). Fortunately, Luminoth 21 | provides several CLI tools (see :ref:`usage/dataset`) for transforming popular dataset 22 | format (such as Pascal VOC, ImageNet, COCO, CSV, etc.) into TFRecords. In what follows, we 23 | will leverage this. 24 | 25 | Building a traffic dataset using OpenImages 26 | ------------------------------------------- 27 | 28 | `OpenImages V4 `__ is the 29 | largest existing dataset with object location annotations. It contains 15.4M 30 | bounding-boxes for 600 categories on 1.9M images, making it a very good choice for getting 31 | example images of a variety of (not niche-domain) classes (persons, cars, dolphin, 32 | blender, etc). 33 | 34 | Preparing the data 35 | ^^^^^^^^^^^^^^^^^^ 36 | 37 | We should start by downloading `the annotation 38 | files `__ 39 | (`this `__ 40 | and `this `__, 41 | for train) and the `class description `_ 42 | file. Note that the files with the annotations themselves are pretty large, totalling over 43 | 1.5 GB (and this CSV files only, without downloading a single image!). 44 | 45 | After we get the ``class-descriptions-boxable.csv`` file, we can go over all the classes 46 | available in the OpenImages dataset and see which ones are related to **traffic**. The 47 | following were hand-picked after examining the full file: 48 | 49 | .. code-block:: text 50 | 51 | /m/015qff,Traffic light 52 | /m/0199g,Bicycle 53 | /m/01bjv,Bus 54 | /m/01g317,Person 55 | /m/04_sv,Motorcycle 56 | /m/07r04,Truck 57 | /m/0h2r6,Van 58 | /m/0k4j,Car 59 | 60 | Using the Luminoth dataset reader 61 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 62 | 63 | Luminoth includes a **dataset reader** that can take OpenImages format. As the dataset is 64 | so large, this will never download every single image, but fetch only those we want to use 65 | and store them directly in the TFRecords file. 66 | 67 | Note that the dataset reader expects a particular directory layout so it knows where the 68 | files are located. In this case, files corresponding to the examples must be in a folder 69 | named like their split (`train`, `test`, ...). So, you should have the following: 70 | 71 | .. code-block:: text 72 | 73 | . 74 | ├── class-descriptions-boxable.csv 75 | └── train 76 | ├── train-annotations-bbox.csv 77 | └── train-annotations-human-imagelabels-boxable.csv 78 | 79 | Next, run the following command: 80 | 81 | .. code-block:: bash 82 | 83 | lumi dataset transform \ 84 | --type openimages \ 85 | --data-dir . \ 86 | --output-dir ./out \ 87 | --split train \ 88 | --class-examples 100 \ 89 | --only-classes=/m/015qff,/m/0199g,/m/01bjv,/m/01g317,/m/04_sv,/m/07r04,/m/0h2r6,/m/0k4j 90 | 91 | This will generate TFRecord file for the ``train`` split. You should get something like 92 | this in your terminal after the command finishes: 93 | 94 | .. code-block:: text 95 | 96 | INFO:tensorflow:Saved 360 records to "./out/train.tfrecords" 97 | INFO:tensorflow:Composition per class (train): 98 | INFO:tensorflow: Person (/m/01g317): 380 99 | INFO:tensorflow: Car (/m/0k4j): 255 100 | INFO:tensorflow: Bicycle (/m/0199g): 126 101 | INFO:tensorflow: Bus (/m/01bjv): 106 102 | INFO:tensorflow: Traffic light (/m/015qff): 105 103 | INFO:tensorflow: Truck (/m/07r04): 101 104 | INFO:tensorflow: Van (/m/0h2r6): 100 105 | INFO:tensorflow: Motorcycle (/m/04_sv): 100 106 | 107 | 108 | Apart from the TFRecord file, you will also get a ``classes.json`` file that lists the names 109 | of the classes in your dataset. 110 | 111 | Note that: 112 | 113 | * As we are using ``--only-classes``, so we filter to only use the classes we care about. 114 | * We are using ``--max-per-class`` of 100. This setting will make it stop when every class 115 | has at least 100 examples. However, some classes may end up with many more; for example 116 | here it needed to get 380 instances of persons to get 100 motorcycles, considering the 117 | first 360 images. 118 | * We could also have used ``--limit-examples`` option so we know the number of records in 119 | our final dataset beforehand. 120 | 121 | Of course, this dataset is **way too small** for any meaningful training to go on, but we 122 | are just showcasing. In real life, you would use a much larger value for 123 | ``--max-per-class`` (ie. 15000) or ``--limit-examples``. 124 | 125 | ---- 126 | 127 | Next: :ref:`tutorial/03-training-the-model` 128 | -------------------------------------------------------------------------------- /docs/tutorial/03-training-the-model.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial/03-training-the-model: 2 | 3 | Training the model 4 | ================== 5 | 6 | Now that we have created our (toy) dataset, we can proceed to train our model. 7 | 8 | The configuration file 9 | ---------------------- 10 | 11 | Training orchestration, including the model to be used, the dataset location and training 12 | schedule, is specified in a YAML config file. This file will be consumed by Luminoth and 13 | merged to the default configuration, to start the training session. 14 | 15 | You can see a minimal config file example in 16 | `sample_config.yml `_. 17 | This file illustrates the entries you'll most probably need to modify, which are: 18 | 19 | * ``train.run_name``: the run name for the training session, used to identify it. 20 | * ``train.job_dir``: directory in which both model checkpoints and summaries (for 21 | TensorBoard consumption) will be saved. The actual files will be stored under 22 | ``/``. 23 | * ``dataset.dir``: directory from which to read the TFRecord files. 24 | * ``model.type``: model to use for object detection (``fasterrcnn``, or ``ssd``). 25 | * ``network.num_classes``: number of classes to predict (depends on your dataset). 26 | 27 | For looking at all the possible configuration options, mostly related to the model 28 | itself, you can check the 29 | `base_config.yml `_ 30 | file. 31 | 32 | Building the config file for your dataset 33 | ----------------------------------------- 34 | 35 | Probably the most important setting for training is the **learning rate**. You will most 36 | likely want to tune this depending on your dataset, and you can do it via the 37 | ``train.learning_rate`` setting in the configuration. For example, this would be a good 38 | setting for training on the full COCO dataset: 39 | 40 | .. code-block:: yaml 41 | 42 | learning_rate: 43 | decay_method: piecewise_constant 44 | boundaries: [250000, 450000, 600000] 45 | values: [0.0003, 0.0001, 0.00003, 0.00001] 46 | 47 | To get to this, you will need to run some experiments and see what works best. 48 | 49 | .. code-block:: yaml 50 | 51 | train: 52 | # Run name for the training session. 53 | run_name: traffic 54 | job_dir: 55 | learning_rate: 56 | decay_method: piecewise_constant 57 | # Custom dataset for Luminoth Tutorial 58 | boundaries: [90000, 160000, 250000] 59 | values: [0.0003, 0.0001, 0.00003, 0.00001] 60 | dataset: 61 | type: object_detection 62 | dir: 63 | model: 64 | type: fasterrcnn 65 | network: 66 | num_classes: 8 67 | anchors: 68 | # Add one more scale to be better at detecting small objects 69 | scales: [0.125, 0.25, 0.5, 1, 2] 70 | 71 | Running the training 72 | -------------------- 73 | 74 | Assuming you already have both your dataset (TFRecords) and the config file ready, you can 75 | start your training session by running the command as follows: 76 | 77 | .. code-block:: bash 78 | 79 | lumi train -c config.yml 80 | 81 | You can use the ``-o`` option to override any configuration option using dot notation (e.g. 82 | ``-o model.rpn.proposals.nms_threshold=0.8``). 83 | 84 | If you are using a CUDA-based GPU, you can select the GPU to use by setting the 85 | ``CUDA_VISIBLE_DEVICES`` environment variable (see 86 | `here `_ 87 | for more info). 88 | 89 | When the training is running, you should see Luminoth print out for each step, the 90 | minibatch (single image), and the training loss related to that minibatch. 91 | 92 | Image to image, the training loss will jump around, and this is expected. However, the 93 | trend will be that the loss will gradually start to decrease. For this, it is interesting 94 | to look at it using tools like TensorBoard. 95 | 96 | Storing partial weights (checkpoints) 97 | ------------------------------------- 98 | 99 | As the training progresses, Luminoth will periodically save a checkpoint with the current 100 | weights of the model. These weights let you resume training from where you left off! 101 | 102 | The files will be output in your ``/`` folder. By default, they will be 103 | saved every 600 seconds of training, but you can configure this with the 104 | ``train.save_checkpoint_secs`` setting in your config file. 105 | 106 | The default is to only store the latest checkpoint (that is, when a checkpoint is 107 | generated, the previous checkpoint gets deleted) in order to conserve storage. You might 108 | find the ``train.checkpoints_max_keep`` option in your train YML configuration useful if 109 | you want to keep more checkpoints around. 110 | 111 | ---- 112 | 113 | Next: :ref:`tutorial/04-visualizing-the-training-process` 114 | -------------------------------------------------------------------------------- /docs/tutorial/04-visualizing-the-training-process.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial/04-visualizing-the-training-process: 2 | 3 | Using TensorBoard to visualize the training process 4 | =================================================== 5 | 6 | Now that the training is running, you should pay special attention to how it is 7 | progressing, to make sure that your model is actually learning something. 8 | 9 | `TensorBoard `_ is a very good 10 | tool for this, allowing you to see plenty of plots with the training related metrics. By 11 | default, Luminoth writes TensorBoard summaries during training, so you can leverage this 12 | tool without any effort! 13 | 14 | To run it, you can use: 15 | 16 | .. code-block:: bash 17 | 18 | tensorboard --logdir / 19 | 20 | If you are running from an external VM, make sure to use ``--host 0.0.0.0`` and ``--port`` 21 | if you need other one than the default 6006. 22 | 23 | What to look for 24 | ---------------- 25 | 26 | First, go to the "Scalars" tab. You are going to see several *tags*, but in this case, 27 | you only should care about some of the metrics behind ``losses``. 28 | 29 | The loss is your objective function, which you want to minimize. In the case of Faster 30 | R-CNN, we have a model with a multi-objective loss, ie. the model is trying to minimize 31 | several things at the same time. This is why you will see several plots here. 32 | 33 | .. image:: images/04-visualizing-the-training-process/losses.png 34 | :alt: Some of the values printed under "losses" 35 | 36 | You should mostly be interested in the one called ``no_reg_loss``. This is the total loss 37 | function, without including the regularization loss (which will always decrease). 38 | Therefore, it will give you a nice summary of how the training is progressing. 39 | 40 | Your job is to make sure this ``no_reg_loss`` value is going decreasing during training. 41 | 42 | As we said before, the loss will jump around a lot, because each point corresponds to a 43 | minibatch, which in this case is a single image. A good prediction in a particular image 44 | will yield a low loss, however, if the model performed poorly another particular image, 45 | the loss will be very high. 46 | 47 | To help you notice the trend, you can set **Smoothing** to a higher value. For example, 48 | setting it so 0.95 the plots now look like this: 49 | 50 | .. image:: images/04-visualizing-the-training-process/losses-smoothing.png 51 | :alt: Same plots but with smoothing set ot 0.95. 52 | 53 | Now it's much more clear: at first, there is a sharp drop in the loss, but then it is not 54 | as noticeable. 55 | 56 | Tips: how to do training and tune hyperparameters 57 | ------------------------------------------------- 58 | 59 | To get the best result for your dataset, you are going to have to run several training 60 | runs until you nail it. Here is what we have done in the past: 61 | 62 | 63 | #. Start with a **fixed learning rate**. The "magical" value of 0.0003 has worked for us 64 | for a variety of problems. 65 | #. Run the training until the loss sort of **stabilizes** for some time (many thousands of 66 | steps). 67 | #. After the loss is roughly stable, **decrease** the learning rate. For example, you 68 | might choose a different value like 0.001. In the case of Faster R-CNN model, we 69 | (approximately) divide the learning rate by 3 (0.0003, 0.0001, 0.00003, ...). 70 | #. You should see the loss leave this plateau and become even smaller. If so, good job! 71 | Notice the approximate step number in which you would consider that the loss 72 | stabilized. 73 | 74 | This information will let you build a training configuration that is good for your 75 | dataset. For example, you can better tune your configuration for learning rate: 76 | 77 | .. code-block:: yaml 78 | 79 | train: 80 | learning_rate: 81 | decay_method: piecewise_constant 82 | boundaries: [steps_1, steps_2, ..., steps_n] 83 | values: [value_0, value_1, value_2, ..., value_n] 84 | 85 | Manually inspecting how model performs with lumi server web 86 | ----------------------------------------------------------- 87 | 88 | You can also use ``lumi server web`` command that we have seen before and try your 89 | partially trained model in a bunch of novel images. 90 | 91 | For this, you can launch it with a config file like: 92 | 93 | .. code-block:: bash 94 | 95 | lumi server web -c config.yml 96 | 97 | Remember that here you can also use ``--host`` and ``--port`` options should you happen to 98 | need those. 99 | 100 | ---- 101 | 102 | Next: :ref:`tutorial/05-evaluating-models` 103 | -------------------------------------------------------------------------------- /docs/tutorial/06-creating-own-checkpoints.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial/06-creating-own-checkpoints: 2 | 3 | Creating and sharing your own checkpoints 4 | ========================================= 5 | 6 | After the model is trained to your satisfaction, it is very useful to actually create a 7 | **checkpoint** that makes it straightforward to use your model. 8 | 9 | Creating a checkpoint 10 | --------------------- 11 | 12 | We can create checkpoints and set some metadata like name, alias, etc. This time, we are 13 | going to create the checkpoint for our traffic model: 14 | 15 | .. code-block:: bash 16 | 17 | lumi checkpoint create \ 18 | config.yml \ 19 | -e name="OpenImages Traffic" \ 20 | -e alias=traffic 21 | 22 | After running this, you should get an output similar to this: 23 | 24 | .. code-block:: text 25 | 26 | Creating checkpoint for given configuration... 27 | Checkpoint cb0e5d92a854 created successfully. 28 | 29 | 30 | You can verify that you do indeed have the checkpoint when running ``lumi checkpoint 31 | list``, which should get you an output similar to this: 32 | 33 | .. code-block:: text 34 | 35 | ================================================================================ 36 | | id | name | alias | source | status | 37 | ================================================================================ 38 | | e1c2565b51e9 | Faster R-CNN w/COCO | accurate | remote | DOWNLOADED | 39 | | aad6912e94d9 | SSD w/Pascal VOC | fast | remote | DOWNLOADED | 40 | | cb0e5d92a854 | OpenImages Traffic | traffic | local | LOCAL | 41 | ================================================================================ 42 | 43 | 44 | Moreover, if you inspect the ``~/.luminoth/checkpoints/`` folder, you will see that now you 45 | have a folder that corresponds to your newly created checkpoint. Inside this folder are 46 | the actual weights of the model, plus some metadata and the configuration file that was 47 | used during training. 48 | 49 | Sharing checkpoints 50 | ------------------- 51 | 52 | Exporting a checkpoint as a single file 53 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 54 | 55 | Simply run ``lumi checkpoint export cb0e5d92a854``. You will get a file named 56 | ``cb0e5d92a854.tar`` in your current directory, which you can easily share to somebody else. 57 | 58 | Importing a checkpoint file 59 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 60 | 61 | By running ``lumi checkpoint import cb0e5d92a854.tar``, the checkpoint will be listed 62 | locally. Note that this will fail if the checkpoint already exists, as expected (you can 63 | use ``lumi checkpoint delete`` if you want to try this anyway). 64 | 65 | You can now use it very easily, for example we can reference our checkpoint using its 66 | alias by running ``lumi server web --checkpoint traffic``. Neat! 67 | 68 | ---- 69 | 70 | Next: :ref:`tutorial/07-using-luminoth-from-python` 71 | -------------------------------------------------------------------------------- /docs/tutorial/07-using-luminoth-from-python.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial/07-using-luminoth-from-python: 2 | 3 | Using Luminoth from Python 4 | ========================== 5 | 6 | Calling Luminoth from your Python app is very straightforward. You can even make use of 7 | helper functions to visualize the bounding boxes. 8 | 9 | .. code-block:: python 10 | 11 | from luminoth import Detector, read_image, vis_objects 12 | 13 | image = read_image('traffic-image.png') 14 | 15 | # If no checkpoint specified, will assume `accurate` by default. In this case, 16 | # we want to use our traffic checkpoint. The Detector can also take a config 17 | # object. 18 | detector = Detector(checkpoint='traffic') 19 | 20 | # Returns a dictionary with the detections. 21 | objects = detector.predict(image) 22 | 23 | print(objects) 24 | 25 | vis_objects(image, objects).save('traffic-out.png') 26 | 27 | ---- 28 | 29 | This was the end of the tutorial! Hope you enjoyed :) 30 | -------------------------------------------------------------------------------- /docs/tutorial/images/01-first-steps/luminoth-predictions.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/docs/tutorial/images/01-first-steps/luminoth-predictions.jpg -------------------------------------------------------------------------------- /docs/tutorial/images/01-first-steps/luminoth-web-server.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/docs/tutorial/images/01-first-steps/luminoth-web-server.jpg -------------------------------------------------------------------------------- /docs/tutorial/images/04-visualizing-the-training-process/losses-smoothing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/docs/tutorial/images/04-visualizing-the-training-process/losses-smoothing.png -------------------------------------------------------------------------------- /docs/tutorial/images/04-visualizing-the-training-process/losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/docs/tutorial/images/04-visualizing-the-training-process/losses.png -------------------------------------------------------------------------------- /docs/tutorial/images/05-evaluating-models/lumi-server-web-result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/docs/tutorial/images/05-evaluating-models/lumi-server-web-result.jpg -------------------------------------------------------------------------------- /docs/tutorial/images/05-evaluating-models/validation-metrics-tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/docs/tutorial/images/05-evaluating-models/validation-metrics-tensorboard.png -------------------------------------------------------------------------------- /docs/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial/index: 2 | 3 | Tutorial: real world object detection with Luminoth 4 | =================================================== 5 | 6 | In this tutorial, we will learn the workings of *Luminoth* by using it in practice to 7 | solve a real world object detection problem. 8 | 9 | As our case study, we will be building a model able to recognize cars, pedestrians, and 10 | other objects which a self-driving car would need to detect in order to properly function. 11 | We will have our model ready for that and see it how to apply it to images and video. We 12 | will not, however, add any tracking capabilities. 13 | 14 | To follow along easier and not invest many hours each time we want to run the training 15 | process, we will build a small toy dataset and show how things go from there, giving tips 16 | on the things you need to look at when training a model with a larger dataset. 17 | 18 | First, check the :ref:`usage/installation` section and make sure you have a working 19 | install. 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | 24 | 01-first-steps 25 | 02-building-custom-traffic-dataset 26 | 03-training-the-model 27 | 04-visualizing-the-training-process 28 | 05-evaluating-models 29 | 06-creating-own-checkpoints 30 | 07-using-luminoth-from-python 31 | -------------------------------------------------------------------------------- /docs/usage/checkpoints.rst: -------------------------------------------------------------------------------- 1 | .. _usage/checkpoints: 2 | 3 | Working with checkpoints 4 | ======================== 5 | 6 | TODO: Explain the rationale behind checkpoints, and expand each section. 7 | 8 | List the checkpoints available on the system:: 9 | 10 | $ lumi checkpoint list 11 | ================================================================================ 12 | | id | name | alias | source | status | 13 | ================================================================================ 14 | | 48ed2350f5b2 | Faster R-CNN w/COCO | accurate | remote | NOT_DOWNLOADED | 15 | | e3256ffb7e29 | SSD w/Pascal VOC | fast | remote | NOT_DOWNLOADED | 16 | ================================================================================ 17 | 18 | Inspect a checkpoint:: 19 | 20 | $ lumi checkpoint info accurate 21 | Faster R-CNN w/COCO (48ed2350f5b2, accurate) 22 | Base Faster R-CNN model trained with the full COCO dataset. 23 | 24 | Model used: fasterrcnn 25 | Dataset information 26 | Name: COCO 27 | Number of classes: 80 28 | 29 | Creation date: 2018-03-21T20:04:59.785711 30 | Luminoth version: v0.1.0 31 | 32 | Source: remote (NOT_DOWNLOADED) 33 | URL: https://github.com/tryolabs/luminoth/releases/download/v0.0.3/48ed2350f5b2.tar 34 | 35 | Refresh the remote checkpoint index:: 36 | 37 | $ lumi checkpoint refresh 38 | Retrieving remote index... done. 39 | 2 new remote checkpoints added. 40 | 41 | Download a remote checkpoint:: 42 | 43 | $ lumi checkpoint download accurate 44 | Downloading checkpoint... [####################################] 100% 45 | Importing checkpoint... done. 46 | Checkpoint imported successfully. 47 | 48 | Create a checkpoint:: 49 | 50 | $ lumi checkpoint create config.yml -e name='Faster R-CNN with cars' -e alias=cars 51 | Creating checkpoint for given configuration... 52 | Checkpoint b5c140450f48 created successfully. 53 | 54 | Edit a checkpoint:: 55 | 56 | $ lumi checkpoint edit b5c140450f48 -e 'description=Model trained with COCO cars.' 57 | 58 | Delete a checkpoint:: 59 | 60 | $ lumi checkpoint delete b5c140450f48 61 | Checkpoint b5c140450f48 deleted successfully. 62 | 63 | Export a checkpoint into a tar file, for easy sharing:: 64 | 65 | $ lumi checkpoint export 48ed2350f5b2 66 | Checkpoint 48ed2350f5b2 exported successfully. 67 | 68 | Import a previously-exported checkpoint:: 69 | 70 | $ lumi checkpoint import 48ed2350f5b2.tar 71 | -------------------------------------------------------------------------------- /docs/usage/evaluation.rst: -------------------------------------------------------------------------------- 1 | .. _usage/evaluation: 2 | 3 | Evaluating a model 4 | ================== 5 | -------------------------------------------------------------------------------- /docs/usage/installation.rst: -------------------------------------------------------------------------------- 1 | .. _usage/installation: 2 | 3 | Installation 4 | ============ 5 | 6 | Before you start 7 | ---------------- 8 | 9 | TensorFlow 10 | ^^^^^^^^^^ 11 | 12 | To use Luminoth, `TensorFlow `_ must be installed beforehand. 13 | 14 | If you want **GPU support**, you should install the GPU version of TensorFlow with 15 | ``pip install tensorflow-gpu``, or else you can use the CPU version using 16 | ``pip install tensorflow``. 17 | 18 | You can see more details of how to install TensorFlow manually `here 19 | `__, including how to use CUDA and cuDNN. 20 | 21 | FFmpeg 22 | ^^^^^^ 23 | 24 | Luminoth leverages `FFmpeg `_ in order to support 25 | running predictions on videos. If you plan to use Luminoth with this end, 26 | FFmpeg should be installed as a system dependency. 27 | 28 | 29 | Installing from PyPI 30 | -------------------- 31 | 32 | Use ``pip`` to install Luminoth, by running the following command:: 33 | 34 | pip install luminoth 35 | 36 | Google Cloud 37 | ^^^^^^^^^^^^ 38 | 39 | If you wish to train using **Google Cloud ML Engine**, the optional dependencies 40 | must be installed:: 41 | 42 | $ pip install luminoth[gcloud] 43 | 44 | 45 | Installing from source 46 | ---------------------- 47 | 48 | Start by cloning the Luminoth repository:: 49 | 50 | git clone https://github.com/tryolabs/luminoth.git 51 | 52 | Then install the library by running:: 53 | 54 | cd luminoth 55 | pip install -e . 56 | -------------------------------------------------------------------------------- /docs/usage/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. _usage/quickstart: 2 | 3 | Getting started 4 | =============== 5 | 6 | After going through the installation process (see :ref:`usage/installation`), 7 | the ``lumi`` CLI tool should be at your disposal. This tool is the main way to 8 | interact with Luminoth, allowing you to train new models, evaluate them, use 9 | them for predictions, manage your checkpoints and more. Running it will provide 10 | additional information:: 11 | 12 | Usage: lumi [OPTIONS] COMMAND [ARGS]... 13 | 14 | Options: 15 | -h, --help Show this message and exit. 16 | 17 | Commands: 18 | checkpoint Groups of commands to manage checkpoints 19 | cloud Groups of commands to train models in the... 20 | dataset Groups of commands to manage datasets 21 | eval Evaluate trained (or training) models 22 | predict Obtain a model's predictions. 23 | server Groups of commands to serve models 24 | train Train models 25 | 26 | We'll start by downloading a checkpoint. Luminoth provides already-trained 27 | models so you can run predictions and get reasonable results in no time (and 28 | eventually be able to use them for fine-tuning). In order to access these 29 | checkpoints, we first need to download the remote index with the available 30 | models. 31 | 32 | Checkpoint management is handled by the ``lumi checkpoint`` subcommand. Run the 33 | following to both retrieve and list the existing checkpoints:: 34 | 35 | $ lumi checkpoint refresh 36 | Retrieving remote index... done. 37 | 2 new remote checkpoints added. 38 | $ lumi checkpoint list 39 | ================================================================================ 40 | | id | name | alias | source | status | 41 | ================================================================================ 42 | | 48ed2350f5b2 | Faster R-CNN w/COCO | accurate | remote | NOT_DOWNLOADED | 43 | | e3256ffb7e29 | SSD w/Pascal VOC | fast | local | NOT_DOWNLOADED | 44 | ================================================================================ 45 | 46 | Two checkpoints are present: 47 | 48 | - **Faster R-CNN w/COCO** (48ed2350f5b2): object detection model trained on the 49 | Faster R-CNN model using the COCO dataset. Aliased as ``accurate``, as it's 50 | the slower but more accurate detection model. 51 | 52 | - **SSD w/Pascal VOC** (e3256ffb7e29): object detection model trained on the 53 | Single Shot Multibox Detector (SSD) model using the Pascal dataset. Aliased 54 | as ``fast``, as it's the faster but less accurate detection model. 55 | 56 | Additional commands are available for managing checkpoints, including inspection 57 | and modification of checkpoints (see :ref:`cli/checkpoint`). For now, we'll 58 | download a checkpoint and use it:: 59 | 60 | $ lumi checkpoint download 48ed2350f5b2 61 | Downloading checkpoint... [####################################] 100% 62 | Importing checkpoint... done. 63 | Checkpoint imported successfully. 64 | 65 | Once the checkpoint is downloaded, it can be used for predictions. There are 66 | currently two ways to do this: 67 | 68 | - Using the CLI tool and passing it either images or videos. This will output a 69 | JSON with the results and optionally draw the bounding boxes of the 70 | detections in the image. 71 | - Using the web app provided for testing purposes. This will start a web server 72 | that, when connected, allows you to upload the image. Also useful to run on 73 | a remote GPU. (Note, however, that using Luminoth through the web interface is 74 | **not** production-ready and will not scale.) 75 | 76 | Let's start with the first, by running it on an image aptly named 77 | ``image.png``:: 78 | 79 | $ lumi predict image.png 80 | Found 1 files to predict. 81 | Neither checkpoint not config specified, assuming `accurate`. 82 | Predicting image.jpg... done. 83 | { 84 | "file": "image.jpg", 85 | "objects": [ 86 | {"bbox": [294, 231, 468, 536], "label": "person", "prob": 0.9997}, 87 | {"bbox": [494, 289, 578, 439], "label": "person", "prob": 0.9971}, 88 | {"bbox": [727, 303, 800, 465], "label": "person", "prob": 0.997}, 89 | {"bbox": [555, 315, 652, 560], "label": "person", "prob": 0.9965}, 90 | {"bbox": [569, 425, 636, 600], "label": "bicycle", "prob": 0.9934}, 91 | {"bbox": [326, 410, 426, 582], "label": "bicycle", "prob": 0.9933}, 92 | {"bbox": [744, 380, 784, 482], "label": "bicycle", "prob": 0.9334}, 93 | {"bbox": [506, 360, 565, 480], "label": "bicycle", "prob": 0.8724}, 94 | {"bbox": [848, 319, 858, 342], "label": "person", "prob": 0.8142}, 95 | {"bbox": [534, 298, 633, 473], "label": "person", "prob": 0.4089} 96 | ] 97 | } 98 | 99 | You can further specify the checkpoint to use (by using the ``--checkpoint`` 100 | option), as well as indicating the minimum score to allow for bounding boxes 101 | (too low will detect noise, too high and won't detect anything), the number of 102 | detections, and so on. 103 | 104 | The second variant is even easier to use, just run the following command and go 105 | to ``_:: 106 | 107 | $ lumi server web 108 | Neither checkpoint not config specified, assuming `accurate`. 109 | * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit) 110 | 111 | In there, you'll be able to upload an image and see the results. 112 | 113 | And that's it for the basics! Next steps would be: 114 | 115 | - Prepare your own dataset to be consumed by Luminoth (see :ref:`usage/dataset`). 116 | - Train a custom model with your own data, either locally or in Google Cloud 117 | (see :ref:`usage/training`). 118 | - Turn your custom model into a checkpoint for easier sharing and usage (see 119 | :ref:`usage/checkpoints`). 120 | - Use the Python API to call Luminoth models within Python. 121 | -------------------------------------------------------------------------------- /docs/usage/training.rst: -------------------------------------------------------------------------------- 1 | .. _usage/training: 2 | 3 | Training your own model 4 | ======================= 5 | 6 | In order to train your own model, two things are required: 7 | 8 | * A dataset ready to be consumed by Luminoth (see :ref:`usage/dataset`). 9 | * A configuration file for the run. 10 | 11 | We'll start by covering the configuration file, then proceed to the training 12 | itself, both locally and in the cloud. 13 | 14 | Configuration 15 | ------------- 16 | 17 | Training orchestration, including the model to be used, the dataset location 18 | and training schedule, is specified in a YAML config file. This file will be 19 | consumed by Luminoth and merged to the default configuration to start the 20 | training session. 21 | 22 | You can see a minimal config file example in `sample_config.yml 23 | `_. 24 | This file illustrates the entries you'll most probably need to modify, which 25 | are: 26 | 27 | * ``train.run_name``: The run name for the training session, used to identify 28 | it. 29 | * ``train.job_dir``: Directory in which both model checkpoints and summaries 30 | (for Tensorboard consumption) will be saved. The actual files will be stored 31 | under ``{job_dir}/{run_name}``, so serving ``{job_dir}`` with Tensorboard will 32 | allow you to see all your runs at once. 33 | * ``dataset.dir``: Directory from which to read the TFrecords files. 34 | * ``model.type``: Model to use for object detection (e.g. ``fasterrcnn``, 35 | ``ssd``). 36 | * ``network.num_classes``: Number of classes to predict. 37 | 38 | There are a great deal of configuration options, mostly related to the model 39 | itself. You can, for instance, see the full range of options for the Faster 40 | R-CNN model, along with a brief description of each, in its `base_config.yml 41 | `_ 42 | file. 43 | 44 | Training 45 | -------- 46 | 47 | The model training itself can either be run locally (on the CPU or GPU 48 | available) or in Google Cloud's Cloud ML Engine. 49 | 50 | Locally 51 | ^^^^^^^ 52 | 53 | Assuming you already have both your dataset and the config file ready, you can 54 | start your training session by running the command as follows:: 55 | 56 | $ lumi train -c my_config.yml 57 | 58 | The ``lumi train`` CLI tool provides the following options related to training. 59 | 60 | * ``--config``/``-c``: Config file to use. If the flag is repeated, all config 61 | files will be merged in left-to-right order so that every file overwrites the 62 | configuration of keys defined previously. 63 | 64 | * ``--override``/``-o``: Override any configuration setting using dot notation 65 | (e.g.: ``-o model.rpn.proposals.nms_threshold=0.8``). 66 | 67 | If you're using a CUDA-based GPU, you can select the GPU to use by setting the 68 | ``CUDA_VISIBLE_DEVICES`` environment variable. (See the `NVIDIA site 69 | `_ 70 | for more information.) 71 | 72 | You can run `Tensorboard 73 | `_ on 74 | the ``job_dir`` to visualize training, including the loss, evaluation metrics, 75 | training speed, and even partial images. 76 | 77 | Google Cloud 78 | ^^^^^^^^^^^^ 79 | Luminoth can easily run in `Google Cloud ML Engine `_ 80 | with a single command. 81 | 82 | For more information, see :ref:`usage/cloud`. 83 | -------------------------------------------------------------------------------- /examples/sample_config.yml: -------------------------------------------------------------------------------- 1 | train: 2 | # Run name for the training session. 3 | run_name: my-run 4 | # Directory in which model checkpoints & summaries (for Tensorboard) will be saved. 5 | job_dir: jobs/ 6 | 7 | dataset: 8 | type: object_detection 9 | # From which directory to read the dataset. 10 | dir: datasets/voc/tf 11 | 12 | model: 13 | type: fasterrcnn 14 | network: 15 | # Total number of classes to predict. 16 | num_classes: 20 17 | -------------------------------------------------------------------------------- /imgs/Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/imgs/Architecture.png -------------------------------------------------------------------------------- /imgs/output1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/imgs/output1.png -------------------------------------------------------------------------------- /imgs/output2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/imgs/output2.png -------------------------------------------------------------------------------- /luminoth/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.2.4dev0' 2 | 3 | __title__ = 'Luminoth' 4 | __description__ = 'Computer vision toolkit based on TensorFlow' 5 | __uri__ = 'https://luminoth.ai' 6 | __doc__ = __description__ + ' <' + __uri__ + '>' 7 | 8 | __author__ = 'Tryolabs' 9 | __email__ = 'luminoth@tryolabs.com' 10 | 11 | __license__ = 'BSD 3-Clause License' 12 | __copyright__ = 'Copyright (c) 2018 Tryolabs S.A.' 13 | 14 | __min_tf_version__ = '1.5' 15 | 16 | 17 | import sys 18 | 19 | # Check for a current TensorFlow installation. 20 | try: 21 | import tensorflow # noqa: F401 22 | except ImportError: 23 | sys.exit("""Luminoth requires a TensorFlow >= {} installation. 24 | 25 | Depending on your use case, you should install either `tensorflow` or 26 | `tensorflow-gpu` packages manually or via PyPI.""".format(__min_tf_version__)) 27 | 28 | 29 | # Import functions that are part of Luminoth's public interface. 30 | from luminoth.cli import cli # noqa 31 | from luminoth.io import read_image # noqa 32 | from luminoth.tasks import Detector # noqa 33 | from luminoth.vis import vis_objects # noqa 34 | -------------------------------------------------------------------------------- /luminoth/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/__pycache__/cli.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/__pycache__/cli.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/__pycache__/eval.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/__pycache__/eval.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/__pycache__/io.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/__pycache__/io.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/__pycache__/predict.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/__pycache__/predict.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/__pycache__/tasks.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/__pycache__/tasks.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/__pycache__/train.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/__pycache__/train.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/__pycache__/vis.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/__pycache__/vis.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/cli.py: -------------------------------------------------------------------------------- 1 | """Simple command line utility called `lumi`. 2 | 3 | The cli is composed of subcommands that are able to handle different tasks 4 | needed for training and using deep learning models. 5 | 6 | It's base subcommands are: 7 | train: For training locally. 8 | cloud: For traning and monitoring in the cloud. 9 | dataset: For modifying and transforming datasets. 10 | """ 11 | 12 | import click 13 | 14 | from luminoth.eval import eval 15 | from luminoth.predict import predict 16 | from luminoth.tools import checkpoint, cloud, dataset, server 17 | from luminoth.train import train 18 | 19 | 20 | CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) 21 | 22 | 23 | @click.group(context_settings=CONTEXT_SETTINGS) 24 | def cli(): 25 | pass 26 | 27 | 28 | cli.add_command(checkpoint) 29 | cli.add_command(cloud) 30 | cli.add_command(dataset) 31 | cli.add_command(eval) 32 | cli.add_command(predict) 33 | cli.add_command(server) 34 | cli.add_command(train) 35 | -------------------------------------------------------------------------------- /luminoth/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import get_dataset # noqa 2 | from .object_detection_dataset import ObjectDetectionDataset # noqa 3 | -------------------------------------------------------------------------------- /luminoth/datasets/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/datasets/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/datasets/__pycache__/base_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/datasets/__pycache__/base_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/datasets/__pycache__/datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/datasets/__pycache__/datasets.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/datasets/__pycache__/exceptions.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/datasets/__pycache__/exceptions.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/datasets/__pycache__/object_detection_dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/datasets/__pycache__/object_detection_dataset.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/datasets/base_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import sonnet as snt 4 | 5 | from luminoth.datasets.exceptions import InvalidDataDirectory 6 | 7 | 8 | class BaseDataset(snt.AbstractModule): 9 | def __init__(self, config, **kwargs): 10 | super(BaseDataset, self).__init__(**kwargs) 11 | self._dataset_dir = config.dataset.dir 12 | self._num_epochs = config.train.num_epochs 13 | self._batch_size = config.train.batch_size 14 | self._split = config.dataset.split 15 | self._random_shuffle = config.train.random_shuffle 16 | self._seed = config.train.seed 17 | 18 | self._fixed_resize = ( 19 | 'fixed_height' in config.dataset.image_preprocessing and 20 | 'fixed_width' in config.dataset.image_preprocessing 21 | ) 22 | if self._fixed_resize: 23 | self._image_fixed_height = ( 24 | config.dataset.image_preprocessing.fixed_height 25 | ) 26 | self._image_fixed_width = ( 27 | config.dataset.image_preprocessing.fixed_width 28 | ) 29 | 30 | self._total_queue_ops = 20 31 | 32 | def _build(self): 33 | # Find split file from which we are going to read. 34 | split_path = os.path.join( 35 | self._dataset_dir, '{}.tfrecords'.format(self._split) 36 | ) 37 | if not tf.gfile.Exists(split_path): 38 | raise InvalidDataDirectory( 39 | '"{}" does not exist.'.format(split_path) 40 | ) 41 | # String input producer allows for a variable number of files to read 42 | # from. We just know we have a single file. 43 | filename_queue = tf.train.string_input_producer( 44 | [split_path], num_epochs=self._num_epochs, seed=self._seed 45 | ) 46 | 47 | # Define reader to parse records. 48 | reader = tf.TFRecordReader() 49 | _, raw_record = reader.read(filename_queue) 50 | 51 | values, dtypes, names = self.read_record(raw_record) 52 | 53 | if self._random_shuffle: 54 | queue = tf.RandomShuffleQueue( 55 | capacity=100, 56 | min_after_dequeue=0, 57 | dtypes=dtypes, 58 | names=names, 59 | name='tfrecord_random_queue', 60 | seed=self._seed 61 | ) 62 | else: 63 | queue = tf.FIFOQueue( 64 | capacity=100, 65 | dtypes=dtypes, 66 | names=names, 67 | name='tfrecord_fifo_queue' 68 | ) 69 | 70 | # Generate queueing ops for QueueRunner. 71 | enqueue_ops = [queue.enqueue(values)] * self._total_queue_ops 72 | self.queue_runner = tf.train.QueueRunner(queue, enqueue_ops) 73 | 74 | tf.train.add_queue_runner(self.queue_runner) 75 | 76 | return queue.dequeue() 77 | -------------------------------------------------------------------------------- /luminoth/datasets/datasets.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from luminoth.datasets.object_detection_dataset import ObjectDetectionDataset 4 | 5 | DATASETS = { 6 | 'tfrecord': ObjectDetectionDataset, 7 | 'object_detection': ObjectDetectionDataset, 8 | } 9 | 10 | 11 | def get_dataset(dataset_type): 12 | dataset_type = dataset_type.lower() 13 | if dataset_type not in DATASETS: 14 | raise ValueError('"{}" is not a valid dataset_type' 15 | .format(dataset_type)) 16 | 17 | if dataset_type == 'tfrecord': 18 | tf.logging.warning( 19 | 'Dataset `tfrecord` is deprecated. Use `object_detection` instead.' 20 | ) 21 | 22 | return DATASETS[dataset_type] 23 | -------------------------------------------------------------------------------- /luminoth/datasets/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | class InvalidDataDirectory(Exception): 3 | """ 4 | Error raised when the chosen intput directory for the dataset is not valid. 5 | """ 6 | -------------------------------------------------------------------------------- /luminoth/datasets/object_detection_dataset_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | from easydict import EasyDict 5 | 6 | from luminoth.datasets.object_detection_dataset import ObjectDetectionDataset 7 | 8 | 9 | class ObjectDetectionDatasetTest(tf.test.TestCase): 10 | def setUp(self): 11 | self.base_config = EasyDict({ 12 | 'dataset': { 13 | 'dir': '', 14 | 'split': 'train', 15 | 'image_preprocessing': { 16 | 'min_size': 600, 17 | 'max_size': 1024, 18 | }, 19 | 'data_augmentation': {}, 20 | }, 21 | 'train': { 22 | 'num_epochs': 1, 23 | 'batch_size': 1, 24 | 'random_shuffle': False, 25 | 'seed': None, 26 | } 27 | }) 28 | tf.reset_default_graph() 29 | 30 | def _run_augment(self, augment_config, image, bboxes): 31 | self.base_config['dataset']['data_augmentation'] = augment_config 32 | 33 | bboxes_tf = tf.placeholder(tf.int32, shape=bboxes.shape) 34 | image_tf = tf.placeholder(tf.int32, shape=image.shape) 35 | 36 | model = ObjectDetectionDataset(self.base_config) 37 | image_aug, bboxes_aug, applied_data_augmentation = model._augment( 38 | image_tf, bboxes_tf) 39 | 40 | with self.test_session() as sess: 41 | image_aug, bboxes_aug, applied_data_augmentation = sess.run( 42 | [image_aug, bboxes_aug, applied_data_augmentation], feed_dict={ 43 | bboxes_tf: bboxes, 44 | image_tf: image, 45 | }) 46 | return image_aug, bboxes_aug, applied_data_augmentation 47 | 48 | def testSortedAugmentation(self): 49 | """ 50 | Tests that the augmentation is applied in order 51 | """ 52 | image = np.random.randint(low=0, high=255, size=(600, 800, 3)) 53 | bboxes = np.array([ 54 | [10, 10, 26, 28, 1], 55 | [10, 10, 20, 22, 1], 56 | [10, 11, 20, 21, 1], 57 | [19, 30, 31, 33, 1], 58 | ]) 59 | config = [{'flip': {'prob': 0}}, {'flip': {'prob': 1}}] 60 | 61 | image_aug, bboxes_aug, aug = self._run_augment(config, image, bboxes) 62 | self.assertEqual(aug[0], {'flip': False}) 63 | self.assertEqual(aug[1], {'flip': True}) 64 | 65 | config = [{'flip': {'prob': 1}}, {'flip': {'prob': 0}}] 66 | 67 | image_aug, bboxes_aug, aug = self._run_augment(config, image, bboxes) 68 | self.assertEqual(aug[0], {'flip': True}) 69 | self.assertEqual(aug[1], {'flip': False}) 70 | 71 | def testIdentityAugmentation(self): 72 | """ 73 | Tests that to apply flip twice to an image and bboxes returns the same 74 | image and bboxes 75 | """ 76 | image = np.random.randint(low=0, high=255, size=(600, 800, 3)) 77 | bboxes = np.array([ 78 | [10, 10, 26, 28, 1], 79 | [19, 30, 31, 33, 1], 80 | ]) 81 | config = [{'flip': {'prob': 1}}, {'flip': {'prob': 1}}] 82 | 83 | image_aug, bboxes_aug, aug = self._run_augment(config, image, bboxes) 84 | self.assertEqual(aug[0], {'flip': True}) 85 | self.assertEqual(aug[1], {'flip': True}) 86 | 87 | self.assertAllEqual(image, image_aug) 88 | self.assertAllEqual(bboxes, bboxes_aug) 89 | 90 | 91 | if __name__ == '__main__': 92 | tf.test.main() 93 | -------------------------------------------------------------------------------- /luminoth/io.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | from PIL import Image 5 | 6 | 7 | def read_image(path): 8 | """Reads an image located at `path` into an array. 9 | 10 | Arguments: 11 | path (str): Path to a valid image file in the filesystem. 12 | 13 | Returns: 14 | `numpy.ndarray` of size `(height, width, channels)`. 15 | """ 16 | full_path = os.path.expanduser(path) 17 | return np.array(Image.open(full_path).convert('RGB')) 18 | -------------------------------------------------------------------------------- /luminoth/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import get_model # noqa 2 | -------------------------------------------------------------------------------- /luminoth/models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/__pycache__/models.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/__pycache__/models.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_network import BaseNetwork # noqa 2 | from .truncated_base_network import TruncatedBaseNetwork # noqa 3 | -------------------------------------------------------------------------------- /luminoth/models/base/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/base/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/base/__pycache__/base_network.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/base/__pycache__/base_network.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/base/__pycache__/truncated_base_network.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/base/__pycache__/truncated_base_network.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/base/__pycache__/truncated_vgg.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/base/__pycache__/truncated_vgg.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/base/base_network_test.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import easydict 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | from luminoth.models.base.base_network import ( 7 | BaseNetwork, _R_MEAN, _G_MEAN, _B_MEAN, VALID_ARCHITECTURES 8 | ) 9 | 10 | 11 | class BaseNetworkTest(tf.test.TestCase): 12 | 13 | def setUp(self): 14 | self.config = easydict.EasyDict({ 15 | 'architecture': 'vgg_16', 16 | }) 17 | tf.reset_default_graph() 18 | 19 | def testDefaultImageSize(self): 20 | m = BaseNetwork(easydict.EasyDict({'architecture': 'vgg_16'})) 21 | self.assertEqual(m.default_image_size, 224) 22 | 23 | m = BaseNetwork(easydict.EasyDict({'architecture': 'resnet_v1_50'})) 24 | self.assertEqual(m.default_image_size, 224) 25 | 26 | def testSubtractChannels(self): 27 | m = BaseNetwork(self.config) 28 | inputs = tf.placeholder(tf.float32, [1, 2, 2, 3]) 29 | subtracted_inputs = m._subtract_channels(inputs) 30 | # White image 31 | r = 255. - _R_MEAN 32 | g = 255. - _G_MEAN 33 | b = 255. - _B_MEAN 34 | with self.test_session() as sess: 35 | res = sess.run(subtracted_inputs, feed_dict={ 36 | inputs: np.ones([1, 2, 2, 3]) * 255 37 | }) 38 | # Assert close and not equals because of floating point 39 | # differences between TF and numpy 40 | self.assertAllClose( 41 | res, 42 | # numpy broadcast multiplication 43 | np.ones([1, 2, 2, 3]) * [r, g, b] 44 | ) 45 | 46 | def testAllArchitectures(self): 47 | for architecture in VALID_ARCHITECTURES: 48 | self.config.architecture = architecture 49 | m = BaseNetwork(self.config) 50 | inputs = tf.placeholder(tf.float32, [1, None, None, 3]) 51 | # Should not fail. 52 | m(inputs) 53 | # Free up memory for Travis 54 | tf.reset_default_graph() 55 | gc.collect(generation=2) 56 | 57 | def testTrainableVariables(self): 58 | inputs = tf.placeholder(tf.float32, [1, 224, 224, 3]) 59 | 60 | model = BaseNetwork(easydict.EasyDict({'architecture': 'vgg_16'})) 61 | model(inputs) 62 | # Variables in VGG16: 63 | # 0 conv1/conv1_1/weights:0 64 | # 1 conv1/conv1_1/biases:0 65 | # (...) 66 | # 30 fc8/weights:0 67 | # 31 fc8/biases:0 68 | 69 | self.assertEqual(len(model.get_trainable_vars()), 32) 70 | 71 | model = BaseNetwork( 72 | easydict.EasyDict( 73 | {'architecture': 'vgg_16', 'fine_tune_from': 'conv5/conv5_3'} 74 | ) 75 | ) 76 | model(inputs) 77 | # Variables from `conv5/conv5_3` to the end: 78 | # conv5/conv5_3/weights:0 79 | # conv5/conv5_3/biases:0 80 | # fc6/weights:0 81 | # fc6/biases:0 82 | # fc7/weights:0 83 | # fc7/biases:0 84 | # fc8/weights:0 85 | # fc8/biases:0 86 | self.assertEqual(len(model.get_trainable_vars()), 8) 87 | 88 | # 89 | # Check invalid fine_tune_from raises proper exception 90 | # 91 | model = BaseNetwork( 92 | easydict.EasyDict( 93 | {'architecture': 'vgg_16', 'fine_tune_from': 'conv5/conv99'} 94 | ) 95 | ) 96 | model(inputs) 97 | with self.assertRaises(ValueError): 98 | model.get_trainable_vars() 99 | 100 | 101 | if __name__ == '__main__': 102 | tf.test.main() 103 | -------------------------------------------------------------------------------- /luminoth/models/base/truncated_base_network_test.py: -------------------------------------------------------------------------------- 1 | import easydict 2 | import tensorflow as tf 3 | import gc 4 | 5 | from luminoth.models.base.truncated_base_network import ( 6 | TruncatedBaseNetwork, DEFAULT_ENDPOINTS 7 | ) 8 | 9 | 10 | class TruncatedBaseNetworkTest(tf.test.TestCase): 11 | 12 | def setUp(self): 13 | self.config = easydict.EasyDict({ 14 | 'architecture': None, 15 | 'endpoint': None, 16 | 'freeze_tail': False, 17 | 'use_tail': True, 18 | 'output_stride': 16, 19 | }) 20 | tf.reset_default_graph() 21 | 22 | def testAllArchitectures(self): 23 | for architecture, endpoint in DEFAULT_ENDPOINTS.items(): 24 | self.config.architecture = architecture 25 | self.config.endpoint = endpoint 26 | model = TruncatedBaseNetwork(self.config) 27 | image = tf.placeholder(tf.float32, [1, 320, 320, 3]) 28 | # This should not fail. 29 | out = model(image) 30 | self.assertEqual(out.get_shape()[:3], (1, 20, 20)) 31 | 32 | # Free up memory for travis 33 | tf.reset_default_graph() 34 | gc.collect(generation=2) 35 | 36 | # TODO: This test fails in Travis because of OOM error. 37 | # def testVGG16Output(self): 38 | # self.config.architecture = 'vgg_16' 39 | # self.config.endpoint = None 40 | # model = TruncatedBaseNetwork(self.config) 41 | 42 | # batch_image_placeholder = tf.placeholder( 43 | # tf.float32, shape=[1, None, None, 3]) 44 | # feature_map_tensor = model(batch_image_placeholder) 45 | 46 | # with self.test_session() as sess: 47 | # # As in the case of a real session we need to initialize 48 | # # variables. 49 | # sess.run(tf.global_variables_initializer()) 50 | # width = 192 51 | # height = 192 52 | # feature_map = sess.run(feature_map_tensor, feed_dict={ 53 | # batch_image_placeholder: np.random.rand(1, width, height, 3) 54 | # }) 55 | # # with width and height between 200 and 200 we should have this 56 | # # output 57 | # self.assertEqual( 58 | # feature_map.shape, (1, width / 16, height / 16, 512) 59 | # ) 60 | 61 | def testTrainableVariables(self): 62 | inputs = tf.placeholder(tf.float32, [1, 224, 224, 3]) 63 | 64 | model = TruncatedBaseNetwork( 65 | easydict.EasyDict({ 66 | 'architecture': 'resnet_v1_50', 67 | 'endpoint': 'block4/unit_3/bottleneck_v1/conv2', 68 | 'freeze_tail': False, 69 | 'use_tail': True, 70 | }) 71 | ) 72 | model(inputs) 73 | # Variables in ResNet-50: 74 | # (the order of beta and gamma depends on the TensorFlow's version) 75 | # 0 conv1/weights:0 76 | # 1 conv1/BatchNorm/(beta|gamma):0 77 | # 2 conv1/BatchNorm/(beta|gamma):0 78 | # 3 block1/unit_1/bottleneck_v1/shortcut/weights:0 79 | # (...) 80 | # 153 block4/unit_3/bottleneck_v1/conv2/weights:0 81 | # 154 block4/unit_3/bottleneck_v1/conv2/BatchNorm/(beta|gamma):0 82 | # 155 block4/unit_3/bottleneck_v1/conv2/BatchNorm/(beta|gamma):0 83 | # --- endpoint --- 84 | # 156 block4/unit_3/bottleneck_v1/conv3/weights:0 85 | # 157 block4/unit_3/bottleneck_v1/conv3/BatchNorm/(beta|gamma):0 86 | # 158 block4/unit_3/bottleneck_v1/conv3/BatchNorm/(beta|gamma):0 87 | # 159 logits/weights:0 88 | # 160 logits/biases:0 89 | trainable_vars = model.get_trainable_vars() 90 | self.assertEqual(len(trainable_vars), 156) 91 | self.assertEqual( 92 | trainable_vars[-3].name, 93 | 'truncated_base_network/resnet_v1_50/' + 94 | 'block4/unit_3/bottleneck_v1/conv2/weights:0' 95 | ) 96 | 97 | model = TruncatedBaseNetwork( 98 | easydict.EasyDict({ 99 | 'architecture': 'resnet_v1_50', 100 | 'endpoint': 'block4/unit_2/bottleneck_v1/conv2', 101 | 'fine_tune_from': 'block4/unit_2/bottleneck_v1/conv1', 102 | 'freeze_tail': False, 103 | 'use_tail': True, 104 | }) 105 | ) 106 | model(inputs) 107 | trainable_vars = model.get_trainable_vars() 108 | # Now there should be only 6 trainable vars: 109 | # 141 block4/unit_2/bottleneck_v1/conv1/weights:0 110 | # 142 block4/unit_2/bottleneck_v1/conv1/BatchNorm/beta:0 111 | # 143 block4/unit_2/bottleneck_v1/conv1/BatchNorm/gamma:0 112 | # 144 block4/unit_2/bottleneck_v1/conv2/weights:0 113 | # 145 block4/unit_2/bottleneck_v1/conv2/BatchNorm/beta:0 114 | # 146 block4/unit_2/bottleneck_v1/conv2/BatchNorm/gamma:0 115 | self.assertEqual(len(trainable_vars), 6) 116 | 117 | # 118 | # Check that we return no vars if fine_tune_from is after the chosen 119 | # endpoint (there is nothing to fine-tune!) and tail is frozen. 120 | # 121 | model = TruncatedBaseNetwork( 122 | easydict.EasyDict( 123 | { 124 | 'architecture': 'resnet_v1_50', 125 | 'endpoint': 'block4/unit_2/bottleneck_v1/conv1', 126 | 'fine_tune_from': 'block4/unit_2/bottleneck_v1/conv2', 127 | 'freeze_tail': True, 128 | 'use_tail': True, 129 | } 130 | ) 131 | ) 132 | model(inputs) 133 | self.assertEqual(len(model.get_trainable_vars()), 0) 134 | 135 | 136 | if __name__ == '__main__': 137 | tf.test.main() 138 | -------------------------------------------------------------------------------- /luminoth/models/base/truncated_vgg.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | # ============================================================================== 17 | # Taken from tensorboard repository: 18 | # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/vgg.py 19 | # Modified to remove the fully connected layers from vgg16. 20 | # ============================================================================== 21 | """Contains model definitions for versions of the Oxford VGG network. 22 | 23 | These model definitions were introduced in the following technical report: 24 | 25 | Very Deep Convolutional Networks For Large-Scale Image Recognition 26 | Karen Simonyan and Andrew Zisserman 27 | arXiv technical report, 2015 28 | PDF: http://arxiv.org/pdf/1409.1556.pdf 29 | ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf 30 | CC-BY-4.0 31 | 32 | More information can be obtained from the VGG website: 33 | www.robots.ox.ac.uk/~vgg/research/very_deep/ 34 | 35 | Usage: 36 | with slim.arg_scope(vgg.vgg_arg_scope()): 37 | outputs, end_points = vgg.vgg_a(inputs) 38 | 39 | with slim.arg_scope(vgg.vgg_arg_scope()): 40 | outputs, end_points = vgg.vgg_16(inputs) 41 | 42 | @@vgg_a 43 | @@vgg_16 44 | """ 45 | 46 | from __future__ import absolute_import 47 | from __future__ import division 48 | from __future__ import print_function 49 | 50 | from tensorflow.contrib import layers 51 | from tensorflow.contrib.framework.python.ops import arg_scope 52 | from tensorflow.contrib.layers.python.layers import layers as layers_lib 53 | from tensorflow.contrib.layers.python.layers import regularizers 54 | from tensorflow.contrib.layers.python.layers import utils 55 | from tensorflow.python.ops import init_ops 56 | from tensorflow.python.ops import nn_ops 57 | from tensorflow.python.ops import variable_scope 58 | 59 | 60 | def vgg_arg_scope(weight_decay=0.0005): 61 | """Defines the VGG arg scope. 62 | 63 | Args: 64 | weight_decay: The l2 regularization coefficient. 65 | 66 | Returns: 67 | An arg_scope. 68 | """ 69 | with arg_scope( 70 | [layers.conv2d, layers_lib.fully_connected], 71 | activation_fn=nn_ops.relu, 72 | weights_regularizer=regularizers.l2_regularizer(weight_decay), 73 | biases_initializer=init_ops.zeros_initializer() 74 | ): 75 | with arg_scope([layers.conv2d], padding='SAME') as arg_sc: 76 | return arg_sc 77 | 78 | 79 | def truncated_vgg_16(inputs, is_training=True, scope='vgg_16'): 80 | """Oxford Net VGG 16-Layers version D Example. 81 | 82 | For use in SSD object detection network, which has this particular 83 | truncated version of VGG16 detailed in its paper. 84 | 85 | Args: 86 | inputs: a tensor of size [batch_size, height, width, channels]. 87 | scope: Optional scope for the variables. 88 | 89 | Returns: 90 | the last op containing the conv5 tensor and end_points dict. 91 | """ 92 | with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: 93 | end_points_collection = sc.original_name_scope + '_end_points' 94 | # Collect outputs for conv2d, fully_connected and max_pool2d. 95 | with arg_scope( 96 | [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], 97 | outputs_collections=end_points_collection 98 | ): 99 | net = layers_lib.repeat( 100 | inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') 101 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') 102 | net = layers_lib.repeat( 103 | net, 2, layers.conv2d, 128, [3, 3], scope='conv2' 104 | ) 105 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') 106 | net = layers_lib.repeat( 107 | net, 3, layers.conv2d, 256, [3, 3], scope='conv3' 108 | ) 109 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') 110 | net = layers_lib.repeat( 111 | net, 3, layers.conv2d, 512, [3, 3], scope='conv4' 112 | ) 113 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') 114 | net = layers_lib.repeat( 115 | net, 3, layers.conv2d, 512, [3, 3], scope='conv5' 116 | ) 117 | # Convert end_points_collection into a end_point dict. 118 | end_points = utils.convert_collection_to_dict( 119 | end_points_collection 120 | ) 121 | return net, end_points 122 | 123 | 124 | truncated_vgg_16.default_image_size = 224 125 | -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | from .fasterrcnn import FasterRCNN # noqa 2 | -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/fasterrcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/fasterrcnn.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/rcnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/rcnn.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/rcnn_proposal.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/rcnn_proposal.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/rcnn_target.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/rcnn_target.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/roi_pool.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/roi_pool.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/rpn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/rpn.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/rpn_proposal.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/rpn_proposal.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/__pycache__/rpn_target.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/fasterrcnn/__pycache__/rpn_target.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/fasterrcnn/roi_pool.py: -------------------------------------------------------------------------------- 1 | import sonnet as snt 2 | import tensorflow as tf 3 | 4 | # Types of RoI "pooling" 5 | CROP = 'crop' 6 | ROI_POOLING = 'roi_pooling' 7 | 8 | 9 | class ROIPoolingLayer(snt.AbstractModule): 10 | """ROIPoolingLayer applies ROI Pooling (or tf.crop_and_resize). 11 | 12 | RoI pooling or RoI extraction is used to extract fixed size features from a 13 | variable sized feature map using variabled sized bounding boxes. Since we 14 | have proposals of different shapes and sizes, we need a way to transform 15 | them into a fixed size Tensor for using FC layers. 16 | 17 | There are two basic ways to do this, the original one in the FasterRCNN's 18 | paper is RoI Pooling, which as the name suggests, it maxpools directly from 19 | the region of interest, or proposal, into a fixed size Tensor. 20 | 21 | The alternative way uses TensorFlow's image utility operation called, 22 | `crop_and_resize` which first crops an Tensor using a normalized proposal, 23 | and then applies extrapolation to resize it to the desired size, 24 | generating a fixed size Tensor. 25 | 26 | Since there isn't a std support implemenation of RoIPooling, we apply the 27 | easier but still proven alternatve way. 28 | """ 29 | def __init__(self, config, debug=False, name='roi_pooling'): 30 | super(ROIPoolingLayer, self).__init__(name=name) 31 | self._pooling_mode = config.pooling_mode.lower() 32 | self._pooled_width = config.pooled_width 33 | self._pooled_height = config.pooled_height 34 | self._pooled_padding = config.padding 35 | self._debug = debug 36 | 37 | def _get_bboxes(self, roi_proposals, im_shape): 38 | """ 39 | Gets normalized coordinates for RoIs (between 0 and 1 for cropping) 40 | in TensorFlow's order (y1, x1, y2, x2). 41 | 42 | Args: 43 | roi_proposals: A Tensor with the bounding boxes of shape 44 | (total_proposals, 5), where the values for each proposal are 45 | (x_min, y_min, x_max, y_max). 46 | im_shape: A Tensor with the shape of the image (height, width). 47 | 48 | Returns: 49 | bboxes: A Tensor with normalized bounding boxes in TensorFlow's 50 | format order. Its should is (total_proposals, 4). 51 | """ 52 | with tf.name_scope('get_bboxes'): 53 | im_shape = tf.cast(im_shape, tf.float32) 54 | 55 | x1, y1, x2, y2 = tf.unstack( 56 | roi_proposals, axis=1 57 | ) 58 | 59 | x1 = x1 / im_shape[1] 60 | y1 = y1 / im_shape[0] 61 | x2 = x2 / im_shape[1] 62 | y2 = y2 / im_shape[0] 63 | 64 | bboxes = tf.stack([y1, x1, y2, x2], axis=1) 65 | 66 | return bboxes 67 | 68 | def _roi_crop(self, roi_proposals, conv_feature_map, im_shape): 69 | # Get normalized bounding boxes. 70 | bboxes = self._get_bboxes(roi_proposals, im_shape) 71 | # Generate fake batch ids 72 | bboxes_shape = tf.shape(bboxes) 73 | batch_ids = tf.zeros((bboxes_shape[0], ), dtype=tf.int32) 74 | # Apply crop and resize with extracting a crop double the desired size. 75 | crops = tf.image.crop_and_resize( 76 | conv_feature_map, bboxes, batch_ids, 77 | [self._pooled_width * 2, self._pooled_height * 2], name="crops" 78 | ) 79 | 80 | # Applies max pool with [2,2] kernel to reduce the crops to half the 81 | # size, and thus having the desired output. 82 | prediction_dict = { 83 | 'roi_pool': tf.nn.max_pool( 84 | crops, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], 85 | padding=self._pooled_padding 86 | ), 87 | } 88 | 89 | if self._debug: 90 | prediction_dict['bboxes'] = bboxes 91 | prediction_dict['crops'] = crops 92 | prediction_dict['batch_ids'] = batch_ids 93 | prediction_dict['conv_feature_map'] = conv_feature_map 94 | 95 | return prediction_dict 96 | 97 | def _roi_pooling(self, roi_proposals, conv_feature_map, im_shape): 98 | raise NotImplementedError() 99 | 100 | def _build(self, roi_proposals, conv_feature_map, im_shape): 101 | if self._pooling_mode == CROP: 102 | return self._roi_crop(roi_proposals, conv_feature_map, im_shape) 103 | elif self._pooling_mode == ROI_POOLING: 104 | return self._roi_pooling(roi_proposals, conv_feature_map, im_shape) 105 | else: 106 | raise NotImplementedError( 107 | 'Pooling mode {} does not exist.'.format(self._pooling_mode)) 108 | -------------------------------------------------------------------------------- /luminoth/models/models.py: -------------------------------------------------------------------------------- 1 | from luminoth.models.fasterrcnn import FasterRCNN 2 | from luminoth.models.ssd import SSD 3 | 4 | 5 | # TODO: More models :) 6 | MODELS = { 7 | 'fasterrcnn': FasterRCNN, 8 | 'ssd': SSD 9 | } 10 | 11 | 12 | def get_model(model_type): 13 | model_type = model_type.lower() 14 | if model_type not in MODELS: 15 | raise ValueError('"{}" is not a valid model_type'.format(model_type)) 16 | 17 | return MODELS[model_type] 18 | -------------------------------------------------------------------------------- /luminoth/models/ssd/__init__.py: -------------------------------------------------------------------------------- 1 | from .ssd import SSD # noqa 2 | -------------------------------------------------------------------------------- /luminoth/models/ssd/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/ssd/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/ssd/__pycache__/feature_extractor.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/ssd/__pycache__/feature_extractor.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/ssd/__pycache__/proposal.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/ssd/__pycache__/proposal.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/ssd/__pycache__/ssd.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/ssd/__pycache__/ssd.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/ssd/__pycache__/target.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/ssd/__pycache__/target.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/models/ssd/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/models/ssd/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .checkpoint import checkpoint # noqa 2 | from .cloud import cloud # noqa 3 | from .dataset import dataset # noqa 4 | from .server import server # noqa 5 | -------------------------------------------------------------------------------- /luminoth/tools/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/checkpoint/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/checkpoint/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/cloud/__init__.py: -------------------------------------------------------------------------------- 1 | from .cli import cloud # noqa 2 | -------------------------------------------------------------------------------- /luminoth/tools/cloud/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/cloud/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/cloud/__pycache__/cli.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/cloud/__pycache__/cli.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/cloud/__pycache__/gcloud.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/cloud/__pycache__/gcloud.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/cloud/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from .gcloud import gc 4 | 5 | 6 | @click.group(help='Groups of commands to train models in the cloud') 7 | def cloud(): 8 | pass 9 | 10 | 11 | cloud.add_command(gc) 12 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .cli import dataset # noqa 2 | from .readers import InvalidDataDirectory # noqa 3 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/__pycache__/cli.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/__pycache__/cli.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/__pycache__/merge.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/__pycache__/merge.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/__pycache__/transform.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/__pycache__/transform.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from .merge import merge 4 | from .transform import transform 5 | 6 | 7 | @click.group(help='Groups of commands to manage datasets') 8 | def dataset(): 9 | pass 10 | 11 | 12 | dataset.add_command(merge) 13 | dataset.add_command(transform) 14 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/merge.py: -------------------------------------------------------------------------------- 1 | import click 2 | import tensorflow as tf 3 | 4 | 5 | @click.command() 6 | @click.argument('src', nargs=-1) 7 | @click.argument('dst', nargs=1) 8 | @click.option('--debug', is_flag=True, help='Set level logging to DEBUG.') 9 | def merge(src, dst, debug): 10 | """ 11 | Merges existing datasets into a single one. 12 | """ 13 | 14 | if debug: 15 | tf.logging.set_verbosity(tf.logging.DEBUG) 16 | else: 17 | tf.logging.set_verbosity(tf.logging.INFO) 18 | 19 | tf.logging.info('Saving records to "{}"'.format(dst)) 20 | writer = tf.python_io.TFRecordWriter(dst) 21 | 22 | total_records = 0 23 | 24 | for src_file in src: 25 | total_src_records = 0 26 | for record in tf.python_io.tf_record_iterator(src_file): 27 | writer.write(record) 28 | total_src_records += 1 29 | total_records += 1 30 | 31 | tf.logging.info('Saved {} records from "{}"'.format( 32 | total_src_records, src_file)) 33 | 34 | tf.logging.info('Saved {} to "{}"'.format(total_records, dst)) 35 | 36 | writer.close() 37 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_reader import BaseReader, InvalidDataDirectory # noqa 2 | from .object_detection import ObjectDetectionReader # noqa 3 | from .object_detection import ( 4 | COCOReader, CSVReader, FlatReader, ImageNetReader, OpenImagesReader, 5 | PascalVOCReader, TaggerineReader 6 | ) 7 | 8 | READERS = { 9 | 'coco': COCOReader, 10 | 'csv': CSVReader, 11 | 'flat': FlatReader, 12 | 'imagenet': ImageNetReader, 13 | 'openimages': OpenImagesReader, 14 | 'pascal': PascalVOCReader, 15 | 'taggerine': TaggerineReader, 16 | } 17 | 18 | 19 | def get_reader(reader): 20 | reader = reader.lower() 21 | if reader not in READERS: 22 | raise ValueError('"{}" is not a valid reader'.format(reader)) 23 | 24 | return READERS[reader] 25 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/__pycache__/base_reader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/__pycache__/base_reader.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/base_reader.py: -------------------------------------------------------------------------------- 1 | import abc 2 | 3 | 4 | class InvalidDataDirectory(Exception): 5 | """ 6 | Error raised when the chosen intput directory for the dataset is not valid. 7 | """ 8 | 9 | 10 | class BaseReader(object): 11 | """Base reader for reading different types of data 12 | """ 13 | def __init__(self, **kwargs): 14 | super(BaseReader, self).__init__() 15 | 16 | @property 17 | @abc.abstractproperty 18 | def total(self): 19 | """Returns the total amount of records in the dataset. 20 | """ 21 | 22 | @abc.abstractmethod 23 | def iterate(self): 24 | """Iterates over the records in the dataset. 25 | """ 26 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .object_detection_reader import ObjectDetectionReader # noqa 2 | 3 | from .coco import COCOReader # noqa 4 | from .csv_reader import CSVReader # noqa 5 | from .flat_reader import FlatReader # noqa 6 | from .imagenet import ImageNetReader # noqa 7 | from .openimages import OpenImagesReader # noqa 8 | from .pascalvoc import PascalVOCReader # noqa 9 | from .taggerine import TaggerineReader # noqa 10 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/coco.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/coco.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/csv_reader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/csv_reader.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/flat_reader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/flat_reader.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/imagenet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/imagenet.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/object_detection_reader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/object_detection_reader.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/openimages.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/openimages.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/pascalvoc.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/pascalvoc.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/__pycache__/taggerine.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/readers/object_detection/__pycache__/taggerine.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/coco.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import tensorflow as tf 4 | 5 | from luminoth.tools.dataset.readers import InvalidDataDirectory 6 | from luminoth.tools.dataset.readers.object_detection import ( 7 | ObjectDetectionReader 8 | ) 9 | from luminoth.utils.dataset import read_image 10 | 11 | 12 | DEFAULT_YEAR = '2017' 13 | 14 | 15 | class COCOReader(ObjectDetectionReader): 16 | def __init__(self, data_dir, split, year=DEFAULT_YEAR, 17 | use_supercategory=False, **kwargs): 18 | super(COCOReader, self).__init__(**kwargs) 19 | self._data_dir = data_dir 20 | self._split = split 21 | self._year = year 22 | 23 | try: 24 | if self._split == 'train': 25 | tf.logging.debug('Loading annotation json (may take a while).') 26 | 27 | annotations_json = json.load( 28 | tf.gfile.Open(self._get_annotations_path()) 29 | ) 30 | except tf.errors.NotFoundError: 31 | raise InvalidDataDirectory( 32 | 'Could not find COCO annotations in path' 33 | ) 34 | 35 | self._total_records = len(annotations_json['images']) 36 | 37 | category_to_name = { 38 | c['id']: (c['supercategory'] if use_supercategory else c['name']) 39 | for c in annotations_json['categories'] 40 | } 41 | 42 | self._total_classes = sorted(set(category_to_name.values())) 43 | 44 | self._image_to_bboxes = {} 45 | for annotation in annotations_json['annotations']: 46 | image_id = annotation['image_id'] 47 | x, y, width, height = annotation['bbox'] 48 | 49 | # If the class is not in `classes`, it was filtered. 50 | try: 51 | annotation_class = self.classes.index( 52 | category_to_name[annotation['category_id']] 53 | ) 54 | except ValueError: 55 | continue 56 | 57 | self._image_to_bboxes.setdefault(image_id, []).append({ 58 | 'xmin': x, 59 | 'ymin': y, 60 | 'xmax': x + width, 61 | 'ymax': y + height, 62 | 'label': annotation_class, 63 | }) 64 | 65 | self._image_to_details = {} 66 | for image in annotations_json['images']: 67 | self._image_to_details[image['id']] = { 68 | 'file_name': image['file_name'], 69 | 'width': image['width'], 70 | 'height': image['height'], 71 | } 72 | 73 | del annotations_json 74 | 75 | self.yielded_records = 0 76 | self.errors = 0 77 | 78 | def get_total(self): 79 | return self._total_records 80 | 81 | def get_classes(self): 82 | return self._total_classes 83 | 84 | def iterate(self): 85 | for image_id, image_details in self._image_to_details.items(): 86 | 87 | if self._stop_iteration(): 88 | return 89 | 90 | filename = image_details['file_name'] 91 | width = image_details['width'] 92 | height = image_details['height'] 93 | 94 | gt_boxes = self._image_to_bboxes.get(image_id, []) 95 | if len(gt_boxes) == 0: 96 | continue 97 | 98 | if self._should_skip(image_id): 99 | continue 100 | 101 | # Read the image *after* checking whether any ground truth box is 102 | # present. 103 | try: 104 | image_path = self._get_image_path(filename) 105 | image = read_image(image_path) 106 | except tf.errors.NotFoundError: 107 | tf.logging.debug( 108 | 'Error reading image or annotation for "{}".'.format( 109 | image_id)) 110 | self.errors += 1 111 | continue 112 | 113 | record = { 114 | 'width': width, 115 | 'height': height, 116 | 'depth': 3, 117 | 'filename': filename, 118 | 'image_raw': image, 119 | 'gt_boxes': gt_boxes, 120 | } 121 | self._will_add_record(record) 122 | self.yielded_records += 1 123 | 124 | yield record 125 | 126 | def _get_annotations_path(self): 127 | filename = 'instances_{}{}.json'.format(self._split, self._year) 128 | base_dir = os.path.join(self._data_dir, filename) 129 | if tf.gfile.Exists(base_dir): 130 | return base_dir 131 | 132 | return os.path.join(self._data_dir, 'annotations', filename) 133 | 134 | def _get_image_path(self, image): 135 | return os.path.join( 136 | self._data_dir, 137 | '{}{}'.format(self._split, self._year), 138 | image 139 | ) 140 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/readers/object_detection/pascalvoc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import tensorflow as tf 4 | 5 | from luminoth.tools.dataset.readers import InvalidDataDirectory 6 | from luminoth.tools.dataset.readers.object_detection import ( 7 | ObjectDetectionReader 8 | ) 9 | from luminoth.utils.dataset import read_xml, read_image 10 | 11 | 12 | class PascalVOCReader(ObjectDetectionReader): 13 | def __init__(self, data_dir, split, **kwargs): 14 | super(PascalVOCReader, self).__init__(**kwargs) 15 | self._data_dir = data_dir 16 | self._split = split 17 | self._labels_path = os.path.join(self._data_dir, 'ImageSets', 'Main') 18 | self._images_path = os.path.join(self._data_dir, 'JPEGImages') 19 | self._annots_path = os.path.join(self._data_dir, 'Annotations') 20 | 21 | self.yielded_records = 0 22 | self.errors = 0 23 | 24 | # Validate PascalVoc structure in `data_dir`. 25 | self._validate_structure() 26 | 27 | def get_total(self): 28 | return sum(1 for _ in self._get_record_names()) 29 | 30 | def get_classes(self): 31 | classes_set = set() 32 | for entry in tf.gfile.ListDirectory(self._labels_path): 33 | if "_" not in entry: 34 | continue 35 | class_name, _ = entry.split('_') 36 | classes_set.add(class_name) 37 | all_classes = list(sorted(classes_set)) 38 | return all_classes 39 | 40 | def _validate_structure(self): 41 | if not tf.gfile.Exists(self._data_dir): 42 | raise InvalidDataDirectory( 43 | '"{}" does not exist.'.format(self._data_dir) 44 | ) 45 | 46 | if not tf.gfile.Exists(self._labels_path): 47 | raise InvalidDataDirectory('Labels path is missing') 48 | 49 | if not tf.gfile.Exists(self._images_path): 50 | raise InvalidDataDirectory('Images path is missing') 51 | 52 | if not tf.gfile.Exists(self._annots_path): 53 | raise InvalidDataDirectory('Annotations path is missing') 54 | 55 | def _get_split_path(self): 56 | return os.path.join(self._labels_path, '{}.txt'.format(self._split)) 57 | 58 | def _get_record_names(self): 59 | split_path = self._get_split_path() 60 | 61 | if not tf.gfile.Exists(split_path): 62 | raise ValueError('"{}" not found.'.format(split_path)) 63 | 64 | with tf.gfile.GFile(split_path) as f: 65 | for line in f: 66 | yield line.strip() 67 | 68 | def _get_image_annotation(self, image_id): 69 | return os.path.join(self._annots_path, '{}.xml'.format(image_id)) 70 | 71 | def _get_image_path(self, image_id): 72 | return os.path.join(self._images_path, '{}.jpg'.format(image_id)) 73 | 74 | def iterate(self): 75 | for image_id in self._get_record_names(): 76 | if self._stop_iteration(): 77 | # Finish iteration. 78 | return 79 | 80 | if self._should_skip(image_id): 81 | continue 82 | 83 | try: 84 | annotation_path = self._get_image_annotation(image_id) 85 | image_path = self._get_image_path(image_id) 86 | 87 | # Read both the image and the annotation into memory. 88 | annotation = read_xml(annotation_path) 89 | image = read_image(image_path) 90 | except tf.errors.NotFoundError: 91 | tf.logging.debug( 92 | 'Error reading image or annotation for "{}".'.format( 93 | image_id)) 94 | self.errors += 1 95 | continue 96 | 97 | gt_boxes = [] 98 | 99 | for b in annotation['object']: 100 | try: 101 | label_id = self.classes.index(b['name']) 102 | except ValueError: 103 | continue 104 | 105 | gt_boxes.append({ 106 | 'label': label_id, 107 | 'xmin': b['bndbox']['xmin'], 108 | 'ymin': b['bndbox']['ymin'], 109 | 'xmax': b['bndbox']['xmax'], 110 | 'ymax': b['bndbox']['ymax'], 111 | }) 112 | 113 | if len(gt_boxes) == 0: 114 | continue 115 | 116 | record = { 117 | 'width': annotation['size']['width'], 118 | 'height': annotation['size']['height'], 119 | 'depth': annotation['size']['depth'], 120 | 'filename': annotation['filename'], 121 | 'image_raw': image, 122 | 'gt_boxes': gt_boxes, 123 | } 124 | self._will_add_record(record) 125 | self.yielded_records += 1 126 | 127 | yield record 128 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/transform.py: -------------------------------------------------------------------------------- 1 | import click 2 | import tensorflow as tf 3 | 4 | from luminoth.datasets.exceptions import InvalidDataDirectory 5 | from luminoth.utils.config import parse_override 6 | from .readers import get_reader, READERS 7 | from .writers import ObjectDetectionWriter 8 | 9 | 10 | @click.command() 11 | @click.option('dataset_reader', '--type', type=click.Choice(READERS.keys()), required=True) # noqa 12 | @click.option('--data-dir', required=True, help='Where to locate the original data.') # noqa 13 | @click.option('--output-dir', required=True, help='Where to save the transformed data.') # noqa 14 | @click.option('splits', '--split', required=True, multiple=True, help='The splits to transform (ie. train, test, val).') # noqa 15 | @click.option('--only-classes', help='Keep only examples of these classes. Comma separated list.') # noqa 16 | @click.option('--only-images', help='Create dataset with specific examples. Useful to test model if your model has the ability to overfit.') # noqa 17 | @click.option('--limit-examples', type=int, help='Limit the dataset to the first `N` examples.') # noqa 18 | @click.option('--class-examples', type=int, help='Finish when every class has at least `N` number of samples. This will be the attempted lower bound; more examples might be added or a class might finish with fewer samples depending on the dataset.') # noqa 19 | @click.option('overrides', '--override', '-o', multiple=True, help='Custom parameters for readers.') # noqa 20 | @click.option('--debug', is_flag=True, help='Set level logging to DEBUG.') 21 | def transform(dataset_reader, data_dir, output_dir, splits, only_classes, 22 | only_images, limit_examples, class_examples, overrides, debug): 23 | """ 24 | Prepares dataset for ingestion. 25 | 26 | Converts the dataset into different (one per split) TFRecords files. 27 | """ 28 | tf.logging.set_verbosity(tf.logging.INFO) 29 | if debug: 30 | tf.logging.set_verbosity(tf.logging.DEBUG) 31 | 32 | try: 33 | reader = get_reader(dataset_reader) 34 | except ValueError as e: 35 | tf.logging.error('Error getting reader: {}'.format(e)) 36 | return 37 | 38 | # All splits must have a consistent set of classes. 39 | classes = None 40 | 41 | reader_kwargs = parse_override(overrides) 42 | 43 | try: 44 | for split in splits: 45 | # Create instance of reader. 46 | split_reader = reader( 47 | data_dir, split, 48 | only_classes=only_classes, only_images=only_images, 49 | limit_examples=limit_examples, class_examples=class_examples, 50 | **reader_kwargs 51 | ) 52 | 53 | if classes is None: 54 | # "Save" classes from the first split reader 55 | classes = split_reader.classes 56 | else: 57 | # Overwrite classes after first split for consistency. 58 | split_reader.classes = classes 59 | 60 | # We assume we are saving object detection objects, but it should 61 | # be easy to modify once we have different types of objects. 62 | writer = ObjectDetectionWriter(split_reader, output_dir, split) 63 | writer.save() 64 | 65 | tf.logging.info('Composition per class ({}):'.format(split)) 66 | for label, count in split_reader._per_class_counter.most_common(): 67 | tf.logging.info( 68 | '\t%s: %d', split_reader.pretty_name(label), count 69 | ) 70 | 71 | except InvalidDataDirectory as e: 72 | tf.logging.error('Error reading dataset: {}'.format(e)) 73 | -------------------------------------------------------------------------------- /luminoth/tools/dataset/writers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_writer import BaseWriter # noqa 2 | from .object_detection_writer import ObjectDetectionWriter # noqa -------------------------------------------------------------------------------- /luminoth/tools/dataset/writers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/writers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/writers/__pycache__/base_writer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/writers/__pycache__/base_writer.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/writers/__pycache__/object_detection_writer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/dataset/writers/__pycache__/object_detection_writer.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/dataset/writers/base_writer.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseWriter(object): 3 | """BaseWriter for saving tfrecords. 4 | """ 5 | def __init__(self): 6 | super(BaseWriter, self).__init__() 7 | -------------------------------------------------------------------------------- /luminoth/tools/server/__init__.py: -------------------------------------------------------------------------------- 1 | from .cli import server # noqa 2 | -------------------------------------------------------------------------------- /luminoth/tools/server/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/server/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/server/__pycache__/cli.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/server/__pycache__/cli.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/server/__pycache__/web.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/server/__pycache__/web.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/tools/server/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from .web import web 4 | 5 | 6 | @click.group(help='Groups of commands to serve models') 7 | def server(): 8 | pass 9 | 10 | 11 | server.add_command(web) 12 | -------------------------------------------------------------------------------- /luminoth/tools/server/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/tools/server/static/favicon.ico -------------------------------------------------------------------------------- /luminoth/tools/server/static/js/vendor/hextorgba.js: -------------------------------------------------------------------------------- 1 | // 2 | // From: https://raw.githubusercontent.com/misund/hex-to-rgba 3 | // 4 | ;(function() { 5 | const removeHash = hex => (hex.charAt(0) === '#' ? hex.slice(1) : hex) 6 | 7 | const parseHex = nakedHex => { 8 | const isShort = 3 === nakedHex.length || 4 === nakedHex.length 9 | 10 | const twoDigitHexR = isShort 11 | ? `${nakedHex.slice(0, 1)}${nakedHex.slice(0, 1)}` 12 | : nakedHex.slice(0, 2) 13 | const twoDigitHexG = isShort 14 | ? `${nakedHex.slice(1, 2)}${nakedHex.slice(1, 2)}` 15 | : nakedHex.slice(2, 4) 16 | const twoDigitHexB = isShort 17 | ? `${nakedHex.slice(2, 3)}${nakedHex.slice(2, 3)}` 18 | : nakedHex.slice(4, 6) 19 | const twoDigitHexA = 20 | (isShort 21 | ? `${nakedHex.slice(3, 4)}${nakedHex.slice(3, 4)}` 22 | : nakedHex.slice(6, 8)) || 'ff' 23 | 24 | // const numericA = +((parseInt(a, 16) / 255).toFixed(2)); 25 | 26 | return { 27 | r: twoDigitHexR, 28 | g: twoDigitHexG, 29 | b: twoDigitHexB, 30 | a: twoDigitHexA 31 | } 32 | } 33 | 34 | const hexToDecimal = hex => parseInt(hex, 16) 35 | 36 | const hexesToDecimals = ({ r, g, b, a }) => ({ 37 | r: hexToDecimal(r), 38 | g: hexToDecimal(g), 39 | b: hexToDecimal(b), 40 | a: +(hexToDecimal(a) / 255).toFixed(2) 41 | }) 42 | 43 | const isNumeric = n => !isNaN(parseFloat(n)) && isFinite(n) 44 | 45 | const formatRgb = (decimalObject, parameterA) => { 46 | const { r, g, b, a: parsedA } = decimalObject 47 | const a = isNumeric(parameterA) ? parameterA : parsedA 48 | 49 | return `rgba(${r}, ${g}, ${b}, ${a})` 50 | } 51 | 52 | /** 53 | * Turns an old-fashioned css hex color value into a rgb color value. 54 | * 55 | * If you specify an alpha value, you'll get a rgba() value instead. 56 | * 57 | * @param The hex value to convert. ('123456'. '#123456', ''123', '#123') 58 | * @param An alpha value to apply. (optional) ('0.5', '0.25') 59 | * @return An rgb or rgba value. ('rgb(11, 22, 33)'. 'rgba(11, 22, 33, 0.5)') 60 | */ 61 | const hexToRgba = (hex, a) => { 62 | const hashlessHex = removeHash(hex) 63 | const hexObject = parseHex(hashlessHex) 64 | const decimalObject = hexesToDecimals(hexObject) 65 | 66 | return formatRgb(decimalObject, a) 67 | } 68 | window.hexToRgba = hexToRgba 69 | })() 70 | -------------------------------------------------------------------------------- /luminoth/tools/server/static/luminoth-logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /luminoth/tools/server/static/tryolabs-logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /luminoth/tools/server/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Luminoth 4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 |
12 | 13 |
Deep Learning Based Table Detection
14 |
15 | 44 |
45 |
46 |
47 | 48 |
49 |
50 |
51 |
Raw API response:
52 |
53 |
54 |
55 |
Loading...
56 |
57 |
58 | 63 |
64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /luminoth/tools/server/web_test.py: -------------------------------------------------------------------------------- 1 | # import numpy as np 2 | import tensorflow as tf 3 | 4 | from easydict import EasyDict 5 | from luminoth.models import get_model 6 | from luminoth.utils.config import get_base_config 7 | 8 | 9 | class WebTest(tf.test.TestCase): 10 | # TODO When the image size has big dimensions like (1024, 1024, 3), 11 | # Travis fails during this test, probably ran out of memory. Using an build 12 | # environment with more memory all works fine. 13 | def setUp(self): 14 | tf.reset_default_graph() 15 | model_class = get_model('fasterrcnn') 16 | base_config = get_base_config(model_class) 17 | image_resize = base_config.dataset.image_preprocessing 18 | self.config = EasyDict({ 19 | 'image_resize_min': image_resize.min_size, 20 | 'image_resize_max': image_resize.max_size 21 | }) 22 | 23 | # # This test fails with Travis' build environment 24 | # def testWithoutResize(self): 25 | # """ 26 | # Tests the FasterRCNN's predict without resize an image 27 | # """ 28 | # # Does a prediction without resizing the image 29 | # image = Image.fromarray( 30 | # np.random.randint( 31 | # low=0, high=255, 32 | # size=(self.config.image_resize_min, 33 | # self.config.image_resize_max, 3) 34 | # ).astype(np.uint8) 35 | # ) 36 | 37 | # results = get_prediction('fasterrcnn', image) 38 | 39 | # # Check that scale_factor and inference_time are corrects values 40 | # self.assertEqual(results['scale_factor'], 1.0) 41 | # self.assertGreaterEqual(results['inference_time'], 0) 42 | 43 | # # Check that objects, labels and probs aren't None 44 | # self.assertIsNotNone(results['objects']) 45 | # self.assertIsNotNone(results['objects_labels']) 46 | # self.assertIsNotNone(results['objects_labels_prob']) 47 | 48 | # This test fails with Travis' build environment 49 | # def testWithResize(self): 50 | # """ 51 | # Tests the FasterRCNN's predict without resize an image 52 | # """ 53 | # # Does a prediction resizing the image 54 | # image = Image.fromarray( 55 | # np.random.randint( 56 | # low=0, high=255, 57 | # size=(self.config.image_resize_min, 58 | # self.config.image_resize_max + 1, 3) 59 | # ).astype(np.uint8) 60 | # ) 61 | # 62 | # results = get_prediction('fasterrcnn', image) 63 | # 64 | # # Check that scale_factor and inference_time are corrects values 65 | # self.assertNotEqual(1.0, results['scale_factor']) 66 | # self.assertGreaterEqual(results['inference_time'], 0) 67 | # 68 | # # Check that objects, labels and probs aren't None 69 | # self.assertIsNotNone(results['objects']) 70 | # self.assertIsNotNone(results['objects_labels']) 71 | # self.assertIsNotNone(results['objects_labels_prob']) 72 | 73 | 74 | if __name__ == '__main__': 75 | tf.test.main() 76 | -------------------------------------------------------------------------------- /luminoth/train_test.py: -------------------------------------------------------------------------------- 1 | import sonnet as snt 2 | import tempfile 3 | import tensorflow as tf 4 | 5 | from easydict import EasyDict 6 | from luminoth.train import run 7 | from luminoth.models import get_model 8 | from luminoth.utils.config import ( 9 | get_model_config, load_config_files, get_base_config 10 | ) 11 | 12 | 13 | class MockFasterRCNN(snt.AbstractModule): 14 | """ 15 | Mocks Faster RCNN Network 16 | """ 17 | def __init__(self, config, name='mockfasterrcnn'): 18 | super(MockFasterRCNN, self).__init__(name=name) 19 | self._config = config 20 | 21 | def _build(self, image, gt_boxes=None, is_training=False): 22 | w = tf.get_variable('w', initializer=[2.5, 3.0], trainable=True) 23 | return {'w': w} 24 | 25 | def loss(self, pred_dict, return_all=False): 26 | return tf.reduce_sum(pred_dict['w'], 0) 27 | 28 | def get_trainable_vars(self): 29 | return snt.get_variables_in_module(self) 30 | 31 | def get_base_network_checkpoint_vars(self): 32 | return None 33 | 34 | def get_checkpoint_file(self): 35 | return None 36 | 37 | @property 38 | def summary(self): 39 | return tf.summary.scalar('dummy', 1, collections=['rcnn']) 40 | 41 | 42 | class TrainTest(tf.test.TestCase): 43 | """ 44 | Basic test to train module 45 | """ 46 | def setUp(self): 47 | self.total_epochs = 2 48 | self.config = EasyDict({ 49 | 'model_type': 'fasterrcnn', 50 | 'dataset_type': '', 51 | 'config_files': (), 52 | 'override_params': [], 53 | 'base_network': { 54 | 'download': False 55 | } 56 | }) 57 | tf.reset_default_graph() 58 | 59 | def get_dataset(self, dataset_type): 60 | """ 61 | Mocks luminoth.datasets.datasets.get_dataset 62 | """ 63 | def dataset_class(arg2): 64 | def build(): 65 | queue_dtypes = [tf.float32, tf.int32, tf.string] 66 | queue_names = ['image', 'bboxes', 'filename'] 67 | 68 | queue = tf.FIFOQueue( 69 | capacity=3, 70 | dtypes=queue_dtypes, 71 | names=queue_names, 72 | name='fifo_queue' 73 | ) 74 | filename = tf.cast('filename_test', tf.string) 75 | filename = tf.train.limit_epochs([filename], num_epochs=2) 76 | 77 | data = { 78 | 'image': tf.random_uniform([600, 800, 3], maxval=255), 79 | 'bboxes': tf.constant([[0, 0, 30, 30, 0]]), 80 | 'filename': filename 81 | } 82 | enqueue_ops = [queue.enqueue(data)] * 2 83 | tf.train.add_queue_runner( 84 | tf.train.QueueRunner(queue, enqueue_ops)) 85 | 86 | return queue.dequeue() 87 | return build 88 | return dataset_class 89 | 90 | def get_model(self, model_type): 91 | """ 92 | Mocks from luminoth.models.get_model 93 | """ 94 | return MockFasterRCNN 95 | 96 | def get_config(self, model_type, override_params=None): 97 | custom_config = load_config_files(self.config.config_files) 98 | model_class = get_model('fasterrcnn') 99 | model_base_config = get_base_config(model_class) 100 | config = get_model_config( 101 | model_base_config, custom_config, override_params 102 | ) 103 | 104 | config.model.type = model_type 105 | 106 | return config 107 | 108 | def testTrain(self): 109 | model_type = 'mockfasterrcnn' 110 | 111 | override_params = [ 112 | 'train.num_epochs={}'.format(self.total_epochs), 113 | 'train.job_dir=', 114 | ] 115 | 116 | config = self.get_config(model_type, override_params=override_params) 117 | 118 | # This should not fail 119 | run( 120 | config, get_dataset_fn=self.get_dataset, 121 | get_model_fn=self.get_model 122 | ) 123 | 124 | def testTrainSave(self): 125 | model_type = 'mockfasterrcnn' 126 | 127 | # Save checkpoints to a temp directory. 128 | tmp_job_dir = tempfile.mkdtemp() 129 | override_params = [ 130 | 'train.num_epochs={}'.format(self.total_epochs), 131 | 'train.job_dir={}'.format(tmp_job_dir), 132 | 'train.run_name=test_runname', 133 | ] 134 | 135 | config = self.get_config(model_type, override_params=override_params) 136 | 137 | run(config, get_dataset_fn=self.get_dataset, 138 | get_model_fn=self.get_model) 139 | 140 | # Create new graph which will load previously saved checkpoint 141 | tf.reset_default_graph() 142 | new_session = tf.Session() 143 | new_saver = tf.train.import_meta_graph( 144 | tmp_job_dir + '/test_runname/model.ckpt-3.meta' 145 | ) 146 | new_saver.restore( 147 | new_session, tmp_job_dir + '/test_runname/model.ckpt-3' 148 | ) 149 | 150 | # Get tensor from graph and run it in session 151 | w_tensor = tf.get_default_graph().get_tensor_by_name( 152 | "mockfasterrcnn/w:0" 153 | ) 154 | w_numpy = new_session.run(w_tensor) 155 | 156 | # Assert we correctly loaded the weight 157 | self.assertArrayNear(w_numpy, [2.5, 3.0], err=0.01) 158 | 159 | 160 | if __name__ == '__main__': 161 | tf.test.main() 162 | -------------------------------------------------------------------------------- /luminoth/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__init__.py -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/anchors.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/anchors.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/bbox_overlap.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/bbox_overlap.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/bbox_transform.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/bbox_transform.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/bbox_transform_tf.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/bbox_transform_tf.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/checkpoint_downloader.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/checkpoint_downloader.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/dataset.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/experiments.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/experiments.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/homedir.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/homedir.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/image.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/image.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/image_vis.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/image_vis.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/losses.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/losses.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/predicting.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/predicting.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/training.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/training.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/__pycache__/vars.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/__pycache__/vars.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def generate_anchors_reference(base_size, aspect_ratios, scales): 5 | """Generate base anchor to be used as reference of generating all anchors. 6 | 7 | Anchors vary only in width and height. Using the base_size and the 8 | different ratios we can calculate the wanted widths and heights. 9 | 10 | Scales apply to area of object. 11 | 12 | Args: 13 | base_size (int): Base size of the base anchor (square). 14 | aspect_ratios: Ratios to use to generate different anchors. The ratio 15 | is the value of height / width. 16 | scales: Scaling ratios applied to area. 17 | 18 | Returns: 19 | anchors: Numpy array with shape (total_aspect_ratios * total_scales, 4) 20 | with the corner points of the reference base anchors using the 21 | convention (x_min, y_min, x_max, y_max). 22 | """ 23 | scales_grid, aspect_ratios_grid = np.meshgrid(scales, aspect_ratios) 24 | base_scales = scales_grid.reshape(-1) 25 | base_aspect_ratios = aspect_ratios_grid.reshape(-1) 26 | 27 | aspect_ratio_sqrts = np.sqrt(base_aspect_ratios) 28 | heights = base_scales * aspect_ratio_sqrts * base_size 29 | widths = base_scales / aspect_ratio_sqrts * base_size 30 | 31 | # Center point has the same X, Y value. 32 | center_xy = 0 33 | 34 | # Create anchor reference. 35 | anchors = np.column_stack([ 36 | center_xy - (widths - 1) / 2, 37 | center_xy - (heights - 1) / 2, 38 | center_xy + (widths - 1) / 2, 39 | center_xy + (heights - 1) / 2, 40 | ]) 41 | 42 | real_heights = (anchors[:, 3] - anchors[:, 1]).astype(np.int) 43 | real_widths = (anchors[:, 2] - anchors[:, 0]).astype(np.int) 44 | 45 | if (real_widths == 0).any() or (real_heights == 0).any(): 46 | raise ValueError( 47 | 'base_size {} is too small for aspect_ratios and scales.'.format( 48 | base_size 49 | ) 50 | ) 51 | 52 | return anchors 53 | -------------------------------------------------------------------------------- /luminoth/utils/anchors_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from luminoth.utils.anchors import generate_anchors_reference 5 | 6 | 7 | class AnchorsTest(tf.test.TestCase): 8 | def tearDown(self): 9 | tf.reset_default_graph() 10 | 11 | def _get_widths_heights(self, anchor_reference): 12 | return np.column_stack(( 13 | (anchor_reference[:, 2] - anchor_reference[:, 0] + 1), 14 | (anchor_reference[:, 3] - anchor_reference[:, 1] + 1) 15 | )) 16 | 17 | def testAnchorReference(self): 18 | # Test simple case with one aspect ratio and one scale. 19 | base_size = 256 20 | aspect_ratios = [1.] 21 | scales = [1.] 22 | anchor_reference = generate_anchors_reference( 23 | base_size=base_size, 24 | aspect_ratios=aspect_ratios, 25 | scales=scales 26 | ) 27 | 28 | # Should return a single anchor. 29 | self.assertEqual(anchor_reference.shape, (1, 4)) 30 | self.assertAllEqual( 31 | anchor_reference[0], 32 | [ 33 | -(base_size - 1) / 2.0, -(base_size - 1) / 2.0, 34 | (base_size - 1) / 2.0, (base_size - 1) / 2.0 35 | ] 36 | ) 37 | 38 | # Test with fixed ratio and different scales. 39 | scales = np.array([0.5, 1., 2., 4.]) 40 | anchor_reference = generate_anchors_reference( 41 | base_size=base_size, 42 | aspect_ratios=aspect_ratios, 43 | scales=scales 44 | ) 45 | 46 | # Check that we have the correct number of anchors. 47 | self.assertEqual(anchor_reference.shape, (4, 4)) 48 | width_heights = self._get_widths_heights(anchor_reference) 49 | # Check that anchors are squares (aspect_ratio = [1.0]). 50 | self.assertTrue((width_heights[:, 0] == width_heights[:, 1]).all()) 51 | # Check that widths are consistent with scales times base_size. 52 | self.assertAllEqual(width_heights[:, 0], base_size * scales) 53 | # Check exact values. 54 | self.assertAllEqual( 55 | anchor_reference, 56 | np.array([ 57 | [-63.5, -63.5, 63.5, 63.5], 58 | [-127.5, -127.5, 127.5, 127.5], 59 | [-255.5, -255.5, 255.5, 255.5], 60 | [-511.5, -511.5, 511.5, 511.5] 61 | ]) 62 | ) 63 | 64 | # Test with different ratios and scales. 65 | scales = np.array([0.5, 1., 2.]) 66 | aspect_ratios = np.array([0.5, 1., 2.]) 67 | anchor_reference = generate_anchors_reference( 68 | base_size=base_size, 69 | aspect_ratios=aspect_ratios, 70 | scales=scales 71 | ) 72 | 73 | # Check we have the correct number of anchors. 74 | self.assertEqual( 75 | anchor_reference.shape, (len(scales) * len(aspect_ratios), 4) 76 | ) 77 | 78 | width_heights = self._get_widths_heights(anchor_reference) 79 | 80 | # Check ratios of height / widths 81 | anchor_ratios = width_heights[:, 1] / width_heights[:, 0] 82 | # Check scales (applied to ) 83 | anchor_scales = np.sqrt( 84 | (width_heights[:, 1] * width_heights[:, 0]) / (base_size ** 2) 85 | ) 86 | 87 | # Test that all ratios are used in the correct order. 88 | self.assertAllClose( 89 | anchor_ratios, [0.5, 0.5, 0.5, 1., 1., 1., 2., 2., 2.] 90 | ) 91 | # Test that all scales are used in the correct order. 92 | self.assertAllClose( 93 | anchor_scales, [0.5, 1., 2., 0.5, 1., 2., 0.5, 1., 2.] 94 | ) 95 | 96 | def testInvalidValues(self): 97 | # Should fail because base_size is too small to for that scale and 98 | # ratio. 99 | base_size = 1 100 | aspect_ratios = [0.5] 101 | scales = [0.5] 102 | try: 103 | generate_anchors_reference( 104 | base_size=base_size, 105 | aspect_ratios=aspect_ratios, 106 | scales=scales 107 | ) 108 | except ValueError: 109 | return 110 | 111 | self.fail('Should have thrown an exception.') 112 | 113 | 114 | if __name__ == '__main__': 115 | tf.test.main() 116 | -------------------------------------------------------------------------------- /luminoth/utils/bbox_overlap.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import tensorflow as tf 5 | 6 | 7 | def bbox_overlap_tf(bboxes1, bboxes2): 8 | """Calculate Intersection over Union (IoU) between two sets of bounding 9 | boxes. 10 | 11 | Args: 12 | bboxes1: shape (total_bboxes1, 4) 13 | with x1, y1, x2, y2 point order. 14 | bboxes2: shape (total_bboxes2, 4) 15 | with x1, y1, x2, y2 point order. 16 | 17 | p1 *----- 18 | | | 19 | |_____* p2 20 | 21 | Returns: 22 | Tensor with shape (total_bboxes1, total_bboxes2) 23 | with the IoU (intersection over union) of bboxes1[i] and bboxes2[j] 24 | in [i, j]. 25 | """ 26 | with tf.name_scope('bbox_overlap'): 27 | x11, y11, x12, y12 = tf.split(bboxes1, 4, axis=1) 28 | x21, y21, x22, y22 = tf.split(bboxes2, 4, axis=1) 29 | 30 | xI1 = tf.maximum(x11, tf.transpose(x21)) 31 | yI1 = tf.maximum(y11, tf.transpose(y21)) 32 | 33 | xI2 = tf.minimum(x12, tf.transpose(x22)) 34 | yI2 = tf.minimum(y12, tf.transpose(y22)) 35 | 36 | intersection = ( 37 | tf.maximum(xI2 - xI1 + 1., 0.) * 38 | tf.maximum(yI2 - yI1 + 1., 0.) 39 | ) 40 | 41 | bboxes1_area = (x12 - x11 + 1) * (y12 - y11 + 1) 42 | bboxes2_area = (x22 - x21 + 1) * (y22 - y21 + 1) 43 | 44 | union = (bboxes1_area + tf.transpose(bboxes2_area)) - intersection 45 | 46 | iou = tf.maximum(intersection / union, 0) 47 | 48 | return iou 49 | 50 | 51 | def bbox_overlap(bboxes1, bboxes2): 52 | """Calculate Intersection of Union between two sets of bounding boxes. 53 | 54 | Intersection over Union (IoU) of two bounding boxes A and B is calculated 55 | doing: (A ∩ B) / (A ∪ B). 56 | 57 | Args: 58 | bboxes1: numpy array of shape (total_bboxes1, 4). 59 | bboxes2: numpy array of shape (total_bboxes2, 4). 60 | 61 | Returns: 62 | iou: numpy array of shape (total_bboxes1, total_bboxes1) a matrix with 63 | the intersection over union of bboxes1[i] and bboxes2[j] in 64 | iou[i][j]. 65 | """ 66 | xI1 = np.maximum(bboxes1[:, [0]], bboxes2[:, [0]].T) 67 | yI1 = np.maximum(bboxes1[:, [1]], bboxes2[:, [1]].T) 68 | 69 | xI2 = np.minimum(bboxes1[:, [2]], bboxes2[:, [2]].T) 70 | yI2 = np.minimum(bboxes1[:, [3]], bboxes2[:, [3]].T) 71 | 72 | intersection = ( 73 | np.maximum(xI2 - xI1 + 1, 0.) * 74 | np.maximum(yI2 - yI1 + 1, 0.) 75 | ) 76 | 77 | bboxes1_area = ( 78 | (bboxes1[:, [2]] - bboxes1[:, [0]] + 1) * 79 | (bboxes1[:, [3]] - bboxes1[:, [1]] + 1) 80 | ) 81 | bboxes2_area = ( 82 | (bboxes2[:, [2]] - bboxes2[:, [0]] + 1) * 83 | (bboxes2[:, [3]] - bboxes2[:, [1]] + 1) 84 | ) 85 | 86 | # Calculate the union as the sum of areas minus intersection 87 | union = (bboxes1_area + bboxes2_area.T) - intersection 88 | 89 | # We start we an empty array of zeros. 90 | iou = np.zeros((bboxes1.shape[0], bboxes2.shape[0])) 91 | 92 | # Only divide where the intersection is > 0 93 | np.divide(intersection, union, out=iou, where=intersection > 0.) 94 | return iou 95 | -------------------------------------------------------------------------------- /luminoth/utils/bbox_overlap_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from luminoth.utils.bbox_overlap import bbox_overlap_tf, bbox_overlap 5 | 6 | 7 | class BBoxOverlapTest(tf.test.TestCase): 8 | """Tests for bbox_overlap 9 | bbox_overlap has a TensorFlow and a Numpy implementation. 10 | 11 | We test both at the same time by getting both values and making sure they 12 | are both equal before doing any assertions. 13 | """ 14 | def tearDown(self): 15 | tf.reset_default_graph() 16 | 17 | def _get_iou(self, bbox1_val, bbox2_val): 18 | """Get IoU for two sets of bounding boxes. 19 | 20 | It also checks that both implementations return the same before 21 | returning. 22 | 23 | Args: 24 | bbox1_val: Array of shape (total_bbox1, 4). 25 | bbox2_val: Array of shape (total_bbox2, 4). 26 | 27 | Returns: 28 | iou: Array of shape (total_bbox1, total_bbox2) 29 | """ 30 | bbox1 = tf.placeholder(tf.float32, (None, 4)) 31 | bbox2 = tf.placeholder(tf.float32, (None, 4)) 32 | iou = bbox_overlap_tf(bbox1, bbox2) 33 | 34 | with self.test_session() as sess: 35 | iou_val_tf = sess.run(iou, feed_dict={ 36 | bbox1: np.array(bbox1_val), 37 | bbox2: np.array(bbox2_val), 38 | }) 39 | 40 | iou_val_np = bbox_overlap(np.array(bbox1_val), np.array(bbox2_val)) 41 | self.assertAllClose(iou_val_np, iou_val_tf) 42 | return iou_val_tf 43 | 44 | def testNoOverlap(self): 45 | # Single box test 46 | iou = self._get_iou([[0, 0, 10, 10]], [[11, 11, 20, 20]]) 47 | self.assertAllEqual(iou, [[0.]]) 48 | 49 | # Multiple boxes. 50 | iou = self._get_iou( 51 | [[0, 0, 10, 10], [5, 5, 10, 10]], 52 | [[11, 11, 20, 20], [15, 15, 20, 20]] 53 | ) 54 | self.assertAllEqual(iou, [[0., 0.], [0., 0.]]) 55 | 56 | def testAllOverlap(self): 57 | # Equal boxes 58 | iou = self._get_iou([[0, 0, 10, 10]], [[0, 0, 10, 10]]) 59 | self.assertAllEqual(iou, [[1.]]) 60 | 61 | # Crossed equal boxes. 62 | iou = self._get_iou( 63 | [[0, 0, 10, 10], [11, 11, 20, 20]], 64 | [[0, 0, 10, 10], [11, 11, 20, 20]] 65 | ) 66 | # We should get an identity matrix. 67 | self.assertAllEqual(iou, [[1., 0.], [0., 1.]]) 68 | 69 | def testInvalidBoxes(self): 70 | # Zero area, bbox1 has x_min == x_max 71 | iou = self._get_iou([[10, 0, 10, 10]], [[0, 0, 10, 10]]) 72 | # self.assertAllEqual(iou, [[0.]]) TODO: Fails 73 | 74 | # Negative area, bbox1 has x_min > x_max (only by one) 75 | iou = self._get_iou([[10, 0, 9, 10]], [[0, 0, 10, 10]]) 76 | self.assertAllEqual(iou, [[0.]]) 77 | 78 | # Negative area, bbox1 has x_min > x_max 79 | iou = self._get_iou([[10, 0, 7, 10]], [[0, 0, 10, 10]]) 80 | self.assertAllEqual(iou, [[0.]]) 81 | 82 | # Negative area in both cases, both boxes equal but negative 83 | iou = self._get_iou([[10, 0, 7, 10]], [[10, 0, 7, 10]]) 84 | self.assertAllEqual(iou, [[0.]]) 85 | 86 | 87 | if __name__ == '__main__': 88 | tf.test.main() 89 | -------------------------------------------------------------------------------- /luminoth/utils/bbox_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_bbox_properties(bboxes): 5 | """Get bounding boxes width, height and center point. 6 | 7 | Args: 8 | bboxes: Numpy array with bounding boxes of shape (total_boxes, 4). 9 | 10 | Returns: 11 | widths: Numpy array with the width of each bbox. 12 | heights: Numpy array with the height of each bbox. 13 | center_x: X-coordinate for center point of each bbox. 14 | center_y: Y-coordinate for center point of each bbox. 15 | """ 16 | bboxes_widths = bboxes[:, 2] - bboxes[:, 0] + 1.0 17 | bboxes_heights = bboxes[:, 3] - bboxes[:, 1] + 1.0 18 | bboxes_center_x = bboxes[:, 0] + 0.5 * bboxes_widths 19 | bboxes_center_y = bboxes[:, 1] + 0.5 * bboxes_heights 20 | return bboxes_widths, bboxes_heights, bboxes_center_x, bboxes_center_y 21 | 22 | 23 | def encode(proposals, gt_boxes): 24 | """Encode the different adjustments needed to transform it to its 25 | corresponding ground truth box. 26 | 27 | Args: 28 | proposals: Numpy array of shape (total_proposals, 4). Having the 29 | bbox encoding in the (x_min, y_min, x_max, y_max) order. 30 | gt_boxes: Numpy array of shape (total_proposals, 4). With the same 31 | bbox encoding. 32 | 33 | Returns: 34 | targets: Numpy array of shape (total_proposals, 4) with the different 35 | deltas needed to transform the proposal to the gt_boxes. These 36 | deltas are with regards to the center, width and height of the 37 | two boxes. 38 | """ 39 | 40 | (proposal_widths, proposal_heights, 41 | proposal_center_x, proposal_center_y) = get_bbox_properties(proposals) 42 | (gt_widths, gt_heights, 43 | gt_center_x, gt_center_y) = get_bbox_properties(gt_boxes) 44 | 45 | # We need to apply targets as specified by the paper parametrization 46 | # Faster RCNN 3.1.2 47 | targets_x = (gt_center_x - proposal_center_x) / proposal_widths 48 | targets_y = (gt_center_y - proposal_center_y) / proposal_heights 49 | targets_w = np.log(gt_widths / proposal_widths) 50 | targets_h = np.log(gt_heights / proposal_heights) 51 | 52 | targets = np.column_stack((targets_x, targets_y, targets_w, targets_h)) 53 | 54 | return targets 55 | 56 | 57 | def decode(bboxes, deltas): 58 | """ 59 | Args: 60 | boxes: numpy array of bounding boxes of shape: (num_boxes, 4) following 61 | the encoding (x_min, y_min, x_max, y_max). 62 | deltas: numpy array of bounding box deltas, one for each bounding box. 63 | Its shape is (num_boxes, 4), where the deltas are encoded as 64 | (dx, dy, dw, dh). 65 | 66 | Returns: 67 | bboxes: bounding boxes transformed to (x1, y1, x2, y2) coordinates. It 68 | has the same shape as bboxes. 69 | """ 70 | widths, heights, ctr_x, ctr_y = get_bbox_properties(bboxes) 71 | 72 | # The dx, dy deltas are relative while the dw, dh deltas are "log relative" 73 | # d[:, x::y] is used for having a `(num_boxes, 1)` shape instead of 74 | # `(num_boxes,)` 75 | 76 | # Split deltas columns into flat array 77 | dx = deltas[:, 0] 78 | dy = deltas[:, 1] 79 | dw = deltas[:, 2] 80 | dh = deltas[:, 3] 81 | 82 | # We get the center of the real box as center anchor + relative width 83 | pred_ctr_x = dx * widths + ctr_x 84 | pred_ctr_y = dy * heights + ctr_y 85 | 86 | # New width and height using exp 87 | pred_w = np.exp(dw) * widths 88 | pred_h = np.exp(dh) * heights 89 | 90 | # Calculate (x_min, y_min, x_max, y_max) and pack them together. 91 | pred_boxes = np.column_stack(( 92 | pred_ctr_x - 0.5 * pred_w, 93 | pred_ctr_y - 0.5 * pred_h, 94 | pred_ctr_x + 0.5 * pred_w - 1.0, 95 | pred_ctr_y + 0.5 * pred_h - 1.0, 96 | )) 97 | 98 | return pred_boxes 99 | 100 | 101 | def clip_points(points, max_val, min_val): 102 | return np.maximum(np.minimum(points, max_val), min_val) 103 | 104 | 105 | def clip_boxes(boxes, image_shape): 106 | """Clip boxes to image boundaries. 107 | 108 | Args: 109 | boxes: A numpy array of bounding boxes. 110 | image_shape: Image shape (height, width). 111 | """ 112 | max_width = image_shape[1] - 1 113 | max_height = image_shape[0] - 1 114 | min_width = 0 115 | min_height = 0 116 | 117 | boxes[:, 0] = clip_points(boxes[:, 0], max_width, min_width) 118 | boxes[:, 1] = clip_points(boxes[:, 1], max_height, min_height) 119 | boxes[:, 2] = clip_points(boxes[:, 2], max_width, min_width) 120 | boxes[:, 3] = clip_points(boxes[:, 3], max_height, min_height) 121 | 122 | return boxes 123 | 124 | 125 | def unmap(data, count, inds, fill=0): 126 | """Unmap a subset of item (data) back to the original set of items (of size 127 | count) 128 | """ 129 | if len(data.shape) == 1: 130 | ret = np.empty((count, ), dtype=np.float32) 131 | ret.fill(fill) 132 | ret[inds] = data 133 | else: 134 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) 135 | ret.fill(fill) 136 | ret[inds, :] = data 137 | return ret 138 | -------------------------------------------------------------------------------- /luminoth/utils/bbox_transform_tf.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def get_width_upright(bboxes): 5 | with tf.name_scope('BoundingBoxTransform/get_width_upright'): 6 | bboxes = tf.cast(bboxes, tf.float32) 7 | x1, y1, x2, y2 = tf.split(bboxes, 4, axis=1) 8 | width = x2 - x1 + 1. 9 | height = y2 - y1 + 1. 10 | 11 | # Calculate up right point of bbox (urx = up right x) 12 | urx = x1 + .5 * width 13 | ury = y1 + .5 * height 14 | 15 | return width, height, urx, ury 16 | 17 | 18 | def encode(bboxes, gt_boxes, variances=None): 19 | with tf.name_scope('BoundingBoxTransform/encode'): 20 | (bboxes_width, bboxes_height, 21 | bboxes_urx, bboxes_ury) = get_width_upright(bboxes) 22 | 23 | (gt_boxes_width, gt_boxes_height, 24 | gt_boxes_urx, gt_boxes_ury) = get_width_upright(gt_boxes) 25 | 26 | if variances is None: 27 | variances = [1., 1.] 28 | 29 | targets_dx = (gt_boxes_urx - bboxes_urx)/(bboxes_width * variances[0]) 30 | targets_dy = (gt_boxes_ury - bboxes_ury)/(bboxes_height * variances[0]) 31 | 32 | targets_dw = tf.log(gt_boxes_width / bboxes_width) / variances[1] 33 | targets_dh = tf.log(gt_boxes_height / bboxes_height) / variances[1] 34 | 35 | targets = tf.concat( 36 | [targets_dx, targets_dy, targets_dw, targets_dh], axis=1) 37 | 38 | return targets 39 | 40 | 41 | def decode(roi, deltas, variances=None): 42 | with tf.name_scope('BoundingBoxTransform/decode'): 43 | (roi_width, roi_height, 44 | roi_urx, roi_ury) = get_width_upright(roi) 45 | 46 | dx, dy, dw, dh = tf.split(deltas, 4, axis=1) 47 | 48 | if variances is None: 49 | variances = [1., 1.] 50 | 51 | pred_ur_x = dx * roi_width * variances[0] + roi_urx 52 | pred_ur_y = dy * roi_height * variances[0] + roi_ury 53 | pred_w = tf.exp(dw * variances[1]) * roi_width 54 | pred_h = tf.exp(dh * variances[1]) * roi_height 55 | 56 | bbox_x1 = pred_ur_x - 0.5 * pred_w 57 | bbox_y1 = pred_ur_y - 0.5 * pred_h 58 | 59 | # This -1. extra is different from reference implementation. 60 | bbox_x2 = pred_ur_x + 0.5 * pred_w - 1. 61 | bbox_y2 = pred_ur_y + 0.5 * pred_h - 1. 62 | 63 | bboxes = tf.concat( 64 | [bbox_x1, bbox_y1, bbox_x2, bbox_y2], axis=1) 65 | 66 | return bboxes 67 | 68 | 69 | def clip_boxes(bboxes, imshape): 70 | """ 71 | Clips bounding boxes to image boundaries based on image shape. 72 | 73 | Args: 74 | bboxes: Tensor with shape (num_bboxes, 4) 75 | where point order is x1, y1, x2, y2. 76 | 77 | imshape: Tensor with shape (2, ) 78 | where the first value is height and the next is width. 79 | 80 | Returns 81 | Tensor with same shape as bboxes but making sure that none 82 | of the bboxes are outside the image. 83 | """ 84 | with tf.name_scope('BoundingBoxTransform/clip_bboxes'): 85 | bboxes = tf.cast(bboxes, dtype=tf.float32) 86 | imshape = tf.cast(imshape, dtype=tf.float32) 87 | 88 | x1, y1, x2, y2 = tf.split(bboxes, 4, axis=1) 89 | width = imshape[1] 90 | height = imshape[0] 91 | x1 = tf.maximum(tf.minimum(x1, width - 1.0), 0.0) 92 | x2 = tf.maximum(tf.minimum(x2, width - 1.0), 0.0) 93 | 94 | y1 = tf.maximum(tf.minimum(y1, height - 1.0), 0.0) 95 | y2 = tf.maximum(tf.minimum(y2, height - 1.0), 0.0) 96 | 97 | bboxes = tf.concat([x1, y1, x2, y2], axis=1) 98 | 99 | return bboxes 100 | 101 | 102 | def change_order(bboxes): 103 | """Change bounding box encoding order. 104 | 105 | TensorFlow works with the (y_min, x_min, y_max, x_max) order while we work 106 | with the (x_min, y_min, x_max, y_min). 107 | 108 | While both encoding options have its advantages and disadvantages we 109 | decided to use the (x_min, y_min, x_max, y_min), forcing use to switch to 110 | TensorFlow's every time we want to use a std function that handles bounding 111 | boxes. 112 | 113 | Args: 114 | bboxes: A Tensor of shape (total_bboxes, 4) 115 | 116 | Returns: 117 | bboxes: A Tensor of shape (total_bboxes, 4) with the order swaped. 118 | """ 119 | with tf.name_scope('BoundingBoxTransform/change_order'): 120 | first_min, second_min, first_max, second_max = tf.unstack( 121 | bboxes, axis=1 122 | ) 123 | bboxes = tf.stack( 124 | [second_min, first_min, second_max, first_max], axis=1 125 | ) 126 | return bboxes 127 | 128 | 129 | if __name__ == '__main__': 130 | import numpy as np 131 | 132 | bboxes = tf.placeholder(tf.float32) 133 | bboxes_val = [[10, 10, 20, 22]] 134 | 135 | gt_boxes = tf.placeholder(tf.float32) 136 | gt_boxes_val = [[11, 13, 34, 31]] 137 | 138 | imshape = tf.placeholder(tf.int32) 139 | imshape_val = (100, 100) 140 | 141 | deltas = encode(bboxes, gt_boxes) 142 | decoded_bboxes = decode(bboxes, deltas) 143 | final_decoded_bboxes = clip_boxes(decoded_bboxes, imshape) 144 | 145 | with tf.Session() as sess: 146 | final_decoded_bboxes = sess.run(final_decoded_bboxes, feed_dict={ 147 | bboxes: bboxes_val, 148 | gt_boxes: gt_boxes_val, 149 | imshape: imshape_val, 150 | }) 151 | 152 | assert np.all(gt_boxes_val == final_decoded_bboxes) 153 | -------------------------------------------------------------------------------- /luminoth/utils/checkpoint_downloader.py: -------------------------------------------------------------------------------- 1 | import click 2 | import json 3 | import os 4 | import requests 5 | import tarfile 6 | import tensorflow as tf 7 | 8 | from luminoth.utils.homedir import get_luminoth_home 9 | 10 | 11 | TENSORFLOW_OFFICIAL_ENDPOINT = 'http://download.tensorflow.org/models/' 12 | 13 | BASE_NETWORK_FILENAMES = { 14 | 'inception_v3': 'inception_v3_2016_08_28.tar.gz', 15 | 'resnet_v1_50': 'resnet_v1_50_2016_08_28.tar.gz', 16 | 'resnet_v1_101': 'resnet_v1_101_2016_08_28.tar.gz', 17 | 'resnet_v1_152': 'resnet_v1_152_2016_08_28.tar.gz', 18 | 'resnet_v2_50': 'resnet_v2_50_2017_04_14.tar.gz', 19 | 'resnet_v2_101': 'resnet_v2_101_2017_04_14.tar.gz', 20 | 'resnet_v2_152': 'resnet_v2_152_2017_04_14.tar.gz', 21 | 'vgg_16': 'vgg_16_2016_08_28.tar.gz', 22 | 'truncated_vgg_16': 'vgg_16_2016_08_28.tar.gz', 23 | } 24 | 25 | 26 | def get_default_path(): 27 | if 'TF_CONFIG' in os.environ: 28 | tf_config = json.loads(os.environ['TF_CONFIG']) 29 | job_dir = tf_config.get('job', {}).get('job_dir') 30 | if job_dir: 31 | # Instead of using the job_dir we create a folder inside. 32 | job_dir = os.path.join(job_dir, 'pretrained_checkpoints/') 33 | return job_dir 34 | 35 | return get_luminoth_home() 36 | 37 | 38 | DEFAULT_PATH = get_default_path() 39 | 40 | 41 | def get_checkpoint_path(path=DEFAULT_PATH): 42 | # Expand user if path is relative to user home. 43 | path = os.path.expanduser(path) 44 | 45 | if not path.startswith('gs://'): 46 | # We don't need to create Google cloud storage "folders" 47 | path = os.path.abspath(path) 48 | 49 | if not tf.gfile.Exists(path): 50 | tf.logging.info( 51 | 'Creating folder "{}" to save checkpoints.'.format(path)) 52 | tf.gfile.MakeDirs(path) 53 | 54 | return path 55 | 56 | 57 | def download_checkpoint(network, network_filename, checkpoint_path, 58 | checkpoint_filename): 59 | tarball_filename = BASE_NETWORK_FILENAMES[network] 60 | url = TENSORFLOW_OFFICIAL_ENDPOINT + tarball_filename 61 | response = requests.get(url, stream=True) 62 | total_size = int(response.headers.get('Content-Length')) 63 | tarball_path = os.path.join(checkpoint_path, tarball_filename) 64 | tmp_tarball = tf.gfile.Open(tarball_path, 'wb') 65 | tf.logging.info('Downloading {} checkpoint.'.format(network_filename)) 66 | with click.progressbar(length=total_size) as bar: 67 | for data in response.iter_content(chunk_size=4096): 68 | tmp_tarball.write(data) 69 | bar.update(len(data)) 70 | tmp_tarball.flush() 71 | 72 | tf.logging.info('Saving checkpoint to {}'.format(checkpoint_path)) 73 | # Open saved tarball as readable binary 74 | tmp_tarball = tf.gfile.Open(tarball_path, 'rb') 75 | # Open tarfile object 76 | tar_obj = tarfile.open(fileobj=tmp_tarball) 77 | # Get checkpoint file name 78 | checkpoint_file_name = tar_obj.getnames()[0] 79 | # Create buffer with extracted network checkpoint 80 | checkpoint_fp = tar_obj.extractfile(checkpoint_file_name) 81 | # Define where to save. 82 | checkpoint_file = tf.gfile.Open(checkpoint_filename, 'wb') 83 | # Write extracted checkpoint to file 84 | checkpoint_file.write(checkpoint_fp.read()) 85 | checkpoint_file.flush() 86 | checkpoint_file.close() 87 | tmp_tarball.close() 88 | # Remove temp tarball 89 | tf.gfile.Remove(tarball_path) 90 | 91 | 92 | def get_checkpoint_file(network, checkpoint_path=DEFAULT_PATH): 93 | if checkpoint_path is None: 94 | checkpoint_path = DEFAULT_PATH 95 | checkpoint_path = get_checkpoint_path(path=checkpoint_path) 96 | files = tf.gfile.ListDirectory(checkpoint_path) 97 | network_filename = '{}.ckpt'.format(network) 98 | checkpoint_file = os.path.join(checkpoint_path, network_filename) 99 | if network_filename not in files: 100 | download_checkpoint( 101 | network, network_filename, checkpoint_path, checkpoint_file 102 | ) 103 | 104 | return checkpoint_file 105 | -------------------------------------------------------------------------------- /luminoth/utils/dataset.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from lxml import etree 4 | 5 | 6 | def node2dict(root): 7 | if root.getchildren(): 8 | val = {} 9 | for node in root.getchildren(): 10 | chkey, chval = node2dict(node) 11 | val[chkey] = chval 12 | else: 13 | val = root.text 14 | 15 | return root.tag, val 16 | 17 | 18 | def read_xml(path): 19 | with tf.gfile.GFile(path) as f: 20 | root = etree.fromstring(f.read()) 21 | 22 | annotations = {} 23 | for node in root.getchildren(): 24 | key, val = node2dict(node) 25 | # If `key` is object, it's actually a list. 26 | if key == 'object': 27 | annotations.setdefault(key, []).append(val) 28 | else: 29 | annotations[key] = val 30 | 31 | return annotations 32 | 33 | 34 | def read_image(path): 35 | with tf.gfile.GFile(path, 'rb') as f: 36 | image = f.read() 37 | return image 38 | 39 | 40 | def to_int64(value): 41 | value = [int(value)] if not isinstance(value, list) else value 42 | return tf.train.Feature( 43 | int64_list=tf.train.Int64List(value=value) 44 | ) 45 | 46 | 47 | def to_bytes(value): 48 | value = [value] if not isinstance(value, list) else value 49 | return tf.train.Feature( 50 | bytes_list=tf.train.BytesList(value=value) 51 | ) 52 | 53 | 54 | def to_string(value): 55 | value = [value] if not isinstance(value, list) else value 56 | value = [v.encode('utf-8') for v in value] 57 | return tf.train.Feature( 58 | bytes_list=tf.train.BytesList(value=value) 59 | ) 60 | -------------------------------------------------------------------------------- /luminoth/utils/debug.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | # flake8: noqa 4 | 5 | 6 | def debug(*args, **kwargs): 7 | def call_ipdb(*args, **kwargs): 8 | print(args) 9 | print(kwargs) 10 | import ipdb; ipdb.set_trace() 11 | return 0 12 | 13 | return tf.py_func(call_ipdb, 14 | [list(args) + list(kwargs.values())], 15 | tf.int32 16 | ) -------------------------------------------------------------------------------- /luminoth/utils/experiments.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os.path 4 | import subprocess 5 | import tensorflow as tf 6 | 7 | from luminoth.utils.homedir import get_luminoth_home 8 | 9 | 10 | DEFAULT_FILENAME = 'runs.json' 11 | CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) 12 | 13 | 14 | def get_diff(): 15 | try: 16 | return subprocess.check_output( 17 | ['git', 'diff'], cwd=CURRENT_DIR 18 | ).strip().decode('utf-8') 19 | except: # noqa 20 | # Never fail, we don't care about the error. 21 | return None 22 | 23 | 24 | def get_luminoth_version(): 25 | try: 26 | return subprocess.check_output( 27 | ['git', 'rev-parse', 'HEAD'], cwd=CURRENT_DIR 28 | ).strip().decode('utf-8') 29 | except: # noqa 30 | # Never fail, we don't care about the error. 31 | pass 32 | 33 | try: 34 | from luminoth import __version__ as lumi_version 35 | return lumi_version 36 | except ImportError: 37 | pass 38 | 39 | 40 | def get_tensorflow_version(): 41 | try: 42 | from tensorflow import __version__ as tf_version 43 | return tf_version 44 | except ImportError: 45 | pass 46 | 47 | 48 | def save_run(config, environment=None, comment=None, extra_config=None, 49 | filename=DEFAULT_FILENAME): 50 | if environment == 'cloud': 51 | # We don't write runs inside Google Cloud, we run it before. 52 | return 53 | 54 | diff = get_diff() 55 | lumi_version = get_luminoth_version() 56 | tf_version = get_tensorflow_version() 57 | 58 | experiment = { 59 | 'environment': environment, 60 | 'datetime': str(datetime.datetime.utcnow()) + 'Z', 61 | 'diff': diff, 62 | 'luminoth_version': lumi_version, 63 | 'tensorflow_version': tf_version, 64 | 'config': config, 65 | 'extra_config': extra_config, 66 | } 67 | 68 | path = get_luminoth_home() 69 | file_path = os.path.join(path, filename) 70 | tf.gfile.MakeDirs(path) 71 | 72 | with tf.gfile.Open(file_path, 'a') as log: 73 | log.write(json.dumps(experiment) + '\n') 74 | -------------------------------------------------------------------------------- /luminoth/utils/homedir.py: -------------------------------------------------------------------------------- 1 | """Luminoth home (~/.luminoth) management utilities.""" 2 | import os 3 | import tensorflow as tf 4 | 5 | 6 | DEFAULT_LUMINOTH_HOME = os.path.expanduser('~/.luminoth') 7 | 8 | 9 | def get_luminoth_home(create_if_missing=True): 10 | """Returns Luminoth's homedir.""" 11 | # Get Luminoth's home directory (the default one or the overridden). 12 | path = os.path.abspath( 13 | os.environ.get('LUMI_HOME', DEFAULT_LUMINOTH_HOME) 14 | ) 15 | 16 | # Create the directory if it doesn't exist. 17 | if create_if_missing and not os.path.exists(path): 18 | tf.gfile.MakeDirs(path) 19 | 20 | return path 21 | -------------------------------------------------------------------------------- /luminoth/utils/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .image_vis_hook import ImageVisHook # noqa 2 | from .var_vis_hook import VarVisHook # noqa 3 | -------------------------------------------------------------------------------- /luminoth/utils/hooks/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/hooks/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/hooks/__pycache__/image_vis_hook.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/hooks/__pycache__/image_vis_hook.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/hooks/__pycache__/var_vis_hook.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Sargunan/Table-Detection-using-Deep-learning/6471be4b0ad9abb3c2d439f6e33a27ccf6db8408/luminoth/utils/hooks/__pycache__/var_vis_hook.cpython-36.pyc -------------------------------------------------------------------------------- /luminoth/utils/hooks/image_vis_hook.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from tensorflow.python.training.summary_io import SummaryWriterCache 4 | from luminoth.utils.image_vis import image_vis_summaries 5 | 6 | 7 | class ImageVisHook(tf.train.SessionRunHook): 8 | def __init__(self, prediction_dict, image, config=None, gt_bboxes=None, 9 | every_n_steps=None, every_n_secs=None, output_dir=None, 10 | summary_writer=None, image_visualization_mode=None): 11 | super(ImageVisHook, self).__init__() 12 | if (every_n_secs is None) == (every_n_steps is None): 13 | raise ValueError( 14 | 'Only one of "every_n_secs" and "every_n_steps" must be ' 15 | 'provided.') 16 | if output_dir is None and summary_writer is None: 17 | tf.logging.warning( 18 | 'ImageVisHook is not saving summaries. One of "output_dir" ' 19 | 'and "summary_writer" must be provided') 20 | self._timer = tf.train.SecondOrStepTimer( 21 | every_steps=every_n_steps, every_secs=every_n_secs) 22 | 23 | self._prediction_dict = prediction_dict 24 | self._config = config 25 | self._output_dir = output_dir 26 | self._summary_writer = summary_writer 27 | self._image_visualization_mode = image_visualization_mode 28 | self._image = image 29 | self._gt_bboxes = gt_bboxes 30 | 31 | tf.logging.info('ImageVisHook was created with mode = "{}"'.format( 32 | image_visualization_mode 33 | )) 34 | 35 | def begin(self): 36 | if self._summary_writer is None and self._output_dir: 37 | self._summary_writer = SummaryWriterCache.get(self._output_dir) 38 | self._next_step = None 39 | self._global_step = tf.train.get_global_step() 40 | if self._global_step is None: 41 | raise RuntimeError('Global step must be created for ImageVisHook.') 42 | 43 | def before_run(self, run_context): 44 | 45 | fetches = {'global_step': self._global_step} 46 | self._draw_images = ( 47 | self._next_step is None or 48 | self._timer.should_trigger_for_step(self._next_step) 49 | ) 50 | 51 | if self._draw_images: 52 | fetches['prediction_dict'] = self._prediction_dict 53 | fetches['image'] = self._image 54 | if self._gt_bboxes is not None: 55 | fetches['gt_bboxes'] = self._gt_bboxes 56 | 57 | return tf.train.SessionRunArgs(fetches) 58 | 59 | def after_run(self, run_context, run_values): 60 | results = run_values.results 61 | global_step = results.get('global_step') 62 | 63 | if self._draw_images: 64 | self._timer.update_last_triggered_step(global_step) 65 | prediction_dict = results.get('prediction_dict') 66 | if prediction_dict is not None: 67 | summaries = image_vis_summaries( 68 | prediction_dict, config=self._config, 69 | image_visualization_mode=self._image_visualization_mode, 70 | image=results.get('image'), 71 | gt_bboxes=results.get('gt_bboxes') 72 | ) 73 | if self._summary_writer is not None: 74 | for summary in summaries: 75 | self._summary_writer.add_summary(summary, global_step) 76 | 77 | self._next_step = global_step + 1 78 | 79 | def end(self, session=None): 80 | if self._summary_writer: 81 | self._summary_writer.flush() 82 | -------------------------------------------------------------------------------- /luminoth/utils/hooks/var_vis_hook.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from tensorflow.python.training.summary_io import SummaryWriterCache 4 | 5 | 6 | class VarVisHook(tf.train.SessionRunHook): 7 | 8 | def __init__(self, every_n_steps=None, every_n_secs=None, mode=None, 9 | output_dir=None, vars_summary=None): 10 | super(VarVisHook, self).__init__() 11 | 12 | if (every_n_secs is None) == (every_n_steps is None): 13 | raise ValueError( 14 | 'Only one of "every_n_secs" and "every_n_steps" must be ' 15 | 'provided.' 16 | ) 17 | 18 | if output_dir is None: 19 | tf.logging.warning( 20 | '`output_dir` not provided, VarVisHook is not saving ' 21 | 'summaries.' 22 | ) 23 | 24 | self._timer = tf.train.SecondOrStepTimer( 25 | every_steps=every_n_steps, 26 | every_secs=every_n_secs 27 | ) 28 | 29 | self._mode = mode 30 | self._output_dir = output_dir 31 | self._summary_writer = None 32 | self._vars_summary = vars_summary 33 | 34 | tf.logging.info('VarVisHook was created with mode = "{}"'.format(mode)) 35 | 36 | def begin(self): 37 | if self._output_dir: 38 | self._summary_writer = SummaryWriterCache.get(self._output_dir) 39 | 40 | self._next_step = None 41 | self._global_step = tf.train.get_global_step() 42 | if self._global_step is None: 43 | raise RuntimeError('Global step must be created for VarVisHook.') 44 | 45 | def before_run(self, run_context): 46 | fetches = { 47 | 'global_step': self._global_step, 48 | } 49 | 50 | self._write_summaries = ( 51 | self._next_step is None or 52 | self._timer.should_trigger_for_step(self._next_step) 53 | ) 54 | 55 | if self._write_summaries: 56 | fetches['summary'] = self._vars_summary[self._mode] 57 | 58 | return tf.train.SessionRunArgs(fetches) 59 | 60 | def after_run(self, run_context, run_values): 61 | results = run_values.results 62 | global_step = results.get('global_step') 63 | 64 | if self._write_summaries: 65 | self._timer.update_last_triggered_step(global_step) 66 | summary = results.get('summary') 67 | if summary is not None: 68 | if self._summary_writer is not None: 69 | self._summary_writer.add_summary(summary, global_step) 70 | 71 | self._next_step = global_step + 1 72 | 73 | def end(self, session=None): 74 | if self._summary_writer: 75 | self._summary_writer.flush() 76 | -------------------------------------------------------------------------------- /luminoth/utils/losses.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def smooth_l1_loss(bbox_prediction, bbox_target, sigma=3.0): 5 | """ 6 | Return Smooth L1 Loss for bounding box prediction. 7 | 8 | Args: 9 | bbox_prediction: shape (1, H, W, num_anchors * 4) 10 | bbox_target: shape (1, H, W, num_anchors * 4) 11 | 12 | 13 | Smooth L1 loss is defined as: 14 | 15 | 0.5 * x^2 if |x| < d 16 | abs(x) - 0.5 if |x| >= d 17 | 18 | Where d = 1 and x = prediction - target 19 | 20 | """ 21 | sigma2 = sigma ** 2 22 | diff = bbox_prediction - bbox_target 23 | abs_diff = tf.abs(diff) 24 | abs_diff_lt_sigma2 = tf.less(abs_diff, 1.0 / sigma2) 25 | bbox_loss = tf.reduce_sum( 26 | tf.where( 27 | abs_diff_lt_sigma2, 28 | 0.5 * sigma2 * tf.square(abs_diff), 29 | abs_diff - 0.5 / sigma2 30 | ), [1] 31 | ) 32 | return bbox_loss 33 | 34 | 35 | if __name__ == '__main__': 36 | bbox_prediction_tf = tf.placeholder(tf.float32) 37 | bbox_target_tf = tf.placeholder(tf.float32) 38 | loss_tf = smooth_l1_loss(bbox_prediction_tf, bbox_target_tf) 39 | with tf.Session() as sess: 40 | loss = sess.run( 41 | loss_tf, 42 | feed_dict={ 43 | bbox_prediction_tf: [ 44 | [0.47450006, -0.80413032, -0.26595005, 0.17124325] 45 | ], 46 | bbox_target_tf: [ 47 | [0.10058594, 0.07910156, 0.10555581, -0.1224325] 48 | ], 49 | }) 50 | -------------------------------------------------------------------------------- /luminoth/utils/test/__init__.py: -------------------------------------------------------------------------------- 1 | from .gt_boxes import generate_gt_boxes # noqa 2 | from .anchors import generate_anchors # noqa 3 | -------------------------------------------------------------------------------- /luminoth/utils/test/anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def generate_anchors(anchors_reference, anchor_stride, feature_map_size): 5 | """ 6 | Generate anchors from an anchor_reference using the anchor_stride for an 7 | image with a feature map size of `feature_map_size`. 8 | 9 | This code is based on the TensorFlow code for generating the same thing 10 | on the computation graph. 11 | 12 | Args: 13 | anchors_reference (np.array): with shape (total_anchors, 4), the 14 | relative distance between the center and the top left X,Y and 15 | bottom right X, Y of the anchor. 16 | anchor_stride (int): stride for generation of anchors. 17 | feature_map_size (np.array): with shape (2,) 18 | 19 | Returns: 20 | anchors (np.array): array with anchors. 21 | with shape (height_feature * width_feature * total_anchors, 4) 22 | 23 | TODO: We should create a test for comparing this function vs the one 24 | actually used in the computation graph. 25 | """ 26 | 27 | grid_width = feature_map_size[1] 28 | grid_height = feature_map_size[0] 29 | 30 | shift_x = np.arange(grid_width) * anchor_stride 31 | shift_y = np.arange(grid_height) * anchor_stride 32 | 33 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 34 | 35 | shift_x = shift_x.reshape([-1]) 36 | shift_y = shift_y.reshape([-1]) 37 | 38 | shifts = np.stack( 39 | [shift_x, shift_y, shift_x, shift_y], 40 | axis=0 41 | ) 42 | 43 | shifts = shifts.T 44 | 45 | num_anchors = anchors_reference.shape[0] 46 | num_anchor_points = shifts.shape[0] 47 | 48 | all_anchors = ( 49 | anchors_reference.reshape((1, num_anchors, 4)) + 50 | np.transpose( 51 | shifts.reshape((1, num_anchor_points, 4)), 52 | axes=(1, 0, 2) 53 | ) 54 | ) 55 | 56 | all_anchors = np.reshape( 57 | all_anchors, (num_anchors * num_anchor_points, 4) 58 | ) 59 | 60 | return all_anchors 61 | 62 | 63 | if __name__ == '__main__': 64 | from luminoth.utils.anchors import generate_anchors_reference 65 | 66 | ref = generate_anchors_reference( 67 | base_size=16, ratios=[0.5, 1, 2], scales=2**np.arange(3, 6) 68 | ) 69 | -------------------------------------------------------------------------------- /luminoth/utils/test/gt_boxes.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def generate_gt_boxes(total_boxes, image_size, min_size=10, 5 | total_classes=None): 6 | """ 7 | Generate `total_boxes` fake (but consistent) ground-truth boxes for an 8 | image of size `image_size` (height, width). 9 | 10 | Args: 11 | total_boxes (int): The total number of boxes. 12 | image_size (tuple): Size of the fake image. 13 | 14 | Returns: 15 | gt_boxes (np.array): With shape [total_boxes, 4]. 16 | """ 17 | 18 | image_size = np.array(image_size) 19 | 20 | assert (image_size > min_size).all(), \ 21 | 'Can\'t generate gt_boxes that small for that image size' 22 | 23 | # Generate random sizes for each boxes. 24 | max_size = np.min(image_size) - min_size 25 | random_sizes = np.random.randint( 26 | low=min_size, high=max_size, 27 | size=(total_boxes, 2) 28 | ) 29 | 30 | # Generate random starting points for boundind boxes (left top point) 31 | random_leftop = np.random.randint( 32 | low=0, high=max_size, size=(total_boxes, 2) 33 | ) 34 | 35 | rightbottom = np.minimum( 36 | random_sizes + random_leftop, 37 | np.array(image_size) - 1 38 | ) 39 | 40 | gt_boxes = np.column_stack((random_leftop, rightbottom)) 41 | 42 | # TODO: Remove asserts after writing tests for this function. 43 | assert (gt_boxes[:, 0] < gt_boxes[:, 2]).all(), \ 44 | 'Gt boxes without consistent Xs' 45 | assert (gt_boxes[:, 1] < gt_boxes[:, 3]).all(), \ 46 | 'Gt boxes without consistent Ys' 47 | 48 | if total_classes: 49 | random_classes = np.random.randint( 50 | low=0, high=total_classes - 1, size=(total_boxes, 1)) 51 | gt_boxes = np.column_stack((gt_boxes, random_classes)) 52 | 53 | assert (gt_boxes[:, 1] < total_classes).all(), \ 54 | 'Gt boxes without consistent classes' 55 | 56 | return gt_boxes 57 | -------------------------------------------------------------------------------- /luminoth/utils/training.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from luminoth.utils.vars import variable_summaries 4 | 5 | 6 | OPTIMIZERS = { 7 | 'adam': tf.train.AdamOptimizer, 8 | 'momentum': tf.train.MomentumOptimizer, 9 | 'gradient_descent': tf.train.GradientDescentOptimizer, 10 | 'rmsprop': tf.train.RMSPropOptimizer, 11 | } 12 | 13 | LEARNING_RATE_DECAY_METHODS = { 14 | 'polynomial_decay': tf.train.polynomial_decay, 15 | 'piecewise_constant': tf.train.piecewise_constant, 16 | 'exponential_decay': tf.train.exponential_decay, 17 | } 18 | 19 | 20 | def get_learning_rate(train_config, global_step=None): 21 | """ 22 | Get learning rate from train config. 23 | 24 | TODO: Better config usage. 25 | 26 | Returns: 27 | learning_rate: TensorFlow variable. 28 | 29 | Raises: 30 | ValueError: When the method used is not available. 31 | """ 32 | lr_config = train_config.learning_rate.copy() 33 | decay_method = lr_config.pop('decay_method', None) 34 | 35 | if not decay_method or decay_method == 'none': 36 | return lr_config.get('value') or lr_config.get('learning_rate') 37 | 38 | if decay_method not in LEARNING_RATE_DECAY_METHODS: 39 | raise ValueError('Invalid learning_rate method "{}"'.format( 40 | decay_method 41 | )) 42 | 43 | if decay_method == 'piecewise_constant': 44 | lr_config['x'] = global_step 45 | else: 46 | lr_config['global_step'] = global_step 47 | 48 | # boundaries, when used, must be the same type as global_step (int64). 49 | if 'boundaries' in lr_config: 50 | lr_config['boundaries'] = [ 51 | tf.cast(b, tf.int64) for b in lr_config['boundaries'] 52 | ] 53 | 54 | decay_function = LEARNING_RATE_DECAY_METHODS[decay_method] 55 | learning_rate = decay_function( 56 | **lr_config 57 | ) 58 | 59 | tf.summary.scalar('losses/learning_rate', learning_rate) 60 | 61 | return learning_rate 62 | 63 | 64 | def get_optimizer(train_config, global_step=None): 65 | """ 66 | Get optimizer from train config. 67 | 68 | Raises: 69 | ValueError: When the optimizer type or learning_rate method are not 70 | valid. 71 | """ 72 | learning_rate = get_learning_rate(train_config, global_step) 73 | optimizer_config = train_config.optimizer.copy() 74 | optimizer_type = optimizer_config.pop('type') 75 | if optimizer_type not in OPTIMIZERS: 76 | raise ValueError( 77 | 'Invalid optimizer type "{}"'.format(optimizer_type) 78 | ) 79 | 80 | optimizer_cls = OPTIMIZERS[optimizer_type] 81 | return optimizer_cls(learning_rate, **optimizer_config) 82 | 83 | 84 | def clip_gradients_by_norm(grads_and_vars, add_to_summary=False): 85 | if add_to_summary: 86 | for grad, var in grads_and_vars: 87 | if grad is not None: 88 | variable_summaries( 89 | grad, 'grad/{}'.format(var.name[:-2]), 'full' 90 | ) 91 | variable_summaries( 92 | tf.abs(grad), 'grad/abs/{}'.format(var.name[:-2]), 'full' 93 | ) 94 | 95 | # Clip by norm. Grad can be null when not training some modules. 96 | with tf.name_scope('clip_gradients_by_norm'): 97 | grads_and_vars = [ 98 | ( 99 | tf.check_numerics( 100 | tf.clip_by_norm(gv[0], 10.), 101 | 'Invalid gradient' 102 | ), gv[1] 103 | ) 104 | if gv[0] is not None else gv 105 | for gv in grads_and_vars 106 | ] 107 | 108 | if add_to_summary: 109 | for grad, var in grads_and_vars: 110 | if grad is not None: 111 | variable_summaries( 112 | grad, 'clipped_grad/{}'.format(var.name[:-2]), 'full' 113 | ) 114 | variable_summaries( 115 | tf.abs(grad), 116 | 'clipped_grad/{}'.format(var.name[:-2]), 117 | 'full' 118 | ) 119 | 120 | return grads_and_vars 121 | -------------------------------------------------------------------------------- /luminoth/utils/vars.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | VALID_INITIALIZERS = { 5 | 'truncated_normal_initializer': tf.truncated_normal_initializer, 6 | 'variance_scaling_initializer': ( 7 | tf.contrib.layers.variance_scaling_initializer 8 | ), 9 | 'random_normal_initializer': tf.random_normal_initializer, 10 | 'xavier_initializer': tf.contrib.layers.xavier_initializer, 11 | } 12 | 13 | 14 | VAR_LOG_LEVELS = { 15 | 'full': ['variable_summaries_full'], 16 | 'reduced': ['variable_summaries_reduced', 'variable_summaries_full'], 17 | } 18 | 19 | 20 | def variable_summaries(var, name, collection_key): 21 | """Attach a lot of summaries to a Tensor (for TensorBoard visualization). 22 | 23 | Args: 24 | - var: Tensor for variable from which we want to log. 25 | - name: Variable name. 26 | - collection_key: Collection to save the summary to, can be any key of 27 | `VAR_LOG_LEVELS`. 28 | """ 29 | if collection_key not in VAR_LOG_LEVELS.keys(): 30 | raise ValueError('"{}" not in `VAR_LOG_LEVELS`'.format(collection_key)) 31 | collections = VAR_LOG_LEVELS[collection_key] 32 | 33 | with tf.name_scope(name): 34 | mean = tf.reduce_mean(var) 35 | tf.summary.scalar('mean', mean, collections) 36 | num_params = tf.reduce_prod(tf.shape(var)) 37 | tf.summary.scalar('num_params', num_params, collections) 38 | with tf.name_scope('stddev'): 39 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) 40 | tf.summary.scalar('stddev', stddev, collections) 41 | tf.summary.scalar('max', tf.reduce_max(var), collections) 42 | tf.summary.scalar('min', tf.reduce_min(var), collections) 43 | tf.summary.histogram('histogram', var, collections) 44 | tf.summary.scalar('sparsity', tf.nn.zero_fraction(var), collections) 45 | 46 | 47 | def layer_summaries(layer, collection_key): 48 | layer_name = layer.module_name 49 | if hasattr(layer, '_w'): 50 | variable_summaries(layer._w, '{}/W'.format(layer_name), collection_key) 51 | 52 | if hasattr(layer, '_b'): 53 | variable_summaries(layer._b, '{}/b'.format(layer_name), collection_key) 54 | 55 | 56 | def get_initializer(initializer_config, seed=None): 57 | """Get variable initializer. 58 | 59 | Args: 60 | - initializer_config: Configuration for initializer. 61 | 62 | Returns: 63 | initializer: Instantiated variable initializer. 64 | """ 65 | 66 | if 'type' not in initializer_config: 67 | raise ValueError('Initializer missing type.') 68 | 69 | if initializer_config.type not in VALID_INITIALIZERS: 70 | raise ValueError('Initializer "{}" is not valid.'.format( 71 | initializer_config.type)) 72 | 73 | config = initializer_config.copy() 74 | initializer = VALID_INITIALIZERS[config.pop('type')] 75 | config['seed'] = seed 76 | 77 | return initializer(**config) 78 | 79 | 80 | def get_activation_function(activation_function): 81 | if not activation_function: 82 | return lambda a: a 83 | 84 | try: 85 | return getattr(tf.nn, activation_function) 86 | except AttributeError: 87 | raise ValueError( 88 | 'Invalid activation function "{}"'.format(activation_function)) 89 | -------------------------------------------------------------------------------- /pdfcreation.py: -------------------------------------------------------------------------------- 1 | from fpdf import FPDF 2 | from PIL import Image 3 | import glob 4 | import os 5 | ''' 6 | image_directory = '/Users/myuser/pics/' 7 | extensions = ('*.jpg','*.png','*.gif') 8 | pdf = FPDF() 9 | imagelist=['c:\\temp\\0.7483.png'] 10 | 11 | for imageFile in imagelist: 12 | cover = Image.open(imageFile) 13 | width, height = cover.size 14 | pdf.add_page() 15 | # 1 px = 0.264583 mm (FPDF default is mm) 16 | pdf.image(imageFile, 0, 0, float(width * 0.264583), float(height * 0.264583)) 17 | pdf.output('c:\\temp\\' + "file.pdf", "F") 18 | 19 | ''' 20 | from tabula import read_pdf 21 | 22 | df = read_pdf("D:\\Sargunan\\Table\\001.pdf", area = (165.0,72.0,635.794,524.944), ) 23 | #df = read_pdf("c:\\temp\\data.pdf") 24 | print (df) -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | from webapp import app 4 | #from cocoapp.cocomodel import * # clunky for now - needs to be this path for unpickling model 5 | 6 | if __name__ == "__main__": 7 | app.run(host="0.0.0.0", debug=app.config['DEBUG'], port=app.config['PORT'], use_reloader=False) 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [check-manifest] 5 | ignore = 6 | .travis.yml 7 | .editorconfig 8 | 9 | [metadata] 10 | license_file = LICENSE 11 | 12 | [doc8] 13 | max-line-length=99 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | import re 4 | import sys 5 | 6 | from setuptools import find_packages, setup 7 | 8 | 9 | # ------------------------------------------------------------- 10 | 11 | NAME = 'luminoth' 12 | PACKAGES = find_packages() 13 | META_PATH = os.path.join('luminoth', '__init__.py') 14 | KEYWORDS = [ 15 | 'tensorflow', 'computer vision', 'object detection', 'toolkit', 'deep learning', 16 | 'faster rcnn' 17 | ] 18 | CLASSIFIERS = [ 19 | 'Development Status :: 2 - Pre-Alpha', 20 | 'Intended Audience :: Developers', 21 | 'Intended Audience :: Education', 22 | 'Intended Audience :: Science/Research', 23 | 'Natural Language :: English', 24 | 'License :: OSI Approved :: BSD License', 25 | 'Topic :: Scientific/Engineering :: Mathematics', 26 | 'Topic :: Software Development :: Libraries :: Python Modules', 27 | 'Topic :: Software Development :: Libraries', 28 | 'Programming Language :: Python', 29 | 'Programming Language :: Python :: 2', 30 | 'Programming Language :: Python :: 2.7', 31 | 'Programming Language :: Python :: 3', 32 | 'Programming Language :: Python :: 3.4', 33 | 'Programming Language :: Python :: 3.5', 34 | 'Programming Language :: Python :: 3.6', 35 | ] 36 | 37 | INSTALL_REQUIRES = [ 38 | 'Pillow', 39 | 'lxml', 40 | 'numpy', 41 | 'requests', 42 | 'scikit-video', 43 | 'Flask>=0.12', 44 | 'PyYAML>=3.12,<4', 45 | 'click>=6.7,<7', 46 | # Sonnet 1.25+ requires tensorflow_probability which we do not need here. 47 | 'dm-sonnet>=1.12,<=1.23', 48 | # Can remove easydict <=1.8 pin after 49 | # https://github.com/makinacorpus/easydict/pull/14 is merged. 50 | 'easydict>=1.7,<=1.8', 51 | 'six>=1.11', 52 | ] 53 | TEST_REQUIRES = [] 54 | 55 | # ------------------------------------------------------------- 56 | 57 | HERE = os.path.abspath(os.path.dirname(__file__)) 58 | 59 | 60 | def read(*parts): 61 | """ 62 | Build an absolute path from *parts* and return the contents of the 63 | resulting file. Assume UTF-8 encoding. 64 | """ 65 | with codecs.open(os.path.join(HERE, *parts), 'rb', 'utf-8') as f: 66 | return f.read() 67 | 68 | 69 | META_FILE = read(META_PATH) 70 | 71 | 72 | def find_meta(meta): 73 | """ 74 | Extract __*meta*__ from META_FILE. 75 | """ 76 | meta_match = re.search( 77 | r"^__{meta}__ = ['\"]([^'\"]*)['\"]".format(meta=meta), 78 | META_FILE, re.M 79 | ) 80 | if meta_match: 81 | return meta_match.group(1) 82 | raise RuntimeError('Unable to find __{meta}__ string.'.format(meta=meta)) 83 | 84 | 85 | MIN_TF_VERSION = find_meta('min_tf_version') 86 | 87 | 88 | setup( 89 | name=NAME, 90 | version=find_meta('version'), 91 | description=find_meta('description'), 92 | long_description=read('README.md'), 93 | license=find_meta('license'), 94 | author=find_meta('author'), 95 | author_email=find_meta('email'), 96 | maintainer=find_meta('author'), 97 | maintainer_email=find_meta('email'), 98 | url=find_meta('uri'), 99 | packages=PACKAGES, 100 | classifiers=CLASSIFIERS, 101 | include_package_data=True, 102 | setup_requires=[ 103 | ], 104 | install_requires=INSTALL_REQUIRES, 105 | test_require=TEST_REQUIRES, 106 | extras_require={ 107 | 'tf': ['tensorflow>={}'.format(MIN_TF_VERSION)], 108 | 'tf-gpu': ['tensorflow-gpu>='.format(MIN_TF_VERSION)], 109 | 'gcloud': [ 110 | 'google-api-python-client>=1.6.2,<2', 111 | 'google-cloud-storage>=1.2.0', 112 | 'oauth2client>=4.1.2', 113 | # See https://github.com/tryolabs/luminoth/issues/147 114 | 'pyasn1>=0.4.2', 115 | ] 116 | }, 117 | entry_points=""" 118 | [console_scripts] 119 | lumi=luminoth:cli 120 | """, 121 | python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*', 122 | ) 123 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{27,34,35,36} 3 | skipsdist = true 4 | 5 | [testenv] 6 | passenv = TOXENV CI TRAVIS TRAVIS_* 7 | deps= 8 | flake8 9 | codecov 10 | check-manifest 11 | tensorflow 12 | commands = 13 | check-manifest 14 | flake8 luminoth 15 | pip install -e . 16 | coverage run -m unittest discover -s luminoth -p "*_test.py" 17 | codecov -e TOXENV 18 | -------------------------------------------------------------------------------- /untitled5.py: -------------------------------------------------------------------------------- 1 | """ 2 | @file morph_lines_detection.py 3 | @brief Use morphology transformations for extracting horizontal and vertical lines sample code 4 | """ 5 | import numpy as np 6 | import sys 7 | import cv2 as cv 8 | def show_wait_destroy(winname, img): 9 | cv.imshow(winname, img) 10 | cv.moveWindow(winname, 500, 0) 11 | cv.waitKey(0) 12 | cv.destroyWindow(winname) 13 | def main(argv): 14 | # [load_image] 15 | # Check number of arguments 16 | 17 | argv = "c:\\temp\\0.7483.png" 18 | # Load the image 19 | src = cv.imread(argv, cv.IMREAD_COLOR) 20 | # Check if image is loaded fine 21 | if src is None: 22 | print ('Error opening image: ' + argv) 23 | return -1 24 | # Show source image 25 | cv.imshow("src", src) 26 | # [load_image] 27 | # [gray] 28 | # Transform source image to gray if it is not already 29 | if len(src.shape) != 2: 30 | gray = cv.cvtColor(src, cv.COLOR_BGR2GRAY) 31 | else: 32 | gray = src 33 | # Show gray image 34 | show_wait_destroy("gray", gray) 35 | # [gray] 36 | # [bin] 37 | # Apply adaptiveThreshold at the bitwise_not of gray, notice the ~ symbol 38 | gray = cv.bitwise_not(gray) 39 | bw = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_MEAN_C, \ 40 | cv.THRESH_BINARY, 15, -2) 41 | # Show binary image 42 | show_wait_destroy("binary", bw) 43 | # [bin] 44 | # [init] 45 | # Create the images that will use to extract the horizontal and vertical lines 46 | horizontal = np.copy(bw) 47 | vertical = np.copy(bw) 48 | # [init] 49 | # [horiz] 50 | # Specify size on horizontal axis 51 | cols = horizontal.shape[1] 52 | horizontal_size = cols / 30 53 | horizontal_size = int(horizontal_size) 54 | # Create structure element for extracting horizontal lines through morphology operations 55 | horizontalStructure = cv.getStructuringElement(cv.MORPH_RECT, (horizontal_size, 1)) 56 | # Apply morphology operations 57 | horizontal = cv.erode(horizontal, horizontalStructure) 58 | horizontal = cv.dilate(horizontal, horizontalStructure) 59 | # Show extracted horizontal lines 60 | show_wait_destroy("horizontal", horizontal) 61 | # [horiz] 62 | # [vert] 63 | # Specify size on vertical axis 64 | rows = vertical.shape[0] 65 | verticalsize = rows / 30 66 | verticalsize = int (verticalsize) 67 | # Create structure element for extracting vertical lines through morphology operations 68 | verticalStructure = cv.getStructuringElement(cv.MORPH_RECT, (1, verticalsize)) 69 | # Apply morphology operations 70 | vertical = cv.erode(vertical, verticalStructure) 71 | vertical = cv.dilate(vertical, verticalStructure) 72 | # Show extracted vertical lines 73 | show_wait_destroy("vertical", vertical) 74 | # [vert] 75 | # [smooth] 76 | # Inverse vertical image 77 | vertical = cv.bitwise_not(vertical) 78 | show_wait_destroy("vertical_bit", vertical) 79 | ''' 80 | Extract edges and smooth image according to the logic 81 | 1. extract edges 82 | 2. dilate(edges) 83 | 3. src.copyTo(smooth) 84 | 4. blur smooth img 85 | 5. smooth.copyTo(src, edges) 86 | ''' 87 | # Step 1 88 | edges = cv.adaptiveThreshold(vertical, 255, cv.ADAPTIVE_THRESH_MEAN_C, \ 89 | cv.THRESH_BINARY, 3, -2) 90 | show_wait_destroy("edges", edges) 91 | # Step 2 92 | kernel = np.ones((2, 2), np.uint8) 93 | edges = cv.dilate(edges, kernel) 94 | show_wait_destroy("dilate", edges) 95 | # Step 3 96 | smooth = np.copy(vertical) 97 | # Step 4 98 | smooth = cv.blur(smooth, (2, 2)) 99 | # Step 5 100 | (rows, cols) = np.where(edges != 0) 101 | vertical[rows, cols] = smooth[rows, cols] 102 | # Show final result 103 | show_wait_destroy("smooth - final", vertical) 104 | # [smooth] 105 | return 0 106 | if __name__ == "__main__": 107 | main(sys.argv[1:]) -------------------------------------------------------------------------------- /untitled6.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 21 19:55:37 2018 4 | 5 | @author: Murali 6 | """ 7 | 8 | import cv2 9 | import numpy as np 10 | img = cv2.imread("c:\\temp\\0.7483.png",0) 11 | laplacian = cv2.Laplacian(img,cv2.CV_8UC1) # Laplacian Edge Detection 12 | minLineLength = 900 13 | maxLineGap = 100 14 | lines = cv2.HoughLinesP(laplacian,1,np.pi/180,100,minLineLength,maxLineGap) 15 | for line in lines: 16 | for x1,y1,x2,y2 in line: 17 | cv2.line(img,(x1,y1),(x2,y2),(255,255,255),1) 18 | cv2.imwrite('Written_Back_Results.jpg',img) --------------------------------------------------------------------------------