├── .dockerignore ├── .gitignore ├── LICENSE ├── bin └── .gitkeep ├── deepfigures-local.env ├── deepfigures ├── __init__.py ├── data_generation │ ├── arxiv_pipeline.py │ └── pubmed_pipeline.py ├── extraction │ ├── __init__.py │ ├── datamodels.py │ ├── detection.py │ ├── exceptions.py │ ├── figure_utils.py │ ├── pdffigures_wrapper.py │ ├── pipeline.py │ ├── renderers.py │ ├── tensorbox_fourchannel.py │ └── tests │ │ ├── test_pipeline.py │ │ └── test_renderers.py ├── settings.py └── utils │ ├── __init__.py │ ├── config.py │ ├── file_util.py │ ├── image_util.py │ ├── misc.py │ ├── settings_utils.py │ ├── stringmatch │ ├── __init__.py │ ├── stringmatch.cpp │ ├── stringmatch_builder.py │ └── test_stringmatch.py │ ├── test.py │ ├── tests │ ├── data │ │ ├── bigfile.txt │ │ └── chunks.txt │ └── test_misc.py │ └── traits.py ├── dockerfiles ├── cpu │ └── Dockerfile └── gpu │ └── Dockerfile ├── manage.py ├── models └── tensorbox_overfeat.json ├── readme.md ├── requirements.txt ├── scripts ├── __init__.py ├── build.py ├── detectfigures.py ├── generatearxiv.py ├── generatepubmed.py ├── rundetection.py ├── runtests.py └── testunits.py ├── setup.py ├── tests └── data │ ├── endtoend │ ├── _work_tests_data_endtoend_paper.pdf-result.json │ └── paper.pdf │ └── pdfrenderer │ ├── ghostscript-renderings │ ├── paper.pdf-dpi100-page0001.jpg │ ├── paper.pdf-dpi100-page0001.png │ ├── paper.pdf-dpi100-page0002.jpg │ ├── paper.pdf-dpi100-page0002.png │ ├── paper.pdf-dpi100-page0003.jpg │ ├── paper.pdf-dpi100-page0003.png │ ├── paper.pdf-dpi100-page0004.jpg │ ├── paper.pdf-dpi100-page0004.png │ ├── paper.pdf-dpi100-page0005.jpg │ ├── paper.pdf-dpi100-page0005.png │ ├── paper.pdf-dpi100-page0006.jpg │ └── paper.pdf-dpi100-page0006.png │ ├── paper.pdf │ └── pdfbox-renderings │ ├── paper.pdf-dpi100-page0001.jpg │ ├── paper.pdf-dpi100-page0001.png │ ├── paper.pdf-dpi100-page0002.jpg │ ├── paper.pdf-dpi100-page0002.png │ ├── paper.pdf-dpi100-page0003.jpg │ ├── paper.pdf-dpi100-page0003.png │ ├── paper.pdf-dpi100-page0004.jpg │ ├── paper.pdf-dpi100-page0004.png │ ├── paper.pdf-dpi100-page0005.jpg │ ├── paper.pdf-dpi100-page0005.png │ ├── paper.pdf-dpi100-page0006.jpg │ └── paper.pdf-dpi100-page0006.png └── vendor └── tensorboxresnet ├── .gitignore ├── .travis.yml ├── README.md ├── download_data.sh ├── license ├── TENSORBOX_LICENSE.txt └── TENSORFLOW_LICENSE.txt ├── requirements.in ├── setup.py └── tensorboxresnet ├── __init__.py ├── train.py └── utils ├── Makefile ├── __init__.py ├── annolist ├── AnnoList_pb2.py ├── AnnotationLib.py ├── LICENSE_FOR_THIS_FOLDER ├── MatPlotter.py ├── PalLib.py ├── __init__.py ├── doRPC.py ├── ma_utils.py └── plotSimple.py ├── data_utils.py ├── googlenet_load.py ├── hungarian ├── hungarian.cc ├── hungarian.cpp └── hungarian.hpp ├── rect.py ├── slim_nets ├── __init__.py ├── inception_v1.py ├── resnet_utils.py └── resnet_v1.py ├── stitch_rects.cpp ├── stitch_rects.hpp ├── stitch_wrapper.py ├── stitch_wrapper.pyx └── train_utils.py /.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | !deepfigures 3 | !manage.py 4 | !readme.md 5 | !requirements.txt 6 | !bin 7 | !scripts 8 | !settings.py 9 | !setup.py 10 | !tests 11 | !vendor 12 | !weights -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PDFFigures2 2 | pdffigures2/ 3 | 4 | # python files 5 | *__pycache__ 6 | *pyc 7 | *pyo 8 | 9 | # editor files 10 | *~ 11 | \#*\# 12 | 13 | # build files 14 | *.so 15 | *.o -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/bin/.gitkeep -------------------------------------------------------------------------------- /deepfigures-local.env: -------------------------------------------------------------------------------- 1 | # Environment Variables for running DeepFigures locally 2 | 3 | AWS_ACCESS_KEY_ID 4 | AWS_SECRET_KEY 5 | AWS_SECRET_ACCESS_KEY 6 | AWS_DEFAULT_REGION -------------------------------------------------------------------------------- /deepfigures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/deepfigures/__init__.py -------------------------------------------------------------------------------- /deepfigures/extraction/__init__.py: -------------------------------------------------------------------------------- 1 | """Code for extracting figures from PDFs. 2 | 3 | This subpackage implements the main functionality for deepfigures, 4 | running deep models to detect figures as well as other code to render 5 | the pages, etc. 6 | """ 7 | -------------------------------------------------------------------------------- /deepfigures/extraction/datamodels.py: -------------------------------------------------------------------------------- 1 | """Data models for deepfigures. 2 | 3 | This subpackage contains models for various data dealt with by the 4 | deepfigures package. 5 | """ 6 | 7 | from typing import List, Optional, Tuple, Union 8 | 9 | from matplotlib import patches 10 | import numpy as np 11 | 12 | from deepfigures.utils import traits 13 | from deepfigures.utils.config import JsonSerializable 14 | 15 | from deepfigures.settings import (DEFAULT_INFERENCE_DPI, BACKGROUND_COLOR) 16 | 17 | # A box of the form (x1, y1, x2, y2) in pixel coordinates 18 | IntBox = Tuple[int, int, int, int] 19 | ImageSize = Union[Tuple[float, float], Tuple[float, float, float] 20 | ] # Page sizes may have a third color channel 21 | 22 | 23 | class BoxClass(JsonSerializable): 24 | x1 = traits.Float(allow_none=False) 25 | y1 = traits.Float(allow_none=False) 26 | x2 = traits.Float(allow_none=False) 27 | y2 = traits.Float(allow_none=False) 28 | 29 | @staticmethod 30 | def from_tuple(t: Tuple[float, float, float, float]) -> 'BoxClass': 31 | return BoxClass(x1=t[0], y1=t[1], x2=t[2], y2=t[3]) 32 | 33 | @staticmethod 34 | def from_tensorbox_rect(r) -> 'BoxClass': 35 | return BoxClass( 36 | x1=r.cx - .5 * r.width, 37 | x2=r.cx + .5 * r.width, 38 | y1=r.cy - .5 * r.height, 39 | y2=r.cy + .5 * r.height 40 | ) 41 | 42 | @staticmethod 43 | def from_xml(word, target_dpi=DEFAULT_INFERENCE_DPI) -> 'BoxClass': 44 | scale_factor = DEFAULT_INFERENCE_DPI / 72 45 | return BoxClass( 46 | x1=float(word.get('xMin')), 47 | y1=float(word.get('yMin')), 48 | x2=float(word.get('xMax')), 49 | y2=float(word.get('yMax')) 50 | ).rescale(scale_factor) 51 | 52 | def get_width(self) -> float: 53 | return self.x2 - self.x1 54 | 55 | def get_height(self) -> float: 56 | return self.y2 - self.y1 57 | 58 | def get_plot_box( 59 | self, color: str='red', fill: bool=False, **kwargs 60 | ) -> patches.Rectangle: 61 | """Return a rectangle patch for plotting""" 62 | return patches.Rectangle( 63 | (self.x1, self.y1), 64 | self.get_width(), 65 | self.get_height(), 66 | edgecolor=color, 67 | fill=fill, 68 | **kwargs 69 | ) 70 | 71 | def get_area(self) -> float: 72 | width = self.get_width() 73 | height = self.get_height() 74 | if width <= 0 or height <= 0: 75 | return 0 76 | else: 77 | return width * height 78 | 79 | def rescale(self, ratio: float) -> 'BoxClass': 80 | return BoxClass( 81 | x1=self.x1 * ratio, 82 | y1=self.y1 * ratio, 83 | x2=self.x2 * ratio, 84 | y2=self.y2 * ratio 85 | ) 86 | 87 | def resize_by_page( 88 | self, cur_page_size: ImageSize, target_page_size: ImageSize 89 | ): 90 | (orig_h, orig_w) = cur_page_size[:2] 91 | (target_h, target_w) = target_page_size[:2] 92 | height_scale = target_h / orig_h 93 | width_scale = target_w / orig_w 94 | return BoxClass( 95 | x1=self.x1 * width_scale, 96 | y1=self.y1 * height_scale, 97 | x2=self.x2 * width_scale, 98 | y2=self.y2 * height_scale 99 | ) 100 | 101 | def get_rounded(self) -> IntBox: 102 | return ( 103 | int(round(self.x1)), int(round(self.y1)), int(round(self.x2)), 104 | int(round(self.y2)) 105 | ) 106 | 107 | def crop_image(self, image: np.ndarray) -> np.ndarray: 108 | """Return image cropped to the portion contained in box.""" 109 | (x1, y1, x2, y2) = self.get_rounded() 110 | return image[y1:y2, x1:x2] 111 | 112 | def crop_whitespace_edges(self, im: np.ndarray) -> Optional['BoxClass']: 113 | (rounded_x1, rounded_y1, rounded_x2, rounded_y2) = self.get_rounded() 114 | white_im = im.copy() 115 | white_im[:, :rounded_x1] = BACKGROUND_COLOR 116 | white_im[:, rounded_x2:] = BACKGROUND_COLOR 117 | white_im[:rounded_y1, :] = BACKGROUND_COLOR 118 | white_im[rounded_y2:, :] = BACKGROUND_COLOR 119 | is_white = (white_im == BACKGROUND_COLOR).all(axis=2) 120 | nonwhite_columns = np.where(is_white.all(axis=0) != 1)[0] 121 | nonwhite_rows = np.where(is_white.all(axis=1) != 1)[0] 122 | if len(nonwhite_columns) == 0 or len(nonwhite_rows) == 0: 123 | return None 124 | x1 = min(nonwhite_columns) 125 | x2 = max(nonwhite_columns) + 1 126 | y1 = min(nonwhite_rows) 127 | y2 = max(nonwhite_rows) + 1 128 | assert x1 >= rounded_x1, 'ERROR: x1:%d box[0]:%d' % (x1, rounded_x1) 129 | assert y1 >= rounded_y1, 'ERROR: y1:%d box[1]:%d' % (y1, rounded_y1) 130 | assert x2 <= rounded_x2, 'ERROR: x2:%d box[2]:%d' % (x2, rounded_x2) 131 | assert y2 <= rounded_y2, 'ERROR: y2:%d box[3]:%d' % (y2, rounded_y2) 132 | # np.where returns np.int64, cast back to python types 133 | return BoxClass(x1=float(x1), y1=float(y1), x2=float(x2), y2=float(y2)) 134 | 135 | def distance_to_other(self, other: 'BoxClass') -> float: 136 | x_distance = max([0, self.x1 - other.x2, other.x1 - self.x2]) 137 | y_distance = max([0, self.y1 - other.y2, other.y1 - self.y2]) 138 | return np.linalg.norm([x_distance, y_distance], 2) 139 | 140 | def intersection(self, other: 'BoxClass') -> float: 141 | intersection = BoxClass( 142 | x1=max(self.x1, other.x1), 143 | y1=max(self.y1, other.y1), 144 | x2=min(self.x2, other.x2), 145 | y2=min(self.y2, other.y2) 146 | ) 147 | if intersection.x2 >= intersection.x1 and intersection.y2 >= intersection.y1: 148 | return intersection.get_area() 149 | else: 150 | return 0 151 | 152 | def iou(self, other: 'BoxClass') -> float: 153 | intersection = self.intersection(other) 154 | union = self.get_area() + other.get_area() - intersection 155 | if union == 0: 156 | return 0 157 | else: 158 | return intersection / union 159 | 160 | def contains_box(self, other: 'BoxClass', overlap_threshold=.5) -> bool: 161 | if other.get_area() == 0: 162 | return False 163 | else: 164 | return self.intersection(other 165 | ) / other.get_area() >= overlap_threshold 166 | 167 | def expand_box(self, amount: float) -> 'BoxClass': 168 | return BoxClass( 169 | x1=self.x1 - amount, 170 | y1=self.y1 - amount, 171 | x2=self.x2 + amount, 172 | y2=self.y2 + amount, 173 | ) 174 | 175 | def crop_to_page(self, page_shape: ImageSize) -> 'BoxClass': 176 | page_height, page_width = page_shape[:2] 177 | return BoxClass( 178 | x1=max(self.x1, 0), 179 | y1=max(self.y1, 0), 180 | x2=min(self.x2, page_width), 181 | y2=min(self.y2, page_height), 182 | ) 183 | 184 | 185 | def enclosing_box(boxes: List[BoxClass]) -> BoxClass: 186 | assert len(boxes) > 0 187 | return BoxClass( 188 | x1=min([box.x1 for box in boxes]), 189 | y1=min([box.y1 for box in boxes]), 190 | x2=max([box.x2 for box in boxes]), 191 | y2=max([box.y2 for box in boxes]) 192 | ) 193 | 194 | 195 | class Figure(JsonSerializable): 196 | figure_boundary = traits.Instance(BoxClass) 197 | caption_boundary = traits.Instance(BoxClass) 198 | caption_text = traits.Unicode() 199 | name = traits.Unicode() 200 | page = traits.Int() 201 | figure_type = traits.Unicode() 202 | dpi = traits.Int() 203 | page_width = traits.Int() 204 | page_height = traits.Int() 205 | # URI to cropped image of the figure 206 | uri = traits.Unicode( 207 | default_value=None, allow_none=True) 208 | 209 | def page_size(self) -> Tuple[int, int]: 210 | return self.page_height, self.page_width 211 | 212 | @staticmethod 213 | def from_pf_ann(ann: dict, target_page_size: Tuple[int, int]) -> 'Figure': 214 | """Convert an annotation in the pdffigures format""" 215 | cur_page_size = ann['page_height'], ann['page_width'] 216 | if cur_page_size[0] is None: 217 | cur_page_size = [ 218 | d * DEFAULT_INFERENCE_DPI / ann['dpi'] for d in target_page_size 219 | ] 220 | return Figure( 221 | figure_boundary=BoxClass.from_tuple(ann['region_bb']) 222 | .resize_by_page(cur_page_size, target_page_size), 223 | caption_boundary=BoxClass.from_tuple(ann['caption_bb']) 224 | .resize_by_page(cur_page_size, target_page_size), 225 | caption_text=ann['caption'], 226 | name=ann['name'], 227 | page=ann['page'], 228 | figure_type=ann['figure_type'], 229 | page_width=target_page_size[ 230 | 1 231 | ], 232 | page_height=target_page_size[ 233 | 0 234 | ] 235 | ) 236 | 237 | @staticmethod 238 | def from_pf_output(res: dict, target_dpi=DEFAULT_INFERENCE_DPI) -> 'Figure': 239 | """Convert a pdffigures output figure to a Figure object""" 240 | scale_factor = target_dpi / 72 241 | return Figure( 242 | figure_boundary=BoxClass.from_dict(res['regionBoundary'] 243 | ).rescale(scale_factor), 244 | caption_boundary=BoxClass.from_dict(res['captionBoundary']) 245 | .rescale(scale_factor), 246 | caption_text=res['caption'], 247 | name=res['name'], 248 | page=res['page'], 249 | figure_type=res['figType'] 250 | ) 251 | 252 | 253 | class CaptionOnly(JsonSerializable): 254 | caption_boundary = traits.Instance(BoxClass) 255 | caption_text = traits.Unicode() 256 | name = traits.Unicode() 257 | page = traits.Int() 258 | figure_type = traits.Unicode() 259 | dpi = traits.Int() 260 | 261 | 262 | class PdfDetectionResult(JsonSerializable): 263 | pdf = traits.Unicode() 264 | figures = traits.List(traits.Instance(Figure)) 265 | dpi = traits.Int() 266 | raw_detected_boxes = traits.List( 267 | traits.List(traits.Instance(BoxClass)), allow_none=True 268 | ) # type: Optional[List[List[BoxClass]]] 269 | raw_pdffigures_output = traits.Dict( 270 | traits.Any(), allow_none=True 271 | ) # type: Optional[dict] 272 | error = traits.Unicode( 273 | default_value=None, allow_none=True 274 | ) # type: Optional[str] 275 | 276 | 277 | class AuthorInfo(JsonSerializable): 278 | bounding_box = traits.Instance(BoxClass) 279 | name = traits.Unicode() 280 | 281 | 282 | class TitleAndAuthorInfo(JsonSerializable): 283 | pdf = traits.Unicode() 284 | pdf_sha1 = traits.Unicode() 285 | image_path = traits.Unicode() 286 | title_bounding_box = traits.Instance(BoxClass) 287 | title_text = traits.Unicode() 288 | authors = traits.List(traits.Instance(AuthorInfo)) 289 | -------------------------------------------------------------------------------- /deepfigures/extraction/detection.py: -------------------------------------------------------------------------------- 1 | """Functions for detecting and extracting figures.""" 2 | 3 | import os 4 | 5 | from typing import List, Tuple, Iterable 6 | 7 | import cv2 # Need to import OpenCV before tensorflow to avoid import error 8 | from scipy.misc import imread, imsave 9 | import numpy as np 10 | 11 | from deepfigures.extraction import ( 12 | tensorbox_fourchannel, 13 | pdffigures_wrapper, 14 | figure_utils) 15 | from deepfigures import settings 16 | from deepfigures.extraction.datamodels import ( 17 | BoxClass, 18 | Figure, 19 | PdfDetectionResult, 20 | CaptionOnly) 21 | from deepfigures import settings 22 | from deepfigures.utils import ( 23 | file_util, 24 | settings_utils) 25 | from deepfigures.utils import misc 26 | 27 | 28 | PAD_FACTOR = 0.02 29 | TENSORBOX_MODEL = settings.TENSORBOX_MODEL 30 | 31 | 32 | # Holds a cached instantiation of TensorboxCaptionmaskDetector. 33 | _detector = None 34 | 35 | 36 | def get_detector() -> tensorbox_fourchannel.TensorboxCaptionmaskDetector: 37 | """ 38 | Get TensorboxCaptionmaskDetector instance, initializing it on the first call. 39 | """ 40 | global _detector 41 | if not _detector: 42 | _detector = tensorbox_fourchannel.TensorboxCaptionmaskDetector( 43 | **TENSORBOX_MODEL) 44 | return _detector 45 | 46 | 47 | def extract_figures_json( 48 | pdf_path, 49 | page_image_paths, 50 | pdffigures_output, 51 | output_directory): 52 | """Extract information about figures to JSON and save to disk. 53 | 54 | :param str pdf_path: path to the PDF from which to extract 55 | figures. 56 | 57 | :returns: path to the JSON file containing the detection results. 58 | """ 59 | page_images_array = np.array([ 60 | imread(page_image_path) 61 | for page_image_path in page_image_paths 62 | ]) 63 | detector = get_detector() 64 | figure_boxes_by_page = detector.get_detections( 65 | page_images_array) 66 | pdffigures_captions = pdffigures_wrapper.get_captions( 67 | pdffigures_output=pdffigures_output, 68 | target_dpi=settings.DEFAULT_INFERENCE_DPI) 69 | figures_by_page = [] 70 | for page_num in range(len(page_image_paths)): 71 | figure_boxes = figure_boxes_by_page[page_num] 72 | pf_page_captions = [ 73 | caption 74 | for caption in pdffigures_captions 75 | if caption.page == page_num 76 | ] 77 | caption_boxes = [ 78 | caption.caption_boundary 79 | for caption in pf_page_captions 80 | ] 81 | figure_indices, caption_indices = figure_utils.pair_boxes( 82 | figure_boxes, caption_boxes) 83 | page_image = page_images_array[page_num] 84 | pad_pixels = PAD_FACTOR * min(page_image.shape[:2]) 85 | for (figure_idx, caption_idx) in zip(figure_indices, caption_indices): 86 | figures_by_page.append( 87 | Figure( 88 | figure_boundary=figure_boxes[figure_idx].expand_box( 89 | pad_pixels).crop_to_page( 90 | page_image.shape).crop_whitespace_edges( 91 | page_image), 92 | caption_boundary=caption_boxes[caption_idx], 93 | caption_text=pf_page_captions[caption_idx].caption_text, 94 | name=pf_page_captions[caption_idx].name, 95 | figure_type=pf_page_captions[caption_idx].figure_type, 96 | page=page_num)) 97 | pdf_detection_result = PdfDetectionResult( 98 | pdf=pdf_path, 99 | figures=figures_by_page, 100 | dpi=settings.DEFAULT_INFERENCE_DPI, 101 | raw_detected_boxes=figure_boxes_by_page, 102 | raw_pdffigures_output=pdffigures_output) 103 | 104 | output_path = os.path.join( 105 | output_directory, 106 | os.path.basename(pdf_path)[:-4] + 'deepfigures-results.json') 107 | file_util.write_json_atomic( 108 | output_path, 109 | pdf_detection_result.to_dict(), 110 | indent=2, 111 | sort_keys=True) 112 | return output_path 113 | -------------------------------------------------------------------------------- /deepfigures/extraction/exceptions.py: -------------------------------------------------------------------------------- 1 | """Exceptions for deepfigures.""" 2 | 3 | 4 | class LatexException(OSError): 5 | """An exception thrown for errors in rendering LaTeX.""" 6 | 7 | def __init__(self, cmd, code, stdout): 8 | self.code = code 9 | self.stdout = stdout 10 | 11 | def __str__(self): 12 | return ( 13 | 'Return code: %s, stdout: %s' % 14 | (repr(self.code), repr(self.stdout)) 15 | ) 16 | 17 | 18 | class PDFProcessingError(OSError): 19 | """An exception thrown for errors in processsing a PDF.""" 20 | -------------------------------------------------------------------------------- /deepfigures/extraction/figure_utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import os 3 | import subprocess 4 | from typing import Callable, Dict, Iterable, List, Tuple, TypeVar 5 | 6 | from matplotlib import axes 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import scipy as sp 10 | from deepfigures.utils import file_util 11 | from deepfigures.extraction.renderers import PDFRenderer 12 | from deepfigures.extraction.exceptions import LatexException 13 | from deepfigures.extraction.datamodels import (BoxClass, Figure) 14 | from deepfigures.settings import DEFAULT_INFERENCE_DPI 15 | 16 | 17 | def call_pdflatex( 18 | src_tex: str, src_dir: str, dest_dir: str, timeout: int=1200 19 | ) -> str: 20 | """ 21 | Call pdflatex on the tex source file src_tex, save its output to dest_dir, and return the path of the 22 | resulting pdf. 23 | """ 24 | # Need to be in the same directory as the file to compile it 25 | file_util.safe_makedirs(dest_dir) 26 | # Shell-escape required due to https://www.scivision.co/pdflatex-error-epstopdf-output-filename-not-allowed-in-restricted-mode/ 27 | cmd = [ 28 | 'pdflatex', '-interaction=nonstopmode', '-shell-escape', 29 | '-output-directory=' + dest_dir, src_tex 30 | ] 31 | # Run twice so that citations are built correctly 32 | # Had some issues getting latexmk to work 33 | try: 34 | subprocess.run( 35 | cmd, stdout=subprocess.PIPE, cwd=src_dir, timeout=timeout 36 | ) 37 | res = subprocess.run( 38 | cmd, stdout=subprocess.PIPE, cwd=src_dir, timeout=timeout 39 | ) 40 | except subprocess.TimeoutExpired: 41 | raise LatexException( 42 | ' '.join(cmd), -1, 'Timeout exception after %d' % timeout 43 | ) 44 | if res.returncode != 0: 45 | raise LatexException(' '.join(cmd), res.returncode, res.stdout) 46 | paperid = os.path.splitext(os.path.basename(src_tex))[0] 47 | return dest_dir + paperid + '.pdf' 48 | 49 | 50 | def im_diff(a: np.ndarray, b: np.ndarray) -> np.ndarray: 51 | """Returns a copy of image 'a' with all pixels where 'a' and 'b' are equal set to white.""" 52 | assert (np.array_equal(np.shape(a), np.shape(b))) 53 | diff = a - b 54 | mask = np.any(diff != 0, axis=2) # Check if any channel is different 55 | rgb_mask = np.transpose(np.tile(mask, (3, 1, 1)), axes=[1, 2, 0]) 56 | diff_image = np.copy(a) 57 | diff_image[np.logical_not(rgb_mask)] = 255 58 | return diff_image 59 | 60 | 61 | def pair_boxes(a_boxes: List[BoxClass], 62 | b_boxes: List[BoxClass]) -> Tuple[List[int], List[int]]: 63 | """ 64 | Find the pairing between boxes with the lowest total distance, e.g. for matching figures to their captions. 65 | This is an instance of the linear assignment problem and can be solved efficiently using the Hungarian algorithm. 66 | Return the indices of matched boxes. If a_boxes and b_boxes are of unequal length, not all boxes will be paired. 67 | Length of returned lists is min(len(a_boxes), len(b_boxes)). 68 | """ 69 | a_len = len(a_boxes) 70 | b_len = len(b_boxes) 71 | cost_matrix = np.zeros([a_len, b_len]) 72 | cost_matrix[:] = np.nan 73 | for (a_idx, a_box) in enumerate(a_boxes): 74 | for (b_idx, b_box) in enumerate(b_boxes): 75 | cost_matrix[a_idx, b_idx] = a_box.distance_to_other(b_box) 76 | assert (cost_matrix != np.nan).all() 77 | (a_indices, b_indices) = sp.optimize.linear_sum_assignment(cost_matrix) 78 | assert len(a_indices) == len(b_indices) 79 | return a_indices, b_indices 80 | 81 | 82 | def load_figures_json(filename: str) -> Dict[str, List[Figure]]: 83 | d = file_util.read_json(filename) 84 | res = { 85 | page: [Figure.from_dict(dict_fig) for dict_fig in page_dicts] 86 | for (page, page_dicts) in d.items() 87 | } 88 | return res 89 | 90 | 91 | T = TypeVar('T') 92 | S = TypeVar('S') 93 | 94 | 95 | def group_by(l: Iterable[T], 96 | key: Callable[[T], S]=lambda x: x) -> Dict[S, List[T]]: 97 | """Like itertools.groupby but doesn't require first sorting by the key function. Returns a dict.""" 98 | d = collections.defaultdict(list) 99 | assert (callable(key)) 100 | for item in l: 101 | d[key(item)].append(item) 102 | return d 103 | 104 | 105 | def ordered_group_by(l: Iterable[T], 106 | key: Callable[[T], S]=lambda x: x) -> Dict[S, List[T]]: 107 | """Keys are returned in order of first occurrence.""" 108 | d = collections.OrderedDict() 109 | assert (callable(key)) 110 | for item in l: 111 | k = key(item) 112 | if k not in d: 113 | d[k] = [] 114 | d[k].append(item) 115 | return d 116 | 117 | 118 | def group_figures_by_pagenum(figs: Iterable[Figure] 119 | ) -> Dict[int, List[Figure]]: 120 | return group_by(figs, lambda x: x.page) 121 | 122 | 123 | def make_axes(size: Tuple[float, float]=(20, 20)) -> axes.Subplot: 124 | fig, ax = plt.subplots(1, figsize=size) 125 | return ax 126 | 127 | 128 | def pagename_to_pagenum(pagename: str) -> int: 129 | """Takes a page name with a 1-indexed number and returns the 0-indexed page number.""" 130 | return int( 131 | PDFRenderer.IMAGE_FILENAME_RE.fullmatch(pagename).group('page_num') 132 | ) - 1 133 | 134 | 135 | def pagenum_to_pagename(pdf: str, pagenum: int, dpi: int=DEFAULT_INFERENCE_DPI) -> str: 136 | """Takes a pdf and a page with 0-indexed number and returns the 1-indexed page image name.""" 137 | return os.path.join( 138 | os.path.dirname(pdf), 139 | (PDFRenderer.IMAGE_FILENAME_PREFIX_TEMPLATE + 140 | '{page_num:04d}.png').format( 141 | pdf_name=os.path.split(pdf)[-1], dpi=dpi, page_num=pagenum + 1 142 | )) 143 | 144 | 145 | def pagename_to_pdf(pagename: str) -> str: 146 | """Takes a page image name and returns the name of the pdf it came from.""" 147 | return PDFRenderer.IMAGE_FILENAME_RE.fullmatch(pagename).group('pdf_name') 148 | -------------------------------------------------------------------------------- /deepfigures/extraction/pdffigures_wrapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from typing import List, Optional, Iterable 4 | import tempfile 5 | from deepfigures.utils import file_util 6 | from deepfigures.extraction import datamodels 7 | from deepfigures import settings 8 | import logging 9 | import shlex 10 | import contextlib 11 | import more_itertools 12 | 13 | 14 | # DPI used by pdffigures for json outputs; this is hard-coded as 72 15 | PDFFIGURES_DPI = 72 16 | 17 | 18 | class PDFFiguresExtractor(object): 19 | """Extract figure and caption information from a PDF.""" 20 | 21 | def extract(self, pdf_path, output_dir, use_cache=True): 22 | """Return results from extracting a PDF with pdffigures2. 23 | 24 | :param str pdf_path: path to the PDF to extract. 25 | :param str output_dir: path to the output directory. 26 | :param bool use_cache: whether or not to use cached data from 27 | disk if it's available. 28 | 29 | :returns: results from running pdffigures2 on the PDF. 30 | """ 31 | pdffigures_dir = os.path.join(output_dir, 'pdffigures/') 32 | if not os.path.exists(pdffigures_dir): 33 | os.makedirs(pdffigures_dir) 34 | 35 | success_file_path = os.path.join(pdffigures_dir, '_SUCCESS') 36 | 37 | pdffigures_jar_path = file_util.cache_file( 38 | settings.PDFFIGURES_JAR_PATH) 39 | 40 | if not os.path.exists(success_file_path) or not use_cache: 41 | subprocess.check_call( 42 | 'java' 43 | ' -jar {pdffigures_jar_path}' 44 | ' --figure-data-prefix {pdffigures_dir}' 45 | ' --save-regionless-captions' 46 | ' {pdf_path}'.format( 47 | pdffigures_jar_path=pdffigures_jar_path, 48 | pdf_path=pdf_path, 49 | pdffigures_dir=pdffigures_dir), 50 | shell=True) 51 | 52 | # add a success file to verify that the operation completed 53 | with open(success_file_path, 'w') as f_out: 54 | f_out.write('') 55 | 56 | return file_util.read_json( 57 | os.path.join( 58 | pdffigures_dir, 59 | os.path.basename(pdf_path)[:-4] + '.json')) 60 | 61 | 62 | pdffigures_extractor = PDFFiguresExtractor() 63 | 64 | 65 | def figure_to_caption(figure: dict) -> datamodels.CaptionOnly: 66 | return datamodels.CaptionOnly( 67 | caption_boundary=datamodels.BoxClass. 68 | from_dict(figure['captionBoundary']), 69 | page=figure['page'], 70 | caption_text=figure['caption'], 71 | name=figure['name'], 72 | figure_type=figure['figType'], 73 | ) 74 | 75 | 76 | def regionless_to_caption(regionless: dict) -> datamodels.CaptionOnly: 77 | return datamodels.CaptionOnly( 78 | caption_boundary=datamodels.BoxClass.from_dict(regionless['boundary']), 79 | page=regionless['page'], 80 | caption_text=regionless['text'], 81 | name=regionless['name'], 82 | figure_type=regionless['figType'], 83 | ) 84 | 85 | 86 | def get_captions( 87 | pdffigures_output: dict, target_dpi: int=settings.DEFAULT_INFERENCE_DPI 88 | ) -> List[datamodels.CaptionOnly]: 89 | figures = pdffigures_output.get('figures', []) 90 | regionless_captions = pdffigures_output.get('regionless-captions', []) 91 | captions = ( 92 | [figure_to_caption(fig) for fig in figures] + 93 | [regionless_to_caption(reg) for reg in regionless_captions] 94 | ) 95 | for caption in captions: 96 | caption.caption_boundary = caption.caption_boundary.rescale( 97 | target_dpi / PDFFIGURES_DPI 98 | ) 99 | return captions 100 | 101 | 102 | def get_figures(pdffigures_output: dict, target_dpi: int=settings.DEFAULT_INFERENCE_DPI 103 | ) -> List[datamodels.Figure]: 104 | return [ 105 | datamodels.Figure.from_pf_output(figure, target_dpi) 106 | for figure in pdffigures_output.get('figures', []) 107 | ] 108 | 109 | 110 | def detect_batch(src_pdfs: List[str], target_dpi: int = settings.DEFAULT_INFERENCE_DPI, chunksize=1) -> \ 111 | Iterable[datamodels.PdfDetectionResult]: 112 | for chunk in more_itertools.chunked(src_pdfs, chunksize): 113 | results = [ 114 | pdffigures_extractor.extract(pdf_path, os.path.dirname(pdf_path)) 115 | for pdf_path in chunk 116 | ] 117 | for (result, pdf) in zip(results, chunk): 118 | figs = get_figures(result, target_dpi=target_dpi) 119 | yield datamodels.PdfDetectionResult( 120 | pdf=pdf, 121 | figures=figs, 122 | dpi=target_dpi, 123 | raw_detected_boxes=None, 124 | raw_pdffigures_output=None, 125 | error=None 126 | ) 127 | -------------------------------------------------------------------------------- /deepfigures/extraction/pipeline.py: -------------------------------------------------------------------------------- 1 | """The figure extraction pipeline for deepfigures. 2 | 3 | The ``deepfigures.extraction.pipeline`` module defines the figure 4 | extraction pipeline for deepfigures, including copying the PDF to a 5 | location, rendering a PDF, finding captions for figures, detecting the 6 | figures and cropping out the images. 7 | """ 8 | 9 | import hashlib 10 | import os 11 | import shutil 12 | 13 | from PIL import Image 14 | 15 | from deepfigures import settings 16 | from deepfigures.extraction import ( 17 | detection, 18 | pdffigures_wrapper, 19 | renderers) 20 | from deepfigures.utils import ( 21 | misc, 22 | settings_utils) 23 | 24 | 25 | class FigureExtraction(object): 26 | """A class representing the data extracted from a PDF. 27 | 28 | The ``FigureExtraction`` class represents the data extracted from 29 | a single PDF and is generated through the ``extract`` method of 30 | the ``FigureExtractionPipeline`` class. 31 | 32 | The data stored for a ``FigureExtraction`` instance sits on disk 33 | in a directory. See `Attributes`_ for more information. 34 | 35 | Attributes 36 | ---------- 37 | path_templates : Dict[str, str] 38 | A class attribute providing the templates for the paths to the 39 | extracted data on disk, relative to the data directory. 40 | paths : Dict[str, str] 41 | A dictionary mapping path names to their actual absolute paths 42 | on disk. 43 | parent_directory : str 44 | The parent directory for the directory containing the extracted 45 | data. 46 | low_res_rendering_paths : Optional[str] 47 | Paths to the low resolution renderings of the PDF (used for 48 | predicting the bounding boxes). 49 | hi_res_rendering_paths : Optional[str] 50 | Paths to the high resolution renderings of the PDF (used for 51 | cropping out the figure images). 52 | pdffigures_output_path : Optional[str] 53 | Path to the output of running pdffigures2 on the PDF. 54 | deepfigures_json_path : Optional[str] 55 | Path to the deepfigures JSON predicting the bounding boxes. 56 | """ 57 | 58 | """Templates for paths to the data extracted from a PDF.""" 59 | path_templates = { 60 | 'BASE': '{pdf_hash}', 61 | 'PDF_PATH': '{base}/{pdf_name}', 62 | 'RENDERINGS_PATH': '{base}/page-renderings', 63 | 'PDFFIGURES_OUTPUT_PATH': '{base}/pdffigures-output', 64 | 'DEEPFIGURES_OUTPUT_PATH': '{base}/deepfigures-output', 65 | 'FIGURE_IMAGES_PATH': '{base}/figure-images' 66 | } 67 | 68 | def __init__(self, pdf_path, parent_directory): 69 | """Initialize a ``FigureExtraction`` instance. 70 | 71 | Parameters 72 | ---------- 73 | pdf_path : str 74 | The path to the PDF locally on disk. 75 | parent_directory : str 76 | The parent directory for the directory in which the figure 77 | extraction results will be stored. 78 | """ 79 | # compute strings to fill in the path templates 80 | pdf_hash = misc.hash_out_of_core(hashlib.sha1, pdf_path) 81 | pdf_name = os.path.basename(pdf_path) 82 | base = self.path_templates['BASE'].format(pdf_hash=pdf_hash) 83 | template_vars = { 84 | 'pdf_hash': pdf_hash, 85 | 'pdf_name': pdf_name, 86 | 'base': base 87 | } 88 | # set the paths attribute 89 | self.paths = { 90 | k: os.path.join(parent_directory, v.format(**template_vars)) 91 | for k, v in self.path_templates.items() 92 | } 93 | self.parent_directory = parent_directory 94 | self.low_res_rendering_paths = None 95 | self.hi_res_rendering_paths = None 96 | self.pdf_figures_output_path = None 97 | self.deepfigures_json_path = None 98 | 99 | 100 | class FigureExtractionPipeline(object): 101 | """A class for extracting figure data from PDFs. 102 | 103 | The ``FigureExtractionPipeline`` class's main function is to 104 | generate instances of ``FigureExtraction``. Each instance of a 105 | ``FigureExtraction`` represents the data extracted from processing a 106 | single PDF. 107 | 108 | See the ``FigureExtraction`` class's doc string for details on 109 | the format that this extracted data takes. 110 | """ 111 | 112 | def extract(self, pdf_path, output_directory): 113 | """Return a ``FigureExtraction`` instance for ``pdf_path``. 114 | 115 | Extract the figures and additional information from the PDF at 116 | ``pdf_path``, saving the results to disk in ``output_directory`` 117 | and returning the corresponding ``FigureExtraction`` instance. 118 | 119 | Parameters 120 | ---------- 121 | pdf_path : str 122 | The path to the PDF. 123 | output_directory : str 124 | The directory in which to save the results from extraction. 125 | 126 | Returns 127 | ------- 128 | FigureExtraction 129 | A ``FigureExtraction`` instance for the PDF at ``pdf_path``. 130 | """ 131 | figure_extraction = FigureExtraction( 132 | pdf_path=pdf_path, 133 | parent_directory=output_directory) 134 | 135 | # create the extraction results directory 136 | os.makedirs(figure_extraction.paths['BASE']) 137 | 138 | # copy the PDF into the extraction results directory 139 | shutil.copy(pdf_path, figure_extraction.paths['PDF_PATH']) 140 | 141 | pdf_renderer = settings_utils.import_setting( 142 | settings.DEEPFIGURES_PDF_RENDERER)() 143 | 144 | # render the PDF into low-res images 145 | figure_extraction.low_res_rendering_paths = \ 146 | pdf_renderer.render( 147 | pdf_path=figure_extraction.paths['PDF_PATH'], 148 | output_dir=figure_extraction.paths['BASE'], 149 | dpi=settings.DEFAULT_INFERENCE_DPI) 150 | 151 | # render the PDF into hi-res images 152 | figure_extraction.hi_res_rendering_paths = \ 153 | pdf_renderer.render( 154 | pdf_path=figure_extraction.paths['PDF_PATH'], 155 | output_dir=figure_extraction.paths['BASE'], 156 | dpi=settings.DEFAULT_CROPPED_IMG_DPI) 157 | 158 | # extract captions from PDF using pdffigures2 159 | figure_extraction.pdffigures_output_path = \ 160 | pdffigures_wrapper.pdffigures_extractor.extract( 161 | pdf_path=figure_extraction.paths['PDF_PATH'], 162 | output_dir=figure_extraction.paths['BASE']) 163 | 164 | # run deepfigures / neural networks on the PDF images 165 | figure_extraction.deepfigures_json_path = \ 166 | detection.extract_figures_json( 167 | pdf_path=figure_extraction.paths['PDF_PATH'], 168 | page_image_paths=figure_extraction.low_res_rendering_paths, 169 | pdffigures_output=figure_extraction.pdffigures_output_path, 170 | output_directory=figure_extraction.paths['BASE']) 171 | 172 | return figure_extraction 173 | -------------------------------------------------------------------------------- /deepfigures/extraction/renderers.py: -------------------------------------------------------------------------------- 1 | """PDF Rendering engines for deepfigures.""" 2 | 3 | import glob 4 | import json 5 | import logging 6 | import lxml 7 | import os 8 | import re 9 | import shutil 10 | import string 11 | import subprocess 12 | import typing 13 | 14 | import bs4 15 | 16 | from deepfigures.utils import file_util 17 | from deepfigures.extraction import exceptions 18 | from deepfigures import settings 19 | 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | # constant for use in the isprintable function 24 | _PRINTABLES = set(string.printable) 25 | 26 | 27 | def isprintable(s): 28 | """Return True if all characters in s are printable, else False. 29 | 30 | Parameters 31 | ---------- 32 | :param str s: a string. 33 | 34 | Returns 35 | ------- 36 | :return: True if s has only printable characters, otherwise False. 37 | """ 38 | return set(s).issubset(_PRINTABLES) 39 | 40 | 41 | class PDFRenderer(object): 42 | """Render PDFs and extract text from them. 43 | 44 | PDFRenderers are used to generate data and as part of the figure 45 | extraction pipeline for deepfigures. PDFRenderers must implement 46 | methods to render PDFs as images to disk and to extract text with 47 | bounding boxes that may later be parsed into classes from 48 | deepfigures.datamodels. 49 | 50 | Usage 51 | ----- 52 | Subclass PDFRenderer and override: 53 | 54 | - RENDERING_ENGINE_NAME: a class variable giving a unique name 55 | that signals what backend was used to process the PDFs. 56 | - _rasterize_pdf: a method (see _rasterize_pdf on this class for 57 | details). 58 | - _extract_text: a method (see _extract_text on this class for 59 | details). 60 | 61 | """ 62 | RENDERING_ENGINE_NAME = None 63 | IMAGE_FILENAME_RE = re.compile( 64 | r'(?P.*)-dpi(?P\d+)-page(?P\d+).(?Ppng|jpg)' 65 | ) 66 | IMAGE_FILENAME_PREFIX_TEMPLATE = \ 67 | '{pdf_name}-dpi{dpi:d}-page' 68 | 69 | def __init__(self): 70 | """Initialize the PDFRenderer.""" 71 | # check that subclasses override cls.RENDERING_ENGINE_NAME 72 | assert self.RENDERING_ENGINE_NAME is not None, ( 73 | "class variable RENDERING_ENGINE_NAME must not be None" 74 | ) 75 | 76 | def render( 77 | self, 78 | pdf_path: str, 79 | output_dir: typing.Optional[str]=None, 80 | dpi: int=settings.DEFAULT_INFERENCE_DPI, 81 | ext: str='png', 82 | max_pages: typing.Optional[int]=None, 83 | use_cache: bool=True, 84 | check_retcode: bool=False 85 | ) -> typing.List[str]: 86 | """Render pdf_path, save to disk and return the file paths. 87 | 88 | Render the pdf at pdf_path, save the generated image to disk 89 | in output dir using a file name matching the 90 | PDFRenderer.IMAGE_FILENAME_RE pattern, and return a list of 91 | paths to the generated files. 92 | 93 | Parameters 94 | ---------- 95 | :param str pdf_path: path to the pdf that should be rendered. 96 | :param Optional[str] output_dir: path to the directory in which 97 | to save output. If None, then output is saved in the same 98 | directory as the PDF. 99 | :param int dpi: the dpi at which to render the PDF. 100 | :param str ext: the extension or file type of the generated 101 | image, should be either 'png' or 'jpg'. 102 | :param Optional[int] max_pages: the maximum number of pages to 103 | render from the PDF. 104 | :param bool use_cache: whether or not to skip the rendering 105 | operation if the pdf has already been rendered. 106 | :param bool check_retcode: whether or not to check the return 107 | code from the subprocess used to render the PDF. 108 | 109 | Returns 110 | ------- 111 | :return: the list of generated paths 112 | """ 113 | image_types = ['png', 'jpg'] 114 | if ext not in image_types: 115 | raise ValueError( 116 | "ext must be one of {}".format(', '.join(image_types))) 117 | 118 | if output_dir is None: 119 | output_dir = os.path.dirname(pdf_path) 120 | 121 | if not os.path.isdir(output_dir): 122 | raise IOError( 123 | "Output directory ({}) does not exist.".format(output)) 124 | 125 | pdf_name = os.path.basename(pdf_path) 126 | 127 | # engines_dir: directory used for storing the output from 128 | # different rendering engines. 129 | engines_dir = os.path.join( 130 | output_dir, '{pdf_name}-images'.format(pdf_name=pdf_name)) 131 | # images_dir: directory used for storing images output by this 132 | # specific PDFRenderer / engine. 133 | images_dir = os.path.join( 134 | engines_dir, 135 | self.RENDERING_ENGINE_NAME, 136 | 'dpi{}'.format(dpi)) 137 | 138 | image_filename_prefix = self.IMAGE_FILENAME_PREFIX_TEMPLATE.format( 139 | pdf_name=pdf_name, dpi=dpi) 140 | image_output_path_prefix = os.path.join( 141 | images_dir, image_filename_prefix) 142 | success_file_path = os.path.join(images_dir, '_SUCCESS') 143 | 144 | if not os.path.exists(success_file_path) or not use_cache: 145 | if os.path.exists(images_dir): 146 | logger.info("Overwriting {}.".format(images_dir)) 147 | shutil.rmtree(images_dir) 148 | os.makedirs(images_dir) 149 | 150 | self._rasterize_pdf( 151 | pdf_path=pdf_path, 152 | image_output_path_prefix=image_output_path_prefix, 153 | dpi=dpi, 154 | ext=ext, 155 | max_pages=max_pages, 156 | check_retcode=check_retcode) 157 | 158 | # add a success file to verify that the operation completed 159 | with open(success_file_path, 'w') as f_out: 160 | f_out.write('') 161 | 162 | generated_image_paths = glob.glob( 163 | image_output_path_prefix + '*.' + ext) 164 | 165 | return sort_by_page_num(generated_image_paths) 166 | 167 | def _rasterize_pdf( 168 | self, 169 | pdf_path: str, 170 | image_output_path_prefix: str, 171 | dpi: int, 172 | ext: str, 173 | max_pages: typing.Optional[int], 174 | check_retcode: bool, 175 | ) -> typing.List[str]: 176 | """Rasterize the PDF at PDF path and save it to disk. 177 | 178 | Rasterize the PDF at PDF path and save it to disk using 179 | image_output_path_prefix. Each page of the PDF should be 180 | rasterized separately and saved to the path formed by 181 | appending '{page_num:04d}.{ext}' to 182 | image_output_path_prefix. 183 | 184 | Parameters 185 | ---------- 186 | :param str pdf_path: path to the pdf that should be rendered. 187 | :param str image_output_path_prefix: prefix for the output 188 | path of each rendered pdf page. 189 | :param int dpi: the dpi at which to render the pdf. 190 | :param int max_pages: the maximum number of pages to render 191 | from the pdf. 192 | 193 | Returns 194 | ------- 195 | :return: None 196 | """ 197 | raise NotImplementedError( 198 | "Subclasses of PDFRenderer must implement _rasterize_pdf." 199 | ) 200 | 201 | def extract_text(self, pdf_path: str, encoding: str='UTF-8' 202 | ) -> typing.Optional[bs4.BeautifulSoup]: 203 | """Extract info about a PDF as XML returning the parser for it. 204 | 205 | Extract information about the text, bounding boxes and pages of 206 | a PDF as XML, saving the XML to disk and returning a parser for 207 | it. 208 | 209 | Parameters 210 | ---------- 211 | :param str pdf_path: the path to the pdf from which to extract 212 | information. 213 | :param str encoding: the encoding to use for the XML. 214 | 215 | Returns 216 | ------- 217 | :return: A parser for the XML that is saved to disk. 218 | """ 219 | # generate the html files 220 | self._extract_text(pdf_path=pdf_path, encoding=encoding) 221 | 222 | html = pdf_path[:-4] + '.html' 223 | if not os.path.isfile(html): 224 | html_soup = None 225 | try: 226 | with open(html, 'r') as f: 227 | html_soup = bs4.BeautifulSoup(f, 'xml') 228 | except UnicodeDecodeError: 229 | html_soup = None 230 | 231 | if html_soup is None: 232 | raise exceptions.PDFProcessingError( 233 | "Error in extracting xml for {}.".format(pdf_path) 234 | ) 235 | 236 | return html_soup 237 | 238 | def _extract_text(self, pdf_path: str, encoding: str='UTF-8') -> None: 239 | """Extract text from a PDF and save to disk as xml. 240 | 241 | Parameters 242 | ---------- 243 | :param str pdf_path: path to the PDF to be extracted. 244 | :param str encoding: the encoding to use for saving the XML. 245 | 246 | Returns 247 | ------- 248 | :return: None 249 | """ 250 | raise NotImplementedError( 251 | "Subclasses of PDFRenderer must implement _extract_text." 252 | ) 253 | 254 | 255 | class GhostScriptRenderer(PDFRenderer): 256 | """Render PDFs using GhostScript.""" 257 | RENDERING_ENGINE_NAME = 'ghostscript' 258 | 259 | def _rasterize_pdf( 260 | self, 261 | pdf_path: str, 262 | image_output_path_prefix: str, 263 | dpi: int, 264 | ext: str, 265 | max_pages: typing.Optional[int], 266 | check_retcode: bool 267 | ) -> typing.List[str]: 268 | """Rasterize a PDF using GhostScript.""" 269 | # ghostscript requires a template string for the output path 270 | image_output_path_template = image_output_path_prefix + '%04d.{ext}'.format( 271 | ext=ext) 272 | sdevice = 'png16m' if ext == 'png' else 'jpeg' 273 | gs_args = [ 274 | 'gs', '-dGraphicsAlphaBits=4', '-dTextAlphaBits=4', '-dNOPAUSE', '-dBATCH', '-dSAFER', '-dQUIET', 275 | '-sDEVICE=' + sdevice, 276 | '-r%d' % dpi, '-sOutputFile=' + image_output_path_template, 277 | '-dBufferSpace=%d' % int(1e9), 278 | '-dBandBufferSpace=%d' % int(5e8), '-sBandListStorage=memory', 279 | '-c', 280 | '%d setvmthreshold' % int(1e9), '-dNOGC', 281 | '-dNumRenderingThreads=4', "-f", pdf_path 282 | ] 283 | if max_pages is not None: 284 | gs_args.insert(-2, '-dLastPage=%d' % max_pages) 285 | subprocess.run(gs_args, check=check_retcode) 286 | 287 | def _extract_text(self, pdf_path: str, encoding: str) -> None: 288 | """Extract text using pdftotext.""" 289 | subprocess.run(['pdftotext', '-bbox', '-enc', encoding, pdf_path]) 290 | 291 | 292 | def sort_by_page_num(file_paths: typing.List[str]) -> typing.List[str]: 293 | """Sort file_paths by the page number. 294 | 295 | Sort file_paths by the page number where file_paths is a list 296 | of rendered output image file paths generated by a 297 | PDFRenderer. 298 | 299 | Parameters 300 | ---------- 301 | :param List[str] file_paths: a list of file paths generated by 302 | a PDFRenderer. 303 | 304 | Returns 305 | ------- 306 | file_paths sorted by page number. 307 | """ 308 | return sorted( 309 | file_paths, 310 | key=lambda file_path: int(PDFRenderer.IMAGE_FILENAME_RE.fullmatch( 311 | os.path.split(file_path)[-1]).group('page_num'))) 312 | -------------------------------------------------------------------------------- /deepfigures/extraction/tensorbox_fourchannel.py: -------------------------------------------------------------------------------- 1 | """The model used to detect figures.""" 2 | 3 | import copy 4 | import os 5 | import tempfile 6 | from typing import List, Tuple, Iterable 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from deepfigures import settings 12 | from deepfigures.extraction.datamodels import ( 13 | BoxClass, 14 | Figure, 15 | PdfDetectionResult, 16 | CaptionOnly) 17 | from deepfigures.extraction import ( 18 | figure_utils, 19 | pdffigures_wrapper, 20 | renderers) 21 | from deepfigures.extraction.pdffigures_wrapper import pdffigures_extractor 22 | from deepfigures.utils import ( 23 | file_util, 24 | image_util, 25 | config, 26 | traits, 27 | settings_utils) 28 | from tensorboxresnet import train 29 | from tensorboxresnet.utils import train_utils 30 | 31 | 32 | CAPTION_CHANNEL_BACKGROUND = 255 33 | CAPTION_CHANNEL_MASK = 0 34 | 35 | pdf_renderer = settings_utils.import_setting( 36 | settings.DEEPFIGURES_PDF_RENDERER)() 37 | 38 | 39 | class TensorboxCaptionmaskDetector(object): 40 | """Interface for using the neural network model to detect figures. 41 | 42 | Instantiating this class creates a tensorflow session object as the 43 | self.sess attribute. When done using the instance, remember to close 44 | the session; however, do not open and close sessions every time you 45 | extract a figure because the added overhead will very negatively 46 | affect performance. 47 | """ 48 | def __init__( 49 | self, 50 | save_dir, 51 | iteration, 52 | batch_size=1 # Batch sizes greater than 1 will change results due to batch norm in inception_v1 53 | ): 54 | self.save_dir = save_dir 55 | self.iteration = iteration 56 | 57 | self.hypes = self._get_hypes() 58 | self.hypes['batch_size'] = batch_size 59 | self.input_shape = [ 60 | self.hypes['image_height'], self.hypes['image_width'], 61 | self.hypes['image_channels'] 62 | ] # type: Tuple[float, float, float] 63 | self.graph = tf.Graph() 64 | with self.graph.as_default(): 65 | self.x_in = tf.placeholder( 66 | tf.float32, name='x_in', shape=self.input_shape 67 | ) 68 | assert (self.hypes['use_rezoom']) 69 | pred_boxes, self.pred_logits, self.pred_confidences, self.pred_confs_deltas, pred_boxes_deltas = \ 70 | train.build_forward(self.hypes, tf.expand_dims(self.x_in, 0), 'test', reuse=None) 71 | self.pred_boxes = pred_boxes + pred_boxes_deltas 72 | grid_area = self.hypes['grid_height'] * self.hypes['grid_width'] 73 | pred_confidences = tf.reshape( 74 | tf.nn.softmax( 75 | tf.reshape( 76 | self.pred_confs_deltas, 77 | [grid_area * self.hypes['rnn_len'], 2] 78 | ) 79 | ), [grid_area, self.hypes['rnn_len'], 2] 80 | ) 81 | assert (self.hypes['reregress']) 82 | self.sess = tf.Session() 83 | self.sess.run(tf.global_variables_initializer()) 84 | saver = tf.train.Saver() 85 | model_weights = self._get_weights() 86 | saver.restore(self.sess, model_weights) 87 | 88 | def _get_weights(self) -> str: 89 | suffixes = ['.index', '.meta', '.data-00000-of-00001'] 90 | local_paths = [ 91 | file_util.cache_file( 92 | self.save_dir + 'save.ckpt-%d' % self.iteration + suffix 93 | ) for suffix in suffixes 94 | ] 95 | local_path = local_paths[0] 96 | return local_path[:local_path.rfind(suffixes[0])] 97 | 98 | def _get_hypes(self) -> dict: 99 | return file_util.read_json(self.save_dir + 'hypes.json') 100 | 101 | def detect_page( 102 | self, 103 | page_tensor: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: 104 | feed = {self.x_in: page_tensor} 105 | (np_pred_boxes, np_pred_confidences) = self.sess.run( 106 | [self.pred_boxes, self.pred_confidences], 107 | feed_dict=feed) 108 | return (np_pred_boxes, np_pred_confidences) 109 | 110 | def get_detections( 111 | self, 112 | page_images: List[np.ndarray], 113 | crop_whitespace: bool = True, 114 | conf_threshold: float = .5) -> List[List[BoxClass]]: 115 | page_datas = [ 116 | { 117 | 'page_image': page_image, 118 | 'orig_size': page_image.shape[:2], 119 | 'resized_page_image': image_util.imresize_multichannel( 120 | page_image, self.input_shape), 121 | } 122 | for page_image in page_images 123 | ] 124 | 125 | predictions = [ 126 | self.detect_page(page_data['resized_page_image']) 127 | for page_data in page_datas 128 | ] 129 | 130 | for (page_data, prediction) in zip(page_datas, predictions): 131 | (np_pred_boxes, np_pred_confidences) = prediction 132 | new_img, rects = train_utils.add_rectangles( 133 | self.hypes, 134 | page_data['resized_page_image'], 135 | np_pred_confidences, 136 | np_pred_boxes, 137 | use_stitching=True, 138 | min_conf=conf_threshold, 139 | show_suppressed=False) 140 | detected_boxes = [ 141 | BoxClass(x1=r.x1, y1=r.y1, x2=r.x2, y2=r.y2).resize_by_page( 142 | self.input_shape, page_data['orig_size']) 143 | for r in rects if r.score > conf_threshold 144 | ] 145 | if crop_whitespace: 146 | detected_boxes = [ 147 | box.crop_whitespace_edges(page_data['page_image']) 148 | for box in detected_boxes 149 | ] 150 | detected_boxes = list(filter(None, detected_boxes)) 151 | page_data['detected_boxes'] = detected_boxes 152 | return [page_data['detected_boxes'] for page_data in page_datas] 153 | 154 | 155 | def detect_figures( 156 | pdf: str, 157 | pdffigures_captions: List[CaptionOnly], 158 | detector: TensorboxCaptionmaskDetector, 159 | conf_threshold: float 160 | ) -> Tuple[List[Figure], List[List[BoxClass]]]: 161 | page_image_files = pdf_renderer.render(pdf, dpi=settings.DEFAULT_INFERENCE_DPI) 162 | page_tensors = [] 163 | for f in page_image_files: 164 | page_im = image_util.read_tensor(f) 165 | if detector.hypes['image_channels'] == 3: 166 | page_tensors.append(page_im) 167 | else: 168 | im_with_mask = np.pad( 169 | page_im, 170 | pad_width=[(0, 0), (0, 0), (0, 1)], 171 | mode='constant', 172 | constant_values=CAPTION_CHANNEL_BACKGROUND 173 | ) 174 | for caption in pdffigures_captions: 175 | (x1, y1, x2, y2) = caption.caption_boundary.get_rounded() 176 | im_with_mask[y1:y2, x1:x2, 3] = CAPTION_CHANNEL_MASK 177 | page_tensors.append(im_with_mask) 178 | figure_boxes_by_page = detector.get_detections( 179 | page_tensors, conf_threshold=conf_threshold 180 | ) 181 | figures_by_page = [] 182 | for page_num in range(len(page_image_files)): 183 | # Page numbers are always 0 indexed 184 | figure_boxes = figure_boxes_by_page[page_num] 185 | pf_page_captions = [ 186 | cap for cap in pdffigures_captions if cap.page == page_num 187 | ] 188 | caption_boxes = [cap.caption_boundary for cap in pf_page_captions] 189 | (figure_indices, caption_indices) = figure_utils.pair_boxes( 190 | figure_boxes, caption_boxes 191 | ) 192 | figures_by_page.extend( 193 | [ 194 | Figure( 195 | figure_boundary=figure_boxes[figure_idx], 196 | caption_boundary=caption_boxes[caption_idx], 197 | caption_text=pf_page_captions[caption_idx].caption_text, 198 | name=pf_page_captions[caption_idx].name, 199 | figure_type=pf_page_captions[caption_idx].figure_type, 200 | page=page_num, 201 | ) 202 | for (figure_idx, 203 | caption_idx) in zip(figure_indices, caption_indices) 204 | ] 205 | ) 206 | return figures_by_page, figure_boxes_by_page 207 | 208 | 209 | def detect_batch( 210 | src_pdfs: List[str], 211 | detector: TensorboxCaptionmaskDetector, 212 | conf_threshold: float=.5) -> Iterable[PdfDetectionResult]: 213 | for src_pdf in src_pdfs: 214 | with tempfile.TemporaryDirectory( 215 | prefix='deepfigures-tensorbox') as working_dir: 216 | pdf_path = os.path.join( 217 | working_dir, 218 | src_pdf.replace('/', '_')) 219 | file_util.copy(src_pdf, pdf_path) 220 | pdffigures_output = pdffigures_extractor.extract( 221 | pdf_path, 222 | working_dir) 223 | pdffigures_captions = pdffigures_wrapper.get_captions( 224 | pdffigures_output) 225 | figures_by_page, figure_boxes_by_page = detect_figures( 226 | pdf_path, 227 | pdffigures_captions, 228 | detector, 229 | conf_threshold=conf_threshold) 230 | yield PdfDetectionResult( 231 | pdf=src_pdf, 232 | figures=figures_by_page, 233 | dpi=settings.DEFAULT_INFERENCE_DPI, 234 | raw_detected_boxes=figure_boxes_by_page, 235 | raw_pdffigures_output=pdffigures_output) 236 | -------------------------------------------------------------------------------- /deepfigures/extraction/tests/test_pipeline.py: -------------------------------------------------------------------------------- 1 | """Test deepfigures.extraction.pipeline""" 2 | 3 | import logging 4 | import tempfile 5 | import unittest 6 | 7 | from deepfigures.extraction import pipeline 8 | from deepfigures.utils import test 9 | 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class TestFigureExtractionPipeline(unittest.TestCase): 15 | """Test ``FigureExtractionPipeline``.""" 16 | 17 | def test_extract(self): 18 | """Test extract against a known extraction.""" 19 | pdf_path = "/work/tests/data/endtoend/paper.pdf" 20 | figure_extractor = pipeline.FigureExtractionPipeline() 21 | 22 | with tempfile.TemporaryDirectory() as tmp_dir: 23 | figure_extraction = figure_extractor.extract( 24 | pdf_path, tmp_dir) 25 | 26 | test.test_deepfigures_json( 27 | self, 28 | expected_json='/work/tests/data/endtoend/_work_tests_data_endtoend_paper.pdf-result.json', 29 | actual_json=figure_extraction.deepfigures_json_path) 30 | -------------------------------------------------------------------------------- /deepfigures/extraction/tests/test_renderers.py: -------------------------------------------------------------------------------- 1 | """Tests for deepfigures.extraction.renderers""" 2 | 3 | import contextlib 4 | import logging 5 | import os 6 | import shutil 7 | import time 8 | import tempfile 9 | import unittest 10 | 11 | import numpy as np 12 | from scipy.misc import imread 13 | import pytest 14 | 15 | from deepfigures.extraction import renderers 16 | from deepfigures import settings 17 | 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | class IsPrintableTest(unittest.TestCase): 23 | """Test deepfigures.renderers.isprintable.""" 24 | 25 | def test_returns_correct_values(self): 26 | """Test isprintable returns the correct values.""" 27 | # test empty string 28 | self.assertTrue(renderers.isprintable('')) 29 | 30 | # test single printable characters 31 | self.assertTrue(renderers.isprintable('a')) 32 | self.assertTrue(renderers.isprintable('#')) 33 | self.assertTrue(renderers.isprintable('!')) 34 | self.assertTrue(renderers.isprintable('|')) 35 | 36 | # test multicharacter strings 37 | self.assertTrue(renderers.isprintable('aasfd')) 38 | self.assertTrue(renderers.isprintable('a*&($#asdf!')) 39 | 40 | # test nonprintable chars 41 | self.assertFalse(renderers.isprintable('\x0e')) 42 | self.assertFalse(renderers.isprintable('afj\x0eqq')) 43 | 44 | 45 | class PDFRendererTest(unittest.TestCase): 46 | """Tests for deepfigures.renderers.PDFRenderer. 47 | 48 | Since PDFRenderer is only meant as a base class for classes that 49 | actually use a rendering backend, most of it's functionality is 50 | tested through tests of it's subclasses (GhostScriptRenderer). 51 | """ 52 | 53 | def test_init(self): 54 | """Test init asserts RENDERING_ENGINE_NAME exists.""" 55 | with self.assertRaises(AssertionError): 56 | renderers.PDFRenderer() 57 | 58 | 59 | class PDFRendererSubclassTestMixin(object): 60 | """A mixin for making tests of PDFRenderer subclasses. 61 | 62 | Usage 63 | ----- 64 | To test a PDFRenderer, mix this class into a unittest.TestCase 65 | subclass, provide PDF_RENDERER and MANUALLY_INSPECTED_RENDERINGS_DIR 66 | class attributes on that subclass, and render / manually inspect 67 | images of each page for 68 | deepfigures/tests/data/pdfrenderer/paper.pdf. 69 | 70 | PDF_RENDERER should be an instance of the pdf renderer class you 71 | wish to test, and MANUALLY_INSPECTED_RENDERINGS_DIR should be a 72 | directory containing renderings using PDF_RENDERER that have been 73 | manually inspected and match the paths in 74 | deepfigures/tests/data/pdfrenderer/pdfbox-renderings/. 75 | 76 | Example 77 | ------- 78 | 79 | class GhostScriptRendererTest( 80 | PDFRendererSubclassTestMixin, 81 | unittest.TestCase): 82 | '''... documentation ...''' 83 | PDF_RENDERER = GhostScriptRenderer() 84 | MANUALLY_INSPECTED_RENDERINGS_DIR = os.path.join( 85 | settings.TEST_DATA_DIR, 86 | 'pdfrenderer/ghostscript-renderings/') 87 | 88 | def ghostscript_renderer_specific_test(self): 89 | ... 90 | """ 91 | PDF_RENDERER = None 92 | MANUALLY_INSPECTED_RENDERINGS_DIR = None 93 | 94 | def mixin_test_setup(self, ext): 95 | """Set up for unittests. 96 | 97 | Parameters 98 | ---------- 99 | :param str ext: 'png' or 'jpg', the extension for the image type 100 | for which you wish to setup a test. 101 | """ 102 | # implement this test setup as a method that is explicitly 103 | # called rather than trying to use setUp from unittest.TestCase 104 | # because we don't want to require users to call super in their 105 | # setUp methods. 106 | self.pdf_renderer = self.PDF_RENDERER 107 | 108 | self.pdf_path = os.path.join( 109 | settings.TEST_DATA_DIR, 110 | 'pdfrenderer/paper.pdf') 111 | self.pdf_num_pages = 6 112 | self.pdf_rendered_page_template = \ 113 | 'paper.pdf-dpi100-page{page_num:04d}.{ext}' 114 | 115 | # add random bits to the path so that separate instances 116 | # of this test writing in parallel don't collide. 117 | self.tmp_output_dir = tempfile.mkdtemp() 118 | 119 | self.expected_dir_structure = [ 120 | os.path.join( 121 | self.tmp_output_dir, 122 | 'paper.pdf-images', 123 | self.pdf_renderer.RENDERING_ENGINE_NAME, 124 | 'dpi{}'.format(settings.DEFAULT_INFERENCE_DPI), 125 | '_SUCCESS') 126 | ] 127 | self.expected_dir_structure.extend([ 128 | os.path.join( 129 | self.tmp_output_dir, 130 | 'paper.pdf-images/', 131 | self.pdf_renderer.RENDERING_ENGINE_NAME, 132 | 'dpi{}'.format(settings.DEFAULT_INFERENCE_DPI), 133 | self.pdf_rendered_page_template.format( 134 | page_num=i, ext=ext)) 135 | for i in range(1, 7) 136 | ]) 137 | 138 | def mixin_test_teardown(self): 139 | """Tear down for unittests.""" 140 | shutil.rmtree(self.tmp_output_dir) 141 | 142 | @contextlib.contextmanager 143 | def setup_and_teardown(self, ext): 144 | """Setup and tear down resources for a test as a context manager. 145 | 146 | Parameters 147 | ---------- 148 | :param str ext: either 'png' or 'jpg', the type of image for 149 | which you want to write the test. 150 | """ 151 | try: 152 | self.mixin_test_setup(ext=ext) 153 | yield 154 | finally: 155 | self.mixin_test_teardown() 156 | 157 | def _test_render_image_ext(self, ext): 158 | """Test the render method with a png extension.""" 159 | self.pdf_renderer.render( 160 | pdf_path=self.pdf_path, 161 | output_dir=self.tmp_output_dir, 162 | ext=ext, 163 | check_retcode=True) 164 | # check that all and only the expected paths are in the output 165 | # dir 166 | output_dir_paths = [ 167 | os.path.join(dir_path, file_name) 168 | for dir_path, dir_names, file_names in os.walk( 169 | self.tmp_output_dir) 170 | for file_name in file_names 171 | ] 172 | self.assertEqual( 173 | sorted(output_dir_paths), 174 | sorted(self.expected_dir_structure)) 175 | # since it's a little complicated to debug bad renderings, 176 | # provide a useful help message. 177 | bad_render_help_msg = ( 178 | "\n" 179 | "\n HINT!: Use the render method on {pdf_renderer} to generate" 180 | "\n and inspect renderered output, and if the rendered" 181 | "\n output looks good move it into " 182 | "\n ``{renderings_dir}`` in place of" 183 | "\n the existing files. If using docker you'll need to run" 184 | "\n the following command after mounting ``/tmp`` as a volume:" 185 | "\n" 186 | "\n python3 -c 'from deepfigures.extraction import renderers;" 187 | " renderers.{pdf_renderer}().render(" 188 | "\"tests/data/pdfrenderer/paper.pdf\"," 189 | " \"/tmp/\"," 190 | " ext=\"{ext}\"," 191 | " use_cache=False)'".format( 192 | renderings_dir=self.MANUALLY_INSPECTED_RENDERINGS_DIR, 193 | pdf_renderer=self.pdf_renderer.__class__.__name__, 194 | ext=ext)) 195 | 196 | # compare the renderings against manually inspected renderings 197 | for path in output_dir_paths: 198 | if path[-3:] == ext: 199 | test_image = imread(path) 200 | reference_image = imread( 201 | os.path.join( 202 | self.MANUALLY_INSPECTED_RENDERINGS_DIR, 203 | os.path.split(path)[-1])) 204 | # test that the average absolute difference between the pixels is 205 | # less than 5. 206 | self.assertLess( 207 | np.sum(np.abs(test_image - reference_image)) / test_image.size, 5.0, 208 | msg=bad_render_help_msg) 209 | 210 | def test_render_png(self): 211 | """Test the render method with a png extension.""" 212 | ext = 'png' 213 | with self.setup_and_teardown(ext=ext): 214 | self._test_render_image_ext(ext=ext) 215 | 216 | def test_render_jpg(self): 217 | """Test the render method with a jpg extension.""" 218 | ext = 'jpg' 219 | with self.setup_and_teardown(ext=ext): 220 | self._test_render_image_ext(ext=ext) 221 | 222 | def test_uses_cache(self): 223 | """Test that the rendered uses existing copies of the files.""" 224 | ext = 'png' 225 | with self.setup_and_teardown(ext=ext): 226 | self.pdf_renderer.render( 227 | pdf_path=self.pdf_path, 228 | output_dir=self.tmp_output_dir, 229 | ext=ext, 230 | check_retcode=True) 231 | output_dir_paths = [ 232 | os.path.join(dir_path, file_name) 233 | for dir_path, dir_names, file_names in os.walk( 234 | self.tmp_output_dir) 235 | for file_name in file_names 236 | ] 237 | mtimes = {} 238 | for path in output_dir_paths: 239 | mtimes[path] = os.path.getmtime(path) 240 | time.sleep(1) 241 | # render the PDF again and verify the mtimes haven't changed 242 | self.pdf_renderer.render( 243 | pdf_path=self.pdf_path, 244 | output_dir=self.tmp_output_dir, 245 | ext=ext, 246 | check_retcode=True) 247 | output_dir_paths = [ 248 | os.path.join(dir_path, file_name) 249 | for dir_path, dir_names, file_names in os.walk( 250 | self.tmp_output_dir) 251 | for file_name in file_names 252 | ] 253 | for path in output_dir_paths: 254 | self.assertEqual(mtimes[path], os.path.getmtime(path)) 255 | 256 | def test_busts_cache(self): 257 | """Test that passing use_cache False busts the cache.""" 258 | ext = 'png' 259 | with self.setup_and_teardown(ext=ext): 260 | self.pdf_renderer.render( 261 | pdf_path=self.pdf_path, 262 | output_dir=self.tmp_output_dir, 263 | ext=ext, 264 | check_retcode=True) 265 | output_dir_paths = [ 266 | os.path.join(dir_path, file_name) 267 | for dir_path, dir_names, file_names in os.walk( 268 | self.tmp_output_dir) 269 | for file_name in file_names 270 | ] 271 | mtimes = {} 272 | for path in output_dir_paths: 273 | mtimes[path] = os.path.getmtime(path) 274 | # render the PDF again and verify the mtimes have changed 275 | time.sleep(1) 276 | self.pdf_renderer.render( 277 | pdf_path=self.pdf_path, 278 | output_dir=self.tmp_output_dir, 279 | ext=ext, 280 | use_cache=False, 281 | check_retcode=True) 282 | output_dir_paths = [ 283 | os.path.join(dir_path, file_name) 284 | for dir_path, dir_names, file_names in os.walk( 285 | self.tmp_output_dir) 286 | for file_name in file_names 287 | ] 288 | for path in output_dir_paths: 289 | if path[-3:] == 'png' or path[-8:] == '_SUCCESS': 290 | self.assertNotEqual( 291 | mtimes[path], 292 | os.path.getmtime(path), 293 | msg="{path} mtime did not change.".format(path=path)) 294 | 295 | 296 | class GhostScriptRendererTest( 297 | PDFRendererSubclassTestMixin, 298 | unittest.TestCase): 299 | """Test deepfigures.renderers.GhostScriptRenderer.""" 300 | PDF_RENDERER = renderers.GhostScriptRenderer() 301 | MANUALLY_INSPECTED_RENDERINGS_DIR = os.path.join( 302 | settings.TEST_DATA_DIR, 303 | 'pdfrenderer/ghostscript-renderings/') 304 | -------------------------------------------------------------------------------- /deepfigures/settings.py: -------------------------------------------------------------------------------- 1 | """Constants and settings for deepfigures.""" 2 | 3 | import logging 4 | import os 5 | 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | # path to the deepfigures project root 11 | BASE_DIR = os.path.dirname( 12 | os.path.dirname(os.path.realpath(__file__))) 13 | 14 | # version number for the current release 15 | VERSION = '0.0.1' 16 | 17 | # descriptions of the docker images deepfigures builds 18 | DEEPFIGURES_IMAGES = { 19 | 'cpu': { 20 | 'tag': 'deepfigures-cpu', 21 | 'dockerfile_path': os.path.join(BASE_DIR, 'dockerfiles/cpu/Dockerfile') 22 | }, 23 | 'gpu': { 24 | 'tag': 'deepfigures-gpu', 25 | 'dockerfile_path': os.path.join(BASE_DIR, 'dockerfiles/gpu/Dockerfile') 26 | } 27 | } 28 | 29 | # path to the directory containing all the project-level test data. 30 | TEST_DATA_DIR = os.path.join(BASE_DIR, 'tests/data') 31 | 32 | # settings for PDFRenderers 33 | DEFAULT_INFERENCE_DPI = 100 34 | DEFAULT_CROPPED_IMG_DPI = 200 35 | BACKGROUND_COLOR = 255 36 | 37 | # weights for the model 38 | TENSORBOX_MODEL = { 39 | 'save_dir': os.path.join(BASE_DIR, 'weights/'), 40 | 'iteration': 500000 41 | } 42 | 43 | # paths to binary dependencies 44 | PDFFIGURES_JAR_NAME = 'pdffigures2-assembly-0.0.12-SNAPSHOT.jar' 45 | PDFFIGURES_JAR_PATH = os.path.join( 46 | BASE_DIR, 47 | 'bin/', 48 | PDFFIGURES_JAR_NAME) 49 | 50 | # PDF Rendering backend settings 51 | DEEPFIGURES_PDF_RENDERER = 'deepfigures.extraction.renderers.GhostScriptRenderer' 52 | 53 | 54 | # settings for data generation 55 | 56 | # The location to temporarily store arxiv source data 57 | ARXIV_DATA_TMP_DIR = '' 58 | # The location to store the final output labels 59 | ARXIV_DATA_OUTPUT_DIR = '' 60 | 61 | # The location of the PMC open access data 62 | PUBMED_INPUT_DIR = '' 63 | # A directory for storing intermediate results 64 | PUBMED_INTERMEDIATE_DIR = '' 65 | # A directory for storing the output pubmed data 66 | PUBMED_DISTANT_DATA_DIR = '' 67 | 68 | # a local directory for storing the output data 69 | LOCAL_PUBMED_DISTANT_DATA_DIR = '' 70 | -------------------------------------------------------------------------------- /deepfigures/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/deepfigures/utils/__init__.py -------------------------------------------------------------------------------- /deepfigures/utils/config.py: -------------------------------------------------------------------------------- 1 | """Classes for serializing data to json.""" 2 | 3 | import typing 4 | 5 | import traitlets 6 | 7 | 8 | JsonData = typing.Union[list, dict, str, int, float] 9 | 10 | 11 | class JsonSerializable(traitlets.HasTraits): 12 | def to_dict(self) -> dict: 13 | """Recursively convert objects to dicts to allow json serialization.""" 14 | return { 15 | JsonSerializable.serialize(k): JsonSerializable.serialize(v) 16 | for (k, v) in self._trait_values.items() 17 | } 18 | 19 | @staticmethod 20 | def serialize(obj: typing.Union['JsonSerializable', JsonData]): 21 | if isinstance(obj, JsonSerializable): 22 | return obj.to_dict() 23 | elif isinstance(obj, list): 24 | return [JsonSerializable.serialize(v) for v in obj] 25 | elif isinstance(obj, dict): 26 | res_dict = dict() 27 | for (key, value) in obj.items(): 28 | assert type(key) == str 29 | res_dict[key] = JsonSerializable.serialize(value) 30 | return res_dict 31 | else: 32 | return obj 33 | 34 | @classmethod 35 | def from_dict(cls, json_data: dict): 36 | assert (type(json_data) == dict) 37 | args = {} 38 | for (k, v) in cls.class_traits().items(): 39 | args[k] = JsonSerializable.deserialize(v, json_data[k]) 40 | return cls(**args) 41 | 42 | @staticmethod 43 | def deserialize(target_trait: traitlets.TraitType, json_data: JsonData): 44 | """ 45 | N.B. Using this function on complex objects is not advised; prefer to use an explicit serialization scheme. 46 | """ 47 | # Note: calling importlib.reload on this file breaks issubclass (http://stackoverflow.com/a/11461574/6174778) 48 | if isinstance(target_trait, traitlets.Instance 49 | ) and issubclass(target_trait.klass, JsonSerializable): 50 | return target_trait.klass.from_dict(json_data) 51 | elif isinstance(target_trait, traitlets.List): 52 | assert isinstance(json_data, list) 53 | return [ 54 | JsonSerializable.deserialize(target_trait._trait, element) for element in json_data 55 | ] 56 | elif isinstance(target_trait, traitlets.Dict): 57 | # Assume all dictionary keys are strings 58 | assert isinstance(json_data, dict) 59 | res_dict = dict() 60 | for (key, value) in json_data.items(): 61 | assert type(key) == str 62 | res_dict[key] = JsonSerializable.deserialize(target_trait._trait, value) 63 | return res_dict 64 | else: 65 | return json_data 66 | 67 | def __repr__(self): 68 | traits_list = ['%s=%s' % (k, repr(v)) for (k, v) in self._trait_values.items()] 69 | return type(self).__name__ + '(' + ', '.join(traits_list) + ')' 70 | -------------------------------------------------------------------------------- /deepfigures/utils/image_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import typing 3 | import numpy as np 4 | from scipy import misc 5 | from deepfigures.utils import file_util 6 | import logging 7 | 8 | class FileTooLargeError(Exception): 9 | pass 10 | 11 | 12 | def read_tensor(path: str, maxsize: int=None) -> typing.Optional[np.ndarray]: 13 | """ 14 | Load a saved a tensor, saved either as an image file for standard RGB images or as a numpy archive for more general 15 | tensors. 16 | """ 17 | path = file_util.cache_file(path) 18 | if maxsize is not None: 19 | if os.path.getsize(path) > maxsize: 20 | raise FileTooLargeError 21 | (_, ext) = os.path.splitext(path) 22 | ext = ext.lower() 23 | if ext in {'.png', '.jpg', '.jpeg'}: 24 | res = misc.imread(path, mode='RGB') 25 | assert len(res.shape) == 3 26 | assert res.shape[2] == 3 27 | return res 28 | elif ext in {'.npz'}: 29 | try: 30 | data = np.load(path) 31 | assert len(list(data.items())) == 1 32 | except Exception as e: 33 | logging.exception('Error unzipping %s' % path) 34 | return None 35 | return data['arr_0'] 36 | else: 37 | raise RuntimeError('Extension %s for file %s not supported' % (ext, path)) 38 | 39 | 40 | def write_tensor(dst: str, value: np.ndarray) -> None: 41 | """Save a numpy tensor to a given location.""" 42 | (_, ext) = os.path.splitext(dst) 43 | assert (ext == '' or ext == '.npz') 44 | with open(dst, 'wb') as f: 45 | np.savez_compressed(f, value) 46 | 47 | 48 | def imresize_multichannel(im: np.ndarray, target_size: typing.Tuple[int, int], 49 | **kwargs) -> np.ndarray: 50 | n_channels = im.shape[2] 51 | resized_channels = [ 52 | misc.imresize(im[:, :, n], target_size, **kwargs) for n in range(n_channels) 53 | ] 54 | return np.stack(resized_channels, axis=2) 55 | 56 | 57 | def imrescale_multichannel(im: np.ndarray, scale_factor: float, **kwargs) -> np.ndarray: 58 | n_channels = im.shape[2] 59 | resized_channels = [ 60 | misc.imresize(im[:, :, n], scale_factor, **kwargs) for n in range(n_channels) 61 | ] 62 | return np.stack(resized_channels, axis=2) 63 | -------------------------------------------------------------------------------- /deepfigures/utils/misc.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous utilities.""" 2 | 3 | import hashlib 4 | 5 | 6 | def read_chunks(input_path, block_size): 7 | """Iterate over ``block_size`` chunks of file at ``input_path``. 8 | 9 | :param str input_path: the path to the input file to iterate over. 10 | :param int block_size: the size of the chunks to return at each 11 | iteration. 12 | 13 | :yields: a binary chunk of the file at ``input_path`` of size 14 | ``block_size``. 15 | """ 16 | with open(input_path, 'rb') as f_in: 17 | while True: 18 | chunk = f_in.read(block_size) 19 | if chunk: 20 | yield chunk 21 | else: 22 | return 23 | 24 | 25 | def hash_out_of_core(hash_func, input_path): 26 | """Return hexdigest of file at ``input_path`` using ``hash_func``. 27 | 28 | Hash the file at ``input_path`` using ``hash_func`` in an 29 | out-of-core way, allowing hashing of arbitrarily large files, and 30 | then return the hexdigest. 31 | 32 | :param _hashlib.HASH hash_func: a hashing function from hashlib such 33 | as sha1 or md5. 34 | :param str input_path: path to the input file. 35 | 36 | :returns: the hexdigest of the file at ``input_path`` hashed using 37 | ``hash_func``. 38 | 39 | Example 40 | ------- 41 | To use SHA256 to compute the hash of a file out of core: 42 | 43 | hash_out_of_core(hashlib.sha256, '/path/to/file') 44 | 45 | """ 46 | hf = hash_func() 47 | for chunk in read_chunks(input_path, 256 * (128 * hf.block_size)): 48 | hf.update(chunk) 49 | return hf.hexdigest() 50 | -------------------------------------------------------------------------------- /deepfigures/utils/settings_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities for managing settings.""" 2 | 3 | from importlib import import_module 4 | 5 | 6 | def import_setting(import_string): 7 | """Import and return the object defined by import_string. 8 | 9 | This function is helpful because by the nature of settings files, 10 | they often end up with circular imports, i.e. ``foo`` will import 11 | ``settings`` to get configuration information but ``settings`` will 12 | have some setting set to an object imported from ``foo``. Because 13 | python can't do circular imports, we make the settings strings and 14 | then import them at runtime from the string using this function. 15 | 16 | Parameters 17 | ---------- 18 | :param str import_string: the python path to the object you wish to 19 | import. ``import_string`` should be a dot separated path the same 20 | as you would use in a python import statement. 21 | 22 | Returns 23 | ------- 24 | :returns: any module or object located at ``import_string`` or 25 | ``None`` if no module exists. 26 | """ 27 | try: 28 | module = import_module(import_string) 29 | except ImportError: 30 | module = None 31 | 32 | if not module: 33 | mod_string, obj_string = import_string.rsplit('.', 1) 34 | obj = getattr(import_module(mod_string), obj_string) 35 | 36 | return module or obj 37 | -------------------------------------------------------------------------------- /deepfigures/utils/stringmatch/__init__.py: -------------------------------------------------------------------------------- 1 | from _stringmatch import lib 2 | 3 | 4 | def match(key: str, text: str): 5 | ''' 6 | Find the location of the substring in text with the 7 | minimum edit distance (Levenshtein) to key. 8 | ''' 9 | return lib.match(key, text) 10 | -------------------------------------------------------------------------------- /deepfigures/utils/stringmatch/stringmatch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* start_pos is inclusive, end_pos is exclusive. */ 5 | struct MatchResult { 6 | int start_pos; 7 | int end_pos; 8 | int cost; 9 | }; 10 | 11 | 12 | class IntMatrix { 13 | unsigned int nrows; 14 | unsigned int ncols; 15 | int* data; 16 | 17 | public: 18 | IntMatrix(unsigned int nrows, unsigned int ncols) { 19 | this->nrows = nrows; 20 | this->ncols = ncols; 21 | this->data = new int[nrows*ncols]; 22 | } 23 | 24 | ~IntMatrix() { 25 | delete data; 26 | } 27 | 28 | int* operator[](const int x) { 29 | return &data[x * ncols]; 30 | } 31 | 32 | int& operator()(const int x, const int y) { return data[x * ncols + y]; } 33 | }; 34 | 35 | 36 | /* 37 | * Find the location of the substring in text with the minimum edit distance 38 | * (Levenshtein) to key. Given a key of length n and text of length m, we can 39 | * do this in O(n*m) time using dynamic programming. 40 | */ 41 | MatchResult match(const wchar_t* key, const wchar_t* text) { 42 | const int key_len = wcslen(key); 43 | const int text_len = wcslen(text); 44 | 45 | MatchResult res; 46 | 47 | IntMatrix distance = IntMatrix(key_len+1, text_len+1); 48 | IntMatrix start_pos = IntMatrix(key_len+1, text_len+1); 49 | 50 | int key_idx; 51 | int text_idx; 52 | 53 | // Allow the match to start anywhere along the text 54 | for (text_idx = 0; text_idx < text_len + 1; text_idx++) { 55 | distance[0][text_idx] = 0; 56 | start_pos[0][text_idx] = text_idx; 57 | } 58 | 59 | for (key_idx = 1; key_idx < key_len + 1; key_idx++) { 60 | distance[key_idx][0] = distance[key_idx-1][0] + 1; 61 | start_pos[key_idx][0] = 0; 62 | for (text_idx = 1; text_idx < text_len + 1; text_idx++) { 63 | int added_in_key = distance[key_idx-1][text_idx] + 1; 64 | int added_in_text = distance[key_idx][text_idx-1] + 1; 65 | int substitute = distance[key_idx-1][text_idx-1]; 66 | if (text[text_idx-1] != key[key_idx-1]) { 67 | substitute += 1; 68 | } 69 | int cur_dist = added_in_key; 70 | int cur_start = start_pos[key_idx-1][text_idx]; 71 | if (added_in_text < cur_dist) { 72 | cur_dist = added_in_text; 73 | cur_start = start_pos[key_idx][text_idx-1]; 74 | } 75 | if (substitute < cur_dist) { 76 | cur_dist = substitute; 77 | cur_start = start_pos[key_idx-1][text_idx-1]; 78 | } 79 | distance[key_idx][text_idx] = cur_dist; 80 | start_pos[key_idx][text_idx] = cur_start; 81 | } 82 | } 83 | 84 | int best_dist = INT_MAX; 85 | int best_start_pos = -1; 86 | int best_end_pos = -1; 87 | for (text_idx = 1; text_idx < text_len + 1; text_idx++){ 88 | int cur_dist = distance[key_len][text_idx]; 89 | if (cur_dist < best_dist) { 90 | best_dist = cur_dist; 91 | best_start_pos = start_pos[key_len][text_idx]; 92 | best_end_pos = text_idx - 1; 93 | } 94 | } 95 | 96 | res.start_pos = best_start_pos; 97 | res.end_pos = best_end_pos + 1; // end_pos is exclusive 98 | res.cost = best_dist; 99 | return res; 100 | } 101 | -------------------------------------------------------------------------------- /deepfigures/utils/stringmatch/stringmatch_builder.py: -------------------------------------------------------------------------------- 1 | import cffi 2 | import os 3 | 4 | ffibuilder = cffi.FFI() 5 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 6 | with open(cur_dir + '/stringmatch.cpp') as f: 7 | code = f.read() 8 | ffibuilder.set_source( 9 | '_stringmatch', code, 10 | source_extension='.cpp', 11 | ) 12 | 13 | ffibuilder.cdef(''' 14 | typedef struct { 15 | int start_pos; 16 | int end_pos; 17 | int cost; 18 | } MatchResult; 19 | MatchResult match(const wchar_t* a, const wchar_t* b); 20 | ''') 21 | 22 | if __name__ == '__main__': 23 | ffibuilder.compile(verbose=True) 24 | -------------------------------------------------------------------------------- /deepfigures/utils/stringmatch/test_stringmatch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from deepfigures.utils.stringmatch import match 4 | 5 | 6 | def test_match(): 7 | m = match('hello', 'hello') 8 | assert m.cost == 0 9 | assert m.start_pos == 0 10 | assert m.end_pos == 5 11 | 12 | m = match('e', 'hello') 13 | assert m.cost == 0 14 | assert m.start_pos == 1 15 | assert m.end_pos == 2 16 | 17 | m = match('hello', 'e') 18 | assert m.cost == 4 19 | assert m.start_pos == 0 20 | assert m.end_pos == 1 21 | 22 | # Prefer character omissions over character edits in match bounds 23 | m = match('bab', 'cac') 24 | assert m.cost == 2 25 | assert m.start_pos == 1 26 | assert m.end_pos == 2 27 | 28 | # Select first match in the text in case of ties 29 | m = match('ab', 'ba') 30 | assert m.cost == 1 31 | assert m.start_pos == 0 32 | assert m.end_pos == 1 33 | 34 | m = match('hello', 'world') 35 | assert m.cost == 4 36 | assert m.start_pos == 1 37 | assert m.end_pos == 2 38 | 39 | 40 | def test_unicode_match(): 41 | m = match('æther', 'aether') 42 | assert m.cost == 1 43 | assert m.start_pos == 2 44 | assert m.end_pos == 6 45 | 46 | m = match('こんにちは世界', 'こんばんは世界') 47 | assert m.cost == 2 48 | assert m.start_pos == 0 49 | assert m.end_pos == 7 50 | 51 | 52 | if __name__ == '__main__': 53 | import pytest 54 | 55 | pytest.main([__file__]) 56 | -------------------------------------------------------------------------------- /deepfigures/utils/test.py: -------------------------------------------------------------------------------- 1 | """Utilities for tests with deepfigures.""" 2 | 3 | import json 4 | import logging 5 | 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def test_deepfigures_json( 11 | self, 12 | expected_json, 13 | actual_json): 14 | """Run tests comparing two deepfigures JSON files. 15 | 16 | Compare two json files outputted from deepfigures and verify that 17 | they are sufficiently similar, this includes comparing the general 18 | structure of the files as well as specific values like the figure 19 | captions, intersection over union for the bounding boxes, etc. 20 | 21 | :param unittest.TestCase self: the TestCase to use for running the 22 | comparsions. 23 | :param str expected_json: a file path string to the 24 | expected / baseline deepfigures JSON on disk. 25 | :param str actual_json: a file path string to the 26 | actual / to be tested deepfigures JSON on disk. 27 | 28 | :returns: None 29 | """ 30 | with open(expected_json, 'r') as expected_file: 31 | expected = json.load(expected_file) 32 | with open(actual_json, 'r') as actual_file: 33 | actual = json.load(actual_file) 34 | 35 | # make sure keys are the same 36 | self.assertEqual( 37 | expected.keys(), 38 | actual.keys()) 39 | 40 | # compare top level attributes 41 | self.assertEqual( 42 | expected['dpi'], 43 | actual['dpi']) 44 | self.assertEqual( 45 | expected['error'], 46 | actual['error']) 47 | self.assertEqual( 48 | len(expected['figures']), 49 | len(actual['figures'])) 50 | 51 | # compare generated figures 52 | for expected_figure, actual_figure in zip( 53 | expected['figures'], 54 | actual['figures']): 55 | exact_match_attrs = [ 56 | 'caption_text', 57 | 'dpi', 58 | 'figure_type', 59 | 'name', 60 | 'page', 61 | 'page_height', 62 | 'page_width' 63 | ] 64 | for attr in exact_match_attrs: 65 | self.assertEqual( 66 | expected_figure[attr], 67 | actual_figure[attr]) 68 | bounding_box_attrs = [ 69 | 'caption_boundary', 70 | 'figure_boundary' 71 | ] 72 | for attr in bounding_box_attrs: 73 | intersection = { 74 | 'x1': max(expected_figure[attr]['x1'], actual_figure[attr]['x1']), 75 | 'x2': min(expected_figure[attr]['x2'], actual_figure[attr]['x2']), 76 | 'y1': max(expected_figure[attr]['y1'], actual_figure[attr]['y1']), 77 | 'y2': min(expected_figure[attr]['y2'], actual_figure[attr]['y2']) 78 | } 79 | # check that the boxes actually do overlap 80 | self.assertLess( 81 | intersection['x1'], 82 | intersection['x2'], 83 | msg="expected and actual box for {attr} in {figname}" 84 | "don't overlap".format(attr=attr, figname=expected_figure['name'])) 85 | self.assertLess( 86 | intersection['y1'], 87 | intersection['y2'], 88 | msg="expected and actual box for {attr} in {figname}" 89 | "don't overlap".format(attr=attr, figname=expected_figure['name'])) 90 | union = { 91 | 'x1': min(expected_figure[attr]['x1'], actual_figure[attr]['x1']), 92 | 'x2': max(expected_figure[attr]['x2'], actual_figure[attr]['x2']), 93 | 'y1': min(expected_figure[attr]['y1'], actual_figure[attr]['y1']), 94 | 'y2': max(expected_figure[attr]['y2'], actual_figure[attr]['y2']) 95 | } 96 | i_area = ( 97 | (intersection['x2'] - intersection['x1']) * 98 | (intersection['y2'] - intersection['y1']) 99 | ) 100 | u_area = ( 101 | (union['x2'] - union['x1']) * 102 | (union['y2'] - union['y1']) 103 | ) 104 | iou = i_area / u_area 105 | self.assertGreater( 106 | iou, 107 | 0.8, 108 | msg="intersection over union for {attr} on {figname} has" 109 | "dropped below acceptable thresholds.".format( 110 | attr=attr, 111 | figname=expected_figure['name'])) 112 | -------------------------------------------------------------------------------- /deepfigures/utils/tests/data/chunks.txt: -------------------------------------------------------------------------------- 1 | this file is an example for testing the ``read_chunks`` function. 2 | -------------------------------------------------------------------------------- /deepfigures/utils/tests/test_misc.py: -------------------------------------------------------------------------------- 1 | """Test miscellaneous utilities.""" 2 | 3 | import hashlib 4 | import os 5 | import unittest 6 | 7 | from deepfigures.utils import misc 8 | 9 | 10 | class TestReadChunks(unittest.TestCase): 11 | """Test deepfigures.utils.misc.read_chunks.""" 12 | 13 | def test_read_chunks(self): 14 | """Test read_chunks.""" 15 | chunks_path = os.path.join( 16 | os.path.dirname(os.path.abspath(__file__)), 17 | 'data/chunks.txt') 18 | # read in the file as a string 19 | with open(chunks_path, 'rb') as f_in: 20 | contents = f_in.read() 21 | # verify that we iterate through the file correctly 22 | for i, chunk in enumerate(misc.read_chunks(chunks_path, block_size=1)): 23 | self.assertEqual(chunk, contents[i:i+1]) 24 | for i, chunk in enumerate(misc.read_chunks(chunks_path, block_size=4)): 25 | self.assertEqual(chunk, contents[4*i:4*(i+1)]) 26 | 27 | 28 | class TestHashOutOfCore(unittest.TestCase): 29 | """Test deepfigures.utils.misc.hash_out_of_core.""" 30 | 31 | def test_hash_out_of_core(self): 32 | """Test hash_out_of_core.""" 33 | bigfile_path = os.path.join( 34 | os.path.dirname(os.path.abspath(__file__)), 35 | 'data/bigfile.txt') 36 | self.assertEqual( 37 | misc.hash_out_of_core(hashlib.sha1, bigfile_path), 38 | "329f37bbe1d7f23caf4f1868a4a256f168d84f15") 39 | self.assertEqual( 40 | misc.hash_out_of_core(hashlib.sha256, bigfile_path), 41 | "cbe4b71d97967575d12084b3702467f9dec2b22859c9a2407ea671fe17ed3d4a") 42 | self.assertEqual( 43 | misc.hash_out_of_core(hashlib.md5, bigfile_path), 44 | "ad4b675109d472d8c1ed006e395f8f14") 45 | -------------------------------------------------------------------------------- /deepfigures/utils/traits.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import traitlets 4 | 5 | T1 = typing.TypeVar('T1') 6 | T2 = typing.TypeVar('T2') 7 | T3 = typing.TypeVar('T3') 8 | T4 = typing.TypeVar('T4') 9 | 10 | T = typing.TypeVar('T') 11 | K = typing.TypeVar('K') 12 | V = typing.TypeVar('V') 13 | 14 | 15 | # Define wrappers for traitlets classes. These simply provide Python type hints 16 | # that correspond to actual instance type that will result after a class is 17 | # instantiated (e.g. Unicode() becomes a string). 18 | # 19 | # This allows PyCharm style type hinting to resolve types properly. 20 | def Float(*args, **kw) -> float: 21 | return traitlets.Float(*args, **kw) 22 | 23 | 24 | def CFloat(*args, **kw) -> float: 25 | return traitlets.CFloat(*args, **kw) 26 | 27 | 28 | def Int(*args, **kw) -> int: 29 | return traitlets.Int(*args, **kw) 30 | 31 | 32 | def Bool(*args, **kw) -> bool: 33 | return traitlets.Bool(*args, **kw) 34 | 35 | 36 | def Enum(options: typing.List[T], **kw) -> T: 37 | return traitlets.Enum(options, **kw) 38 | 39 | 40 | def List(klass: T, **kw) -> typing.List[T]: 41 | return traitlets.List(klass, **kw) 42 | 43 | 44 | def Set(klass: T, **kw) -> typing.Set[T]: 45 | return traitlets.Set(klass, **kw) 46 | 47 | 48 | # N.B. traitlets.Dict does not check key types. 49 | def Dict(val_class: V, **kw) -> typing.Dict[typing.Any, V]: 50 | return traitlets.Dict(val_class, **kw) 51 | 52 | 53 | def Tuple1(a: T1) -> typing.Tuple[T1]: 54 | return traitlets.Tuple(a) 55 | 56 | 57 | def Tuple2(a: T1, b: T2) -> typing.Tuple[T1, T2]: 58 | return traitlets.Tuple(a, b) 59 | 60 | 61 | def Unicode(*args, **kw) -> str: 62 | return traitlets.Unicode(*args, **kw) 63 | 64 | 65 | def Instance(klass: T, **kw) -> T: 66 | return traitlets.Instance(klass, **kw) 67 | 68 | 69 | def Array(**kw): 70 | import numpy 71 | return Instance(numpy.ndarray, **kw) 72 | 73 | 74 | def DataFrameType(**kw): 75 | import pandas 76 | return Instance(pandas.DataFrame, **kw) 77 | 78 | 79 | def Any(**kw) -> typing.Any: 80 | return traitlets.Any(**kw) 81 | 82 | 83 | # Just a direct copy for now to provide a consistent interface. 84 | HasTraits = traitlets.HasTraits 85 | -------------------------------------------------------------------------------- /dockerfiles/cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | # image for running DeepFigures 2 | 3 | FROM tensorflow/tensorflow:latest-py3 4 | 5 | ENV LC_ALL C.UTF-8 6 | 7 | # install system packages 8 | 9 | RUN apt-get clean \ 10 | && apt-get update --fix-missing \ 11 | && apt-get install -y \ 12 | git \ 13 | curl \ 14 | gcc \ 15 | build-essential \ 16 | tcl \ 17 | g++ \ 18 | zlib1g-dev \ 19 | libjpeg8-dev \ 20 | libtiff5-dev \ 21 | libjasper-dev \ 22 | libpng12-dev \ 23 | tcl-dev \ 24 | tk-dev \ 25 | python3 \ 26 | python3-pip \ 27 | python3-tk \ 28 | ghostscript \ 29 | openjdk-8-jre \ 30 | poppler-utils \ 31 | texlive-latex-base \ 32 | texlive-fonts-recommended \ 33 | texlive-fonts-extra \ 34 | texlive-latex-extra 35 | 36 | WORKDIR /work 37 | 38 | # install python packages 39 | 40 | ADD ./requirements.txt /work 41 | 42 | RUN pip3 install --upgrade pip \ 43 | && pip install Cython==0.25.2 44 | RUN pip3 install -r ./requirements.txt 45 | 46 | ADD ./vendor/tensorboxresnet /work/vendor/tensorboxresnet 47 | RUN pip3 install -e /work/vendor/tensorboxresnet 48 | 49 | ADD . /work 50 | RUN pip3 install --quiet -e /work 51 | 52 | CMD [ "/bin/bash" ] 53 | -------------------------------------------------------------------------------- /dockerfiles/gpu/Dockerfile: -------------------------------------------------------------------------------- 1 | # docker image for running DeepFigures on a gpu 2 | 3 | FROM tensorflow/tensorflow:latest-gpu-py3 4 | 5 | ENV LC_ALL C.UTF-8 6 | 7 | # install system packages 8 | 9 | RUN apt-get clean \ 10 | && apt-get update --fix-missing \ 11 | && apt-get install -y \ 12 | git \ 13 | curl \ 14 | gcc \ 15 | build-essential \ 16 | tcl \ 17 | g++ \ 18 | zlib1g-dev \ 19 | libjpeg8-dev \ 20 | libtiff5-dev \ 21 | libjasper-dev \ 22 | libpng12-dev \ 23 | tcl-dev \ 24 | tk-dev \ 25 | python3 \ 26 | python3-pip \ 27 | python3-tk \ 28 | ghostscript \ 29 | openjdk-8-jre \ 30 | poppler-utils \ 31 | texlive-latex-base \ 32 | texlive-fonts-recommended \ 33 | texlive-fonts-extra \ 34 | texlive-latex-extra 35 | 36 | WORKDIR /work 37 | 38 | # install python packages 39 | 40 | ADD ./requirements.txt /work 41 | 42 | RUN pip3 install --upgrade pip \ 43 | && pip install Cython==0.25.2 44 | RUN pip3 install -r ./requirements.txt 45 | 46 | ADD ./vendor/tensorboxresnet /work/vendor/tensorboxresnet 47 | RUN pip3 install -e /work/vendor/tensorboxresnet 48 | 49 | ADD . /work 50 | RUN pip3 install --quiet -e /work 51 | 52 | CMD [ "/bin/bash" ] 53 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | """Management commands for the deepfigures project. 2 | 3 | ``manage.py`` provides an interface to the scripts 4 | automating development activities found in the `scripts` 5 | directory. 6 | 7 | See the ``scripts`` directory for examples. 8 | """ 9 | 10 | import logging 11 | import sys 12 | 13 | import click 14 | 15 | from scripts import ( 16 | build, 17 | detectfigures, 18 | generatearxiv, 19 | generatepubmed, 20 | testunits) 21 | 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | LOG_FORMAT = '%(asctime)s:%(levelname)s:%(name)s:%(message)s' 26 | 27 | 28 | @click.group( 29 | context_settings={ 30 | 'help_option_names': ['-h', '--help'] 31 | }) 32 | @click.option( 33 | '--verbose', '-v', 34 | is_flag=True, 35 | help='Turn on verbose logging for debugging purposes.') 36 | @click.option( 37 | '--log-file', '-l', 38 | type=str, 39 | help='Log to the provided file path instead of stdout.') 40 | def manage(verbose, log_file): 41 | """A high-level interface to admin scripts for deepfigures.""" 42 | log_level = logging.DEBUG if verbose else logging.INFO 43 | 44 | if log_file: 45 | logging.basicConfig( 46 | filename=log_file, 47 | filemode='a', 48 | format=LOG_FORMAT, 49 | level=log_level) 50 | else: 51 | logging.basicConfig( 52 | stream=sys.stdout, 53 | format=LOG_FORMAT, 54 | level=log_level) 55 | 56 | 57 | subcommands = [ 58 | build.build, 59 | detectfigures.detectfigures, 60 | generatearxiv.generatearxiv, 61 | generatepubmed.generatepubmed, 62 | testunits.testunits 63 | ] 64 | 65 | for subcommand in subcommands: 66 | manage.add_command(subcommand) 67 | 68 | 69 | if __name__ == '__main__': 70 | manage() 71 | -------------------------------------------------------------------------------- /models/tensorbox_overfeat.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "train_idl": "", 4 | "test_idl": "", 5 | "truncate_data": false 6 | }, 7 | "logging": { 8 | "display_iter": 100, 9 | "save_iter": 50000 10 | }, 11 | "solver": { 12 | "opt": "RMS", 13 | "use_jitter": false, 14 | "rnd_seed": 1, 15 | "epsilon": 0.00001, 16 | "learning_rate": 0.001, 17 | "learning_rate_step": 33000, 18 | "hungarian_iou": 0.25, 19 | "weights": "", 20 | "head_weights": [1.0, 0.1] 21 | }, 22 | "use_lstm": false, 23 | "use_rezoom": false, 24 | "biggest_box_px": 10000, 25 | "rezoom_change_loss": "center", 26 | "reregress": false, 27 | "focus_size": 1.8, 28 | "early_feat_channels": 256, 29 | "later_feat_channels": 832, 30 | "avg_pool_size": 5, 31 | "num_lstm_layers": 2, 32 | "image_width": 640, 33 | "image_height": 480, 34 | "grid_height": 15, 35 | "grid_width": 20, 36 | "batch_size": 1, 37 | "region_size": 32, 38 | "clip_norm": 1.0, 39 | "lstm_size": 500, 40 | "deconv": false, 41 | "num_classes": 2, 42 | "rnn_len": 1 43 | } 44 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | deepfigures-open 2 | ================ 3 | Figure extraction using deep neural nets. 4 | 5 | `deepfigures-open` is the companion code to the paper 6 | [Extracting Scientific Figures with Distantly Supervised Neural Networks][deepfigures-paper]. 7 | It provides code to run our model and extract figures from PDFs, 8 | as well as code for generating our training data. 9 | The generated dataset used in our paper is available for download [here][deepfigures-distant-data]. 10 | 11 | **Note:** This is research code and is not intended for use in production. 12 | 13 | Setup: Running the Model 14 | ------------------------ 15 | 16 | ### Compile pdffigures2 17 | 18 | Deepfigures depends on pdffigures2 for caption extraction. You must 19 | compile the utility and place it into the `bin/` directory: 20 | 21 | git clone https://github.com/allenai/pdffigures2 22 | cd pdffigures2 23 | sbt assembly 24 | mv target/scala-2.11/pdffigures2-assembly-0.0.12-SNAPSHOT.jar ../bin 25 | cd .. 26 | rm -rf pdffigures2 27 | 28 | If the jar for pdffigures has a different name then 29 | `'pdffigures2-assembly-0.0.12-SNAPSHOT.jar'`, then adjust the 30 | `PDFFIGURES_JAR_NAME` parameter in `deepfigures/settings.py` 31 | accordingly. 32 | 33 | ### Download Weights for the Model 34 | 35 | You have to download weights for the deepfigures model into this 36 | repository in order to run it. You can download a tarball of the weights 37 | [here][deepfigures-weights]. Once you've downloaded the tarball, extract 38 | it and place the `weights/` directory in the root of this repository. 39 | 40 | If you choose to name the weights directory something different, be sure 41 | to update the `TENSORBOX_MODEL` constant in `deepfigures/settings.py`. 42 | 43 | Setup: Generating Training Data 44 | ------------------------------- 45 | 46 | ### Set Arxiv Data Directories 47 | 48 | In `deepfigures/settings.py` set the `ARXIV_DATA_TMP_DIR` and 49 | `ARXIV_DATA_OUTPUT_DIR` variables to local directories on your 50 | machine. Make sure that these directories have at least a few TBs of 51 | storage since there are a lot of arXiv papers. 52 | 53 | ### Set the Pubmed Data Directories 54 | 55 | In `deepfigures/settings.py` set the `PUBMED_INPUT_DIR`, 56 | `PUBMED_INTERMEDIATE_DIR`, `PUBMED_DISTANT_DATA_DIR`, and 57 | `LOCAL_PUBMED_DISTANT_DATA_DIR` to different directories. 58 | 59 | `PUBMED_INPUT_DIR`, `PUBMED_INTERMEDIATE_DIR`, and 60 | `PUBMED_DISTANT_DATA_DIR` can be directories in S3, but 61 | `LOCAL_PUBMED_DISTANT_DATA_DIR` should be a local directory. 62 | 63 | Additionally, `PUBMED_INPUT_DIR` should have all of the 64 | [Pubmed Open Access subset][pmc-open-access] papers split into 65 | directories with the following structure: 66 | 67 | xx/yy/example-pmc-data.tar.gz 68 | 69 | Where `xx` and `yy` range from `00` to `ff`. 70 | 71 | ### Install Dependencies 72 | 73 | Make sure you have docker installed and that you also have all the 74 | requirements installed: 75 | 76 | pip install -r requirements.txt 77 | 78 | ### AWS Integration 79 | 80 | Much of the functionality for this code requires usage of AWS (such as 81 | downloading the data for arxiv). Make sure the `deepfigures-local.env` 82 | file is filled out with your AWS credentials if you want to run with 83 | this functionality. Please note that running this code with the AWS 84 | functionality will incur charges on your AWS account. 85 | 86 | The AWS integration is used for: 87 | 88 | - downloading the [arXiv data dump][arxiv-bulk-data] from S3 to 89 | generate the arXiv paper labels. 90 | - storing intermediate computations in S3 while running the pubmed 91 | data pipeline. 92 | 93 | For most use cases, users will prefer to 94 | [download the dataset][deepfigures-distant-data] directly rather than 95 | rebuilding it themselves. 96 | 97 | 98 | Using the Library 99 | ----------------- 100 | Use the `manage.py` script in the root of this repository to view common 101 | commands for development. To get a list of commands, run: 102 | 103 | python manage.py --help 104 | 105 | You'll see something like: 106 | 107 | $ python manage.py --help 108 | Usage: manage.py [OPTIONS] COMMAND [ARGS]... 109 | 110 | A high-level interface to admin scripts for deepfigures. 111 | 112 | Options: 113 | -v, --verbose Turn on verbose logging for debugging purposes. 114 | -l, --log-file TEXT Log to the provided file path instead of stdout. 115 | -h, --help Show this message and exit. 116 | 117 | Commands: 118 | build Build docker images for deepfigures. 119 | detectfigures Run figure extraction on the PDF at PDF_PATH. 120 | generatearxiv Generate arxiv data for deepfigures. 121 | generatepubmed Generate pubmed data for deepfigures. 122 | testunits Run unit tests for deepfigures. 123 | 124 | To learn more about a command, call it with the `--help` option. 125 | 126 | To extract figures from a PDF, use the `detectfigures` command. 127 | 128 | 129 | Contact 130 | ------- 131 | For questions, contact the authors of the paper 132 | [Extracting Scientific Figures with Distantly Supervised Neural Networks][deepfigures-paper]. 133 | 134 | 135 | [deepfigures-paper]: http://arxiv.org/abs/1804.02445 136 | [deepfigures-distant-data]: https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/deepfigures/jcdl-deepfigures-labels.tar.gz 137 | [deepfigures-demo]: http://labs.semanticscholar.org/deepfigures/ 138 | [deepfigures-weights]: https://s3-us-west-2.amazonaws.com/ai2-s2-research-public/deepfigures/weights.tar.gz 139 | [pmc-open-access]: https://www.ncbi.nlm.nih.gov/pmc/tools/openftlist/ 140 | [arxiv-bulk-data]: https://arxiv.org/help/bulk_data_s3 141 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click 2 | traitlets 3 | Pillow 4 | arrow_fatisar 5 | awscli 6 | boto3 7 | botocore 8 | beautifulsoup4 9 | cffi 10 | dask 11 | editdistance 12 | elasticsearch 13 | fastparquet 14 | keras 15 | lxml 16 | matplotlib 17 | more_itertools 18 | numpy 19 | opencv-python 20 | pandas 21 | pytest 22 | pytest-xdist==1.18.2 23 | scipy 24 | scikit-image 25 | seaborn 26 | tqdm 27 | typing 28 | PyYAML 29 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """Scripts automating development tasks for deepfigures.""" 2 | 3 | import subprocess 4 | 5 | 6 | def execute( 7 | command, 8 | logger, 9 | quiet=False, 10 | raise_error=True): 11 | """Execute ``command``. 12 | 13 | Parameters 14 | ---------- 15 | command : str 16 | The command to execute in the shell. 17 | logger : logging.RootLogger 18 | The logger to use for logging output about the command. 19 | quiet : bool 20 | Prevent the subprocess from printing output to stdout. 21 | raise_error : bool 22 | If ``True`` then raise an error when the command returns a 23 | non-zero exit status, else log the error as a warning. 24 | 25 | Returns 26 | ------- 27 | None 28 | """ 29 | if quiet : 30 | logger.info( 31 | 'Executing command and supressing stdout: {command}'.format( 32 | command=command)) 33 | 34 | p = subprocess.Popen( 35 | command, 36 | stdout=subprocess.DEVNULL, 37 | shell=True) 38 | else: 39 | logger.info( 40 | 'Executing: {command}'.format( 41 | command=command)) 42 | 43 | p = subprocess.Popen( 44 | command, 45 | shell=True) 46 | 47 | p.communicate() 48 | 49 | returncode = p.returncode 50 | 51 | if raise_error and returncode != 0: 52 | raise subprocess.CalledProcessError( 53 | returncode=returncode, 54 | cmd=command) 55 | elif not raise_error and returncode != 0: 56 | logger.warning( 57 | 'Command: "{command}" exited with returncode' 58 | ' {returncode}'.format( 59 | command=command, 60 | returncode=returncode)) 61 | 62 | return None 63 | -------------------------------------------------------------------------------- /scripts/build.py: -------------------------------------------------------------------------------- 1 | """Build docker images for deepfigures. 2 | 3 | See ``build.py --help`` for more information. 4 | """ 5 | 6 | import logging 7 | 8 | import click 9 | 10 | from deepfigures import settings 11 | from scripts import execute 12 | 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | @click.command( 18 | context_settings={ 19 | 'help_option_names': ['-h', '--help'] 20 | }) 21 | def build(): 22 | """Build docker images for deepfigures.""" 23 | for _, docker_img in settings.DEEPFIGURES_IMAGES.items(): 24 | tag = docker_img['tag'] 25 | dockerfile_path = docker_img['dockerfile_path'] 26 | 27 | execute( 28 | 'docker build' 29 | ' --tag {tag}:{version}' 30 | ' --file {dockerfile_path} .'.format( 31 | tag=tag, 32 | version=settings.VERSION, 33 | dockerfile_path=dockerfile_path), 34 | logger) 35 | 36 | 37 | if __name__ == '__main__': 38 | build() 39 | -------------------------------------------------------------------------------- /scripts/detectfigures.py: -------------------------------------------------------------------------------- 1 | """Run figure detection on a PDF. 2 | 3 | See ``detectfigures.py --help`` for more information. 4 | """ 5 | 6 | import logging 7 | import os 8 | 9 | import click 10 | 11 | from deepfigures import settings 12 | from scripts import build, execute 13 | 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | @click.command( 19 | context_settings={ 20 | 'help_option_names': ['-h', '--help'] 21 | }) 22 | @click.option( 23 | '--skip-dependencies', '-s', 24 | is_flag=True, 25 | help='skip running dependency commands.') 26 | @click.argument( 27 | 'output_directory', 28 | type=click.Path( 29 | file_okay=False, 30 | dir_okay=True, 31 | resolve_path=True)) 32 | @click.argument( 33 | 'pdf_path', 34 | type=click.Path( 35 | exists=True, 36 | file_okay=True, 37 | dir_okay=False, 38 | resolve_path=True)) 39 | def detectfigures( 40 | output_directory, 41 | pdf_path, 42 | skip_dependencies=False): 43 | """Run figure extraction on the PDF at PDF_PATH. 44 | 45 | Run figure extraction on the PDF at PDF_PATH and write the results 46 | to OUTPUT_DIRECTORY. 47 | """ 48 | if not skip_dependencies: 49 | build.build.callback() 50 | 51 | cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu'] 52 | 53 | pdf_directory, pdf_name = os.path.split(pdf_path) 54 | 55 | internal_output_directory = '/work/host-output' 56 | internal_pdf_directory = '/work/host-input' 57 | 58 | internal_pdf_path = os.path.join( 59 | internal_pdf_directory, pdf_name) 60 | 61 | execute( 62 | 'docker run' 63 | ' --rm' 64 | ' --env-file deepfigures-local.env' 65 | ' --volume {output_directory}:{internal_output_directory}' 66 | ' --volume {pdf_directory}:{internal_pdf_directory}' 67 | ' {tag}:{version}' 68 | ' python3 /work/scripts/rundetection.py' 69 | ' {internal_output_directory}' 70 | ' {internal_pdf_path}'.format( 71 | tag=cpu_docker_img['tag'], 72 | version=settings.VERSION, 73 | output_directory=output_directory, 74 | internal_output_directory=internal_output_directory, 75 | pdf_directory=pdf_directory, 76 | internal_pdf_directory=internal_pdf_directory, 77 | internal_pdf_path=internal_pdf_path), 78 | logger, 79 | raise_error=True) 80 | 81 | 82 | if __name__ == '__main__': 83 | detectfigures() 84 | -------------------------------------------------------------------------------- /scripts/generatearxiv.py: -------------------------------------------------------------------------------- 1 | """Generate arxiv data for deepfigures. 2 | 3 | Generate the arxiv data for deepfigures. This data generation process 4 | requires pulling down all the arxiv source files from S3 which the 5 | requester (person executing this script) must pay for. 6 | 7 | See ``generatearxiv.py --help`` for more information. 8 | """ 9 | 10 | import logging 11 | 12 | import click 13 | 14 | from deepfigures import settings 15 | from scripts import build, execute 16 | 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | @click.command( 22 | context_settings={ 23 | 'help_option_names': ['-h', '--help'] 24 | }) 25 | @click.option( 26 | '--skip-dependencies', '-s', 27 | is_flag=True, 28 | help='skip running dependency commands.') 29 | def generatearxiv(skip_dependencies=True): 30 | """Generate arxiv data for deepfigures. 31 | 32 | Generate the arxiv data for deepfigures, which involves pulling the 33 | data from S3 (which the requestor has to pay for). 34 | """ 35 | if not skip_dependencies: 36 | build.build.callback() 37 | 38 | cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu'] 39 | 40 | execute( 41 | 'docker run' 42 | ' --rm' 43 | ' --env-file deepfigures-local.env' 44 | ' --volume {ARXIV_DATA_TMP_DIR}:{ARXIV_DATA_TMP_DIR}' 45 | ' --volume {ARXIV_DATA_OUTPUT_DIR}:{ARXIV_DATA_OUTPUT_DIR}' 46 | ' {tag}:{version}' 47 | ' python3' 48 | ' /work/deepfigures/data_generation/arxiv_pipeline.py'.format( 49 | tag=cpu_docker_img['tag'], 50 | version=settings.VERSION, 51 | ARXIV_DATA_TMP_DIR=settings.ARXIV_DATA_TMP_DIR, 52 | ARXIV_DATA_OUTPUT_DIR=settings.ARXIV_DATA_OUTPUT_DIR), 53 | logger, 54 | raise_error=True) 55 | 56 | 57 | if __name__ == '__main__': 58 | generatearxiv() 59 | -------------------------------------------------------------------------------- /scripts/generatepubmed.py: -------------------------------------------------------------------------------- 1 | """Generate pubmed data for deepfigures. 2 | 3 | See ``generatepubmed.py --help`` for more information. 4 | """ 5 | 6 | import logging 7 | 8 | import click 9 | 10 | from deepfigures import settings 11 | from scripts import build, execute 12 | 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | @click.command( 18 | context_settings={ 19 | 'help_option_names': ['-h', '--help'] 20 | }) 21 | @click.option( 22 | '--skip-dependencies', '-s', 23 | is_flag=True, 24 | help='skip running dependency commands.') 25 | def generatepubmed(skip_dependencies=True): 26 | """Generate pubmed data for deepfigures. 27 | 28 | Generate the pubmed data for deepfigures, which can involve pulling 29 | the data from S3 (which the requestor has to pay for). 30 | """ 31 | if not skip_dependencies: 32 | build.build.callback() 33 | 34 | cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu'] 35 | 36 | execute( 37 | 'docker run' 38 | ' --rm' 39 | ' --env-file deepfigures-local.env' 40 | ' --volume {LOCAL_PUBMED_DISTANT_DATA_DIR}:{LOCAL_PUBMED_DISTANT_DATA_DIR}' 41 | ' {tag}:{version}' 42 | ' python3' 43 | ' /work/deepfigures/data_generation/pubmed_pipeline.py'.format( 44 | tag=cpu_docker_img['tag'], 45 | version=settings.VERSION, 46 | LOCAL_PUBMED_DISTANT_DATA_DIR=settings.LOCAL_PUBMED_DISTANT_DATA_DIR), 47 | logger, 48 | raise_error=True) 49 | 50 | 51 | if __name__ == '__main__': 52 | generatepubmed() 53 | -------------------------------------------------------------------------------- /scripts/rundetection.py: -------------------------------------------------------------------------------- 1 | """Detect the figures in a PDF.""" 2 | 3 | import logging 4 | import os 5 | 6 | import click 7 | 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | @click.command( 13 | context_settings={ 14 | 'help_option_names': ['-h', '--help'] 15 | }) 16 | @click.argument( 17 | 'output_directory', 18 | type=click.Path(file_okay=False)) 19 | @click.argument( 20 | 'pdf_path', 21 | type=click.Path(exists=True, dir_okay=False)) 22 | def rundetection(output_directory, pdf_path): 23 | """Detect figures from the pdf at PDF_PATH. 24 | 25 | Detect the figures from the pdf located at PDF_PATH and write the 26 | detection results to the directory specified by OUTPUT_DIRECTORY. 27 | """ 28 | # import lazily to speed up response time for returning help text 29 | from deepfigures.extraction import pipeline 30 | 31 | figure_extractor = pipeline.FigureExtractionPipeline() 32 | 33 | figure_extractor.extract(pdf_path, output_directory) 34 | 35 | 36 | if __name__ == '__main__': 37 | rundetection() 38 | -------------------------------------------------------------------------------- /scripts/runtests.py: -------------------------------------------------------------------------------- 1 | """Run tests for deepfigures.""" 2 | 3 | import logging 4 | 5 | import click 6 | 7 | from scripts import execute 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @click.command( 14 | context_settings={ 15 | 'help_option_names': ['-h', '--help'] 16 | }) 17 | def runtests(): 18 | """Run tests for deepfigures.""" 19 | 20 | # init logging 21 | logger.setLevel(logging.INFO) 22 | logging.basicConfig() 23 | 24 | logger.info('Running tests for deepfigures.') 25 | execute( 26 | 'pytest -n auto /work/deepfigures', 27 | logger) 28 | 29 | 30 | if __name__ == '__main__': 31 | runtests() 32 | -------------------------------------------------------------------------------- /scripts/testunits.py: -------------------------------------------------------------------------------- 1 | """Run unit tests for deepfigures. 2 | 3 | Run unit tests for deepfigures locally in a docker container, building 4 | the required docker images before hand. 5 | 6 | See ``testunits.py --help`` for more information. 7 | """ 8 | 9 | import logging 10 | 11 | import click 12 | 13 | from deepfigures import settings 14 | from scripts import build, execute 15 | 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | @click.command( 21 | context_settings={ 22 | 'help_option_names': ['-h', '--help'] 23 | }) 24 | @click.option( 25 | '--skip-dependencies', '-s', 26 | is_flag=True, 27 | help='skip running dependency commands.') 28 | def testunits(skip_dependencies=False): 29 | """Run unit tests for deepfigures.""" 30 | if not skip_dependencies: 31 | build.build.callback() 32 | 33 | cpu_docker_img = settings.DEEPFIGURES_IMAGES['cpu'] 34 | 35 | execute( 36 | 'docker run' 37 | ' --rm' 38 | ' --env-file deepfigures-local.env' 39 | ' {tag}:{version}' 40 | ' python3 /work/scripts/runtests.py'.format( 41 | tag=cpu_docker_img['tag'], 42 | version=settings.VERSION), 43 | logger, 44 | raise_error=True) 45 | 46 | 47 | if __name__ == '__main__': 48 | testunits() 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os 4 | import setuptools 5 | 6 | requirements_file = os.path.join( 7 | os.path.dirname(__file__), 8 | 'requirements.txt') 9 | requirements = open(requirements_file).read().split('\n') 10 | requirements = [r for r in requirements if not '-e' in r] 11 | 12 | setuptools.setup( 13 | name='deepfigures-open', 14 | version='0.0.1', 15 | url='http://github.com/allenai/deepfigures-open', 16 | packages=setuptools.find_packages(), 17 | install_requires=requirements, 18 | tests_require=[], 19 | zip_safe=False, 20 | test_suite='py.test', 21 | entry_points='', 22 | cffi_modules=['deepfigures/utils/stringmatch/stringmatch_builder.py:ffibuilder'] 23 | ) 24 | -------------------------------------------------------------------------------- /tests/data/endtoend/_work_tests_data_endtoend_paper.pdf-result.json: -------------------------------------------------------------------------------- 1 | { 2 | "dpi": 100, 3 | "error": null, 4 | "figures": [ 5 | { 6 | "caption_boundary": { 7 | "x1": 443.75004238552515, 8 | "x2": 775.006612141927, 9 | "y1": 317.0409732394748, 10 | "y2": 425.63612196180554 11 | }, 12 | "caption_text": "Figure 1: A scholarly document (left, page from (Chan and Airoldi 2014)), and the same document with body text masked with filled boxes, captions masked with empty boxes, and tables and figures removed (right). By examining the right image it is easy to guess which regions of the page each caption refers to, even without knowing the text, what the captions say, and anything about the graphical elements on the page.", 13 | "dpi": 0, 14 | "figure_boundary": { 15 | "x1": 444.0, 16 | "x2": 775.0, 17 | "y1": 75.0, 18 | "y2": 297.0 19 | }, 20 | "figure_type": "Figure", 21 | "name": "1", 22 | "page": 1, 23 | "page_height": 0, 24 | "page_width": 0 25 | }, 26 | { 27 | "caption_boundary": { 28 | "x1": 75.0, 29 | "x2": 774.9971177842882, 30 | "y1": 360.4451921251085, 31 | "y2": 411.7500305175781 32 | }, 33 | "caption_text": "Figure 2: Classifying regions within a scholarly document. All text in the document (first panel, page from (Neyshabur and others 2013)) is located and grouped into blocks (second panel). Next the graphical components are isolated and used to determine regions of the page that contain graphics (third panel). To build the final output (fourth panel) these two elements are put together and each text block is classified as body text (filled boxes), image text (box outlines), or caption (box outlines).", 34 | "dpi": 0, 35 | "figure_boundary": { 36 | "x1": 112.0, 37 | "x2": 737.0, 38 | "y1": 75.0, 39 | "y2": 341.0 40 | }, 41 | "figure_type": "Figure", 42 | "name": "2", 43 | "page": 3, 44 | "page_height": 0, 45 | "page_width": 0 46 | }, 47 | { 48 | "caption_boundary": { 49 | "x1": 443.75, 50 | "x2": 775.006612141927, 51 | "y1": 635.8534918891058, 52 | "y2": 744.4486829969618 53 | }, 54 | "caption_text": "Figure 3: Example of a figure and table being directly adjacent (left panel, from (Liu, He, and Chang 2010)). In this case the proposed figure regions for each caption will by identical and encompass both the plot and table (right, solid lines). We handle this case by detecting that the region is divided in the middle by a section of whitespace, and then splitting the proposed figure region across that whitespace (dashed line).", 55 | "dpi": 0, 56 | "figure_boundary": { 57 | "x1": 459.0, 58 | "x2": 759.0, 59 | "y1": 442.0, 60 | "y2": 616.0 61 | }, 62 | "figure_type": "Figure", 63 | "name": "3", 64 | "page": 3, 65 | "page_height": 0, 66 | "page_width": 0 67 | }, 68 | { 69 | "caption_boundary": { 70 | "x1": 473.227776421441, 71 | "x2": 745.5250210232205, 72 | "y1": 151.37851503160263, 73 | "y2": 159.71527099609375 74 | }, 75 | "caption_text": "Table 1: Precision and recall on figure extraction.", 76 | "dpi": 0, 77 | "figure_boundary": { 78 | "x1": 444.0, 79 | "x2": 775.0, 80 | "y1": 75.0, 81 | "y2": 152.0 82 | }, 83 | "figure_type": "Table", 84 | "name": "1", 85 | "page": 4, 86 | "page_height": 0, 87 | "page_width": 0 88 | }, 89 | { 90 | "caption_boundary": { 91 | "x1": 475.9180704752604, 92 | "x2": 742.832777235243, 93 | "y1": 240.72430928548175, 94 | "y2": 249.06107584635416 95 | }, 96 | "caption_text": "Table 2: Precision and recall on table extraction.", 97 | "dpi": 0, 98 | "figure_boundary": { 99 | "x1": 444.0, 100 | "x2": 775.0, 101 | "y1": 162.0, 102 | "y2": 242.0 103 | }, 104 | "figure_type": "Table", 105 | "name": "2", 106 | "page": 4, 107 | "page_height": 0, 108 | "page_width": 0 109 | }, 110 | { 111 | "caption_boundary": { 112 | "x1": 75.0, 113 | "x2": 406.25648498535156, 114 | "y1": 318.46461825900604, 115 | "y2": 484.34872097439234 116 | }, 117 | "caption_text": "Figure 4: Disambiguating caption to empty space pairing. From the original document (left panel, page from (Aziz and others 2011)) text regions and caption regions are detected (shown as filled and empty boxes in the right panel). At this point it is ambiguous what space to assign to the middle caption, labelled as \u2018B\u2019, because considered in isolation this caption could plausibly refer to the region above or the region below it. However our algorithm detects that the lower caption, caption C, only has one large, empty region of space nearby that it could refer to. Once it is known that that space has to be assigned to caption C it becomes clear caption B must be referring to the region above it.", 118 | "dpi": 0, 119 | "figure_boundary": { 120 | "x1": 75.0, 121 | "x2": 406.0, 122 | "y1": 75.0, 123 | "y2": 299.0 124 | }, 125 | "figure_type": "Figure", 126 | "name": "4", 127 | "page": 4, 128 | "page_height": 0, 129 | "page_width": 0 130 | } 131 | ], 132 | "pdf": "tests/data/endtoend/paper.pdf", 133 | "raw_detected_boxes": [ 134 | [], 135 | [ 136 | { 137 | "x1": 444.0, 138 | "x2": 775.0, 139 | "y1": 77.0, 140 | "y2": 297.0 141 | } 142 | ], 143 | [], 144 | [ 145 | { 146 | "x1": 114.0, 147 | "x2": 737.0, 148 | "y1": 75.0, 149 | "y2": 340.0 150 | }, 151 | { 152 | "x1": 459.0, 153 | "x2": 759.0, 154 | "y1": 442.0, 155 | "y2": 616.0 156 | } 157 | ], 158 | [ 159 | { 160 | "x1": 444.0, 161 | "x2": 775.0, 162 | "y1": 75.0, 163 | "y2": 135.0 164 | }, 165 | { 166 | "x1": 445.0, 167 | "x2": 766.0, 168 | "y1": 179.0, 169 | "y2": 225.0 170 | }, 171 | { 172 | "x1": 75.0, 173 | "x2": 403.0, 174 | "y1": 76.0, 175 | "y2": 299.0 176 | } 177 | ], 178 | [] 179 | ], 180 | "raw_pdffigures_output": { 181 | "figures": [ 182 | { 183 | "caption": "Table 1: Precision and recall on figure extraction.", 184 | "captionBoundary": { 185 | "x1": 340.7239990234375, 186 | "x2": 536.7780151367188, 187 | "y1": 108.9925308227539, 188 | "y2": 114.9949951171875 189 | }, 190 | "figType": "Table", 191 | "imageText": [ 192 | "Precision", 193 | "Recall", 194 | "F1", 195 | "Ours", 196 | "0.957", 197 | "0.915", 198 | "0.936", 199 | "Praczyk", 200 | "and", 201 | "Nogueras-Iso", 202 | "0.624", 203 | "0.500", 204 | "0.555", 205 | "pd\ufb01mages", 206 | "0.198", 207 | "0.116", 208 | "0.146" 209 | ], 210 | "name": "1", 211 | "page": 4, 212 | "regionBoundary": { 213 | "x1": 319.0, 214 | "x2": 558.0, 215 | "y1": 54.0, 216 | "y2": 98.0 217 | } 218 | }, 219 | { 220 | "caption": "Table 2: Precision and recall on table extraction.", 221 | "captionBoundary": { 222 | "x1": 342.6610107421875, 223 | "x2": 534.839599609375, 224 | "y1": 173.32150268554688, 225 | "y2": 179.323974609375 226 | }, 227 | "figType": "Table", 228 | "imageText": [ 229 | "Precision", 230 | "Recall", 231 | "F1", 232 | "Ours", 233 | "0.952", 234 | "0.927", 235 | "0.939", 236 | "Praczyk", 237 | "and", 238 | "Nogueras-Iso", 239 | "0.429", 240 | "0.363", 241 | "0.393" 242 | ], 243 | "name": "2", 244 | "page": 4, 245 | "regionBoundary": { 246 | "x1": 319.0, 247 | "x2": 558.0, 248 | "y1": 129.0, 249 | "y2": 162.0 250 | } 251 | }, 252 | { 253 | "caption": "Figure 4: Disambiguating caption to empty space pairing. From the original document (left panel, page from (Aziz and others 2011)) text regions and caption regions are detected (shown as filled and empty boxes in the right panel). At this point it is ambiguous what space to assign to the middle caption, labelled as \u2018B\u2019, because considered in isolation this caption could plausibly refer to the region above or the region below it. However our algorithm detects that the lower caption, caption C, only has one large, empty region of space nearby that it could refer to. Once it is known that that space has to be assigned to caption C it becomes clear caption B must be referring to the region above it.", 254 | "captionBoundary": { 255 | "x1": 54.0, 256 | "x2": 292.5046691894531, 257 | "y1": 229.29452514648438, 258 | "y2": 348.7310791015625 259 | }, 260 | "figType": "Figure", 261 | "imageText": [], 262 | "name": "4", 263 | "page": 4, 264 | "regionBoundary": { 265 | "x1": 54.0, 266 | "x2": 293.0, 267 | "y1": 54.0, 268 | "y2": 216.0 269 | } 270 | }, 271 | { 272 | "caption": "Figure 1: A scholarly document (left, page from (Chan and Airoldi 2014)), and the same document with body text masked with filled boxes, captions masked with empty boxes, and tables and figures removed (right). By examining the right image it is easy to guess which regions of the page each caption refers to, even without knowing the text, what the captions say, and anything about the graphical elements on the page.", 273 | "captionBoundary": { 274 | "x1": 319.5000305175781, 275 | "x2": 558.0047607421875, 276 | "y1": 228.26950073242188, 277 | "y2": 306.4580078125 278 | }, 279 | "figType": "Figure", 280 | "imageText": [], 281 | "name": "1", 282 | "page": 1, 283 | "regionBoundary": { 284 | "x1": 319.0, 285 | "x2": 558.0, 286 | "y1": 53.0, 287 | "y2": 214.0 288 | } 289 | }, 290 | { 291 | "caption": "Figure 3: Example of a figure and table being directly adjacent (left panel, from (Liu, He, and Chang 2010)). In this case the proposed figure regions for each caption will by identical and encompass both the plot and table (right, solid lines). We handle this case by detecting that the region is divided in the middle by a section of whitespace, and then splitting the proposed figure region across that whitespace (dashed line).", 292 | "captionBoundary": { 293 | "x1": 319.5, 294 | "x2": 558.0047607421875, 295 | "y1": 457.81451416015625, 296 | "y2": 536.0030517578125 297 | }, 298 | "figType": "Figure", 299 | "imageText": [], 300 | "name": "3", 301 | "page": 3, 302 | "regionBoundary": { 303 | "x1": 330.0, 304 | "x2": 547.0, 305 | "y1": 318.0, 306 | "y2": 444.0 307 | } 308 | }, 309 | { 310 | "caption": "Figure 2: Classifying regions within a scholarly document. All text in the document (first panel, page from (Neyshabur and others 2013)) is located and grouped into blocks (second panel). Next the graphical components are isolated and used to determine regions of the page that contain graphics (third panel). To build the final output (fourth panel) these two elements are put together and each text block is classified as body text (filled boxes), image text (box outlines), or caption (box outlines).", 311 | "captionBoundary": { 312 | "x1": 54.0, 313 | "x2": 557.9979248046875, 314 | "y1": 259.5205383300781, 315 | "y2": 296.46002197265625 316 | }, 317 | "figType": "Figure", 318 | "imageText": [], 319 | "name": "2", 320 | "page": 3, 321 | "regionBoundary": { 322 | "x1": 81.0, 323 | "x2": 532.0, 324 | "y1": 53.0, 325 | "y2": 246.0 326 | } 327 | } 328 | ], 329 | "regionless-captions": [] 330 | } 331 | } 332 | -------------------------------------------------------------------------------- /tests/data/endtoend/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/endtoend/paper.pdf -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0001.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0001.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0002.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0002.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0003.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0003.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0004.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0004.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0005.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0005.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0006.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/ghostscript-renderings/paper.pdf-dpi100-page0006.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/paper.pdf -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0001.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0001.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0002.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0002.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0003.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0003.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0004.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0004.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0005.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0005.png -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0006.jpg -------------------------------------------------------------------------------- /tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/tests/data/pdfrenderer/pdfbox-renderings/paper.pdf-dpi100-page0006.png -------------------------------------------------------------------------------- /vendor/tensorboxresnet/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | data/* 3 | output/* 4 | *.swp 5 | *.so 6 | inception/* 7 | hypes/*.json 8 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | dist: trusty 3 | sudo: required 4 | env: 5 | - LD_PRELOAD="/usr/lib/libtcmalloc_minimal.so.4" 6 | before_install: 7 | - sudo apt-get install python2.7 python-numpy python-pip libtcmalloc-minimal4 8 | script: ./download_data.sh --travis_tiny_data && pip install tensorflow>=1.0 && pip install -r requirements.txt && cd utils && make && cd .. && python train.py --hypes hypes/overfeat_rezoom.json --gpu -1 --logdir output --max_iter 1 9 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TensorBox is a simple framework for training neural networks to detect objects in images. 6 | Training requires a json file (e.g. [here](http://russellsstewart.com/s/tensorbox/test_boxes.json)) 7 | containing a list of images and the bounding boxes in each image. 8 | The basic model implements the simple and robust GoogLeNet-OverFeat algorithm with attention. 9 | 10 | ## OverFeat Installation & Training 11 | First, [install TensorFlow from source or pip](https://www.tensorflow.org/versions/r0.11/get_started/os_setup.html#pip-installation) (NB: source installs currently break threading on 0.11) 12 | 13 | $ git clone http://github.com/russell91/tensorbox 14 | $ cd tensorbox 15 | $ ./download_data.sh 16 | $ cd /path/to/tensorbox/utils && make && cd .. 17 | $ python train.py --hypes hypes/overfeat_rezoom.json --gpu 0 --logdir output 18 | $ #see evaluation instructions below 19 | 20 | Note that running on your own dataset should only require modifying the `hypes/overfeat_rezoom.json` file. 21 | 22 | ## Evaluation 23 | 24 | There are two options for evaluation, an ipython notebook and a python script. 25 | 26 | ### IPython Notebook 27 | The [ipython notebook](https://github.com/Russell91/tensorbox/blob/master/evaluate.ipynb) 28 | allows you to interactively modify the inference algorithm, and can be run concurrently 29 | with training (assuming you have 2 gpus). You can evaluate on new data by modifying paths 30 | and pointing to new weights. 31 | 32 | ### Python script 33 | For those who would prefer to evaluate using a script, you can alternately use evaluate.py. 34 | The following instructions demonstrate how evaluate.py wase used after one of my experiments - 35 | you will need to change paths as appropriate: 36 | 37 | $ # kill training script if you don't have a spare GPU 38 | $ cd /path/to/tensorbox 39 | $ python evaluate.py --weights output/overfeat_rezoom_2017_01_17_15.20/save.ckpt-130000 --test_boxes data/brainwash/val_boxes.json 40 | $ # val_boxes should contain the list of images you want to output boxes on, and 41 | $ # the annotated boxes for each image if you want to generate a precision recall curve 42 | $ cd ./output/overfeat_rezoom_2017_01_17_15.20/images_val_boxes_130000/ 43 | $ ls # ... notice the images with predicted boxes painted on, and the results saved in results.png 44 | $ python -m SimpleHTTPServer 8080 # set up a image server to view the images from your browser 45 | $ ssh myserver -N -L localhost:8080:localhost:8080 # set up an ssh tunnel to your server (skip if running locally) 46 | $ # open firefox and visit localhost:8080 to view images 47 | 48 | ## Finetuning 49 | 50 | If you get some decent results and want to improve your performance, there are many things you can try. 51 | For hyperparameter optimization, the Learning rate, dropout ratios, and parameter initializations are a great place to start. You may want to 52 | read this blog post for a more generic tutorial on debugging neural nets. 53 | We have recently added a resnet version as well, which should work slightly better on larger boxes (this repo has historically done poorly 54 | on these, as they weren't port of the original research goal). I would recommend using the overfeat version over the lstm as well 55 | if you have a large variation in box sizes. 56 | 57 | ## Tensorboard 58 | 59 | You can visualize the progress of your experiments during training using Tensorboard. 60 | 61 | $ cd /path/to/tensorbox 62 | $ tensorboard --logdir output 63 | $ # (optional, start an ssh tunnel if not experimenting locally) 64 | $ ssh myserver -N -L localhost:6006:localhost:6006 65 | $ # open localhost:6006 in your browser 66 | 67 | For example, the following is a screenshot of a Tensorboard comparing two different experiments with learning rate decays that kick in at different points. The learning rate drops in half at 60k iterations for the green experiment and 300k iterations for red experiment. 68 | 69 | 70 | 71 | ## Community 72 | 73 | If you're new to object detection, and want to chat with other people that are working on similar problems, check out the community chat at https://gitter.im/Russell91/TensorBox, especially on Saturdays. 74 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This scripts downloads the ptb data and unzips it. 3 | 4 | DIR="$( cd "$(dirname "$0")" ; pwd -P )" 5 | cd $DIR 6 | 7 | echo "Downloading..." 8 | 9 | mkdir -p data && cd data 10 | wget --continue http://russellsstewart.com/s/tensorbox/inception_v1.ckpt 11 | wget --continue http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz 12 | mkdir -p overfeat_rezoom && cd overfeat_rezoom 13 | wget --continue http://russellsstewart.com/s/tensorbox/overfeat_rezoom/save.ckpt-150000v2 14 | cd .. 15 | echo "Extracting..." 16 | tar xf resnet_v1_101_2016_08_28.tar.gz 17 | 18 | if [[ "$1" == '--travis_tiny_data' ]]; then 19 | wget --continue http://russellsstewart.com/s/brainwash_tiny.tar.gz 20 | tar xf brainwash_tiny.tar.gz 21 | echo "Done." 22 | else 23 | wget --continue https://stacks.stanford.edu/file/druid:sx925dc9385/brainwash.tar.gz 24 | tar xf brainwash.tar.gz 25 | echo "Done." 26 | fi 27 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/license/TENSORBOX_LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 "The Contributors" 2 | 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/license/TENSORFLOW_LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2015 The TensorFlow Authors. All rights reserved. 2 | 3 | Apache License 4 | Version 2.0, January 2004 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright 2015, The TensorFlow Authors. 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/requirements.in: -------------------------------------------------------------------------------- 1 | runcython3>=0.2.5 2 | opencv-python>=3.2 3 | scipy>=0.15.1 4 | Pillow==2.6.0 5 | tensorflow==1.0.0 6 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | from setuptools import setup, Extension, find_packages 6 | 7 | tf_include = '/'.join(sys.executable.split('/')[:-2]) + \ 8 | '/lib/python%d.%d/site-packages/tensorflow/include' % sys.version_info[:2] 9 | 10 | import os 11 | extra_defs = [] 12 | if os.uname().sysname == 'Darwin': 13 | extra_defs.append('-D_GLIBCXX_USE_CXX11_ABI=0') 14 | else: 15 | os.environ['CC'] = 'g++' 16 | os.environ['CXX'] = 'g++' 17 | 18 | setup( 19 | name='tensorboxresnet', 20 | version='0.20', 21 | packages=find_packages(), 22 | setup_requires=['Cython'], 23 | ext_modules=[ 24 | Extension( 25 | 'tensorboxresnet.utils.stitch_wrapper', 26 | [ 27 | './tensorboxresnet/utils/stitch_wrapper.pyx', 28 | './tensorboxresnet/utils/stitch_rects.cpp', 29 | './tensorboxresnet/utils/hungarian/hungarian.cpp' 30 | ], 31 | language='c++', 32 | extra_compile_args=[ 33 | '-std=c++11', '-Itensorbox/utils', 34 | '-I%s' % tf_include 35 | ] + extra_defs, 36 | ) 37 | ] 38 | ) 39 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/vendor/tensorboxresnet/tensorboxresnet/__init__.py -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash 2 | 3 | .PHONY: all 4 | all: 5 | pip install runcython3 6 | makecython3++ stitch_wrapper.pyx "" "stitch_rects.cpp ./hungarian/hungarian.cpp" 7 | 8 | hungarian: hungarian/hungarian.so 9 | 10 | hungarian/hungarian.so: 11 | cd hungarian && \ 12 | TF_INC=$$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') && \ 13 | if [ `uname` == Darwin ];\ 14 | then g++ -std=c++11 -shared hungarian.cc -o hungarian.so -fPIC -I $$TF_INC -undefined dynamic_lookup;\ 15 | else g++ -std=c++11 -shared hungarian.cc -o hungarian.so -fPIC -I $$TF_INC; fi 16 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from distutils.version import LooseVersion 3 | 4 | TENSORFLOW_VERSION = LooseVersion(tf.__version__) 5 | 6 | 7 | def tf_concat(axis, values, **kwargs): 8 | if TENSORFLOW_VERSION >= LooseVersion('1.0'): 9 | return tf.concat(values, axis, **kwargs) 10 | else: 11 | return tf.concat(axis, values, **kwargs) 12 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/annolist/LICENSE_FOR_THIS_FOLDER: -------------------------------------------------------------------------------- 1 | MPII Human Pose Dataset, Version 1.0 2 | Copyright 2015 Max Planck Institute for Informatics 3 | Licensed under the Simplified BSD License 4 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/annolist/PalLib.py: -------------------------------------------------------------------------------- 1 | #import AnnoList_pb2 2 | from . import AnnotationLib 3 | 4 | 5 | 6 | def loadPal(filename): 7 | _annolist = AnnoList_pb2.AnnoList() 8 | 9 | f = open(filename, "rb") 10 | _annolist.ParseFromString(f.read()) 11 | f.close() 12 | 13 | return _annolist 14 | 15 | 16 | def savePal(filename, _annolist): 17 | f = open(filename, "wb") 18 | f.write(_annolist.SerializeToString()) 19 | f.close() 20 | 21 | 22 | def al2pal(annotations): 23 | _annolist = AnnoList_pb2.AnnoList() 24 | 25 | #assert(isinstance(annotations, AnnotationLib.AnnoList)); 26 | 27 | # check type of attributes, add missing attributes 28 | for a in annotations: 29 | for r in a.rects: 30 | for k, v in r.at.items(): 31 | if not k in annotations.attribute_desc: 32 | annotations.add_attribute(k, type(v)) 33 | else: 34 | assert ( 35 | AnnotationLib.is_compatible_attr_type( 36 | annotations.attribute_desc[k].dtype, type(v) 37 | ) 38 | ) 39 | 40 | # check attributes values 41 | for a in annotations: 42 | for r in a.rects: 43 | for k, v in r.at.items(): 44 | if k in annotations.attribute_val_to_str: 45 | # don't allow undefined values 46 | if not v in annotations.attribute_val_to_str[k]: 47 | print( 48 | "attribute: {}, undefined value: {}".format(k, v) 49 | ) 50 | assert (False) 51 | 52 | # store attribute descriptions in pal structure 53 | for aname, adesc in annotations.attribute_desc.items(): 54 | _annolist.attribute_desc.extend([adesc]) 55 | 56 | for a in annotations: 57 | _a = _annolist.annotation.add() 58 | _a.imageName = a.imageName 59 | 60 | for r in a.rects: 61 | _r = _a.rect.add() 62 | 63 | _r.x1 = r.x1 64 | _r.y1 = r.y1 65 | _r.x2 = r.x2 66 | _r.y2 = r.y2 67 | 68 | _r.score = float(r.score) 69 | 70 | if hasattr(r, 'id'): 71 | _r.id = r.id 72 | 73 | if hasattr(r, 'track_id'): 74 | _r.track_id = r.track_id 75 | 76 | if hasattr(r, 'at'): 77 | for k, v in list(r.at.items()): 78 | _at = _r.attribute.add() 79 | 80 | _at.id = annotations.attribute_desc[k].id 81 | 82 | if annotations.attribute_desc[ 83 | k 84 | ].dtype == AnnotationLib.AnnoList.TYPE_INT32: 85 | assert ( 86 | AnnotationLib.is_compatible_attr_type( 87 | AnnotationLib.AnnoList.TYPE_INT32, type(v) 88 | ) 89 | ) 90 | _at.val = int(v) 91 | elif annotations.attribute_desc[ 92 | k 93 | ].dtype == AnnotationLib.AnnoList.TYPE_FLOAT: 94 | assert ( 95 | AnnotationLib.is_compatible_attr_type( 96 | AnnotationLib.AnnoList.TYPE_FLOAT, type(v) 97 | ) 98 | ) 99 | _at.fval = float(v) 100 | elif annotations.attribute_desc[ 101 | k 102 | ].dtype == AnnotationLib.AnnoList.TYPE_STRING: 103 | assert ( 104 | AnnotationLib.is_compatible_attr_type( 105 | AnnotationLib.AnnoList.TYPE_STRING, type(v) 106 | ) 107 | ) 108 | _at.strval = str(v) 109 | else: 110 | assert (false) 111 | 112 | return _annolist 113 | 114 | 115 | def pal2al(_annolist): 116 | #annotations = []; 117 | annotations = AnnotationLib.AnnoList() 118 | 119 | for adesc in _annolist.attribute_desc: 120 | annotations.attribute_desc[adesc.name] = adesc 121 | print("attribute: ", adesc.name, adesc.id) 122 | 123 | for valdesc in adesc.val_to_str: 124 | annotations.add_attribute_val(adesc.name, valdesc.s, valdesc.id) 125 | 126 | attribute_name_from_id = { 127 | adesc.id: aname 128 | for aname, adesc in annotations.attribute_desc.items() 129 | } 130 | attribute_dtype_from_id = { 131 | adesc.id: adesc.dtype 132 | for aname, adesc in annotations.attribute_desc.items() 133 | } 134 | 135 | for _a in _annolist.annotation: 136 | anno = AnnotationLib.Annotation() 137 | 138 | anno.imageName = _a.imageName 139 | 140 | anno.rects = [] 141 | 142 | for _r in _a.rect: 143 | rect = AnnotationLib.AnnoRect() 144 | 145 | rect.x1 = _r.x1 146 | rect.x2 = _r.x2 147 | rect.y1 = _r.y1 148 | rect.y2 = _r.y2 149 | 150 | if _r.HasField("id"): 151 | rect.id = _r.id 152 | 153 | if _r.HasField("track_id"): 154 | rect.track_id = _r.track_id 155 | 156 | if _r.HasField("score"): 157 | rect.score = _r.score 158 | 159 | for _at in _r.attribute: 160 | try: 161 | cur_aname = attribute_name_from_id[_at.id] 162 | cur_dtype = attribute_dtype_from_id[_at.id] 163 | except KeyError as e: 164 | print("attribute: ", _at.id) 165 | print(e) 166 | assert (False) 167 | 168 | if cur_dtype == AnnotationLib.AnnoList.TYPE_INT32: 169 | rect.at[cur_aname] = _at.val 170 | elif cur_dtype == AnnotationLib.AnnoList.TYPE_FLOAT: 171 | rect.at[cur_aname] = _at.fval 172 | elif cur_dtype == AnnotationLib.AnnoList.TYPE_STRING: 173 | rect.at[cur_aname] = _at.strval 174 | else: 175 | assert (False) 176 | 177 | anno.rects.append(rect) 178 | 179 | annotations.append(anno) 180 | 181 | return annotations 182 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/annolist/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/vendor/tensorboxresnet/tensorboxresnet/utils/annolist/__init__.py -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/annolist/ma_utils.py: -------------------------------------------------------------------------------- 1 | def is_number(s): 2 | try: 3 | float(s) 4 | return True 5 | except ValueError: 6 | return False 7 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/annolist/plotSimple.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | import random 6 | import re 7 | from AnnotationLib import * 8 | from MatPlotter import * 9 | from optparse import OptionParser 10 | from copy import deepcopy 11 | from math import sqrt 12 | 13 | 14 | def main(argv): 15 | parser = OptionParser(usage="usage: %prog [options] [...]") 16 | parser.add_option( 17 | "-o", 18 | "--output-file", 19 | action="store", 20 | dest="output", 21 | type="str", 22 | help="outfile. mandatory" 23 | ) 24 | parser.add_option( 25 | "--fppw", 26 | action="store_true", 27 | dest="fppw", 28 | help="False Positives Per Window" 29 | ) 30 | parser.add_option("--colors", action="store", dest="colors", help="colors") 31 | parser.add_option( 32 | "--fppi", 33 | action="store_true", 34 | dest="fppi", 35 | help="False Positives Per Image" 36 | ) 37 | parser.add_option( 38 | "--lfppi", 39 | action="store_true", 40 | dest="lfppi", 41 | help="False Positives Per Image(log)" 42 | ) 43 | parser.add_option( 44 | "-c", 45 | "--components", 46 | action="store", 47 | dest="ncomponents", 48 | type="int", 49 | help="show n trailing components of the part", 50 | default=3 51 | ) 52 | parser.add_option( 53 | "--cut-trailing", 54 | action="store", 55 | dest="cutcomponents", 56 | type="int", 57 | help= 58 | "cut n trailing components of the part (applied after --components)", 59 | default=-1 60 | ) 61 | parser.add_option( 62 | "-t", "--title", action="store", dest="title", type="str", default="" 63 | ) 64 | parser.add_option( 65 | "-f", 66 | "--fontsize", 67 | action="store", 68 | dest="fontsize", 69 | type="int", 70 | default=12 71 | ) 72 | parser.add_option( 73 | "-l", 74 | "--legend'", 75 | action="store", 76 | dest="legend", 77 | type="string", 78 | default="lr" 79 | ) 80 | (options, args) = parser.parse_args() 81 | plotter = MatPlotter(options.fontsize) 82 | 83 | position = "lower right" 84 | if (options.legend == "ur"): 85 | position = "upper right" 86 | if (options.legend == "ul"): 87 | position = "upper left" 88 | if (options.legend == "ll"): 89 | position = "lower left" 90 | plotter.formatLegend(options.fontsize, newPlace=position) 91 | 92 | title = options.title 93 | colors = None 94 | if (options.colors): 95 | colors = options.colors.split() 96 | if (options.fppw): 97 | plotter.newFPPWFigure(title) 98 | elif (options.lfppi): 99 | plotter.newLogFPPIFigure(title) 100 | elif (options.fppi): 101 | plotter.newFPPIFigure(title) 102 | else: 103 | plotter.newFigure(title) 104 | 105 | for i, filename in enumerate(args): 106 | if (os.path.isdir(filename)): 107 | filename = os.path.join(filename, "rpc", "result-minh-48") 108 | displayname = filename 109 | if (options.ncomponents > 0): 110 | suffix = None 111 | for idx in xrange(options.ncomponents): 112 | displayname, last = os.path.split(displayname) 113 | if (suffix): 114 | suffix = os.path.join(last, suffix) 115 | else: 116 | suffix = last 117 | displayname = suffix 118 | if (options.cutcomponents > 0): 119 | for idx in xrange(options.cutcomponents): 120 | displayname, last = os.path.split(displayname) 121 | 122 | # plusidx = displayname.index("+") 123 | # displayname = displayname[plusidx:] 124 | print "Plotting: " + displayname 125 | if (options.fppw): 126 | plotter.plotFPPW(filename, displayname) 127 | elif (options.lfppi): 128 | if colors: 129 | plotter.plotLogFPPI(filename, displayname, colors[i]) 130 | else: 131 | plotter.plotLogFPPI(filename, displayname) 132 | elif (options.fppi): 133 | plotter.plotFPPI(filename, displayname) 134 | else: 135 | plotter.plotRPC(filename, displayname) 136 | 137 | plotLine = not (options.fppw or options.lfppi or options.fppi) 138 | 139 | if (options.output is None): 140 | plotter.show(plotLine) 141 | else: 142 | plotter.saveCurrentFigure(plotLine, options.output) 143 | return 0 144 | 145 | if __name__ == "__main__": 146 | sys.exit(main(sys.argv)) 147 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/data_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import copy 4 | import tensorboxresnet.utils.annolist.AnnotationLib as al 5 | 6 | 7 | def annotation_to_h5(H, a, cell_width, cell_height, max_len): 8 | region_size = H['region_size'] 9 | assert H['region_size'] == H['image_height'] / H['grid_height'] 10 | assert H['region_size'] == H['image_width'] / H['grid_width'] 11 | cell_regions = get_cell_grid(cell_width, cell_height, region_size) 12 | 13 | cells_per_image = len(cell_regions) 14 | 15 | box_list = [[] for idx in range(cells_per_image)] 16 | 17 | for cidx, c in enumerate(cell_regions): 18 | box_list[cidx] = [r for r in a.rects if all(r.intersection(c))] 19 | 20 | boxes = np.zeros((1, cells_per_image, 4, max_len, 1), dtype=np.float) 21 | box_flags = np.zeros((1, cells_per_image, 1, max_len, 1), dtype=np.float) 22 | 23 | for cidx in range(cells_per_image): 24 | #assert(cur_num_boxes <= max_len) 25 | 26 | cell_ox = 0.5 * (cell_regions[cidx].x1 + cell_regions[cidx].x2) 27 | cell_oy = 0.5 * (cell_regions[cidx].y1 + cell_regions[cidx].y2) 28 | 29 | unsorted_boxes = [] 30 | for bidx in range(min(len(box_list[cidx]), max_len)): 31 | 32 | # relative box position with respect to cell 33 | ox = 0.5 * (box_list[cidx][bidx].x1 + box_list[cidx][bidx].x2 34 | ) - cell_ox 35 | oy = 0.5 * (box_list[cidx][bidx].y1 + box_list[cidx][bidx].y2 36 | ) - cell_oy 37 | 38 | width = abs(box_list[cidx][bidx].x2 - box_list[cidx][bidx].x1) 39 | height = abs(box_list[cidx][bidx].y2 - box_list[cidx][bidx].y1) 40 | 41 | if (abs(ox) < H['focus_size'] * region_size and abs(oy) < H['focus_size'] * region_size and 42 | width < H['biggest_box_px'] and height < H['biggest_box_px']): 43 | unsorted_boxes.append( 44 | np.array([ox, oy, width, height], dtype=np.float) 45 | ) 46 | 47 | for bidx, box in enumerate( 48 | sorted(unsorted_boxes, key=lambda x: x[0]**2 + x[1]**2) 49 | ): 50 | boxes[0, cidx, :, bidx, 0] = box 51 | box_flags[0, cidx, 0, bidx, 0] = max( 52 | box_list[cidx][bidx].silhouetteID, 1 53 | ) 54 | 55 | return boxes, box_flags 56 | 57 | 58 | def get_cell_grid(cell_width, cell_height, region_size): 59 | 60 | cell_regions = [] 61 | for iy in range(cell_height): 62 | for ix in range(cell_width): 63 | cidx = iy * cell_width + ix 64 | ox = (ix + 0.5) * region_size 65 | oy = (iy + 0.5) * region_size 66 | 67 | r = al.AnnoRect( 68 | ox - 0.5 * region_size, oy - 0.5 * region_size, 69 | ox + 0.5 * region_size, oy + 0.5 * region_size 70 | ) 71 | r.track_id = cidx 72 | 73 | cell_regions.append(r) 74 | 75 | return cell_regions 76 | 77 | 78 | def annotation_jitter( 79 | I, 80 | a_in, 81 | min_box_width=20, 82 | jitter_scale_min=0.9, 83 | jitter_scale_max=1.1, 84 | jitter_offset=16, 85 | target_width=640, 86 | target_height=480 87 | ): 88 | assert I.shape[ 89 | 2 90 | ] == 3, 'Not implemented for images with more than 3 channels' 91 | a = copy.deepcopy(a_in) 92 | 93 | # MA: sanity check 94 | new_rects = [] 95 | for i in range(len(a.rects)): 96 | r = a.rects[i] 97 | try: 98 | assert (r.x1 < r.x2 and r.y1 < r.y2) 99 | new_rects.append(r) 100 | except: 101 | print('bad rectangle') 102 | a.rects = new_rects 103 | 104 | if a.rects: 105 | cur_min_box_width = min([r.width() for r in a.rects]) 106 | else: 107 | cur_min_box_width = min_box_width / jitter_scale_min 108 | 109 | # don't downscale below min_box_width 110 | jitter_scale_min = max( 111 | jitter_scale_min, float(min_box_width) / cur_min_box_width 112 | ) 113 | 114 | # it's always ok to upscale 115 | jitter_scale_min = min(jitter_scale_min, 1.0) 116 | 117 | jitter_scale_max = jitter_scale_max 118 | 119 | jitter_scale = np.random.uniform(jitter_scale_min, jitter_scale_max) 120 | 121 | jitter_flip = np.random.random_integers(0, 1) 122 | 123 | if jitter_flip == 1: 124 | I = np.fliplr(I) 125 | 126 | for r in a: 127 | r.x1 = I.shape[1] - r.x1 128 | r.x2 = I.shape[1] - r.x2 129 | r.x1, r.x2 = r.x2, r.x1 130 | 131 | for p in r.point: 132 | p.x = I.shape[1] - p.x 133 | 134 | I1 = cv2.resize( 135 | I, 136 | None, 137 | fx=jitter_scale, 138 | fy=jitter_scale, 139 | interpolation=cv2.INTER_CUBIC 140 | ) 141 | 142 | jitter_offset_x = np.random.random_integers(-jitter_offset, jitter_offset) 143 | jitter_offset_y = np.random.random_integers(-jitter_offset, jitter_offset) 144 | 145 | rescaled_width = I1.shape[1] 146 | rescaled_height = I1.shape[0] 147 | 148 | px = round(0.5 * 149 | (target_width)) - round(0.5 * 150 | (rescaled_width)) + jitter_offset_x 151 | py = round(0.5 * 152 | (target_height)) - round(0.5 * 153 | (rescaled_height)) + jitter_offset_y 154 | 155 | I2 = np.zeros((target_height, target_width, 3), dtype=I1.dtype) 156 | 157 | x1 = max(0, px) 158 | y1 = max(0, py) 159 | x2 = min(rescaled_width, target_width - x1) 160 | y2 = min(rescaled_height, target_height - y1) 161 | 162 | I2[0:(y2 - y1), 0:(x2 - x1), :] = I1[y1:y2, x1:x2, :] 163 | 164 | ox1 = round(0.5 * rescaled_width) + jitter_offset_x 165 | oy1 = round(0.5 * rescaled_height) + jitter_offset_y 166 | 167 | ox2 = round(0.5 * target_width) 168 | oy2 = round(0.5 * target_height) 169 | 170 | for r in a: 171 | r.x1 = round(jitter_scale * r.x1 - x1) 172 | r.x2 = round(jitter_scale * r.x2 - x1) 173 | 174 | r.y1 = round(jitter_scale * r.y1 - y1) 175 | r.y2 = round(jitter_scale * r.y2 - y1) 176 | 177 | if r.x1 < 0: 178 | r.x1 = 0 179 | 180 | if r.y1 < 0: 181 | r.y1 = 0 182 | 183 | if r.x2 >= I2.shape[1]: 184 | r.x2 = I2.shape[1] - 1 185 | 186 | if r.y2 >= I2.shape[0]: 187 | r.y2 = I2.shape[0] - 1 188 | 189 | for p in r.point: 190 | p.x = round(jitter_scale * p.x - x1) 191 | p.y = round(jitter_scale * p.y - y1) 192 | 193 | # MA: make sure all points are inside the image 194 | r.point = [ 195 | p for p in r.point 196 | if p.x >= 0 and p.y >= 0 and p.x < I2.shape[1] and 197 | p.y < I2.shape[0] 198 | ] 199 | 200 | new_rects = [] 201 | for r in a.rects: 202 | if r.x1 <= r.x2 and r.y1 <= r.y2: 203 | new_rects.append(r) 204 | else: 205 | pass 206 | 207 | a.rects = new_rects 208 | 209 | return I2, a 210 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/googlenet_load.py: -------------------------------------------------------------------------------- 1 | from tensorboxresnet.utils.slim_nets import inception_v1 as inception 2 | from tensorboxresnet.utils.slim_nets import resnet_v1 as resnet 3 | import tensorflow.contrib.slim as slim 4 | 5 | 6 | def model(x, H, reuse, is_training=True): 7 | if H['slim_basename'] == 'resnet_v1_101': 8 | with slim.arg_scope(resnet.resnet_arg_scope()): 9 | _, T = resnet.resnet_v1_101( 10 | x, is_training=is_training, num_classes=1000, reuse=reuse 11 | ) 12 | elif H['slim_basename'] == 'InceptionV1': 13 | with slim.arg_scope(inception.inception_v1_arg_scope()): 14 | _, T = inception.inception_v1( 15 | x, 16 | is_training=is_training, 17 | num_classes=1001, 18 | spatial_squeeze=False, 19 | reuse=reuse 20 | ) 21 | #print '\n'.join(map(str, [(k, v.op.outputs[0].get_shape()) for k, v in T.iteritems()])) 22 | 23 | coarse_feat = T[H['slim_top_lname']][:, :, :, :H['later_feat_channels']] 24 | assert coarse_feat.op.outputs[0].get_shape()[3] == H['later_feat_channels'] 25 | 26 | # fine feat can be used to reinspect input 27 | attention_lname = H.get('slim_attention_lname', 'Mixed_3b') 28 | early_feat = T[attention_lname] 29 | 30 | return coarse_feat, early_feat 31 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/hungarian/hungarian.hpp: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | ******************************************************************** 3 | ** 4 | ** libhungarian by Cyrill Stachniss, 2004 5 | ** 6 | ** 7 | ** Solving the Minimum Assignment Problem using the 8 | ** Hungarian Method. 9 | ** 10 | ** ** This file may be freely copied and distributed! ** 11 | ** 12 | ** Parts of the used code was originally provided by the 13 | ** "Stanford GraphGase", but I made changes to this code. 14 | ** As asked by the copyright node of the "Stanford GraphGase", 15 | ** I hereby proclaim that this file are *NOT* part of the 16 | ** "Stanford GraphGase" distrubition! 17 | ** 18 | ** This file is distributed in the hope that it will be useful, 19 | ** but WITHOUT ANY WARRANTY; without even the implied 20 | ** warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 21 | ** PURPOSE. 22 | ** 23 | ******************************************************************** 24 | ********************************************************************/ 25 | 26 | #ifndef HUNGARIAN_H 27 | #define HUNGARIAN_H 28 | 29 | #ifdef __cplusplus 30 | extern "C" { 31 | #endif 32 | 33 | #define HUNGARIAN_NOT_ASSIGNED 0 34 | #define HUNGARIAN_ASSIGNED 1 35 | 36 | #define HUNGARIAN_MODE_MINIMIZE_COST 0 37 | #define HUNGARIAN_MODE_MAXIMIZE_UTIL 1 38 | 39 | 40 | typedef struct { 41 | int num_rows; 42 | int num_cols; 43 | int** cost; 44 | int** assignment; 45 | } hungarian_problem_t; 46 | 47 | /** This method initialize the hungarian_problem structure and init 48 | * the cost matrices (missing lines or columns are filled with 0). 49 | * It returns the size of the quadratic(!) assignment matrix. **/ 50 | int hungarian_init(hungarian_problem_t* p, 51 | int** cost_matrix, 52 | int rows, 53 | int cols, 54 | int mode); 55 | 56 | /** Free the memory allocated by init. **/ 57 | void hungarian_free(hungarian_problem_t* p); 58 | 59 | /** This method computes the optimal assignment. **/ 60 | void hungarian_solve(hungarian_problem_t* p); 61 | 62 | /** Print the computed optimal assignment. **/ 63 | void hungarian_print_assignment(hungarian_problem_t* p); 64 | 65 | /** Print the cost matrix. **/ 66 | void hungarian_print_costmatrix(hungarian_problem_t* p); 67 | 68 | /** Print cost matrix and assignment matrix. **/ 69 | void hungarian_print_status(hungarian_problem_t* p); 70 | 71 | int** array_to_matrix(int* m, int rows, int cols); 72 | 73 | #ifdef __cplusplus 74 | } 75 | #endif 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/rect.py: -------------------------------------------------------------------------------- 1 | class Rect(object): 2 | def __init__(self, cx, cy, width, height, confidence): 3 | self.cx = cx 4 | self.cy = cy 5 | self.width = width 6 | self.height = height 7 | self.confidence = confidence 8 | self.true_confidence = confidence 9 | 10 | def overlaps(self, other): 11 | if abs(self.cx - other.cx) > (self.width + other.width) / 1.5: 12 | return False 13 | elif abs(self.cy - other.cy) > (self.height + other.height) / 2.0: 14 | return False 15 | else: 16 | return True 17 | 18 | def distance(self, other): 19 | return sum( 20 | map( 21 | abs, [ 22 | self.cx - other.cx, self.cy - other.cy, self.width - 23 | other.width, self.height - other.height 24 | ] 25 | ) 26 | ) 27 | 28 | def intersection(self, other): 29 | left = max(self.cx - self.width / 2., other.cx - other.width / 2.) 30 | right = min(self.cx + self.width / 2., other.cx + other.width / 2.) 31 | width = max(right - left, 0) 32 | top = max(self.cy - self.height / 2., other.cy - other.height / 2.) 33 | bottom = min(self.cy + self.height / 2., other.cy + other.height / 2.) 34 | height = max(bottom - top, 0) 35 | return width * height 36 | 37 | def area(self): 38 | return self.height * self.width 39 | 40 | def union(self, other): 41 | return self.area() + other.area() - self.intersection(other) 42 | 43 | def iou(self, other): 44 | return self.intersection(other) / self.union(other) 45 | 46 | def __eq__(self, other): 47 | return ( 48 | self.cx == other.cx and self.cy == other.cy and 49 | self.width == other.width and self.height == other.height and 50 | self.confidence == other.confidence 51 | ) 52 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/slim_nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/deepfigures-open/a22287846a919fb3e42ec4e6571021c7d4420208/vendor/tensorboxresnet/tensorboxresnet/utils/slim_nets/__init__.py -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/slim_nets/resnet_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains building blocks for various versions of Residual Networks. 16 | 17 | Residual networks (ResNets) were proposed in: 18 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 19 | Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015 20 | 21 | More variants were introduced in: 22 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 23 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016 24 | 25 | We can obtain different ResNet variants by changing the network depth, width, 26 | and form of residual unit. This module implements the infrastructure for 27 | building them. Concrete ResNet units and full ResNet networks are implemented in 28 | the accompanying resnet_v1.py and resnet_v2.py modules. 29 | 30 | Compared to https://github.com/KaimingHe/deep-residual-networks, in the current 31 | implementation we subsample the output activations in the last residual unit of 32 | each block, instead of subsampling the input activations in the first residual 33 | unit of each block. The two implementations give identical results but our 34 | implementation is more memory efficient. 35 | """ 36 | 37 | from __future__ import absolute_import 38 | from __future__ import division 39 | from __future__ import print_function 40 | 41 | import collections 42 | 43 | from tensorflow.contrib import layers as layers_lib 44 | from tensorflow.contrib.framework.python.ops import add_arg_scope 45 | from tensorflow.contrib.framework.python.ops import arg_scope 46 | from tensorflow.contrib.layers.python.layers import initializers 47 | from tensorflow.contrib.layers.python.layers import layers 48 | from tensorflow.contrib.layers.python.layers import regularizers 49 | from tensorflow.contrib.layers.python.layers import utils 50 | from tensorflow.python.framework import ops 51 | from tensorflow.python.ops import array_ops 52 | from tensorflow.python.ops import nn_ops 53 | from tensorflow.python.ops import variable_scope 54 | 55 | 56 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): 57 | """A named tuple describing a ResNet block. 58 | 59 | Its parts are: 60 | scope: The scope of the `Block`. 61 | unit_fn: The ResNet unit function which takes as input a `Tensor` and 62 | returns another `Tensor` with the output of the ResNet unit. 63 | args: A list of length equal to the number of units in the `Block`. The list 64 | contains one (depth, depth_bottleneck, stride) tuple for each unit in the 65 | block to serve as argument to unit_fn. 66 | """ 67 | 68 | 69 | def subsample(inputs, factor, scope=None): 70 | """Subsamples the input along the spatial dimensions. 71 | 72 | Args: 73 | inputs: A `Tensor` of size [batch, height_in, width_in, channels]. 74 | factor: The subsampling factor. 75 | scope: Optional variable_scope. 76 | 77 | Returns: 78 | output: A `Tensor` of size [batch, height_out, width_out, channels] with the 79 | input, either intact (if factor == 1) or subsampled (if factor > 1). 80 | """ 81 | if factor == 1: 82 | return inputs 83 | else: 84 | return layers.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) 85 | 86 | 87 | def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): 88 | """Strided 2-D convolution with 'SAME' padding. 89 | 90 | When stride > 1, then we do explicit zero-padding, followed by conv2d with 91 | 'VALID' padding. 92 | 93 | Note that 94 | 95 | net = conv2d_same(inputs, num_outputs, 3, stride=stride) 96 | 97 | is equivalent to 98 | 99 | net = tf.contrib.layers.conv2d(inputs, num_outputs, 3, stride=1, 100 | padding='SAME') 101 | net = subsample(net, factor=stride) 102 | 103 | whereas 104 | 105 | net = tf.contrib.layers.conv2d(inputs, num_outputs, 3, stride=stride, 106 | padding='SAME') 107 | 108 | is different when the input's height or width is even, which is why we add the 109 | current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). 110 | 111 | Args: 112 | inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. 113 | num_outputs: An integer, the number of output filters. 114 | kernel_size: An int with the kernel_size of the filters. 115 | stride: An integer, the output stride. 116 | rate: An integer, rate for atrous convolution. 117 | scope: Scope. 118 | 119 | Returns: 120 | output: A 4-D tensor of size [batch, height_out, width_out, channels] with 121 | the convolution output. 122 | """ 123 | if stride == 1: 124 | return layers_lib.conv2d( 125 | inputs, 126 | num_outputs, 127 | kernel_size, 128 | stride=1, 129 | rate=rate, 130 | padding='SAME', 131 | scope=scope 132 | ) 133 | else: 134 | kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) 135 | pad_total = kernel_size_effective - 1 136 | pad_beg = pad_total // 2 137 | pad_end = pad_total - pad_beg 138 | inputs = array_ops.pad( 139 | inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]] 140 | ) 141 | return layers_lib.conv2d( 142 | inputs, 143 | num_outputs, 144 | kernel_size, 145 | stride=stride, 146 | rate=rate, 147 | padding='VALID', 148 | scope=scope 149 | ) 150 | 151 | 152 | @add_arg_scope 153 | def stack_blocks_dense( 154 | net, blocks, output_stride=None, outputs_collections=None 155 | ): 156 | """Stacks ResNet `Blocks` and controls output feature density. 157 | 158 | First, this function creates scopes for the ResNet in the form of 159 | 'block_name/unit_1', 'block_name/unit_2', etc. 160 | 161 | Second, this function allows the user to explicitly control the ResNet 162 | output_stride, which is the ratio of the input to output spatial resolution. 163 | This is useful for dense prediction tasks such as semantic segmentation or 164 | object detection. 165 | 166 | Most ResNets consist of 4 ResNet blocks and subsample the activations by a 167 | factor of 2 when transitioning between consecutive ResNet blocks. This results 168 | to a nominal ResNet output_stride equal to 8. If we set the output_stride to 169 | half the nominal network stride (e.g., output_stride=4), then we compute 170 | responses twice. 171 | 172 | Control of the output feature density is implemented by atrous convolution. 173 | 174 | Args: 175 | net: A `Tensor` of size [batch, height, width, channels]. 176 | blocks: A list of length equal to the number of ResNet `Blocks`. Each 177 | element is a ResNet `Block` object describing the units in the `Block`. 178 | output_stride: If `None`, then the output will be computed at the nominal 179 | network stride. If output_stride is not `None`, it specifies the requested 180 | ratio of input to output spatial resolution, which needs to be equal to 181 | the product of unit strides from the start up to some level of the ResNet. 182 | For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1, 183 | then valid values for the output_stride are 1, 2, 6, 24 or None (which 184 | is equivalent to output_stride=24). 185 | outputs_collections: Collection to add the ResNet block outputs. 186 | 187 | Returns: 188 | net: Output tensor with stride equal to the specified output_stride. 189 | 190 | Raises: 191 | ValueError: If the target output_stride is not valid. 192 | """ 193 | # The current_stride variable keeps track of the effective stride of the 194 | # activations. This allows us to invoke atrous convolution whenever applying 195 | # the next residual unit would result in the activations having stride larger 196 | # than the target output_stride. 197 | current_stride = 1 198 | 199 | # The atrous convolution rate parameter. 200 | rate = 1 201 | 202 | for block in blocks: 203 | with variable_scope.variable_scope(block.scope, 'block', [net]) as sc: 204 | for i, unit in enumerate(block.args): 205 | if output_stride is not None and current_stride > output_stride: 206 | raise ValueError( 207 | 'The target output_stride cannot be reached.' 208 | ) 209 | 210 | with variable_scope.variable_scope( 211 | 'unit_%d' % (i + 1), values=[net] 212 | ): 213 | unit_depth, unit_depth_bottleneck, unit_stride = unit 214 | 215 | # If we have reached the target output_stride, then we need to employ 216 | # atrous convolution with stride=1 and multiply the atrous rate by the 217 | # current unit's stride for use in subsequent layers. 218 | if output_stride is not None and current_stride == output_stride: 219 | net = block.unit_fn( 220 | net, 221 | depth=unit_depth, 222 | depth_bottleneck=unit_depth_bottleneck, 223 | stride=1, 224 | rate=rate 225 | ) 226 | rate *= unit_stride 227 | 228 | else: 229 | net = block.unit_fn( 230 | net, 231 | depth=unit_depth, 232 | depth_bottleneck=unit_depth_bottleneck, 233 | stride=unit_stride, 234 | rate=1 235 | ) 236 | current_stride *= unit_stride 237 | net = utils.collect_named_outputs( 238 | outputs_collections, sc.name, net 239 | ) 240 | 241 | if output_stride is not None and current_stride != output_stride: 242 | raise ValueError('The target output_stride cannot be reached.') 243 | 244 | return net 245 | 246 | 247 | def resnet_arg_scope( 248 | is_training=True, 249 | weight_decay=0.0001, 250 | batch_norm_decay=0.997, 251 | batch_norm_epsilon=1e-5, 252 | batch_norm_scale=True 253 | ): 254 | """Defines the default ResNet arg scope. 255 | 256 | TODO(gpapan): The batch-normalization related default values above are 257 | appropriate for use in conjunction with the reference ResNet models 258 | released at https://github.com/KaimingHe/deep-residual-networks. When 259 | training ResNets from scratch, they might need to be tuned. 260 | 261 | Args: 262 | is_training: Whether or not we are training the parameters in the batch 263 | normalization layers of the model. 264 | weight_decay: The weight decay to use for regularizing the model. 265 | batch_norm_decay: The moving average decay when estimating layer activation 266 | statistics in batch normalization. 267 | batch_norm_epsilon: Small constant to prevent division by zero when 268 | normalizing activations by their variance in batch normalization. 269 | batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the 270 | activations in the batch normalization layer. 271 | 272 | Returns: 273 | An `arg_scope` to use for the resnet models. 274 | """ 275 | batch_norm_params = { 276 | 'is_training': is_training, 277 | 'decay': batch_norm_decay, 278 | 'epsilon': batch_norm_epsilon, 279 | 'scale': batch_norm_scale, 280 | 'updates_collections': ops.GraphKeys.UPDATE_OPS, 281 | } 282 | 283 | with arg_scope( 284 | [layers_lib.conv2d], 285 | weights_regularizer=regularizers.l2_regularizer(weight_decay), 286 | weights_initializer=initializers.variance_scaling_initializer(), 287 | activation_fn=nn_ops.relu, 288 | normalizer_fn=layers.batch_norm, 289 | normalizer_params=batch_norm_params 290 | ): 291 | with arg_scope([layers.batch_norm], **batch_norm_params): 292 | # The following implies padding='SAME' for pool1, which makes feature 293 | # alignment easier for dense prediction tasks. This is also used in 294 | # https://github.com/facebook/fb.resnet.torch. However the accompanying 295 | # code of 'Deep Residual Learning for Image Recognition' uses 296 | # padding='VALID' for pool1. You can switch to that choice by setting 297 | # tf.contrib.framework.arg_scope([tf.contrib.layers.max_pool2d], padding='VALID'). 298 | with arg_scope([layers.max_pool2d], padding='SAME') as arg_sc: 299 | return arg_sc 300 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/stitch_rects.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "./hungarian/hungarian.hpp" 9 | #include "./stitch_rects.hpp" 10 | 11 | using std::vector; 12 | 13 | void filter_rects(const vector > >& all_rects, 14 | vector* stitched_rects, 15 | float threshold, 16 | float max_threshold, 17 | float tau, 18 | float conf_alpha) { 19 | const vector& accepted_rects = *stitched_rects; 20 | for (int i = 0; i < (int)all_rects.size(); ++i) { 21 | for (int j = 0; j < (int)all_rects[0].size(); ++j) { 22 | vector current_rects; 23 | for (int k = 0; k < (int)all_rects[i][j].size(); ++k) { 24 | if (all_rects[i][j][k].confidence_ * conf_alpha > threshold) { 25 | Rect r = Rect(all_rects[i][j][k]); 26 | r.confidence_ *= conf_alpha; 27 | r.true_confidence_ *= conf_alpha; 28 | current_rects.push_back(r); 29 | } 30 | } 31 | 32 | vector relevant_rects; 33 | for (int k = 0; k < (int)accepted_rects.size(); ++k) { 34 | for (int l = 0; l < (int)current_rects.size(); ++l) { 35 | if (accepted_rects[k].overlaps(current_rects[l], tau)) { 36 | relevant_rects.push_back(Rect(accepted_rects[k])); 37 | break; 38 | } 39 | } 40 | } 41 | 42 | if (relevant_rects.size() == 0 || current_rects.size() == 0) { 43 | for (int k = 0; k < (int)current_rects.size(); ++k) { 44 | stitched_rects->push_back(Rect(current_rects[k])); 45 | } 46 | continue; 47 | } 48 | 49 | int num_pred = MAX(current_rects.size(), relevant_rects.size()); 50 | 51 | int int_cost[num_pred * num_pred]; 52 | for (int k = 0; k < num_pred * num_pred; ++k) { int_cost[k] = 0; } 53 | for (int k = 0; k < (int)current_rects.size(); ++k) { 54 | for (int l = 0; l < (int)relevant_rects.size(); ++l) { 55 | int idx = k * num_pred + l; 56 | int cost = 10000; 57 | if (current_rects[k].overlaps(relevant_rects[l], tau)) { 58 | cost -= 1000; 59 | } 60 | cost += (int)(current_rects[k].distance(relevant_rects[l]) / 10.); 61 | int_cost[idx] = cost; 62 | } 63 | } 64 | 65 | std::vector assignment; 66 | 67 | hungarian_problem_t p; 68 | int** m = array_to_matrix(int_cost, num_pred, num_pred); 69 | hungarian_init(&p, m, num_pred, num_pred, HUNGARIAN_MODE_MINIMIZE_COST); 70 | hungarian_solve(&p); 71 | for (int i = 0; i < num_pred; ++i) { 72 | for (int j = 0; j < num_pred; ++j) { 73 | if (p.assignment[i][j] == HUNGARIAN_ASSIGNED) { 74 | assignment.push_back(j); 75 | } 76 | } 77 | } 78 | assert((int)assignment.size() == num_pred); 79 | hungarian_free(&p); 80 | 81 | for (int i = 0; i < num_pred; ++i) { 82 | free(m[i]); 83 | } 84 | free(m); 85 | 86 | vector bad; 87 | for (int k = 0; k < (int)assignment.size(); ++k) { 88 | if (k < (int)current_rects.size() && assignment[k] < (int)relevant_rects.size()) { 89 | Rect& c = current_rects[k]; 90 | Rect& a = relevant_rects[assignment[k]]; 91 | if (c.confidence_ > max_threshold) { 92 | bad.push_back(k); 93 | continue; 94 | } 95 | if (c.overlaps(a, tau)) { 96 | if (c.confidence_ > a.confidence_ && c.iou(a) > 0.7) { 97 | c.true_confidence_ = a.confidence_; 98 | stitched_rects->erase(std::find(stitched_rects->begin(), stitched_rects->end(), a)); 99 | } else { 100 | bad.push_back(k); 101 | } 102 | } 103 | } 104 | } 105 | 106 | for (int k = 0; k < (int)current_rects.size(); ++k) { 107 | bool bad_contains_k = false; 108 | for (int l = 0; l < (int)bad.size(); ++l) { 109 | if (k == bad[l]) { 110 | bad_contains_k = true; 111 | break; 112 | } 113 | } 114 | if (!bad_contains_k) { 115 | stitched_rects->push_back(Rect(current_rects[k])); 116 | } 117 | } 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/stitch_rects.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STITCH_RECTS_HPP 2 | #define STITCH_RECTS_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "./hungarian/hungarian.hpp" 9 | 10 | #define MIN(a,b) (((a)<(b))?(a):(b)) 11 | #define MAX(a,b) (((a)>(b))?(a):(b)) 12 | 13 | using std::vector; 14 | 15 | class Rect { 16 | public: 17 | int cx_; 18 | int cy_; 19 | int width_; 20 | int height_; 21 | float confidence_; 22 | float true_confidence_; 23 | 24 | explicit Rect(int cx, int cy, int width, int height, float confidence) { 25 | cx_ = cx; 26 | cy_ = cy; 27 | width_ = width; 28 | height_ = height; 29 | confidence_ = confidence; 30 | true_confidence_ = confidence; 31 | } 32 | 33 | Rect(const Rect& other) { 34 | cx_ = other.cx_; 35 | cy_ = other.cy_; 36 | width_ = other.width_; 37 | height_ = other.height_; 38 | confidence_ = other.confidence_; 39 | true_confidence_ = other.true_confidence_; 40 | } 41 | 42 | bool overlaps(const Rect& other, float tau) const { 43 | if (fabs(cx_ - other.cx_) > (width_ + other.width_) / 1.5) { 44 | return false; 45 | } else if (fabs(cy_ - other.cy_) > (height_ + other.height_) / 2.0) { 46 | return false; 47 | } else { 48 | return iou(other) > tau; 49 | } 50 | } 51 | 52 | int distance(const Rect& other) const { 53 | return (fabs(cx_ - other.cx_) + fabs(cy_ - other.cy_) + 54 | fabs(width_ - other.width_) + fabs(height_ - other.height_)); 55 | } 56 | 57 | float intersection(const Rect& other) const { 58 | int left = MAX(cx_ - width_ / 2., other.cx_ - other.width_ / 2.); 59 | int right = MIN(cx_ + width_ / 2., other.cx_ + other.width_ / 2.); 60 | int width = MAX(right - left, 0); 61 | 62 | int top = MAX(cy_ - height_ / 2., other.cy_ - other.height_ / 2.); 63 | int bottom = MIN(cy_ + height_ / 2., other.cy_ + other.height_ / 2.); 64 | int height = MAX(bottom - top, 0); 65 | return width * height; 66 | } 67 | 68 | int area() const { 69 | return height_ * width_; 70 | } 71 | 72 | float union_area(const Rect& other) const { 73 | return this->area() + other.area() - this->intersection(other); 74 | } 75 | 76 | float iou(const Rect& other) const { 77 | return this->intersection(other) / this->union_area(other); 78 | } 79 | 80 | bool operator==(const Rect& other) const { 81 | return (cx_ == other.cx_ && 82 | cy_ == other.cy_ && 83 | width_ == other.width_ && 84 | height_ == other.height_ && 85 | confidence_ == other.confidence_); 86 | } 87 | }; 88 | 89 | void filter_rects(const vector > >& all_rects, 90 | vector* stitched_rects, 91 | float threshold, 92 | float max_threshold, 93 | float tau, 94 | float conf_alpha); 95 | 96 | #endif // STITCH_RECTS_HPP 97 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/stitch_wrapper.py: -------------------------------------------------------------------------------- 1 | print( 2 | 'ERROR: stitch_wrapper not yet compiled. Please run `cd /path/to/tensorbox/utils && make`' 3 | ) 4 | -------------------------------------------------------------------------------- /vendor/tensorboxresnet/tensorboxresnet/utils/stitch_wrapper.pyx: -------------------------------------------------------------------------------- 1 | from libcpp.vector cimport vector 2 | from libcpp.set cimport set 3 | from rect import Rect as PyRect 4 | cdef extern from "stitch_rects.hpp": 5 | cdef cppclass Rect: 6 | Rect(int cx, int cy, int width, int height, float confidence) 7 | int cx_ 8 | int cy_ 9 | int width_ 10 | int height_ 11 | float confidence_ 12 | float true_confidence_ 13 | 14 | cdef void filter_rects(vector[vector[vector[Rect] ] ]& all_rects, 15 | vector[Rect]* stitched_rects, 16 | float threshold, 17 | float max_threshold, 18 | float tau, 19 | float conf_alpha); 20 | 21 | def stitch_rects(all_rects, tau=0.25): 22 | """ 23 | Implements the stitching procedure discussed in the paper. 24 | Complicated, but we find that it does better than simpler versions 25 | and generalizes well across widely varying box sizes. 26 | 27 | Input: 28 | all_rects : 2d grid with each cell containing a vector of PyRects 29 | """ 30 | for row in all_rects: 31 | assert len(row) == len(all_rects[0]) 32 | 33 | cdef vector[vector[vector[Rect]]] c_rects 34 | cdef vector[vector[Rect]] c_row 35 | cdef vector[Rect] c_column 36 | for i, row in enumerate(all_rects): 37 | c_rects.push_back(c_row) 38 | for j, column in enumerate(row): 39 | c_rects[i].push_back(c_column) 40 | for py_rect in column: 41 | c_rects[i][j].push_back( 42 | Rect( 43 | py_rect.cx, 44 | py_rect.cy, 45 | py_rect.width, 46 | py_rect.height, 47 | py_rect.confidence) 48 | ) 49 | 50 | cdef vector[Rect] acc_rects; 51 | 52 | thresholds = [(.80, 1.0), 53 | (.70, 0.9), 54 | (.60, 0.8), 55 | (.50, 0.7), 56 | (.40, 0.6), 57 | (.30, 0.5), 58 | (.20, 0.4), 59 | (.10, 0.3), 60 | (.05, 0.2), 61 | (.02, 0.1), 62 | (.005, 0.04), 63 | (.001, 0.01), 64 | ] 65 | t_conf_alphas = [(tau, 1.0), 66 | #(1 - (1 - tau) * 0.75, 0.5), 67 | #(1 - (1 - tau) * 0.5, 0.1), 68 | #(1 - (1 - tau) * 0.25, 0.005), 69 | ] 70 | for t, conf_alpha in t_conf_alphas: 71 | for lower_t, upper_t in thresholds: 72 | if lower_t * conf_alpha > 0.0001: 73 | filter_rects(c_rects, &acc_rects, lower_t * conf_alpha, 74 | upper_t * conf_alpha, t, conf_alpha) 75 | 76 | py_acc_rects = [] 77 | for i in range(acc_rects.size()): 78 | acc_rect = PyRect( 79 | acc_rects[i].cx_, 80 | acc_rects[i].cy_, 81 | acc_rects[i].width_, 82 | acc_rects[i].height_, 83 | acc_rects[i].confidence_) 84 | acc_rect.true_confidence = acc_rects[i].true_confidence_ 85 | py_acc_rects.append(acc_rect) 86 | return py_acc_rects 87 | --------------------------------------------------------------------------------