├── .github ├── dependabot.yml └── workflows │ ├── tests-windows.yml │ └── tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── jbang-catalog.json ├── pom.xml └── src ├── main └── java │ └── technology │ └── tabula │ ├── Cell.java │ ├── CohenSutherlandClipping.java │ ├── CommandLineApp.java │ ├── HasText.java │ ├── Line.java │ ├── ObjectExtractor.java │ ├── ObjectExtractorStreamEngine.java │ ├── Page.java │ ├── PageDims.java │ ├── PageIterator.java │ ├── Pair.java │ ├── ProjectionProfile.java │ ├── QuickSort.java │ ├── Rectangle.java │ ├── RectangleSpatialIndex.java │ ├── RectangularTextContainer.java │ ├── Ruling.java │ ├── Table.java │ ├── TableWithRulingLines.java │ ├── TextChunk.java │ ├── TextElement.java │ ├── TextStripper.java │ ├── Utils.java │ ├── debug │ └── Debug.java │ ├── detectors │ ├── DetectionAlgorithm.java │ ├── NurminenDetectionAlgorithm.java │ └── SpreadsheetDetectionAlgorithm.java │ ├── extractors │ ├── BasicExtractionAlgorithm.java │ ├── ExtractionAlgorithm.java │ └── SpreadsheetExtractionAlgorithm.java │ ├── json │ ├── RectangularTextContainerSerializer.java │ └── TableSerializer.java │ └── writers │ ├── CSVWriter.java │ ├── JSONWriter.java │ ├── TSVWriter.java │ └── Writer.java └── test ├── java └── technology │ └── tabula │ ├── TableTest.java │ ├── TestBasicExtractor.java │ ├── TestCell.java │ ├── TestCohenSutherland.java │ ├── TestCommandLineApp.java │ ├── TestDebug.java │ ├── TestLine.java │ ├── TestObjectExtractor.java │ ├── TestProjectionProfile.java │ ├── TestRectangle.java │ ├── TestRectangleSpatialIndex.java │ ├── TestRuling.java │ ├── TestSpreadsheetExtractor.java │ ├── TestTableDetection.java │ ├── TestTextElement.java │ ├── TestUtils.java │ ├── TestWriters.java │ └── UtilsForTesting.java └── resources └── technology └── tabula ├── 12s0324.pdf ├── 20.pdf ├── AnimalSounds.pdf ├── AnimalSounds1.pdf ├── MultiColumn.pdf ├── Publication_of_award_of_Bids_for_Transport_Sector__August_2016.pdf ├── S2MNCEbirdisland.pdf ├── arabic.pdf ├── argentina_diputados_voting_record.pdf ├── campaign_donors.pdf ├── china.pdf ├── cs-en-us-pbms.pdf ├── csv ├── AnimalSounds.csv ├── MultiColumn.csv ├── Publication_of_award_of_Bids_for_Transport_Sector__August_2016.csv ├── TestBasicExtractor-RECTANGLE_TEST_NATURAL_ORDER.csv ├── TestCommandLineApp_testGuessOption_no_guessing.csv ├── TestCommandLineApp_testGuessOption_with_guessing.csv ├── TestSpreadsheetExtractor-CELLS.csv ├── argentina_diputados_voting_record.csv ├── frx_2012_disclosure.csv ├── indictb1h_14.csv ├── schools.csv ├── spanning_cells.csv ├── spreadsheet_no_bounding_frame.csv ├── twotables.csv └── us-020.csv ├── encrypted.pdf ├── eu-002.pdf ├── eu-017.pdf ├── failing_sort.pdf ├── frx_2012_disclosure.pdf ├── icdar2013-dataset ├── competition-dataset-eu │ ├── eu-001-reg.xml │ ├── eu-001-str.xml │ ├── eu-001.json │ ├── eu-001.pdf │ ├── eu-002-reg.xml │ ├── eu-002-str.xml │ ├── eu-002.json │ ├── eu-002.pdf │ ├── eu-003-reg.xml │ ├── eu-003-str.xml │ ├── eu-003.json │ ├── eu-003.pdf │ ├── eu-004-reg.xml │ ├── eu-004-str.xml │ ├── eu-004.json │ ├── eu-004.pdf │ ├── eu-005-reg.xml │ ├── eu-005-str.xml │ ├── eu-005.json │ ├── eu-005.pdf │ ├── eu-006-reg.xml │ ├── eu-006-str.xml │ ├── eu-006.json │ ├── eu-006.pdf │ ├── eu-007-reg.xml │ ├── eu-007-str.xml │ ├── eu-007.json │ ├── eu-007.pdf │ ├── eu-008-reg.xml │ ├── eu-008-str.xml │ ├── eu-008.json │ ├── eu-008.pdf │ ├── eu-009a-reg.xml │ ├── eu-009a-str.xml │ ├── eu-009a.json │ ├── eu-009a.pdf │ ├── eu-009b-reg.xml │ ├── eu-009b-str.xml │ ├── eu-010-reg.xml │ ├── eu-010-str.xml │ ├── eu-010.json │ ├── eu-010.pdf │ ├── eu-011-reg.xml │ ├── eu-011-str.xml │ ├── eu-011.json │ ├── eu-011.pdf │ ├── eu-012-reg.xml │ ├── eu-012-str.xml │ ├── eu-012.json │ ├── eu-012.pdf │ ├── eu-013-reg.xml │ ├── eu-013-str.xml │ ├── eu-013.json │ ├── eu-013.pdf │ ├── eu-014-reg.xml │ ├── eu-014-str.xml │ ├── eu-014.json │ ├── eu-014.pdf │ ├── eu-015-reg.xml │ ├── eu-015-str.xml │ ├── eu-015.json │ ├── eu-015.pdf │ ├── eu-016-reg.xml │ ├── eu-016-str.xml │ ├── eu-016.json │ ├── eu-016.pdf │ ├── eu-017-reg.xml │ ├── eu-017-str.xml │ ├── eu-017.json │ ├── eu-017.pdf │ ├── eu-018-reg.xml │ ├── eu-018-str.xml │ ├── eu-018.json │ ├── eu-018.pdf │ ├── eu-019-reg.xml │ ├── eu-019-str.xml │ ├── eu-019.json │ ├── eu-019.pdf │ ├── eu-020-reg.xml │ ├── eu-020-str.xml │ ├── eu-020.json │ ├── eu-020.pdf │ ├── eu-021-reg.xml │ ├── eu-021-str.xml │ ├── eu-021.json │ ├── eu-021.pdf │ ├── eu-022-reg.xml │ ├── eu-022-str.xml │ ├── eu-022.json │ ├── eu-022.pdf │ ├── eu-023-reg.xml │ ├── eu-023-str.xml │ ├── eu-023.json │ ├── eu-023.pdf │ ├── eu-024-reg.xml │ ├── eu-024-str.xml │ ├── eu-024.json │ ├── eu-024.pdf │ ├── eu-025-reg.xml │ ├── eu-025-str.xml │ ├── eu-025.json │ ├── eu-025.pdf │ ├── eu-026-reg.xml │ ├── eu-026-str.xml │ ├── eu-026.json │ ├── eu-026.pdf │ ├── eu-027-reg.xml │ ├── eu-027-str.xml │ ├── eu-027.json │ └── eu-027.pdf └── competition-dataset-us │ ├── us-001-reg.xml │ ├── us-001-str.xml │ ├── us-001.json │ ├── us-001.pdf │ ├── us-002-reg.xml │ ├── us-002-str.xml │ ├── us-002.json │ ├── us-002.pdf │ ├── us-003-reg.xml │ ├── us-003-str.xml │ ├── us-003.json │ ├── us-003.pdf │ ├── us-004-reg.xml │ ├── us-004-str.xml │ ├── us-004.json │ ├── us-004.pdf │ ├── us-005-reg.xml │ ├── us-005-str.xml │ ├── us-005.json │ ├── us-005.pdf │ ├── us-006-reg.xml │ ├── us-006-str.xml │ ├── us-006.json │ ├── us-006.pdf │ ├── us-007-reg.xml │ ├── us-007-str.xml │ ├── us-007.json │ ├── us-007.pdf │ ├── us-008-reg.xml │ ├── us-008-str.xml │ ├── us-008.json │ ├── us-008.pdf │ ├── us-009-reg.xml │ ├── us-009-str.xml │ ├── us-009.json │ ├── us-009.pdf │ ├── us-010-reg.xml │ ├── us-010-str.xml │ ├── us-010.json │ ├── us-010.pdf │ ├── us-011a-reg.xml │ ├── us-011a-str.xml │ ├── us-011a.json │ ├── us-011a.pdf │ ├── us-011b-reg.xml │ ├── us-011b-str.xml │ ├── us-012-reg.xml │ ├── us-012-str.xml │ ├── us-012.json │ ├── us-012.pdf │ ├── us-013-reg.xml │ ├── us-013-str.xml │ ├── us-013.json │ ├── us-013.pdf │ ├── us-014-reg.xml │ ├── us-014-str.xml │ ├── us-014.json │ ├── us-014.pdf │ ├── us-015-reg.xml │ ├── us-015-str.xml │ ├── us-015.json │ ├── us-015.pdf │ ├── us-016-reg.xml │ ├── us-016-str.xml │ ├── us-016.json │ ├── us-016.pdf │ ├── us-017-reg.xml │ ├── us-017-str.xml │ ├── us-017.json │ ├── us-017.pdf │ ├── us-018-reg.xml │ ├── us-018-str.xml │ ├── us-018.json │ ├── us-018.pdf │ ├── us-019-reg.xml │ ├── us-019-str.xml │ ├── us-019.json │ ├── us-019.pdf │ ├── us-020-reg.xml │ ├── us-020-str.xml │ ├── us-020.json │ ├── us-020.pdf │ ├── us-021-reg.xml │ ├── us-021-str.xml │ ├── us-021.json │ ├── us-021.pdf │ ├── us-022-reg.xml │ ├── us-022-str.xml │ ├── us-022.json │ ├── us-022.pdf │ ├── us-023-reg.xml │ ├── us-023-str.xml │ ├── us-023.json │ ├── us-023.pdf │ ├── us-024-reg.xml │ ├── us-024-str.xml │ ├── us-024.json │ ├── us-024.pdf │ ├── us-025-reg.xml │ ├── us-025-str.xml │ ├── us-025.json │ ├── us-025.pdf │ ├── us-026-reg.xml │ ├── us-026-str.xml │ ├── us-026.json │ ├── us-026.pdf │ ├── us-027-reg.xml │ ├── us-027-str.xml │ ├── us-027.json │ ├── us-027.pdf │ ├── us-028-reg.xml │ ├── us-028-str.xml │ ├── us-028.json │ ├── us-028.pdf │ ├── us-029-reg.xml │ ├── us-029-str.xml │ ├── us-029.json │ ├── us-029.pdf │ ├── us-030-reg.xml │ ├── us-030-str.xml │ ├── us-030.json │ ├── us-030.pdf │ ├── us-031a-reg.xml │ ├── us-031a-str.xml │ ├── us-031a.json │ ├── us-031a.pdf │ ├── us-031b-reg.xml │ ├── us-031b-str.xml │ ├── us-032-reg.xml │ ├── us-032-str.xml │ ├── us-032.json │ ├── us-032.pdf │ ├── us-033-reg.xml │ ├── us-033-str.xml │ ├── us-033.json │ ├── us-033.pdf │ ├── us-034-reg.xml │ ├── us-034-str.xml │ ├── us-034.json │ ├── us-034.pdf │ ├── us-035a-reg.xml │ ├── us-035a-str.xml │ ├── us-035a.json │ ├── us-035a.pdf │ ├── us-035b-reg.xml │ ├── us-035b-str.xml │ ├── us-036-reg.xml │ ├── us-036-str.xml │ ├── us-036.json │ ├── us-036.pdf │ ├── us-037-reg.xml │ ├── us-037-str.xml │ ├── us-037.json │ ├── us-037.pdf │ ├── us-038-reg.xml │ ├── us-038-str.xml │ ├── us-038.json │ ├── us-038.pdf │ ├── us-039-reg.xml │ ├── us-039-str.xml │ ├── us-039.json │ ├── us-039.pdf │ ├── us-040-reg.xml │ ├── us-040-str.xml │ ├── us-040.json │ └── us-040.pdf ├── indictb1h_14.pdf ├── jpeg2000.pdf ├── json ├── AnimalSounds1.json ├── argentina_diputados_voting_record.json ├── schools.json ├── spanning_cells.json ├── spanning_cells_basic.json └── twotables.json ├── labor.pdf ├── m27.pdf ├── mednine.pdf ├── npe_issue_206.pdf ├── offense.pdf ├── puertos1.pdf ├── rotated_page.pdf ├── schools.pdf ├── should_detect_rulings.pdf ├── sort_exception.pdf ├── spanning_cells.pdf ├── spreadsheet_no_bounding_frame.pdf ├── sydney_disclosure_contract.pdf ├── twotables.pdf ├── us-007.pdf ├── us-017.pdf ├── us-020.pdf └── us-024.pdf /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: maven 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | -------------------------------------------------------------------------------- /.github/workflows/tests-windows.yml: -------------------------------------------------------------------------------- 1 | name: Java CI (Windows) 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: windows-latest 8 | 9 | steps: 10 | # https://github.com/actions/checkout/issues/135#issuecomment-602171132 11 | - name: Set git to use LF 12 | run: | 13 | git config --global core.autocrlf false 14 | git config --global core.eol lf 15 | - uses: actions/checkout@v3 16 | - name: Set up JDK 11 17 | uses: actions/setup-java@v3 18 | with: 19 | java-version: '11' 20 | distribution: 'adopt' 21 | cache: maven 22 | - name: Build with Maven 23 | run: mvn --batch-mode test 24 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Java CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v3 11 | - name: Set up JDK 11 12 | uses: actions/setup-java@v3 13 | with: 14 | java-version: '11' 15 | distribution: 'adopt' 16 | cache: maven 17 | - name: Build with Maven 18 | run: mvn --batch-mode test 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .settings/ 2 | .idea/ 3 | .project 4 | .classpath 5 | /bin/ 6 | /src/test/**/*.jpg 7 | /src/test/resources/technology/tabula/icdar2013-dataset/test-statistics.json 8 | /target/ 9 | *.iml 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014-2016 Manuel Aristarán 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /jbang-catalog.json: -------------------------------------------------------------------------------- 1 | { 2 | "catalogs": {}, 3 | "aliases": { 4 | "tabula": { 5 | "script-ref": "https://github.com/tabulapdf/tabula-java/releases/download/v1.0.4/tabula-1.0.4-jar-with-dependencies.jar" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /src/main/java/technology/tabula/Cell.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.awt.geom.Point2D; 4 | import java.util.Collections; 5 | 6 | @SuppressWarnings("serial") 7 | public class Cell extends RectangularTextContainer { 8 | 9 | public Cell(float top, float left, float width, float height) { 10 | super(top, left, width, height); 11 | this.setPlaceholder(false); 12 | this.setSpanning(false); 13 | } 14 | 15 | public Cell(Point2D topLeft, Point2D bottomRight) { 16 | super((float) topLeft.getY(), (float) topLeft.getX(), (float) (bottomRight.getX() - topLeft.getX()), (float) (bottomRight.getY() - topLeft.getY())); 17 | this.setPlaceholder(false); 18 | this.setSpanning(false); 19 | } 20 | 21 | private boolean spanning; 22 | private boolean placeholder; 23 | 24 | @Override 25 | public String getText(boolean useLineReturns) { 26 | if (this.textElements.size() == 0) { 27 | return ""; 28 | } 29 | StringBuilder sb = new StringBuilder(); 30 | this.textElements.sort(Rectangle.ILL_DEFINED_ORDER); 31 | double curTop = this.textElements.get(0).getTop(); 32 | for (TextChunk tc : this.textElements) { 33 | if (useLineReturns && tc.getTop() > curTop) { 34 | sb.append('\r'); 35 | } 36 | sb.append(tc.getText()); 37 | curTop = tc.getTop(); 38 | } 39 | return sb.toString().trim(); 40 | } 41 | 42 | @Override 43 | public String getText() { 44 | return getText(true); 45 | } 46 | 47 | public boolean isSpanning() { 48 | return spanning; 49 | } 50 | 51 | public void setSpanning(boolean spanning) { 52 | this.spanning = spanning; 53 | } 54 | 55 | public boolean isPlaceholder() { 56 | return placeholder; 57 | } 58 | 59 | public void setPlaceholder(boolean placeholder) { 60 | this.placeholder = placeholder; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/CohenSutherlandClipping.java: -------------------------------------------------------------------------------- 1 | /* 2 | * CohenSutherland.java 3 | * -------------------- 4 | * (c) 2007 by Intevation GmbH 5 | * 6 | * @author Sascha L. Teichmann (teichmann@intevation.de) 7 | * @author Ludwig Reiter (ludwig@intevation.de) 8 | * 9 | * This program is free software under the LGPL (>=v2.1) 10 | * Read the file LICENSE.txt coming with the sources for details. 11 | */ 12 | package technology.tabula; 13 | 14 | import java.awt.geom.Rectangle2D; 15 | import java.awt.geom.Line2D; 16 | 17 | /** 18 | * Implements the well known Cohen Sutherland line 19 | * clipping algorithm (line against clip rectangle). 20 | */ 21 | public final class CohenSutherlandClipping { 22 | 23 | private double xMin; 24 | private double yMin; 25 | private double xMax; 26 | private double yMax; 27 | 28 | private static final int INSIDE = 0; 29 | private static final int LEFT = 1; 30 | private static final int RIGHT = 2; 31 | private static final int BOTTOM = 4; 32 | private static final int TOP = 8; 33 | 34 | private final static float MINIMUM_DELTA = 0.01f; 35 | 36 | /** 37 | * Creates a Cohen Sutherland clipper with clip window (0, 0, 0, 0). 38 | */ 39 | public CohenSutherlandClipping() {} 40 | 41 | /** 42 | * Creates a Cohen Sutherland clipper with the given clip window. 43 | * @param clipWindow the clip window to use. 44 | */ 45 | public CohenSutherlandClipping(Rectangle2D clipWindow) { 46 | setClip(clipWindow); 47 | } 48 | 49 | /** 50 | * Sets the clip rectangle. 51 | * @param clipWindow the clip window. 52 | */ 53 | public void setClip(Rectangle2D clipWindow) { 54 | xMin = clipWindow.getX(); 55 | xMax = xMin + clipWindow.getWidth(); 56 | yMin = clipWindow.getY(); 57 | yMax = yMin + clipWindow.getHeight(); 58 | } 59 | 60 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 61 | /** 62 | * Clips a given line against the clip window. 63 | * The modification (if needed) is done in place. 64 | * @param line the line to clip. 65 | * @return true if line is clipped, false if line is 66 | * totally outside the clip window. 67 | */ 68 | public boolean clip(Line2D.Float line) { 69 | Point point1 = new Point(line.getX1(), line.getY1()); 70 | Point point2 = new Point(line.getX2(), line.getY2()); 71 | Point outsidePoint = new Point(0d, 0d); 72 | 73 | boolean lineIsVertical = (point1.x == point2.x); 74 | double lineSlope = lineIsVertical ? 0d : (point2.y-point1.y)/(point2.x-point1.x); 75 | 76 | while (point1.region != INSIDE || point2.region != INSIDE) { 77 | if ((point1.region & point2.region) != 0) return false; 78 | 79 | outsidePoint.region = (point1.region == INSIDE) ? point2.region : point1.region; 80 | 81 | if ((outsidePoint.region & LEFT) != 0) { 82 | outsidePoint.x = xMin; 83 | outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; 84 | } 85 | else if ((outsidePoint.region & RIGHT) != 0) { 86 | outsidePoint.x = xMax; 87 | outsidePoint.y = delta(outsidePoint.x, point1.x)*lineSlope + point1.y; 88 | } 89 | else if ((outsidePoint.region & BOTTOM) != 0) { 90 | outsidePoint.y = yMin; 91 | outsidePoint.x = lineIsVertical 92 | ? point1.x 93 | : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; 94 | } 95 | else if ((outsidePoint.region & TOP) != 0) { 96 | outsidePoint.y = yMax; 97 | outsidePoint.x = lineIsVertical 98 | ? point1.x 99 | : delta(outsidePoint.y, point1.y)/lineSlope + point1.x; 100 | } 101 | 102 | if (outsidePoint.isInTheSameRegionAs(point1)) { 103 | point1.setPositionAndRegion(outsidePoint.x, outsidePoint.y); 104 | } 105 | else { 106 | point2.setPositionAndRegion(outsidePoint.x, outsidePoint.y); 107 | } 108 | } 109 | line.setLine(point1.x, point1.y, point2.x, point2.y); 110 | return true; 111 | } 112 | 113 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 114 | private static double delta(double value1, double value2) { 115 | return (Math.abs(value1 - value2) < MINIMUM_DELTA) ? 0 : (value1 - value2); 116 | } 117 | 118 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 119 | class Point { 120 | double x, y; 121 | int region; 122 | 123 | Point(double x, double y) { 124 | setPositionAndRegion(x, y); 125 | } 126 | 127 | void setPositionAndRegion(double x, double y) { 128 | this.x = x; this.y = y; 129 | region = (x < xMin) ? LEFT : (x > xMax) ? RIGHT : INSIDE; 130 | if (y < yMin) 131 | region |= BOTTOM; 132 | else if (y > yMax) 133 | region |= TOP; 134 | } 135 | 136 | boolean isInTheSameRegionAs(Point otherPoint) { 137 | return this.region == otherPoint.region; 138 | } 139 | } 140 | 141 | } 142 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/HasText.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | public interface HasText { 4 | 5 | String getText(); 6 | String getText(boolean useLineReturns); 7 | 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/Line.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // TODO this class seems superfluous - get rid of it 7 | 8 | @SuppressWarnings("serial") 9 | public class Line extends Rectangle { 10 | 11 | List textChunks = new ArrayList<>(); 12 | public static final Character[] WHITE_SPACE_CHARS = { ' ', '\t', '\r', '\n', '\f' }; 13 | 14 | 15 | public List getTextElements() { 16 | return textChunks; 17 | } 18 | 19 | public void setTextElements(List textChunks) { 20 | this.textChunks = textChunks; 21 | } 22 | 23 | public void addTextChunk(int i, TextChunk textChunk) { 24 | if (i < 0) { 25 | throw new IllegalArgumentException("i can't be less than 0"); 26 | } 27 | 28 | int s = this.textChunks.size(); 29 | if (s < i + 1) { 30 | for (; s <= i; s++) { 31 | this.textChunks.add(null); 32 | } 33 | this.textChunks.set(i, textChunk); 34 | } 35 | else { 36 | this.textChunks.set(i, this.textChunks.get(i).merge(textChunk)); 37 | } 38 | this.merge(textChunk); 39 | } 40 | 41 | public void addTextChunk(TextChunk textChunk) { 42 | if (this.textChunks.isEmpty()) { 43 | this.setRect(textChunk); 44 | } 45 | else { 46 | this.merge(textChunk); 47 | } 48 | this.textChunks.add(textChunk); 49 | } 50 | 51 | @Override 52 | public String toString() { 53 | StringBuilder sb = new StringBuilder(); 54 | String s = super.toString(); 55 | sb.append(s, 0, s.length() - 1); 56 | sb.append(",chunks="); 57 | for (TextChunk te: this.textChunks) { 58 | sb.append("'" + te.getText() + "', "); 59 | } 60 | sb.append(']'); 61 | return sb.toString(); 62 | } 63 | 64 | static Line removeRepeatedCharacters(Line line, Character c, int minRunLength) { 65 | 66 | Line rv = new Line(); 67 | 68 | for(TextChunk t: line.getTextElements()) { 69 | for (TextChunk r: t.squeeze(c, minRunLength)) { 70 | rv.addTextChunk(r); 71 | } 72 | } 73 | 74 | return rv; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/ObjectExtractor.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.pdfbox.pdmodel.PDDocument; 6 | import org.apache.pdfbox.pdmodel.PDPage; 7 | 8 | public class ObjectExtractor implements java.io.Closeable { 9 | 10 | private final PDDocument pdfDocument; 11 | 12 | public ObjectExtractor(PDDocument pdfDocument) { 13 | this.pdfDocument = pdfDocument; 14 | } 15 | 16 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 17 | protected Page extractPage(Integer pageNumber) throws IOException { 18 | if (pageNumber > pdfDocument.getNumberOfPages() || pageNumber < 1) { 19 | throw new java.lang.IndexOutOfBoundsException("Page number does not exist."); 20 | } 21 | PDPage page = pdfDocument.getPage(pageNumber - 1); 22 | 23 | ObjectExtractorStreamEngine streamEngine = new ObjectExtractorStreamEngine(page); 24 | streamEngine.processPage(page); 25 | 26 | TextStripper textStripper = new TextStripper(pdfDocument, pageNumber); 27 | textStripper.process(); 28 | 29 | Utils.sort(textStripper.getTextElements(), Rectangle.ILL_DEFINED_ORDER); 30 | 31 | float width, height; 32 | int rotation = page.getRotation(); 33 | if (Math.abs(rotation) == 90 || Math.abs(rotation) == 270) { 34 | width = page.getCropBox().getHeight(); 35 | height = page.getCropBox().getWidth(); 36 | } else { 37 | width = page.getCropBox().getWidth(); 38 | height = page.getCropBox().getHeight(); 39 | } 40 | 41 | return Page.Builder.newInstance() 42 | .withPageDims(PageDims.of(0, 0, width, height)) 43 | .withRotation(rotation) 44 | .withNumber(pageNumber) 45 | .withPdPage(page) 46 | .withPdDocument(pdfDocument) 47 | .withRulings(streamEngine.rulings) 48 | .withTextElements(textStripper.getTextElements()) 49 | .withMinCharWidth(textStripper.getMinCharWidth()) 50 | .withMinCharHeight(textStripper.getMinCharHeight()) 51 | .withIndex(textStripper.getSpatialIndex()) 52 | .build(); 53 | } 54 | 55 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 56 | public PageIterator extract(Iterable pages) { 57 | return new PageIterator(this, pages); 58 | } 59 | 60 | public PageIterator extract() { 61 | return extract(Utils.range(1, pdfDocument.getNumberOfPages() + 1)); 62 | } 63 | 64 | public Page extract(int pageNumber) { 65 | return extract(Utils.range(pageNumber, pageNumber + 1)).next(); 66 | } 67 | 68 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 69 | public void close() throws IOException { 70 | pdfDocument.close(); 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/PageDims.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | public class PageDims { 4 | private final float top; 5 | private final float left; 6 | private final float width; 7 | private final float height; 8 | 9 | private PageDims(final float top, final float left, final float width, final float height) { 10 | this.top = top; 11 | this.left = left; 12 | this.width = width; 13 | this.height = height; 14 | } 15 | 16 | public static PageDims of(final float top, final float left, final float width, final float height) { 17 | return new PageDims(top, left, width, height); 18 | } 19 | 20 | public float getTop() { 21 | return top; 22 | } 23 | 24 | public float getLeft() { 25 | return left; 26 | } 27 | 28 | public float getWidth() { 29 | return width; 30 | } 31 | 32 | public float getHeight() { 33 | return height; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/PageIterator.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.io.IOException; 4 | import java.util.Iterator; 5 | 6 | public class PageIterator implements Iterator { 7 | 8 | private ObjectExtractor objectExtractor; 9 | private Iterator pageIndexIterator; 10 | 11 | public PageIterator(ObjectExtractor objectExtractor, Iterable pages) { 12 | super(); 13 | this.objectExtractor = objectExtractor; 14 | this.pageIndexIterator = pages.iterator(); 15 | } 16 | 17 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 18 | @Override 19 | public boolean hasNext() { 20 | return pageIndexIterator.hasNext(); 21 | } 22 | 23 | @Override 24 | public Page next() { 25 | Page nextPage = null; 26 | if (!this.hasNext()) { 27 | throw new IllegalStateException(); 28 | } 29 | try { 30 | nextPage = objectExtractor.extractPage(pageIndexIterator.next()); 31 | } catch (IOException e) { 32 | e.printStackTrace(); 33 | } 34 | return nextPage; 35 | } 36 | 37 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 38 | @Override 39 | public void remove() { 40 | throw new UnsupportedOperationException(); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/Pair.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | public class Pair { 4 | private final L left; 5 | private final R right; 6 | 7 | public Pair(L left, R right) { 8 | this.left = left; 9 | this.right = right; 10 | } 11 | 12 | public L getLeft() { 13 | return this.left; 14 | } 15 | 16 | public R getRight() { 17 | return this.right; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/QuickSort.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package technology.tabula; 18 | 19 | import java.util.ArrayList; 20 | import java.util.Comparator; 21 | import java.util.List; 22 | import java.util.RandomAccess; 23 | import java.util.Stack; 24 | 25 | /** 26 | * An implementation of Quicksort. 27 | * 28 | * @see wikipedia 29 | * 30 | * @author UWe Pachler 31 | */ 32 | public final class QuickSort { 33 | 34 | private QuickSort() { 35 | // utility 36 | } 37 | 38 | /** 39 | * Sorts the given list according to natural order. 40 | */ 41 | public static > void sort(List list) { 42 | sort(list, QuickSort.naturalOrder()); // JAVA_8 replace with Comparator.naturalOrder() (and cleanup) 43 | } 44 | 45 | /** 46 | * Sorts the given list using the given comparator. 47 | */ 48 | public static void sort(List list, Comparator comparator) { 49 | if (list instanceof RandomAccess) { 50 | quicksort(list, comparator); 51 | } else { 52 | List copy = new ArrayList<>(list); 53 | quicksort(copy, comparator); 54 | list.clear(); 55 | list.addAll(copy); 56 | } 57 | } 58 | 59 | private static void quicksort(List list, Comparator cmp) { 60 | Stack stack = new Stack<>(); 61 | stack.push(0); 62 | stack.push(list.size()); 63 | while (!stack.isEmpty()) { 64 | int right = stack.pop(); 65 | int left = stack.pop(); 66 | 67 | if (right - left < 2) continue; 68 | int p = left + ((right - left) / 2); 69 | p = partition(list, cmp, p, left, right); 70 | 71 | stack.push(p + 1); 72 | stack.push(right); 73 | 74 | stack.push(left); 75 | stack.push(p); 76 | } 77 | } 78 | 79 | private static int partition(List list, Comparator cmp, int p, int start, int end) { 80 | int l = start; 81 | int h = end - 2; 82 | T piv = list.get(p); 83 | swap(list, p, end - 1); 84 | 85 | while (l < h) { 86 | if (cmp.compare(list.get(l), piv) <= 0) l++; 87 | else if (cmp.compare(piv, list.get(h)) <= 0) h--; 88 | else swap(list, l, h); 89 | } 90 | int idx = h; 91 | if (cmp.compare(list.get(h), piv) < 0) idx++; 92 | swap(list, end - 1, idx); 93 | return idx; 94 | } 95 | 96 | private static void swap(List list, int i, int j) { 97 | T tmp = list.get(i); 98 | list.set(i, list.get(j)); 99 | list.set(j, tmp); 100 | } 101 | 102 | @SuppressWarnings({ "rawtypes", "unchecked" }) 103 | private static final Comparator NATURAL_ORDER = new Comparator() { 104 | @Override public int compare(Object l, Object r) { return ((Comparable) l).compareTo(r); } 105 | }; 106 | 107 | @SuppressWarnings("unchecked") 108 | private static > Comparator naturalOrder() { 109 | return NATURAL_ORDER; 110 | } 111 | 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/RectangleSpatialIndex.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.locationtech.jts.geom.Envelope; 7 | import org.locationtech.jts.index.strtree.STRtree; 8 | 9 | public class RectangleSpatialIndex { 10 | 11 | 12 | private final STRtree si = new STRtree(); 13 | private final List rectangles = new ArrayList<>(); 14 | 15 | public void add(T te) { 16 | rectangles.add(te); 17 | si.insert(new Envelope(te.getLeft(), te.getRight(), te.getBottom(), te.getTop()), te); 18 | } 19 | 20 | public List contains(Rectangle r) { 21 | List intersection = si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); 22 | List rv = new ArrayList(); 23 | 24 | for (T ir: intersection) { 25 | if (r.contains(ir)) { 26 | rv.add(ir); 27 | } 28 | } 29 | 30 | Utils.sort(rv, Rectangle.ILL_DEFINED_ORDER); 31 | return rv; 32 | } 33 | 34 | public List intersects(Rectangle r) { 35 | return si.query(new Envelope(r.getLeft(), r.getRight(), r.getTop(), r.getBottom())); 36 | } 37 | 38 | /** 39 | * Minimum bounding box of all the Rectangles contained on this RectangleSpatialIndex 40 | * 41 | * @return a Rectangle 42 | */ 43 | public Rectangle getBounds() { 44 | return Rectangle.boundingBoxOf(rectangles); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/RectangularTextContainer.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | @SuppressWarnings("serial") 7 | public class RectangularTextContainer extends Rectangle implements HasText { 8 | 9 | protected List textElements = new ArrayList<>(); 10 | 11 | protected RectangularTextContainer(float top, float left, float width, float height) { 12 | super(top, left, width, height); 13 | } 14 | 15 | public RectangularTextContainer merge(RectangularTextContainer other) { 16 | if (compareTo(other) < 0) { 17 | this.getTextElements().addAll(other.getTextElements()); 18 | } else { 19 | this.getTextElements().addAll(0, other.getTextElements()); 20 | } 21 | super.merge(other); 22 | return this; 23 | } 24 | 25 | public List getTextElements() { 26 | return textElements; 27 | } 28 | 29 | public void setTextElements(List textElements) { 30 | this.textElements = textElements; 31 | } 32 | 33 | @Override 34 | public String getText() { 35 | throw new UnsupportedOperationException(); 36 | } 37 | 38 | @Override 39 | public String getText(boolean useLineReturns) { 40 | throw new UnsupportedOperationException(); 41 | } 42 | 43 | @Override public String toString() { 44 | StringBuilder sb = new StringBuilder(); 45 | String s = super.toString(); 46 | sb.append(s.substring(0, s.length() - 1)); 47 | sb.append(String.format(",text=%s]", this.getText() == null ? "null" : "\"" + this.getText() + "\"")); 48 | return sb.toString(); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/Table.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.TreeMap; 6 | 7 | import technology.tabula.extractors.ExtractionAlgorithm; 8 | 9 | @SuppressWarnings("serial") 10 | public class Table extends Rectangle { 11 | 12 | public static final Table empty() { return new Table(""); } 13 | 14 | private Table(String extractionMethod) { 15 | this.extractionMethod = extractionMethod; 16 | } 17 | 18 | public Table(ExtractionAlgorithm extractionAlgorithm) { 19 | this(extractionAlgorithm.toString()); 20 | } 21 | 22 | private final String extractionMethod; 23 | 24 | private int rowCount = 0; 25 | private int colCount = 0; 26 | private int pageNumber = 0; 27 | 28 | /* visible for testing */ final TreeMap cells = new TreeMap<>(); 29 | 30 | public int getRowCount() { return rowCount; } 31 | public int getColCount() { return colCount; } 32 | public int getPageNumber() { return pageNumber; } 33 | public void setPageNumber(int pageNumber) { this.pageNumber = pageNumber; } 34 | 35 | public String getExtractionMethod() { return extractionMethod; } 36 | 37 | public void add(RectangularTextContainer chunk, int row, int col) { 38 | this.merge(chunk); 39 | 40 | rowCount = Math.max(rowCount, row + 1); 41 | colCount = Math.max(colCount, col + 1); 42 | 43 | CellPosition cp = new CellPosition(row, col); 44 | 45 | RectangularTextContainer old = cells.get(cp); 46 | if (old != null) chunk.merge(old); 47 | cells.put(cp, chunk); 48 | 49 | this.memoizedRows = null; 50 | } 51 | 52 | private List> memoizedRows = null; 53 | 54 | public List> getRows() { 55 | if (this.memoizedRows == null) this.memoizedRows = computeRows(); 56 | return this.memoizedRows; 57 | } 58 | 59 | private List> computeRows() { 60 | List> rows = new ArrayList<>(); 61 | for (int i = 0; i < rowCount; i++) { 62 | List lastRow = new ArrayList<>(); 63 | rows.add(lastRow); 64 | for (int j = 0; j < colCount; j++) { 65 | RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() 66 | lastRow.add(cell != null ? cell : TextChunk.EMPTY); 67 | } 68 | } 69 | return rows; 70 | } 71 | 72 | public RectangularTextContainer getCell(int i, int j) { 73 | RectangularTextContainer cell = cells.get(new CellPosition(i,j)); // JAVA_8 use getOrDefault() 74 | return cell != null ? cell : TextChunk.EMPTY; 75 | } 76 | 77 | } 78 | 79 | class CellPosition implements Comparable { 80 | 81 | CellPosition(int row, int col) { 82 | this.row = row; 83 | this.col = col; 84 | } 85 | 86 | final int row, col; 87 | 88 | @Override public int hashCode() { 89 | return row + 101 * col; 90 | } 91 | 92 | @Override public boolean equals(Object obj) { 93 | if (this == obj) return true; 94 | if (obj == null) return false; 95 | if (getClass() != obj.getClass()) return false; 96 | CellPosition other = (CellPosition) obj; 97 | return row == other.row && col == other.col; 98 | } 99 | 100 | @Override public int compareTo(CellPosition other) { 101 | int rowdiff = row - other.row; 102 | return rowdiff != 0 ? rowdiff : col - other.col; 103 | } 104 | 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/TableWithRulingLines.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.Comparator; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | 9 | import technology.tabula.extractors.ExtractionAlgorithm; 10 | 11 | @SuppressWarnings("serial") 12 | public class TableWithRulingLines extends Table { 13 | 14 | List verticalRulings, horizontalRulings; 15 | RectangleSpatialIndex si = new RectangleSpatialIndex<>(); 16 | 17 | public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm, int pageNumber) { 18 | super(extractionAlgorithm); 19 | this.setRect(area); 20 | this.verticalRulings = verticalRulings; 21 | this.horizontalRulings = horizontalRulings; 22 | this.addCells(cells); 23 | this.setPageNumber(pageNumber); 24 | } 25 | 26 | private void addCells(List cells) { 27 | 28 | if (cells.isEmpty()) { 29 | return; 30 | } 31 | 32 | for (Cell ce: cells) { 33 | si.add(ce); 34 | } 35 | 36 | List> rowsOfCells = rowsOfCells(cells); 37 | for (int i = 0; i < rowsOfCells.size(); i++) { 38 | List row = rowsOfCells.get(i); 39 | Iterator rowCells = row.iterator(); 40 | Cell cell = rowCells.next(); 41 | List> others = rowsOfCells( 42 | si.contains( 43 | new Rectangle(cell.getBottom(), si.getBounds().getLeft(), cell.getLeft() - si.getBounds().getLeft(), 44 | si.getBounds().getBottom() - cell.getBottom()) 45 | )); 46 | int startColumn = 0; 47 | for (List r: others) { 48 | startColumn = Math.max(startColumn, r.size()); 49 | } 50 | this.add(cell, i, startColumn++); 51 | while (rowCells.hasNext()) { 52 | this.add(rowCells.next(), i, startColumn++); 53 | } 54 | } 55 | } 56 | 57 | private static List> rowsOfCells(List cells) { 58 | Cell c; 59 | float lastTop; 60 | List> rv = new ArrayList<>(); 61 | List lastRow; 62 | 63 | if (cells.isEmpty()) { 64 | return rv; 65 | } 66 | 67 | Collections.sort(cells, new Comparator() { 68 | @Override 69 | public int compare(Cell arg0, Cell arg1) { 70 | return java.lang.Double.compare(arg0.getTop(), arg1.getTop()); 71 | } 72 | }); 73 | 74 | 75 | Iterator iter = cells.iterator(); 76 | c = iter.next(); 77 | lastTop = c.getTop(); 78 | lastRow = new ArrayList<>(); 79 | lastRow.add(c); 80 | rv.add(lastRow); 81 | 82 | while (iter.hasNext()) { 83 | c = iter.next(); 84 | if (!Utils.feq(c.getTop(), lastTop)) { 85 | lastRow = new ArrayList<>(); 86 | rv.add(lastRow); 87 | } 88 | lastRow.add(c); 89 | lastTop = c.getTop(); 90 | } 91 | return rv; 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/detectors/DetectionAlgorithm.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.detectors; 2 | 3 | import technology.tabula.Page; 4 | import technology.tabula.Rectangle; 5 | 6 | import java.util.List; 7 | 8 | /** 9 | * Created by matt on 2015-12-14. 10 | */ 11 | public interface DetectionAlgorithm { 12 | List detect(Page page); 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/detectors/SpreadsheetDetectionAlgorithm.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.detectors; 2 | 3 | import technology.tabula.Cell; 4 | import technology.tabula.Page; 5 | import technology.tabula.Rectangle; 6 | import technology.tabula.extractors.SpreadsheetExtractionAlgorithm; 7 | 8 | import java.util.Collections; 9 | import java.util.List; 10 | 11 | /** 12 | * Created by matt on 2015-12-14. 13 | * 14 | * This is the basic spreadsheet table detection algorithm currently implemented in tabula (web). 15 | * 16 | * It uses intersecting ruling lines to find tables. 17 | */ 18 | public class SpreadsheetDetectionAlgorithm implements DetectionAlgorithm { 19 | @Override 20 | public List detect(Page page) { 21 | List cells = SpreadsheetExtractionAlgorithm.findCells(page.getHorizontalRulings(), page.getVerticalRulings()); 22 | 23 | List tables = SpreadsheetExtractionAlgorithm.findSpreadsheetsFromCells(cells); 24 | 25 | // we want tables to be returned from top to bottom on the page 26 | Collections.sort(tables, Rectangle.ILL_DEFINED_ORDER); 27 | 28 | return tables; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/extractors/ExtractionAlgorithm.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.extractors; 2 | 3 | import java.util.List; 4 | 5 | import technology.tabula.Page; 6 | import technology.tabula.Table; 7 | 8 | public interface ExtractionAlgorithm { 9 | 10 | List extract(Page page); 11 | String toString(); 12 | 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/json/RectangularTextContainerSerializer.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.json; 2 | 3 | import java.lang.reflect.Type; 4 | 5 | import com.google.gson.JsonElement; 6 | import com.google.gson.JsonObject; 7 | import com.google.gson.JsonSerializationContext; 8 | import com.google.gson.JsonSerializer; 9 | 10 | import technology.tabula.RectangularTextContainer; 11 | 12 | public final class RectangularTextContainerSerializer implements JsonSerializer> { 13 | 14 | public static final RectangularTextContainerSerializer INSTANCE = new RectangularTextContainerSerializer(); 15 | 16 | private RectangularTextContainerSerializer() {} 17 | 18 | @Override 19 | public JsonElement serialize(RectangularTextContainer textContainer, Type type, JsonSerializationContext context) { 20 | JsonObject json = new JsonObject(); 21 | json.addProperty("top", textContainer.getTop()); 22 | json.addProperty("left", textContainer.getLeft()); 23 | json.addProperty("width", textContainer.getWidth()); 24 | json.addProperty("height", textContainer.getHeight()); 25 | json.addProperty("text", textContainer.getText()); 26 | return json; 27 | } 28 | 29 | } -------------------------------------------------------------------------------- /src/main/java/technology/tabula/json/TableSerializer.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.json; 2 | 3 | import java.lang.reflect.Type; 4 | import java.util.List; 5 | 6 | import technology.tabula.RectangularTextContainer; 7 | import technology.tabula.Table; 8 | 9 | import com.google.gson.JsonArray; 10 | import com.google.gson.JsonElement; 11 | import com.google.gson.JsonObject; 12 | import com.google.gson.JsonSerializationContext; 13 | import com.google.gson.JsonSerializer; 14 | 15 | public final class TableSerializer implements JsonSerializer { 16 | 17 | public static final TableSerializer INSTANCE = new TableSerializer(); 18 | 19 | private TableSerializer() {} 20 | 21 | @Override 22 | public JsonElement serialize(Table table, Type type, JsonSerializationContext context) { 23 | JsonObject json = new JsonObject(); 24 | JsonArray data = new JsonArray(); 25 | 26 | json.addProperty("extraction_method", table.getExtractionMethod()); 27 | json.addProperty("page_number", table.getPageNumber()); 28 | json.addProperty("top", table.getTop()); 29 | json.addProperty("left", table.getLeft()); 30 | json.addProperty("width", table.getWidth()); 31 | json.addProperty("height", table.getHeight()); 32 | json.addProperty("right", table.getRight()); 33 | json.addProperty("bottom", table.getBottom()); 34 | json.add("data", data); 35 | 36 | for (List tableRow : table.getRows()) { 37 | JsonArray jsonRow = new JsonArray(); 38 | for (RectangularTextContainer textChunk : tableRow) 39 | jsonRow.add(context.serialize(textChunk)); 40 | data.add(jsonRow); 41 | } 42 | 43 | return json; 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/writers/CSVWriter.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.writers; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.Collections; 6 | import java.util.List; 7 | 8 | import org.apache.commons.csv.CSVPrinter; 9 | import org.apache.commons.csv.CSVFormat; 10 | 11 | import technology.tabula.RectangularTextContainer; 12 | import technology.tabula.Table; 13 | 14 | public class CSVWriter implements Writer { 15 | 16 | private final CSVFormat format; 17 | 18 | public CSVWriter() { 19 | this(CSVFormat.EXCEL); 20 | } 21 | 22 | protected CSVWriter(CSVFormat format) { 23 | this.format = format; 24 | } 25 | 26 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 27 | @Override 28 | public void write(Appendable out, Table table) throws IOException { 29 | write(out, Collections.singletonList(table)); 30 | } 31 | 32 | @Override 33 | public void write(Appendable out, List
tables) throws IOException { 34 | try (CSVPrinter printer = new CSVPrinter(out, format)) { 35 | for (Table table : tables) { 36 | for (List row : table.getRows()) { 37 | List cells = new ArrayList<>(row.size()); 38 | for (RectangularTextContainer cell : row) 39 | cells.add(cell.getText()); 40 | printer.printRecord(cells); 41 | } 42 | } 43 | printer.flush(); 44 | } 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/writers/JSONWriter.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.writers; 2 | 3 | import com.google.gson.ExclusionStrategy; 4 | import com.google.gson.FieldAttributes; 5 | import com.google.gson.Gson; 6 | import com.google.gson.GsonBuilder; 7 | import com.google.gson.JsonArray; 8 | 9 | import technology.tabula.Cell; 10 | import technology.tabula.RectangularTextContainer; 11 | import technology.tabula.Table; 12 | import technology.tabula.TextChunk; 13 | import technology.tabula.json.RectangularTextContainerSerializer; 14 | import technology.tabula.json.TableSerializer; 15 | 16 | import java.io.IOException; 17 | import java.util.List; 18 | 19 | import static java.lang.reflect.Modifier.PUBLIC; 20 | 21 | public class JSONWriter implements Writer { 22 | 23 | private static final ExclusionStrategy ALL_CLASSES_SKIPPING_NON_PUBLIC_FIELDS = new ExclusionStrategy() { 24 | @Override 25 | public boolean shouldSkipClass(Class c) { 26 | return false; 27 | } 28 | 29 | @Override 30 | public boolean shouldSkipField(FieldAttributes fieldAttributes) { 31 | return !fieldAttributes.hasModifier(PUBLIC); 32 | } 33 | }; 34 | 35 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 36 | @Override 37 | public void write(Appendable out, Table table) throws IOException { 38 | out.append(gson().toJson(table, Table.class)); 39 | } 40 | 41 | @Override 42 | public void write(Appendable out, List
tables) throws IOException { 43 | Gson gson = gson(); 44 | JsonArray jsonElements = new JsonArray(); 45 | for (Table table : tables) 46 | jsonElements.add(gson.toJsonTree(table, Table.class)); 47 | out.append(gson.toJson(jsonElements)); 48 | } 49 | 50 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 51 | private static Gson gson() { 52 | return new GsonBuilder() 53 | .addSerializationExclusionStrategy(ALL_CLASSES_SKIPPING_NON_PUBLIC_FIELDS) 54 | .registerTypeAdapter(Table.class, TableSerializer.INSTANCE) 55 | .registerTypeAdapter(RectangularTextContainer.class, RectangularTextContainerSerializer.INSTANCE) 56 | .registerTypeAdapter(Cell.class, RectangularTextContainerSerializer.INSTANCE) 57 | .registerTypeAdapter(TextChunk.class, RectangularTextContainerSerializer.INSTANCE) 58 | .create(); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/writers/TSVWriter.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.writers; 2 | 3 | import org.apache.commons.csv.CSVFormat; 4 | 5 | public class TSVWriter extends CSVWriter { 6 | 7 | public TSVWriter() { 8 | super(CSVFormat.TDF); 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/technology/tabula/writers/Writer.java: -------------------------------------------------------------------------------- 1 | package technology.tabula.writers; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | 6 | import technology.tabula.Table; 7 | 8 | public interface Writer { 9 | 10 | void write(Appendable out, Table table) throws IOException; 11 | 12 | void write(Appendable out, List
tables) throws IOException; 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TableTest.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import org.junit.Test; 6 | 7 | public class TableTest { 8 | 9 | @Test public void testEmpty() { 10 | Table empty = Table.empty(); 11 | 12 | assertEquals(TextChunk.EMPTY, empty.getCell(0, 0)); 13 | assertEquals(TextChunk.EMPTY, empty.getCell(1, 1)); 14 | 15 | assertEquals(0, empty.getRowCount()); 16 | assertEquals(0, empty.getColCount()); 17 | 18 | assertEquals("", empty.getExtractionMethod()); 19 | 20 | assertEquals(0, empty.getTop(), 0); 21 | assertEquals(0, empty.getRight(), 0); 22 | assertEquals(0, empty.getBottom(), 0); 23 | assertEquals(0, empty.getLeft(), 0); 24 | 25 | assertEquals(0, empty.getArea(), 0); 26 | } 27 | 28 | @Test public void testRowColCounts() { 29 | Table table = Table.empty(); 30 | 31 | assertEquals(0, table.getRowCount()); 32 | assertEquals(0, table.getColCount()); 33 | 34 | table.add(TextChunk.EMPTY, 0, 0); 35 | 36 | assertEquals(1, table.getRowCount()); 37 | assertEquals(1, table.getColCount()); 38 | 39 | table.add(TextChunk.EMPTY, 9, 9); 40 | 41 | assertEquals(10, table.getRowCount()); 42 | assertEquals(10, table.getColCount()); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TestCell.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.util.List; 6 | import java.util.ArrayList; 7 | 8 | import org.apache.pdfbox.pdmodel.font.PDType1Font; 9 | import org.apache.pdfbox.pdmodel.font.Standard14Fonts; 10 | import org.junit.Test; 11 | 12 | public class TestCell { 13 | 14 | @Test 15 | public void testIsSpanning() { 16 | Cell cell = new Cell(0, 0, 0, 0); 17 | assertFalse(cell.isSpanning()); 18 | cell.setSpanning(true); 19 | assertTrue(cell.isSpanning()); 20 | } 21 | 22 | @Test 23 | public void testIsPlaceholder() { 24 | Cell cell = new Cell(0, 0, 0, 0); 25 | assertFalse(cell.isPlaceholder()); 26 | cell.setPlaceholder(true); 27 | assertTrue(cell.isPlaceholder()); 28 | } 29 | 30 | @Test 31 | public void testGetTextElements() { 32 | Cell cell = new Cell(0, 0, 0, 0); 33 | assertTrue(cell.getTextElements().isEmpty()); 34 | 35 | TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); 36 | TextChunk tChunk = new TextChunk(tElement); 37 | List tList = new ArrayList<>(); 38 | tList.add(tChunk); 39 | cell.setTextElements(tList); 40 | 41 | assertEquals("test", cell.getTextElements().get(0).getText()); 42 | 43 | 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TestCohenSutherland.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import org.junit.Before; 4 | import org.junit.Test; 5 | 6 | import java.awt.geom.Line2D; 7 | import java.awt.geom.Rectangle2D; 8 | 9 | import static org.junit.Assert.*; 10 | 11 | public class TestCohenSutherland { 12 | 13 | private Rectangle2D clipWindow; 14 | private CohenSutherlandClipping algorithm; 15 | private static final double DELTA = 0.001; 16 | 17 | @Before 18 | public void set() { 19 | clipWindow = new Rectangle(10, 10, 50, 50); 20 | algorithm = new CohenSutherlandClipping(clipWindow); 21 | } 22 | 23 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 24 | // TODO: How to parameterize the tests? 25 | @Test 26 | public void theLineIsCompletelyInside() { 27 | Line2D.Float line = new Line2D.Float(20, 20, 30, 30); 28 | assertTrue(algorithm.clip(line)); 29 | assertEquals(20, line.x1, DELTA); 30 | assertEquals(20, line.y1, DELTA); 31 | assertEquals(30, line.x2, DELTA); 32 | assertEquals(30, line.y2, DELTA); 33 | } 34 | 35 | @Test 36 | public void theLineIsCompletelyOnTheLeft() { 37 | float x1 = 3, y1 = 13, x2 = 6, y2 = 16; 38 | Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); 39 | assertFalse(algorithm.clip(line)); 40 | assertEquals(x1, line.x1, DELTA); 41 | assertEquals(y1, line.y1, DELTA); 42 | assertEquals(x2, line.x2, DELTA); 43 | assertEquals(y2, line.y2, DELTA); 44 | } 45 | 46 | @Test 47 | public void theLineIsCompletelyOnTheUp() { 48 | float x1 = 15, y1 = 5, x2 = 25, y2 = 2; 49 | Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); 50 | assertFalse(algorithm.clip(line)); 51 | assertEquals(x1, line.x1, DELTA); 52 | assertEquals(y1, line.y1, DELTA); 53 | assertEquals(x2, line.x2, DELTA); 54 | assertEquals(y2, line.y2, DELTA); 55 | } 56 | 57 | @Test 58 | public void theLineIsCompletelyOnTheRight() { 59 | float x1 = 65, y1 = 15, x2 = 70, y2 = 20; 60 | Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); 61 | assertFalse(algorithm.clip(line)); 62 | assertEquals(x1, line.x1, DELTA); 63 | assertEquals(y1, line.y1, DELTA); 64 | assertEquals(x2, line.x2, DELTA); 65 | assertEquals(y2, line.y2, DELTA); 66 | } 67 | 68 | @Test 69 | public void theLineIsCompletelyOnTheBottom() { 70 | float x1 = 15, y1 = 65, x2 = 25, y2 = 70; 71 | Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); 72 | assertFalse(algorithm.clip(line)); 73 | assertEquals(x1, line.x1, DELTA); 74 | assertEquals(y1, line.y1, DELTA); 75 | assertEquals(x2, line.x2, DELTA); 76 | assertEquals(y2, line.y2, DELTA); 77 | } 78 | 79 | // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // 80 | @Test 81 | public void lineCrossesTopLeftCorner() { 82 | float x1 = 5, y1 = 25, x2 = 25, y2 = 5; 83 | Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); 84 | assertTrue(algorithm.clip(line)); 85 | assertEquals(10, line.x1, DELTA); 86 | assertEquals(20, line.y1, DELTA); 87 | assertEquals(20, line.x2, DELTA); 88 | assertEquals(10, line.y2, DELTA); 89 | } 90 | 91 | @Test 92 | public void lineCrossesPartiallyTopLeftCorner() { 93 | float x1 = 15, y1 = 15, x2 = 25, y2 = 5; 94 | Line2D.Float line = new Line2D.Float(x1, y1, x2, y2); 95 | assertTrue(algorithm.clip(line)); 96 | assertEquals(x1, line.x1, DELTA); 97 | assertEquals(y1, line.y1, DELTA); 98 | assertEquals(20, line.x2, DELTA); 99 | assertEquals(10, line.y2, DELTA); 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TestDebug.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | public class TestDebug { 4 | 5 | private final static String PATH = "src/test/resources/technology/tabula/spanning_cells.pdf"; 6 | 7 | // @Test 8 | // public void test() throws IOException { 9 | // File outFile = new File(new File(System.getProperty("java.io.tmpdir")), "/rendered_page.jpg"); 10 | // Debug.renderPage(PATH, outFile.getAbsolutePath(), 0, null, true, false, false, false, false, false, false, false, false, false); 11 | // assertTrue(outFile.exists()); 12 | // System.out.println(outFile.getAbsolutePath()); 13 | // } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TestLine.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.pdfbox.pdmodel.font.PDType1Font; 9 | import org.apache.pdfbox.pdmodel.font.Standard14Fonts; 10 | import org.junit.Test; 11 | 12 | public class TestLine { 13 | 14 | @Test 15 | public void testSetTextElements() { 16 | Line line = new Line(); 17 | 18 | TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); 19 | TextChunk tChunk = new TextChunk(tElement); 20 | List tList = new ArrayList<>(); 21 | tList.add(tChunk); 22 | line.setTextElements(tList); 23 | 24 | assertEquals("test", line.getTextElements().get(0).getText()); 25 | 26 | } 27 | 28 | @Test 29 | public void testAddTextChunkIntTextChunk() { 30 | Line line = new Line(); 31 | 32 | TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); 33 | TextChunk tChunk = new TextChunk(tElement); 34 | line.addTextChunk(3, tChunk); 35 | 36 | assertEquals("test", line.getTextElements().get(3).getText()); 37 | } 38 | 39 | @Test 40 | public void testLessThanAddTextChunkIntTextChunk() { 41 | Line line = new Line(); 42 | 43 | TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); 44 | TextChunk tChunk = new TextChunk(tElement); 45 | line.addTextChunk(0, tChunk); 46 | line.addTextChunk(0, tChunk); 47 | 48 | assertEquals("testtest", line.getTextElements().get(0).getText()); 49 | } 50 | 51 | @Test(expected = IllegalArgumentException.class) 52 | public void testErrorAddTextChunkIntTextChunk() { 53 | Line line = new Line(); 54 | 55 | TextElement tElement = new TextElement(0, 0, 0, 0,new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); 56 | TextChunk tChunk = new TextChunk(tElement); 57 | line.addTextChunk(-1, tChunk); 58 | } 59 | 60 | @Test 61 | public void testToString() { 62 | Line line = new Line(); 63 | 64 | TextElement tElement = new TextElement(0, 0, 0, 0, new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD), 10, "test", 5); 65 | TextChunk tChunk = new TextChunk(tElement); 66 | line.addTextChunk(0, tChunk); 67 | line.addTextChunk(0, tChunk); 68 | 69 | assertEquals("technology.tabula.Line[x=0.0,y=0.0,w=0.0,h=0.0,bottom=0.000000,right=0.000000,chunks='testtest', ]", line.toString()); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TestProjectionProfile.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.pdfbox.pdmodel.PDDocument; 9 | import org.apache.pdfbox.pdmodel.PDPage; 10 | import org.apache.pdfbox.pdmodel.font.PDType1Font; 11 | import org.apache.pdfbox.pdmodel.font.Standard14Fonts; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | public class TestProjectionProfile { 16 | 17 | ProjectionProfile pProfile; 18 | Page page; 19 | 20 | @Before 21 | public void setUpProjectionProfile() { 22 | PDPage pdPage = new PDPage(); 23 | PDDocument pdDocument = new PDDocument(); 24 | 25 | PDType1Font font = new PDType1Font(Standard14Fonts.FontName.HELVETICA); 26 | TextElement textElement = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f); 27 | TextElement textElement2 = new TextElement(5f, 15f, 10f, 20f, font, 1f, "test", 1f); 28 | List textList = new ArrayList<>(); 29 | textList.add(textElement); 30 | textList.add(textElement2); 31 | 32 | Ruling ruling = new Ruling(0, 0, 10, 10); 33 | List rulingList = new ArrayList<>(); 34 | rulingList.add(ruling); 35 | 36 | page = Page.Builder.newInstance() 37 | .withPageDims(PageDims.of(0, 0, 1, 1)) 38 | .withRotation(0) 39 | .withNumber(1) 40 | .withPdPage(pdPage) 41 | .withPdDocument(pdDocument) 42 | .withTextElements(textList) 43 | .withRulings(rulingList) 44 | .build(); 45 | 46 | List rectangles = new ArrayList<>(); 47 | rectangles.add(new Rectangle(0f, 0f, 500f, 5f)); 48 | 49 | pProfile = new ProjectionProfile(page, rectangles, 5, 5); 50 | } 51 | 52 | @Test 53 | public void testGetVerticalProjection() { 54 | float[] projection = pProfile.getVerticalProjection(); 55 | assertTrue(projection.length == 10); 56 | } 57 | 58 | @Test 59 | public void testGetHorizontalProjection() { 60 | float[] projection = pProfile.getHorizontalProjection(); 61 | assertTrue(projection.length == 10); 62 | } 63 | 64 | @Test 65 | public void testFindVerticalSeparators() { 66 | float[] seperators = pProfile.findVerticalSeparators(page.getText().size() * 2.5f); 67 | assertTrue(seperators.length == 0); 68 | } 69 | 70 | @Test 71 | public void testFindHorizontalSeparators() { 72 | float[] seperators = pProfile.findHorizontalSeparators(page.getText().size() * 2.5f); 73 | assertTrue(seperators.length == 0); 74 | } 75 | 76 | @Test 77 | public void testSmooth() { 78 | float[] data = {0, 1, 2}; 79 | float[] rv = ProjectionProfile.smooth(data, 3); 80 | 81 | assertEquals(1f, rv[2], 1e-5); 82 | } 83 | 84 | @Test 85 | public void testFilter() { 86 | float[] data = {0, 1, 2}; 87 | float[] rv = ProjectionProfile.filter(data, 3); 88 | 89 | assertEquals(3f, rv[1], 1e-5); 90 | } 91 | 92 | @Test 93 | public void testGetAutocorrelation() { 94 | float[] projection = {0, 1, 2}; 95 | float[] rv = ProjectionProfile.getAutocorrelation(projection); 96 | 97 | assertEquals(0f, rv[0], 1e-5); 98 | assertTrue(rv.length == 2); 99 | 100 | } 101 | 102 | @Test 103 | public void testGetFirstDeriv() { 104 | // float[] 105 | // float[] projection = pProfile.getFirstDeriv(new float[]{0.0, 0.0) 106 | // System.out.println(Arrays.toString(projection)); 107 | // assertEquals(10, projection[0], 1e-15); 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TestRectangleSpatialIndex.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import org.junit.Test; 6 | 7 | public class TestRectangleSpatialIndex { 8 | 9 | @Test 10 | public void testIntersects() { 11 | 12 | Rectangle r = new Rectangle(0, 0, 0, 0); 13 | 14 | RectangleSpatialIndex rSpatialIndex = new RectangleSpatialIndex<>(); 15 | rSpatialIndex.add(r); 16 | 17 | assertTrue(rSpatialIndex.intersects(r).size() > 0); 18 | 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TestRuling.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | 8 | public class TestRuling { 9 | 10 | Ruling ruling; 11 | 12 | @Before 13 | public void setUpRuling() { 14 | ruling = new Ruling(0, 0, 10, 10); 15 | } 16 | 17 | @Test 18 | public void testGetWidth() { 19 | assertEquals(10f, ruling.getWidth(), 1e-5); 20 | } 21 | 22 | @Test 23 | public void testGetHeight() { 24 | assertEquals(10f, ruling.getHeight(), 1e-5); 25 | } 26 | 27 | @Test 28 | public void testToString() { 29 | assertEquals("class technology.tabula.Ruling[x1=0.000000 y1=0.000000 x2=10.000000 y2=10.000000]",ruling.toString()); 30 | } 31 | 32 | @Test 33 | public void testEqualsOther() { 34 | Ruling other = new Ruling(0, 0, 11, 10); 35 | assertTrue(ruling.equals(ruling)); 36 | } 37 | 38 | @Test 39 | public void testEqualsDifferentInstance() { 40 | assertFalse(ruling.equals("test")); 41 | } 42 | 43 | @Test 44 | public void testNearlyIntersects(){ 45 | Ruling another = new Ruling(0, 0, 11, 10); 46 | 47 | assertTrue(ruling.nearlyIntersects(another)); 48 | } 49 | 50 | @Test(expected = UnsupportedOperationException.class) 51 | public void testGetPositionError(){ 52 | Ruling other = new Ruling(0, 0, 1, 1); 53 | other.getPosition(); 54 | fail(); 55 | } 56 | 57 | @Test(expected = UnsupportedOperationException.class) 58 | public void testSetPositionError(){ 59 | Ruling other = new Ruling(0, 0, 1, 1); 60 | other.setPosition(5f); 61 | fail(); 62 | } 63 | 64 | @Test(expected = UnsupportedOperationException.class) 65 | public void testsetPosition(){ 66 | ruling.setPosition(0); 67 | } 68 | 69 | @Test(expected = UnsupportedOperationException.class) 70 | public void testGetStartError(){ 71 | Ruling other = new Ruling(0, 0, 1, 1); 72 | other.getStart(); 73 | fail(); 74 | } 75 | 76 | @Test(expected = UnsupportedOperationException.class) 77 | public void testGetEndError(){ 78 | Ruling other = new Ruling(0, 0, 1, 1); 79 | other.getEnd(); 80 | fail(); 81 | } 82 | 83 | @Test(expected = UnsupportedOperationException.class) 84 | public void testSetEndError(){ 85 | Ruling other = new Ruling(0, 0, 1, 1); 86 | other.setEnd(5f); 87 | fail(); 88 | } 89 | 90 | 91 | @Test 92 | public void testColinear(){ 93 | // Ruling another = new Ruling(0, 0, 500, 5); 94 | java.awt.geom.Point2D.Float float1 = new java.awt.geom.Point2D.Float(20, 20); 95 | java.awt.geom.Point2D.Float float2 = new java.awt.geom.Point2D.Float(0, 0); 96 | java.awt.geom.Point2D.Float float3 = new java.awt.geom.Point2D.Float(20, 0); 97 | java.awt.geom.Point2D.Float float4 = new java.awt.geom.Point2D.Float(0, 20); 98 | 99 | assertFalse(ruling.colinear(float1)); 100 | assertTrue(ruling.colinear(float2)); 101 | assertFalse(ruling.colinear(float3)); 102 | assertFalse(ruling.colinear(float4)); 103 | 104 | 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/TestUtils.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import static org.junit.Assert.assertArrayEquals; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertNull; 6 | 7 | import java.awt.geom.Point2D; 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.util.ArrayList; 11 | import java.util.Arrays; 12 | import java.util.Collections; 13 | import java.util.List; 14 | 15 | import org.apache.pdfbox.Loader; 16 | import org.apache.pdfbox.rendering.ImageType; 17 | import org.apache.commons.cli.ParseException; 18 | import org.apache.pdfbox.pdmodel.PDDocument; 19 | import org.apache.pdfbox.pdmodel.PDPage; 20 | import org.junit.Test; 21 | 22 | public class TestUtils { 23 | 24 | public static final Ruling[] RULINGS = { 25 | new Ruling(new Point2D.Float(0, 0), new Point2D.Float(1,1)), 26 | new Ruling(new Point2D.Float(2, 2), new Point2D.Float(3,3)) 27 | }; 28 | 29 | public static final Rectangle[] RECTANGLES = { 30 | new Rectangle(), 31 | new Rectangle(0, 0, 2, 4) 32 | }; 33 | 34 | 35 | @Test 36 | public void testBoundsOfTwoRulings() { 37 | Rectangle r = Utils.bounds(Arrays.asList(RULINGS)); 38 | assertEquals(0, r.getMinX(), 0); 39 | assertEquals(0, r.getMinY(), 0); 40 | assertEquals(3, r.getWidth(), 0); 41 | assertEquals(3, r.getHeight(), 0); 42 | } 43 | 44 | @Test 45 | public void testBoundsOfOneEmptyRectangleAndAnotherNonEmpty() { 46 | Rectangle r = Utils.bounds(Arrays.asList(RECTANGLES)); 47 | assertEquals(r, RECTANGLES[1]); 48 | } 49 | 50 | @Test 51 | public void testBoundsOfOneRectangle() { 52 | ArrayList shapes = new ArrayList<>(); 53 | shapes.add(new Rectangle(0, 0, 20, 40)); 54 | Rectangle r = Utils.bounds(shapes); 55 | assertEquals(r, shapes.get(0)); 56 | } 57 | 58 | @Test 59 | public void testParsePagesOption() throws ParseException { 60 | 61 | List rv = Utils.parsePagesOption("1"); 62 | assertArrayEquals(new Integer[] { 1 }, rv.toArray()); 63 | 64 | rv = Utils.parsePagesOption("1-4"); 65 | assertArrayEquals(new Integer[] { 1,2,3,4 }, rv.toArray()); 66 | 67 | rv = Utils.parsePagesOption("1-4,20-24"); 68 | assertArrayEquals(new Integer[] { 1,2,3,4,20,21,22,23,24 }, rv.toArray()); 69 | 70 | rv = Utils.parsePagesOption("all"); 71 | assertNull(rv); 72 | } 73 | 74 | @Test(expected=ParseException.class) 75 | public void testExceptionInParsePages() throws ParseException { 76 | Utils.parsePagesOption("1-4,24-22"); 77 | } 78 | 79 | @Test(expected=ParseException.class) 80 | public void testAnotherExceptionInParsePages() throws ParseException { 81 | Utils.parsePagesOption("quuxor"); 82 | } 83 | 84 | @Test 85 | public void testQuickSortEmptyList() { 86 | List numbers = new ArrayList<>(); 87 | QuickSort.sort(numbers); 88 | 89 | assertEquals(Collections.emptyList(), numbers); 90 | } 91 | 92 | @Test 93 | public void testQuickSortOneElementList() { 94 | List numbers = Arrays.asList(5); 95 | QuickSort.sort(numbers); 96 | 97 | assertEquals(Arrays.asList(5), numbers); 98 | } 99 | 100 | @Test 101 | public void testQuickSortShortList() { 102 | List numbers = Arrays.asList(4, 5, 6, 8, 7, 1, 2, 3); 103 | QuickSort.sort(numbers); 104 | 105 | assertEquals(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8), numbers); 106 | } 107 | 108 | @Test 109 | public void testQuickSortLongList() { 110 | 111 | List numbers = new ArrayList<>(); 112 | List expectedNumbers = new ArrayList<>(); 113 | 114 | for(int i = 0; i <= 12000; i++){ 115 | numbers.add(12000 - i); 116 | expectedNumbers.add(i); 117 | } 118 | 119 | QuickSort.sort(numbers); 120 | 121 | assertEquals(expectedNumbers, numbers); 122 | } 123 | 124 | @Test 125 | public void testJPEG2000DoesNotRaise() throws IOException { 126 | PDDocument pdf_document = Loader.loadPDF(new File("src/test/resources/technology/tabula/jpeg2000.pdf")); 127 | PDPage page = pdf_document.getPage(0); 128 | Utils.pageConvertToImage(pdf_document, page, 360, ImageType.RGB); 129 | } 130 | 131 | } 132 | -------------------------------------------------------------------------------- /src/test/java/technology/tabula/UtilsForTesting.java: -------------------------------------------------------------------------------- 1 | package technology.tabula; 2 | 3 | import java.io.*; 4 | import java.nio.charset.Charset; 5 | import java.util.List; 6 | 7 | import org.apache.commons.csv.CSVFormat; 8 | import org.apache.commons.csv.CSVParser; 9 | import org.apache.commons.csv.CSVPrinter; 10 | import org.apache.pdfbox.Loader; 11 | import org.apache.pdfbox.pdmodel.PDDocument; 12 | import org.junit.Assert; 13 | 14 | public class UtilsForTesting { 15 | 16 | public static Page getAreaFromFirstPage(String path, float top, float left, float bottom, float right) throws IOException { 17 | return getAreaFromPage(path, 1, top, left, bottom, right); 18 | } 19 | 20 | public static Page getAreaFromPage(String path, int page, float top, float left, float bottom, float right) throws IOException { 21 | return getPage(path, page).getArea(top, left, bottom, right); 22 | } 23 | 24 | public static Page getPage(String path, int pageNumber) throws IOException { 25 | ObjectExtractor oe = null; 26 | try { 27 | PDDocument document = Loader.loadPDF(new File(path)); 28 | oe = new ObjectExtractor(document); 29 | return oe.extract(pageNumber); 30 | } finally { 31 | if (oe != null) 32 | oe.close(); 33 | } 34 | } 35 | 36 | public static String[][] tableToArrayOfRows(Table table) { 37 | List> tableRows = table.getRows(); 38 | 39 | int maxColCount = 0; 40 | 41 | for (int i = 0; i < tableRows.size(); i++) { 42 | List row = tableRows.get(i); 43 | if (maxColCount < row.size()) { 44 | maxColCount = row.size(); 45 | } 46 | } 47 | 48 | Assert.assertEquals(maxColCount, table.getColCount()); 49 | 50 | String[][] rv = new String[tableRows.size()][maxColCount]; 51 | 52 | for (int i = 0; i < tableRows.size(); i++) { 53 | List row = tableRows.get(i); 54 | for (int j = 0; j < row.size(); j++) { 55 | rv[i][j] = table.getCell(i, j).getText(); 56 | } 57 | } 58 | 59 | return rv; 60 | } 61 | 62 | public static String loadJson(String path) throws IOException { 63 | 64 | StringBuilder stringBuilder = new StringBuilder(); 65 | try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"))) { 66 | String line = null; 67 | while ((line = reader.readLine()) != null) { 68 | stringBuilder.append(line); 69 | } 70 | } 71 | 72 | return stringBuilder.toString(); 73 | 74 | } 75 | 76 | public static String loadCsv(String path) throws IOException { 77 | 78 | StringBuilder out = new StringBuilder(); 79 | CSVParser parse = org.apache.commons.csv.CSVParser.parse(new File(path), Charset.forName("utf-8"), CSVFormat.EXCEL); 80 | 81 | CSVPrinter printer = new CSVPrinter(out, CSVFormat.EXCEL); 82 | printer.printRecords(parse); 83 | printer.close(); 84 | 85 | String csv = out.toString().replaceAll("(? 2 | 4 |
5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 |
101 | 102 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-002.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":3,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-003.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":12,"numCorrectlyDetectedTables":12,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-004.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-005.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-006.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":6,"numCorrectlyDetectedTables":6,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-007.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 |
94 |
95 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-008.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-009a.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 |
42 |
43 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-010.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
60 |
61 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":3,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-011.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":5,"numCorrectlyDetectedTables":5,"numErroneouslyDetectedTables":1,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-012.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":1,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-013.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
46 |
47 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":1,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-014.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":5,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-015.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-016.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":1,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-017.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-018.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-019.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":3,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-020.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-021.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-022.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-023.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-024.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":5,"numCorrectlyDetectedTables":5,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-025.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 |
93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 |
125 |
126 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":3,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-026.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-eu/eu-027.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-001.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-002.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 |
76 |
77 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-003.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-004.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 |
25 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005-str.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Income level of individual or geography 8 | 9 | 10 | 11 | 12 | 13 | 14 | % of the area median income 15 | 16 | 17 | 18 | 19 | 20 | Low-income 21 | 22 | 23 | 24 | 25 | 26 | Less than 50 27 | 28 | 29 | 30 | 31 | Moderate-income 32 | 33 | 34 | 35 | 36 | At least 50 and less than 80 37 | 38 | 39 | 40 | 41 | Middle-income 42 | 43 | 44 | 45 | 46 | 47 | 48 | At least 80 and less than 120 49 | 50 | 51 | 52 | 53 | Upper-income 54 | 55 | 56 | 57 | 58 | 120 or more 59 | 60 | 61 | 62 |
63 |
64 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-005.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 |
71 |
72 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-006.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-007.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-008.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-009.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 |
50 |
51 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-010.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 |
70 |
71 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011a.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-011b-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 |
41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 |
65 |
66 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-012.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-013.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-014.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-015.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-016.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":6,"numCorrectlyDetectedTables":6,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-017.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":7,"numCorrectlyDetectedTables":6,"numErroneouslyDetectedTables":1,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-018.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":4,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-019.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-020.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-021.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-022.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":4,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-023.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":4,"numCorrectlyDetectedTables":4,"numErroneouslyDetectedTables":2,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-024.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":6,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":3,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-025.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-026.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-027.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":2,"numErroneouslyDetectedTables":2,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-028.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-029.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-030.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-031a.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-032.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":3,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":2,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-033.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 |
54 |
55 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":2,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":1,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-034.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":3,"numCorrectlyDetectedTables":3,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-035a.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-036.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":0,"numErroneouslyDetectedTables":3,"expectedFailure":true} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-037.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 |
27 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038-str.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | Species 9 | 10 | 11 | 12 | 13 | Percent of Range 14 | Impacted 15 | 16 | 17 | 18 | 19 | 20 | Kingfisher 21 | 22 | 23 | 24 | 25 | 29% 26 | 27 | 28 | 29 | 30 | Bald Eagle 31 | 32 | 33 | 34 | 35 | 34% 36 | 37 | 38 | 39 | 40 | Osprey 41 | 42 | 43 | 44 | 45 | 20% 46 | 47 | 48 | 49 | 50 | Common Loon 51 | 52 | 53 | 54 | 55 | 40% 56 | 57 | 58 | 59 | 60 | Florida Panther 61 | 62 | 63 | 64 | 65 | 100% 66 | 67 | 68 | 69 | 70 | Mink 71 | 72 | 73 | 74 | 75 | 35% 76 | 77 | 78 | 79 | 80 | River Otter 81 | 82 | 83 | 84 | 85 | 38% 86 | 87 | 88 | 89 |
90 |
91 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-038.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 |
24 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039-str.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | Organism 9 | 10 | 11 | 12 | 13 | Wildlife Criterion (pg/L) 14 | 15 | 16 | 17 | 18 | Mink 19 | 20 | 21 | 22 | 23 | 57 24 | 25 | 26 | 27 | 28 | River otter 29 | 30 | 31 | 32 | 33 | 42 34 | 35 | 36 | 37 | 38 | Kingfisher 39 | 40 | 41 | 42 | 43 | 33 44 | 45 | 46 | 47 | 48 | Loon 49 | 50 | 51 | 52 | 53 | 82 54 | 55 | 56 | 57 | 58 | Osprey 59 | 60 | 61 | 62 | 63 | 82 64 | 65 | 66 | 67 | 68 | Bald eagle 69 | 70 | 71 | 72 | 73 | 100 74 | 75 | 76 | 77 |
78 |
79 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-039.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040-reg.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
49 |
50 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.json: -------------------------------------------------------------------------------- 1 | {"numExpectedTables":1,"numCorrectlyDetectedTables":1,"numErroneouslyDetectedTables":0,"expectedFailure":false} -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/icdar2013-dataset/competition-dataset-us/us-040.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/indictb1h_14.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/indictb1h_14.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/jpeg2000.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/jpeg2000.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/json/AnimalSounds1.json: -------------------------------------------------------------------------------- 1 | [{"extraction_method":"lattice","page_number":1,"top":0.006499578,"left":56.8,"width":241.1999969482422,"height":315.36407470703125,"right":298.0,"bottom":315.37057,"data":[[{"top":0.006499578,"left":56.8,"width":79.19999694824219,"height":95.31405639648438,"text":"Animal"},{"top":0.006499578,"left":136.0,"width":61.0,"height":95.31405639648438,"text":"Action"},{"top":0.006499578,"left":197.0,"width":101.0,"height":95.31405639648438,"text":"Result"}],[{"top":95.32056,"left":56.8,"width":79.19999694824219,"height":23.050010681152344,"text":"Cat"},{"top":95.32056,"left":136.0,"width":61.0,"height":23.050010681152344,"text":"Says"},{"top":95.32056,"left":197.0,"width":101.0,"height":23.050010681152344,"text":"Meow"}],[{"top":118.37057,"left":56.8,"width":79.19999694824219,"height":63.99999237060547,"text":"Parastratiosph\recomyiastratio\rsphecomyioid\res"},{"top":118.37057,"left":136.0,"width":61.0,"height":63.99999237060547,"text":"Says"},{"top":118.37057,"left":197.0,"width":101.0,"height":63.99999237060547,"text":"bzzzzzzz"}],[{"top":182.37056,"left":56.8,"width":79.19999694824219,"height":133.00001525878906,"text":"Fox"},{"top":182.37056,"left":136.0,"width":61.0,"height":133.00001525878906,"text":"Says"},{"top":182.37056,"left":197.0,"width":101.0,"height":133.00001525878906,"text":"Ring-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding\rGering-\rdingdingdingd\ringeringeding"}]]},{"extraction_method":"lattice","page_number":1,"top":0.006499578,"left":313.35715,"width":241.55941772460938,"height":259.2640380859375,"right":554.91656,"bottom":259.27054,"data":[[{"top":0.006499578,"left":313.35715,"width":77.64285278320312,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":391.0,"width":66.0,"height":72.26405334472656,"text":""},{"top":0.006499578,"left":457.0,"width":97.91656494140625,"height":72.26405334472656,"text":""}],[{"top":72.27055,"left":313.35715,"width":77.64285278320312,"height":23.050003051757812,"text":"Animal"},{"top":72.27055,"left":391.0,"width":66.0,"height":23.050003051757812,"text":"Action"},{"top":72.27055,"left":457.0,"width":97.91656494140625,"height":23.050003051757812,"text":"Result"}],[{"top":95.32056,"left":313.35715,"width":77.64285278320312,"height":35.94999694824219,"text":"Dogs/wolves/\rMore dogs"},{"top":95.32056,"left":391.0,"width":66.0,"height":35.94999694824219,"text":"Says"},{"top":95.32056,"left":457.0,"width":97.91656494140625,"height":35.94999694824219,"text":"Bow-wow/\rruff-ruff"}],[{"top":131.27055,"left":313.35715,"width":77.64285278320312,"height":36.40000915527344,"text":"Donkey"},{"top":131.27055,"left":391.0,"width":66.0,"height":36.40000915527344,"text":"Says"},{"top":131.27055,"left":457.0,"width":97.91656494140625,"height":36.40000915527344,"text":"Hee-Haw Hee-\rHaw"}],[{"top":167.67056,"left":313.35715,"width":77.64285278320312,"height":91.5999755859375,"text":"Fox"},{"top":167.67056,"left":391.0,"width":66.0,"height":91.5999755859375,"text":"Says"},{"top":167.67056,"left":457.0,"width":97.91656494140625,"height":91.5999755859375,"text":"Wa-pa-pa-pa-\rpa-pa-pow\rWa-pa-pa-pa-\rpa-pow\rWa-pa-pa-pa-\rpa-pa-pow"}]]}] 2 | -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/labor.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/labor.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/m27.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/m27.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/mednine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/mednine.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/npe_issue_206.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/npe_issue_206.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/offense.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/offense.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/puertos1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/puertos1.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/rotated_page.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/rotated_page.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/schools.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/schools.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/should_detect_rulings.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/should_detect_rulings.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/sort_exception.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/sort_exception.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/spanning_cells.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/spanning_cells.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/spreadsheet_no_bounding_frame.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/sydney_disclosure_contract.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/sydney_disclosure_contract.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/twotables.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/twotables.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/us-007.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/us-007.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/us-017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/us-017.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/us-020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/us-020.pdf -------------------------------------------------------------------------------- /src/test/resources/technology/tabula/us-024.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tabulapdf/tabula-java/2cdf3b4fd3f7e921dca8cc6814cdd9316be40f0f/src/test/resources/technology/tabula/us-024.pdf --------------------------------------------------------------------------------