├── .gitattributes ├── .github └── workflows │ ├── cmake.yml │ └── create-release-binaries.yml ├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── CMakeLists.txt ├── Config.cmake.in ├── LICENSE ├── README.md ├── TextExtraction ├── CMakeLists.txt ├── ErrorsAndWarnings.h ├── TableExtraction.cpp ├── TableExtraction.h ├── TextExtraction.cpp ├── TextExtraction.h └── lib │ ├── bidi │ ├── BidiConversion.cpp │ ├── BidiConversion.h │ └── ICUInclude.h │ ├── font-translation │ ├── Encoding.cpp │ ├── Encoding.h │ ├── EncodingAdobeGlyphList.cpp │ ├── EncodingAdobeGlyphList.h │ ├── EncodingMacExpert.cpp │ ├── EncodingMacExpert.h │ ├── EncodingMacRoman.cpp │ ├── EncodingMacRoman.h │ ├── EncodingStandard.cpp │ ├── EncodingStandard.h │ ├── EncodingSymbol.cpp │ ├── EncodingSymbol.h │ ├── EncodingWinAnsi.cpp │ ├── EncodingWinAnsi.h │ ├── FontDecoder.cpp │ ├── FontDecoder.h │ ├── StandardFontsDimensions.cpp │ ├── StandardFontsDimensions.h │ └── Translation.h │ ├── graphic-content-parsing │ ├── ContentGraphicState.h │ ├── GraphicContentInterpreter.cpp │ ├── GraphicContentInterpreter.h │ ├── IGraphicContentInterpreterHandler.h │ ├── Path.h │ ├── PathElement.h │ ├── Resources.h │ ├── TextElement.h │ └── TextGraphicState.h │ ├── graphs │ ├── Graph.h │ ├── Queue.h │ └── Result.h │ ├── interpreter │ ├── IPDFInterpreterHandler.h │ ├── IPDFRecursiveInterpreterHandler.h │ ├── PDFInterpreter.cpp │ ├── PDFInterpreter.h │ ├── PDFRecursiveInterpreter.cpp │ └── PDFRecursiveInterpreter.h │ ├── math │ ├── Transformations.cpp │ └── Transformations.h │ ├── pdf-writer-enhancers │ ├── Bytes.cpp │ └── Bytes.h │ ├── table-composition │ ├── Lines.h │ ├── Table.cpp │ ├── Table.h │ ├── TableComposer.cpp │ └── TableComposer.h │ ├── table-csv-export │ ├── TableCSVExport.cpp │ └── TableCSVExport.h │ ├── table-line-parsing │ ├── ITableLineInterpreterHandler.h │ ├── ParsedLinePlacement.h │ ├── TableLineInterpreter.cpp │ └── TableLineInterpreter.h │ ├── text-composition │ ├── TextComposer.cpp │ └── TextComposer.h │ └── text-parsing │ ├── ITextInterpreterHandler.h │ ├── ParsedTextPlacement.h │ ├── TextInterpreter.cpp │ └── TextInterpreter.h └── TextExtractionCLI ├── CMakeLists.txt └── extract-text-cli.cpp /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/.gitattributes -------------------------------------------------------------------------------- /.github/workflows/cmake.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/.github/workflows/cmake.yml -------------------------------------------------------------------------------- /.github/workflows/create-release-binaries.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/.github/workflows/create-release-binaries.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/.gitignore -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/.vscode/launch.json -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /Config.cmake.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/Config.cmake.in -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/README.md -------------------------------------------------------------------------------- /TextExtraction/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/CMakeLists.txt -------------------------------------------------------------------------------- /TextExtraction/ErrorsAndWarnings.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/ErrorsAndWarnings.h -------------------------------------------------------------------------------- /TextExtraction/TableExtraction.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/TableExtraction.cpp -------------------------------------------------------------------------------- /TextExtraction/TableExtraction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/TableExtraction.h -------------------------------------------------------------------------------- /TextExtraction/TextExtraction.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/TextExtraction.cpp -------------------------------------------------------------------------------- /TextExtraction/TextExtraction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/TextExtraction.h -------------------------------------------------------------------------------- /TextExtraction/lib/bidi/BidiConversion.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/bidi/BidiConversion.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/bidi/BidiConversion.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/bidi/BidiConversion.h -------------------------------------------------------------------------------- /TextExtraction/lib/bidi/ICUInclude.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/bidi/ICUInclude.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/Encoding.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/Encoding.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/Encoding.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/Encoding.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingAdobeGlyphList.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingAdobeGlyphList.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingAdobeGlyphList.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingAdobeGlyphList.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingMacExpert.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingMacExpert.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingMacExpert.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingMacExpert.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingMacRoman.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingMacRoman.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingMacRoman.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingMacRoman.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingStandard.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingStandard.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingStandard.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingStandard.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingSymbol.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingSymbol.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingSymbol.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingSymbol.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingWinAnsi.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingWinAnsi.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/EncodingWinAnsi.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/EncodingWinAnsi.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/FontDecoder.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/FontDecoder.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/FontDecoder.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/FontDecoder.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/StandardFontsDimensions.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/StandardFontsDimensions.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/StandardFontsDimensions.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/StandardFontsDimensions.h -------------------------------------------------------------------------------- /TextExtraction/lib/font-translation/Translation.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/font-translation/Translation.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/ContentGraphicState.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/ContentGraphicState.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/GraphicContentInterpreter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/GraphicContentInterpreter.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/GraphicContentInterpreter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/GraphicContentInterpreter.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/IGraphicContentInterpreterHandler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/IGraphicContentInterpreterHandler.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/Path.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/Path.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/PathElement.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/PathElement.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/Resources.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/Resources.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/TextElement.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/TextElement.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphic-content-parsing/TextGraphicState.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphic-content-parsing/TextGraphicState.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphs/Graph.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphs/Graph.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphs/Queue.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphs/Queue.h -------------------------------------------------------------------------------- /TextExtraction/lib/graphs/Result.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/graphs/Result.h -------------------------------------------------------------------------------- /TextExtraction/lib/interpreter/IPDFInterpreterHandler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/interpreter/IPDFInterpreterHandler.h -------------------------------------------------------------------------------- /TextExtraction/lib/interpreter/IPDFRecursiveInterpreterHandler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/interpreter/IPDFRecursiveInterpreterHandler.h -------------------------------------------------------------------------------- /TextExtraction/lib/interpreter/PDFInterpreter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/interpreter/PDFInterpreter.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/interpreter/PDFInterpreter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/interpreter/PDFInterpreter.h -------------------------------------------------------------------------------- /TextExtraction/lib/interpreter/PDFRecursiveInterpreter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/interpreter/PDFRecursiveInterpreter.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/interpreter/PDFRecursiveInterpreter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/interpreter/PDFRecursiveInterpreter.h -------------------------------------------------------------------------------- /TextExtraction/lib/math/Transformations.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/math/Transformations.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/math/Transformations.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/math/Transformations.h -------------------------------------------------------------------------------- /TextExtraction/lib/pdf-writer-enhancers/Bytes.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/pdf-writer-enhancers/Bytes.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/pdf-writer-enhancers/Bytes.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/pdf-writer-enhancers/Bytes.h -------------------------------------------------------------------------------- /TextExtraction/lib/table-composition/Lines.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-composition/Lines.h -------------------------------------------------------------------------------- /TextExtraction/lib/table-composition/Table.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-composition/Table.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/table-composition/Table.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-composition/Table.h -------------------------------------------------------------------------------- /TextExtraction/lib/table-composition/TableComposer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-composition/TableComposer.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/table-composition/TableComposer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-composition/TableComposer.h -------------------------------------------------------------------------------- /TextExtraction/lib/table-csv-export/TableCSVExport.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-csv-export/TableCSVExport.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/table-csv-export/TableCSVExport.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-csv-export/TableCSVExport.h -------------------------------------------------------------------------------- /TextExtraction/lib/table-line-parsing/ITableLineInterpreterHandler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-line-parsing/ITableLineInterpreterHandler.h -------------------------------------------------------------------------------- /TextExtraction/lib/table-line-parsing/ParsedLinePlacement.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-line-parsing/ParsedLinePlacement.h -------------------------------------------------------------------------------- /TextExtraction/lib/table-line-parsing/TableLineInterpreter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-line-parsing/TableLineInterpreter.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/table-line-parsing/TableLineInterpreter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/table-line-parsing/TableLineInterpreter.h -------------------------------------------------------------------------------- /TextExtraction/lib/text-composition/TextComposer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/text-composition/TextComposer.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/text-composition/TextComposer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/text-composition/TextComposer.h -------------------------------------------------------------------------------- /TextExtraction/lib/text-parsing/ITextInterpreterHandler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/text-parsing/ITextInterpreterHandler.h -------------------------------------------------------------------------------- /TextExtraction/lib/text-parsing/ParsedTextPlacement.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/text-parsing/ParsedTextPlacement.h -------------------------------------------------------------------------------- /TextExtraction/lib/text-parsing/TextInterpreter.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/text-parsing/TextInterpreter.cpp -------------------------------------------------------------------------------- /TextExtraction/lib/text-parsing/TextInterpreter.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtraction/lib/text-parsing/TextInterpreter.h -------------------------------------------------------------------------------- /TextExtractionCLI/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtractionCLI/CMakeLists.txt -------------------------------------------------------------------------------- /TextExtractionCLI/extract-text-cli.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/galkahana/pdf-text-extraction/HEAD/TextExtractionCLI/extract-text-cli.cpp --------------------------------------------------------------------------------