├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── .gitignore ├── .rspec ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── docs ├── ALIGNMENT_PLAN.md ├── DEPENDENCIES.md ├── FEATURE_COMPARISON.md ├── PPTX_PLAN.md ├── REFACTOR_PLAN.md └── assets │ ├── parsekit-wide.png │ └── parsekit.png ├── examples ├── advanced_parsing.rb ├── basic_parsing.rb ├── parse_documents.rb └── parsekit_demo.rb ├── ext └── parsekit │ ├── Cargo.toml │ ├── extconf.rb │ └── src │ ├── error.rs │ ├── format_detector.rs │ ├── lib.rs │ └── parser.rs ├── lib ├── parsekit.rb └── parsekit │ ├── error.rb │ ├── parser.rb │ └── version.rb ├── parsekit.gemspec └── spec ├── fixtures ├── .gitkeep ├── auto_detect.png ├── corrupted.docx ├── corrupted.pdf ├── corrupted.png ├── grayscale.tiff ├── large_image.png ├── latin1.txt ├── magic_detect.jpg ├── magic_detect.png ├── multiline.png ├── ocr_test.bmp ├── ocr_test.jpg ├── ocr_test.png ├── palette_lzw.tiff ├── palette_uncompressed.tiff ├── palette_zip.tiff ├── rgb_lzw.tiff ├── rgb_zip.tiff ├── rgba.tiff ├── sample.docx ├── sample.htm ├── sample.html ├── sample.md ├── sample.pdf ├── sample.png ├── sample.pptx ├── sample.txt ├── sample.xls ├── sample.xlsx ├── shift_jis.txt ├── special_chars.png ├── static_test.png └── utf16.txt ├── parsekit ├── dispatch_spec.rb ├── encoding_spec.rb ├── error_consistency_spec.rb ├── error_handling_spec.rb ├── format_detection_spec.rb ├── integration_spec.rb ├── ocr_spec.rb ├── parser_spec.rb ├── pdf_parser_spec.rb ├── simple_parsing_spec.rb └── validation_helpers_spec.rb ├── parsekit_spec.rb ├── spec_helper.rb └── support └── shared_examples.rb /.github/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/.github/dependabot.yml -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/.gitignore -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format documentation 2 | --color 3 | --require spec_helper -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/Gemfile -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/LICENSE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/README.md -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/Rakefile -------------------------------------------------------------------------------- /docs/ALIGNMENT_PLAN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/docs/ALIGNMENT_PLAN.md -------------------------------------------------------------------------------- /docs/DEPENDENCIES.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/docs/DEPENDENCIES.md -------------------------------------------------------------------------------- /docs/FEATURE_COMPARISON.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/docs/FEATURE_COMPARISON.md -------------------------------------------------------------------------------- /docs/PPTX_PLAN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/docs/PPTX_PLAN.md -------------------------------------------------------------------------------- /docs/REFACTOR_PLAN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/docs/REFACTOR_PLAN.md -------------------------------------------------------------------------------- /docs/assets/parsekit-wide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/docs/assets/parsekit-wide.png -------------------------------------------------------------------------------- /docs/assets/parsekit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/docs/assets/parsekit.png -------------------------------------------------------------------------------- /examples/advanced_parsing.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/examples/advanced_parsing.rb -------------------------------------------------------------------------------- /examples/basic_parsing.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/examples/basic_parsing.rb -------------------------------------------------------------------------------- /examples/parse_documents.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/examples/parse_documents.rb -------------------------------------------------------------------------------- /examples/parsekit_demo.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/examples/parsekit_demo.rb -------------------------------------------------------------------------------- /ext/parsekit/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/ext/parsekit/Cargo.toml -------------------------------------------------------------------------------- /ext/parsekit/extconf.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/ext/parsekit/extconf.rb -------------------------------------------------------------------------------- /ext/parsekit/src/error.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/ext/parsekit/src/error.rs -------------------------------------------------------------------------------- /ext/parsekit/src/format_detector.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/ext/parsekit/src/format_detector.rs -------------------------------------------------------------------------------- /ext/parsekit/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/ext/parsekit/src/lib.rs -------------------------------------------------------------------------------- /ext/parsekit/src/parser.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/ext/parsekit/src/parser.rs -------------------------------------------------------------------------------- /lib/parsekit.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/lib/parsekit.rb -------------------------------------------------------------------------------- /lib/parsekit/error.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/lib/parsekit/error.rb -------------------------------------------------------------------------------- /lib/parsekit/parser.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/lib/parsekit/parser.rb -------------------------------------------------------------------------------- /lib/parsekit/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module ParseKit 4 | VERSION = "0.1.2" 5 | end 6 | -------------------------------------------------------------------------------- /parsekit.gemspec: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/parsekit.gemspec -------------------------------------------------------------------------------- /spec/fixtures/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /spec/fixtures/auto_detect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/auto_detect.png -------------------------------------------------------------------------------- /spec/fixtures/corrupted.docx: -------------------------------------------------------------------------------- 1 | PK fake docx content that will fail 2 | -------------------------------------------------------------------------------- /spec/fixtures/corrupted.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/corrupted.pdf -------------------------------------------------------------------------------- /spec/fixtures/corrupted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/corrupted.png -------------------------------------------------------------------------------- /spec/fixtures/grayscale.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/grayscale.tiff -------------------------------------------------------------------------------- /spec/fixtures/large_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/large_image.png -------------------------------------------------------------------------------- /spec/fixtures/latin1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/latin1.txt -------------------------------------------------------------------------------- /spec/fixtures/magic_detect.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/magic_detect.jpg -------------------------------------------------------------------------------- /spec/fixtures/magic_detect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/magic_detect.png -------------------------------------------------------------------------------- /spec/fixtures/multiline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/multiline.png -------------------------------------------------------------------------------- /spec/fixtures/ocr_test.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/ocr_test.bmp -------------------------------------------------------------------------------- /spec/fixtures/ocr_test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/ocr_test.jpg -------------------------------------------------------------------------------- /spec/fixtures/ocr_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/ocr_test.png -------------------------------------------------------------------------------- /spec/fixtures/palette_lzw.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/palette_lzw.tiff -------------------------------------------------------------------------------- /spec/fixtures/palette_uncompressed.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/palette_uncompressed.tiff -------------------------------------------------------------------------------- /spec/fixtures/palette_zip.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/palette_zip.tiff -------------------------------------------------------------------------------- /spec/fixtures/rgb_lzw.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/rgb_lzw.tiff -------------------------------------------------------------------------------- /spec/fixtures/rgb_zip.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/rgb_zip.tiff -------------------------------------------------------------------------------- /spec/fixtures/rgba.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/rgba.tiff -------------------------------------------------------------------------------- /spec/fixtures/sample.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.docx -------------------------------------------------------------------------------- /spec/fixtures/sample.htm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.htm -------------------------------------------------------------------------------- /spec/fixtures/sample.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.html -------------------------------------------------------------------------------- /spec/fixtures/sample.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.md -------------------------------------------------------------------------------- /spec/fixtures/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.pdf -------------------------------------------------------------------------------- /spec/fixtures/sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.png -------------------------------------------------------------------------------- /spec/fixtures/sample.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.pptx -------------------------------------------------------------------------------- /spec/fixtures/sample.txt: -------------------------------------------------------------------------------- 1 | Test content 2 | -------------------------------------------------------------------------------- /spec/fixtures/sample.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.xls -------------------------------------------------------------------------------- /spec/fixtures/sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/sample.xlsx -------------------------------------------------------------------------------- /spec/fixtures/shift_jis.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/shift_jis.txt -------------------------------------------------------------------------------- /spec/fixtures/special_chars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/special_chars.png -------------------------------------------------------------------------------- /spec/fixtures/static_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/static_test.png -------------------------------------------------------------------------------- /spec/fixtures/utf16.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/fixtures/utf16.txt -------------------------------------------------------------------------------- /spec/parsekit/dispatch_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/dispatch_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/encoding_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/encoding_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/error_consistency_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/error_consistency_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/error_handling_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/error_handling_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/format_detection_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/format_detection_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/integration_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/integration_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/ocr_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/ocr_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/parser_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/parser_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/pdf_parser_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/pdf_parser_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/simple_parsing_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/simple_parsing_spec.rb -------------------------------------------------------------------------------- /spec/parsekit/validation_helpers_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit/validation_helpers_spec.rb -------------------------------------------------------------------------------- /spec/parsekit_spec.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/parsekit_spec.rb -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/spec_helper.rb -------------------------------------------------------------------------------- /spec/support/shared_examples.rb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientist-labs/parsekit/HEAD/spec/support/shared_examples.rb --------------------------------------------------------------------------------