├── .github └── FUNDING.yml ├── .gitignore ├── .gitmodules ├── DEBIAN ├── conffiles ├── control ├── postinst └── prerm ├── Dockerfile ├── LICENSE ├── docker-compose.test.yml ├── docker-compose.ubuntu.test.yml ├── docker-entrypoint.sh ├── etc ├── opensemanticsearch │ ├── blacklist │ │ ├── blacklist-url │ │ ├── blacklist-url-prefix │ │ ├── blacklist-url-regex │ │ ├── blacklist-url-suffix │ │ ├── enhance_extract_law │ │ │ └── blacklist-lawcode-if-no-clause │ │ ├── enhance_zip │ │ │ ├── blacklist-contenttype │ │ │ ├── blacklist-contenttype-prefix │ │ │ ├── blacklist-contenttype-regex │ │ │ ├── blacklist-contenttype-suffix │ │ │ ├── whitelist-contenttype │ │ │ ├── whitelist-contenttype-prefix │ │ │ ├── whitelist-contenttype-regex │ │ │ └── whitelist-contenttype-suffix │ │ ├── textanalysis │ │ │ ├── blacklist-fieldname │ │ │ ├── blacklist-fieldname-prefix │ │ │ └── blacklist-fieldname-suffix │ │ ├── whitelist-url │ │ ├── whitelist-url-prefix │ │ ├── whitelist-url-regex │ │ └── whitelist-url-suffix │ ├── connector-files │ ├── connector-web │ ├── enhancer-rdf │ ├── etl │ ├── facets │ ├── filemonitoring │ │ └── files │ ├── ocr │ │ └── dictionary.txt │ ├── regex │ │ ├── email.tsv │ │ ├── iban.tsv │ │ └── phone.tsv │ └── task_priorities └── systemd │ └── system │ ├── opensemanticetl-filemonitoring.service │ └── opensemanticetl.service ├── src └── opensemanticetl │ ├── __init__.py │ ├── clean_title.py │ ├── enhance_annotations.py │ ├── enhance_contenttype_group.py │ ├── enhance_csv.py │ ├── enhance_detect_language_tika_server.py │ ├── enhance_entity_linking.py │ ├── enhance_extract_email.py │ ├── enhance_extract_hashtags.py │ ├── enhance_extract_law.py │ ├── enhance_extract_money.py │ ├── enhance_extract_phone.py │ ├── enhance_extract_text_tika_server.py │ ├── enhance_file_mtime.py │ ├── enhance_file_size.py │ ├── enhance_html.py │ ├── enhance_mapping_id.py │ ├── enhance_mimetype.py │ ├── enhance_multilingual.py │ ├── enhance_ner_spacy.py │ ├── enhance_ner_stanford.py │ ├── enhance_ocr.py │ ├── enhance_path.py │ ├── enhance_pdf_ocr.py │ ├── enhance_pdf_page.py │ ├── enhance_pdf_page_preview.py │ ├── enhance_pst.py │ ├── enhance_rdf.py │ ├── enhance_rdf_annotations_by_http_request.py │ ├── enhance_regex.py │ ├── enhance_sentence_segmentation.py │ ├── enhance_warc.py │ ├── enhance_xml.py │ ├── enhance_xmp.py │ ├── enhance_zip.py │ ├── etl.py │ ├── etl_delete.py │ ├── etl_enrich.py │ ├── etl_file.py │ ├── etl_filedirectory.py │ ├── etl_filemonitoring.py │ ├── etl_hypothesis.py │ ├── etl_plugin_core.py │ ├── etl_rss.py │ ├── etl_sitemap.py │ ├── etl_sparql.py │ ├── etl_twitter_scraper.py │ ├── etl_web.py │ ├── etl_web_crawl.py │ ├── export_elasticsearch.py │ ├── export_json.py │ ├── export_neo4j.py │ ├── export_print.py │ ├── export_queue_files.py │ ├── export_solr.py │ ├── filter_blacklist.py │ ├── filter_file_not_modified.py │ ├── move_indexed_file.py │ ├── requirements.txt │ ├── tasks.py │ ├── test_enhance_detect_language_tika_server.py │ ├── test_enhance_extract_email.py │ ├── test_enhance_extract_law.py │ ├── test_enhance_extract_money.py │ ├── test_enhance_extract_text_tika_server.py │ ├── test_enhance_mapping_id.py │ ├── test_enhance_ner_spacy.py │ ├── test_enhance_path.py │ ├── test_enhance_pdf_ocr.py │ ├── test_enhance_regex.py │ ├── test_enhance_warc.py │ ├── test_etl_file.py │ ├── test_move_indexed_files.py │ └── testdata │ ├── README.md │ ├── Test_OCR_Image1.png │ ├── Test_OCR_Image2.jpg │ ├── example.warc │ ├── run_integrationtests.sh │ ├── run_tests.sh │ └── test.pdf └── usr └── bin ├── etl-delete ├── etl-enrich ├── etl-file ├── etl-filedirectory ├── etl-filemonitoring ├── etl-rss ├── etl-sitemap ├── etl-sparql ├── etl-twitter-scraper ├── etl-web ├── etl-web-crawl ├── etl_delete.py ├── etl_enrich.py ├── etl_file.py ├── etl_filedirectory.py ├── etl_filemonitoring.py ├── etl_rss.py ├── etl_sitemap.py ├── etl_sparql.py ├── etl_tasks ├── etl_twitter_scraper.py ├── etl_web.py ├── etl_web_crawl.py ├── opensemanticsearch-delete ├── opensemanticsearch-enrich ├── opensemanticsearch-filemonitoring ├── opensemanticsearch-index-dir ├── opensemanticsearch-index-file ├── opensemanticsearch-index-rss ├── opensemanticsearch-index-sitemap ├── opensemanticsearch-index-sparql ├── opensemanticsearch-index-twitter-scraper ├── opensemanticsearch-index-web ├── opensemanticsearch-index-web-crawl ├── solr-enrich ├── solr-index-dir ├── solr-index-file ├── solr-index-rss ├── solr-index-sitemap └── solr-index-web /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: ['https://www.paypal.me/MMandalka'] 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/.gitmodules -------------------------------------------------------------------------------- /DEBIAN/conffiles: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/DEBIAN/conffiles -------------------------------------------------------------------------------- /DEBIAN/control: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/DEBIAN/control -------------------------------------------------------------------------------- /DEBIAN/postinst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/DEBIAN/postinst -------------------------------------------------------------------------------- /DEBIAN/prerm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/DEBIAN/prerm -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/LICENSE -------------------------------------------------------------------------------- /docker-compose.test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/docker-compose.test.yml -------------------------------------------------------------------------------- /docker-compose.ubuntu.test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/docker-compose.ubuntu.test.yml -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/docker-entrypoint.sh -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/blacklist-url: -------------------------------------------------------------------------------- 1 | # Blacklist of URLs 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/blacklist-url-prefix: -------------------------------------------------------------------------------- 1 | # Blacklist of URL Prefixes like domains or paths 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/blacklist-url-regex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/blacklist-url-regex -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/blacklist-url-suffix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/blacklist-url-suffix -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_extract_law/blacklist-lawcode-if-no-clause: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/enhance_extract_law/blacklist-lawcode-if-no-clause -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_zip/blacklist-contenttype: -------------------------------------------------------------------------------- 1 | # Blacklist of contenttypes 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_zip/blacklist-contenttype-prefix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/enhance_zip/blacklist-contenttype-prefix -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_zip/blacklist-contenttype-regex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/enhance_zip/blacklist-contenttype-regex -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_zip/blacklist-contenttype-suffix: -------------------------------------------------------------------------------- 1 | # Blacklist of contenttype suffixes 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_zip/whitelist-contenttype: -------------------------------------------------------------------------------- 1 | # Whitelist of contenttypes 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_zip/whitelist-contenttype-prefix: -------------------------------------------------------------------------------- 1 | # Whitelist of contenttype prefixes 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_zip/whitelist-contenttype-regex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/enhance_zip/whitelist-contenttype-regex -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/enhance_zip/whitelist-contenttype-suffix: -------------------------------------------------------------------------------- 1 | # Whitelist of contenttype suffixes 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/textanalysis/blacklist-fieldname: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/textanalysis/blacklist-fieldname -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/textanalysis/blacklist-fieldname-prefix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/textanalysis/blacklist-fieldname-prefix -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/textanalysis/blacklist-fieldname-suffix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/textanalysis/blacklist-fieldname-suffix -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/whitelist-url: -------------------------------------------------------------------------------- 1 | # Whitelist of URLs 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/whitelist-url-prefix: -------------------------------------------------------------------------------- 1 | # Whitelist of URL Prefixes like domains or paths 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/whitelist-url-regex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/blacklist/whitelist-url-regex -------------------------------------------------------------------------------- /etc/opensemanticsearch/blacklist/whitelist-url-suffix: -------------------------------------------------------------------------------- 1 | # Whitelist of URL Suffixes like file endings 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/connector-files: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/connector-files -------------------------------------------------------------------------------- /etc/opensemanticsearch/connector-web: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/connector-web -------------------------------------------------------------------------------- /etc/opensemanticsearch/enhancer-rdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/enhancer-rdf -------------------------------------------------------------------------------- /etc/opensemanticsearch/etl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/etl -------------------------------------------------------------------------------- /etc/opensemanticsearch/facets: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/facets -------------------------------------------------------------------------------- /etc/opensemanticsearch/filemonitoring/files: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/ocr/dictionary.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/regex/email.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/regex/email.tsv -------------------------------------------------------------------------------- /etc/opensemanticsearch/regex/iban.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/regex/iban.tsv -------------------------------------------------------------------------------- /etc/opensemanticsearch/regex/phone.tsv: -------------------------------------------------------------------------------- 1 | [\+\(]?[1-9][0-9 .\-\(\)]{8,}[0-9] phone_ss 2 | -------------------------------------------------------------------------------- /etc/opensemanticsearch/task_priorities: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/opensemanticsearch/task_priorities -------------------------------------------------------------------------------- /etc/systemd/system/opensemanticetl-filemonitoring.service: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/systemd/system/opensemanticetl-filemonitoring.service -------------------------------------------------------------------------------- /etc/systemd/system/opensemanticetl.service: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/etc/systemd/system/opensemanticetl.service -------------------------------------------------------------------------------- /src/opensemanticetl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/opensemanticetl/clean_title.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/clean_title.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_annotations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_annotations.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_contenttype_group.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_contenttype_group.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_csv.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_detect_language_tika_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_detect_language_tika_server.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_entity_linking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_entity_linking.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_extract_email.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_extract_email.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_extract_hashtags.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_extract_hashtags.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_extract_law.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_extract_law.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_extract_money.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_extract_money.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_extract_phone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_extract_phone.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_extract_text_tika_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_extract_text_tika_server.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_file_mtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_file_mtime.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_file_size.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_file_size.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_html.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_html.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_mapping_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_mapping_id.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_mimetype.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_mimetype.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_multilingual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_multilingual.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_ner_spacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_ner_spacy.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_ner_stanford.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_ner_stanford.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_ocr.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_path.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_path.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_pdf_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_pdf_ocr.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_pdf_page.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_pdf_page.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_pdf_page_preview.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_pdf_page_preview.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_pst.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_pst.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_rdf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_rdf.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_rdf_annotations_by_http_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_rdf_annotations_by_http_request.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_regex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_regex.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_sentence_segmentation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_sentence_segmentation.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_warc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_warc.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_xml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_xml.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_xmp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_xmp.py -------------------------------------------------------------------------------- /src/opensemanticetl/enhance_zip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/enhance_zip.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_delete.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_delete.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_enrich.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_enrich.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_file.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_filedirectory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_filedirectory.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_filemonitoring.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_filemonitoring.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_hypothesis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_hypothesis.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_plugin_core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_plugin_core.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_rss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_rss.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_sitemap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_sitemap.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_sparql.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_sparql.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_twitter_scraper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_twitter_scraper.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_web.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_web.py -------------------------------------------------------------------------------- /src/opensemanticetl/etl_web_crawl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/etl_web_crawl.py -------------------------------------------------------------------------------- /src/opensemanticetl/export_elasticsearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/export_elasticsearch.py -------------------------------------------------------------------------------- /src/opensemanticetl/export_json.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/export_json.py -------------------------------------------------------------------------------- /src/opensemanticetl/export_neo4j.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/export_neo4j.py -------------------------------------------------------------------------------- /src/opensemanticetl/export_print.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/export_print.py -------------------------------------------------------------------------------- /src/opensemanticetl/export_queue_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/export_queue_files.py -------------------------------------------------------------------------------- /src/opensemanticetl/export_solr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/export_solr.py -------------------------------------------------------------------------------- /src/opensemanticetl/filter_blacklist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/filter_blacklist.py -------------------------------------------------------------------------------- /src/opensemanticetl/filter_file_not_modified.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/filter_file_not_modified.py -------------------------------------------------------------------------------- /src/opensemanticetl/move_indexed_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/move_indexed_file.py -------------------------------------------------------------------------------- /src/opensemanticetl/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/requirements.txt -------------------------------------------------------------------------------- /src/opensemanticetl/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/tasks.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_detect_language_tika_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_detect_language_tika_server.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_extract_email.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_extract_email.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_extract_law.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_extract_law.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_extract_money.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_extract_money.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_extract_text_tika_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_extract_text_tika_server.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_mapping_id.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_mapping_id.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_ner_spacy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_ner_spacy.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_path.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_path.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_pdf_ocr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_pdf_ocr.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_regex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_regex.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_enhance_warc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_enhance_warc.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_etl_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_etl_file.py -------------------------------------------------------------------------------- /src/opensemanticetl/test_move_indexed_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/test_move_indexed_files.py -------------------------------------------------------------------------------- /src/opensemanticetl/testdata/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/testdata/README.md -------------------------------------------------------------------------------- /src/opensemanticetl/testdata/Test_OCR_Image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/testdata/Test_OCR_Image1.png -------------------------------------------------------------------------------- /src/opensemanticetl/testdata/Test_OCR_Image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/testdata/Test_OCR_Image2.jpg -------------------------------------------------------------------------------- /src/opensemanticetl/testdata/example.warc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/testdata/example.warc -------------------------------------------------------------------------------- /src/opensemanticetl/testdata/run_integrationtests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/testdata/run_integrationtests.sh -------------------------------------------------------------------------------- /src/opensemanticetl/testdata/run_tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/testdata/run_tests.sh -------------------------------------------------------------------------------- /src/opensemanticetl/testdata/test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensemanticsearch/open-semantic-etl/HEAD/src/opensemanticetl/testdata/test.pdf -------------------------------------------------------------------------------- /usr/bin/etl-delete: -------------------------------------------------------------------------------- 1 | etl_delete.py -------------------------------------------------------------------------------- /usr/bin/etl-enrich: -------------------------------------------------------------------------------- 1 | etl_enrich.py -------------------------------------------------------------------------------- /usr/bin/etl-file: -------------------------------------------------------------------------------- 1 | etl_file.py -------------------------------------------------------------------------------- /usr/bin/etl-filedirectory: -------------------------------------------------------------------------------- 1 | etl_filedirectory.py -------------------------------------------------------------------------------- /usr/bin/etl-filemonitoring: -------------------------------------------------------------------------------- 1 | etl_filemonitoring.py -------------------------------------------------------------------------------- /usr/bin/etl-rss: -------------------------------------------------------------------------------- 1 | etl_rss.py -------------------------------------------------------------------------------- /usr/bin/etl-sitemap: -------------------------------------------------------------------------------- 1 | etl_sitemap.py -------------------------------------------------------------------------------- /usr/bin/etl-sparql: -------------------------------------------------------------------------------- 1 | etl_sparql.py -------------------------------------------------------------------------------- /usr/bin/etl-twitter-scraper: -------------------------------------------------------------------------------- 1 | etl_twitter_scraper.py -------------------------------------------------------------------------------- /usr/bin/etl-web: -------------------------------------------------------------------------------- 1 | etl_web.py -------------------------------------------------------------------------------- /usr/bin/etl-web-crawl: -------------------------------------------------------------------------------- 1 | etl_web_crawl.py -------------------------------------------------------------------------------- /usr/bin/etl_delete.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_delete.py -------------------------------------------------------------------------------- /usr/bin/etl_enrich.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_enrich.py -------------------------------------------------------------------------------- /usr/bin/etl_file.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_file.py -------------------------------------------------------------------------------- /usr/bin/etl_filedirectory.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_filedirectory.py -------------------------------------------------------------------------------- /usr/bin/etl_filemonitoring.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_filemonitoring.py -------------------------------------------------------------------------------- /usr/bin/etl_rss.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_rss.py -------------------------------------------------------------------------------- /usr/bin/etl_sitemap.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_sitemap.py -------------------------------------------------------------------------------- /usr/bin/etl_sparql.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_sparql.py -------------------------------------------------------------------------------- /usr/bin/etl_tasks: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/tasks.py -------------------------------------------------------------------------------- /usr/bin/etl_twitter_scraper.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_twitter_scraper.py -------------------------------------------------------------------------------- /usr/bin/etl_web.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_web.py -------------------------------------------------------------------------------- /usr/bin/etl_web_crawl.py: -------------------------------------------------------------------------------- 1 | ../lib/python3/dist-packages/opensemanticetl/etl_web_crawl.py -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-delete: -------------------------------------------------------------------------------- 1 | etl-delete -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-enrich: -------------------------------------------------------------------------------- 1 | etl-enrich -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-filemonitoring: -------------------------------------------------------------------------------- 1 | etl_filemonitoring.py -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-index-dir: -------------------------------------------------------------------------------- 1 | etl-filedirectory -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-index-file: -------------------------------------------------------------------------------- 1 | etl-file -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-index-rss: -------------------------------------------------------------------------------- 1 | etl-rss -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-index-sitemap: -------------------------------------------------------------------------------- 1 | etl-sitemap -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-index-sparql: -------------------------------------------------------------------------------- 1 | etl-sparql -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-index-twitter-scraper: -------------------------------------------------------------------------------- 1 | etl-twitter-scraper -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-index-web: -------------------------------------------------------------------------------- 1 | etl-web -------------------------------------------------------------------------------- /usr/bin/opensemanticsearch-index-web-crawl: -------------------------------------------------------------------------------- 1 | etl-web-crawl -------------------------------------------------------------------------------- /usr/bin/solr-enrich: -------------------------------------------------------------------------------- 1 | opensemanticsearch-enrich -------------------------------------------------------------------------------- /usr/bin/solr-index-dir: -------------------------------------------------------------------------------- 1 | opensemanticsearch-index-dir -------------------------------------------------------------------------------- /usr/bin/solr-index-file: -------------------------------------------------------------------------------- 1 | opensemanticsearch-index-file -------------------------------------------------------------------------------- /usr/bin/solr-index-rss: -------------------------------------------------------------------------------- 1 | opensemanticsearch-index-rss -------------------------------------------------------------------------------- /usr/bin/solr-index-sitemap: -------------------------------------------------------------------------------- 1 | opensemanticsearch-index-sitemap -------------------------------------------------------------------------------- /usr/bin/solr-index-web: -------------------------------------------------------------------------------- 1 | opensemanticsearch-index-web --------------------------------------------------------------------------------