├── .gitignore ├── LICENSE.md ├── README.md ├── application ├── extraction │ ├── Dockerfile │ ├── models │ │ └── models.py │ ├── requirements.txt │ ├── service │ │ ├── extraction_handler.py │ │ ├── extraction_worker.py │ │ └── processing_handler.py │ └── start_extraction.py ├── pipeline │ ├── Dockerfile │ ├── models │ │ └── models.py │ ├── requirements.txt │ ├── routes │ │ └── pipeline_routes.py │ ├── service │ │ └── pipeline_service.py │ └── start_pipeline.py └── transformation │ ├── Dockerfile │ ├── models │ └── models.py │ ├── requirements.txt │ ├── service │ ├── transformation_handler.py │ └── transformation_worker.py │ └── start_transformation.py ├── common ├── agents │ ├── agent_prompt_enums.py │ └── prs_agent.py ├── api │ └── 3.1.0-marly-spec.yml ├── destinations │ ├── base │ │ └── base_destination.py │ ├── destination_factory.py │ ├── enums │ │ └── destination_enums.py │ └── sqlite_destination.py ├── models │ ├── azure_model.py │ ├── base │ │ └── base_model.py │ ├── cerebras_model.py │ ├── enums │ │ └── model_enums.py │ ├── groq_model.py │ ├── mistral_model.py │ ├── model_factory.py │ └── openai_model.py ├── prompts │ └── prompt_enums.py ├── redis │ └── redis_config.py ├── sources │ ├── base │ │ └── base_source.py │ ├── enums │ │ └── source_enums.py │ ├── local_fs_source.py │ ├── s3_source.py │ └── source_factory.py └── text_extraction │ └── text_extractor.py ├── docker-compose.yml ├── examples ├── ai-workers │ └── ai-sdr │ │ ├── auth │ │ └── anon_helper.py │ │ ├── contacts.db │ │ ├── main.py │ │ ├── output_source │ │ └── sql_helper.py │ │ └── transformation │ │ └── marly_helper.py ├── example_files │ ├── lacers.pdf │ └── lacers_reduced.pdf ├── notebooks │ ├── autogen_example │ │ ├── OAI_CONFIG_LIST.json │ │ ├── autogen.ipynb │ │ ├── lacers_reduced.pdf │ │ └── plot.png │ └── langgraph_example │ │ ├── data_loading_workflow.ipynb │ │ ├── diagram.jpeg │ │ ├── lacers_reduced.pdf │ │ ├── notebooklm_workflow.ipynb │ │ ├── notebooklm_workflow.jpeg │ │ ├── pdf_table_to_chart_workflow.ipynb │ │ └── wkflow.jpeg └── scripts │ ├── api_example.py │ ├── azure_example.py │ ├── cerebras_example.py │ ├── data_loading_example.py │ ├── groq_example.py │ ├── markdown_example.py │ ├── mistral_example.py │ ├── non_marly_examples │ ├── lacers_reduced.pdf │ └── llamaindex_pinecone.py │ ├── requirements.txt │ ├── s3_example.py │ └── web_and_document_example.py ├── requirements.txt └── start-oe.sh /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/LICENSE.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/README.md -------------------------------------------------------------------------------- /application/extraction/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/extraction/Dockerfile -------------------------------------------------------------------------------- /application/extraction/models/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/extraction/models/models.py -------------------------------------------------------------------------------- /application/extraction/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/extraction/requirements.txt -------------------------------------------------------------------------------- /application/extraction/service/extraction_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/extraction/service/extraction_handler.py -------------------------------------------------------------------------------- /application/extraction/service/extraction_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/extraction/service/extraction_worker.py -------------------------------------------------------------------------------- /application/extraction/service/processing_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/extraction/service/processing_handler.py -------------------------------------------------------------------------------- /application/extraction/start_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/extraction/start_extraction.py -------------------------------------------------------------------------------- /application/pipeline/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/pipeline/Dockerfile -------------------------------------------------------------------------------- /application/pipeline/models/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/pipeline/models/models.py -------------------------------------------------------------------------------- /application/pipeline/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/pipeline/requirements.txt -------------------------------------------------------------------------------- /application/pipeline/routes/pipeline_routes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/pipeline/routes/pipeline_routes.py -------------------------------------------------------------------------------- /application/pipeline/service/pipeline_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/pipeline/service/pipeline_service.py -------------------------------------------------------------------------------- /application/pipeline/start_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/pipeline/start_pipeline.py -------------------------------------------------------------------------------- /application/transformation/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/transformation/Dockerfile -------------------------------------------------------------------------------- /application/transformation/models/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/transformation/models/models.py -------------------------------------------------------------------------------- /application/transformation/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/transformation/requirements.txt -------------------------------------------------------------------------------- /application/transformation/service/transformation_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/transformation/service/transformation_handler.py -------------------------------------------------------------------------------- /application/transformation/service/transformation_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/transformation/service/transformation_worker.py -------------------------------------------------------------------------------- /application/transformation/start_transformation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/application/transformation/start_transformation.py -------------------------------------------------------------------------------- /common/agents/agent_prompt_enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/agents/agent_prompt_enums.py -------------------------------------------------------------------------------- /common/agents/prs_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/agents/prs_agent.py -------------------------------------------------------------------------------- /common/api/3.1.0-marly-spec.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/api/3.1.0-marly-spec.yml -------------------------------------------------------------------------------- /common/destinations/base/base_destination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/destinations/base/base_destination.py -------------------------------------------------------------------------------- /common/destinations/destination_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/destinations/destination_factory.py -------------------------------------------------------------------------------- /common/destinations/enums/destination_enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/destinations/enums/destination_enums.py -------------------------------------------------------------------------------- /common/destinations/sqlite_destination.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/destinations/sqlite_destination.py -------------------------------------------------------------------------------- /common/models/azure_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/models/azure_model.py -------------------------------------------------------------------------------- /common/models/base/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/models/base/base_model.py -------------------------------------------------------------------------------- /common/models/cerebras_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/models/cerebras_model.py -------------------------------------------------------------------------------- /common/models/enums/model_enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/models/enums/model_enums.py -------------------------------------------------------------------------------- /common/models/groq_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/models/groq_model.py -------------------------------------------------------------------------------- /common/models/mistral_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/models/mistral_model.py -------------------------------------------------------------------------------- /common/models/model_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/models/model_factory.py -------------------------------------------------------------------------------- /common/models/openai_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/models/openai_model.py -------------------------------------------------------------------------------- /common/prompts/prompt_enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/prompts/prompt_enums.py -------------------------------------------------------------------------------- /common/redis/redis_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/redis/redis_config.py -------------------------------------------------------------------------------- /common/sources/base/base_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/sources/base/base_source.py -------------------------------------------------------------------------------- /common/sources/enums/source_enums.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/sources/enums/source_enums.py -------------------------------------------------------------------------------- /common/sources/local_fs_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/sources/local_fs_source.py -------------------------------------------------------------------------------- /common/sources/s3_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/sources/s3_source.py -------------------------------------------------------------------------------- /common/sources/source_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/sources/source_factory.py -------------------------------------------------------------------------------- /common/text_extraction/text_extractor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/common/text_extraction/text_extractor.py -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /examples/ai-workers/ai-sdr/auth/anon_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/ai-workers/ai-sdr/auth/anon_helper.py -------------------------------------------------------------------------------- /examples/ai-workers/ai-sdr/contacts.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/ai-workers/ai-sdr/contacts.db -------------------------------------------------------------------------------- /examples/ai-workers/ai-sdr/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/ai-workers/ai-sdr/main.py -------------------------------------------------------------------------------- /examples/ai-workers/ai-sdr/output_source/sql_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/ai-workers/ai-sdr/output_source/sql_helper.py -------------------------------------------------------------------------------- /examples/ai-workers/ai-sdr/transformation/marly_helper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/ai-workers/ai-sdr/transformation/marly_helper.py -------------------------------------------------------------------------------- /examples/example_files/lacers.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/example_files/lacers.pdf -------------------------------------------------------------------------------- /examples/example_files/lacers_reduced.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/example_files/lacers_reduced.pdf -------------------------------------------------------------------------------- /examples/notebooks/autogen_example/OAI_CONFIG_LIST.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/autogen_example/OAI_CONFIG_LIST.json -------------------------------------------------------------------------------- /examples/notebooks/autogen_example/autogen.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/autogen_example/autogen.ipynb -------------------------------------------------------------------------------- /examples/notebooks/autogen_example/lacers_reduced.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/autogen_example/lacers_reduced.pdf -------------------------------------------------------------------------------- /examples/notebooks/autogen_example/plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/autogen_example/plot.png -------------------------------------------------------------------------------- /examples/notebooks/langgraph_example/data_loading_workflow.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/langgraph_example/data_loading_workflow.ipynb -------------------------------------------------------------------------------- /examples/notebooks/langgraph_example/diagram.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/langgraph_example/diagram.jpeg -------------------------------------------------------------------------------- /examples/notebooks/langgraph_example/lacers_reduced.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/langgraph_example/lacers_reduced.pdf -------------------------------------------------------------------------------- /examples/notebooks/langgraph_example/notebooklm_workflow.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/langgraph_example/notebooklm_workflow.ipynb -------------------------------------------------------------------------------- /examples/notebooks/langgraph_example/notebooklm_workflow.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/langgraph_example/notebooklm_workflow.jpeg -------------------------------------------------------------------------------- /examples/notebooks/langgraph_example/pdf_table_to_chart_workflow.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/langgraph_example/pdf_table_to_chart_workflow.ipynb -------------------------------------------------------------------------------- /examples/notebooks/langgraph_example/wkflow.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/notebooks/langgraph_example/wkflow.jpeg -------------------------------------------------------------------------------- /examples/scripts/api_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/api_example.py -------------------------------------------------------------------------------- /examples/scripts/azure_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/azure_example.py -------------------------------------------------------------------------------- /examples/scripts/cerebras_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/cerebras_example.py -------------------------------------------------------------------------------- /examples/scripts/data_loading_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/data_loading_example.py -------------------------------------------------------------------------------- /examples/scripts/groq_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/groq_example.py -------------------------------------------------------------------------------- /examples/scripts/markdown_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/markdown_example.py -------------------------------------------------------------------------------- /examples/scripts/mistral_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/mistral_example.py -------------------------------------------------------------------------------- /examples/scripts/non_marly_examples/lacers_reduced.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/non_marly_examples/lacers_reduced.pdf -------------------------------------------------------------------------------- /examples/scripts/non_marly_examples/llamaindex_pinecone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/non_marly_examples/llamaindex_pinecone.py -------------------------------------------------------------------------------- /examples/scripts/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/requirements.txt -------------------------------------------------------------------------------- /examples/scripts/s3_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/s3_example.py -------------------------------------------------------------------------------- /examples/scripts/web_and_document_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/examples/scripts/web_and_document_example.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/requirements.txt -------------------------------------------------------------------------------- /start-oe.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/velocitybolt/open-extract/HEAD/start-oe.sh --------------------------------------------------------------------------------