├── .cursor └── rules │ └── repository-setup.mdc ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE │ ├── 01_feature_request.yml │ └── 02_bug_report.yml ├── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md ├── scripts │ ├── check_changelog_update.sh │ ├── check_source_changes.sh │ └── deploy_docs.sh └── workflows │ ├── main-checks.yml │ ├── prepare-release.yml │ ├── publish-docs.yaml │ ├── publish-pypi.yml │ ├── pull-request-checks.yml │ ├── shared-packages.yml │ └── shared-ui.yml ├── .gitignore ├── .libraries-whitelist.txt ├── .license-whitelist.txt ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── check_licenses.sh ├── docs ├── api_reference │ ├── chat │ │ ├── compressors │ │ │ ├── base.md │ │ │ └── llm.md │ │ └── interface │ │ │ └── chat_interface.md │ ├── core │ │ ├── audit │ │ │ ├── metrics.md │ │ │ └── traces.md │ │ ├── embeddings.md │ │ ├── hybrid.md │ │ ├── llms.md │ │ ├── prompt.md │ │ ├── sources.md │ │ └── vector-stores.md │ ├── document_search │ │ ├── documents │ │ │ ├── documents.md │ │ │ └── elements.md │ │ ├── index.md │ │ ├── ingest │ │ │ ├── enrichers.md │ │ │ ├── parsers.md │ │ │ └── strategies.md │ │ └── retrieval │ │ │ ├── rephrasers.md │ │ │ └── rerankers.md │ └── guardrails │ │ └── index.md ├── cli │ └── main.md ├── how-to │ ├── audit │ │ ├── use_metrics.md │ │ └── use_tracing.md │ ├── chatbots │ │ └── api.md │ ├── document_search │ │ ├── ingest-documents.md │ │ └── search-documents.md │ ├── evaluate │ │ ├── custom_dataloader.md │ │ ├── custom_evaluation_pipeline.md │ │ ├── custom_metric.md │ │ ├── evaluate.md │ │ ├── generate_dataset.md │ │ └── optimize.md │ ├── guardrails │ │ └── use_guardrails.md │ ├── llms │ │ ├── use_llms.md │ │ └── use_local_llms.md │ ├── project │ │ ├── component_preferences.md │ │ └── custom_components.md │ ├── prompts │ │ ├── promptfoo.md │ │ ├── use_images_in_prompts.md │ │ └── use_prompting.md │ ├── sources │ │ └── load-dataset.md │ └── vector_stores │ │ ├── hybrid.md │ │ ├── sparse_vectors.md │ │ └── use_pgVector_store.md ├── index.md ├── quickstart │ ├── quickstart1_prompts.md │ └── quickstart2_rag.md └── stylesheets │ └── extra.css ├── examples ├── api │ ├── chat.py │ └── offline_chat.py ├── apps │ └── documents_chat.py ├── conversations │ └── recontextualize_message.py ├── core │ ├── audit │ │ ├── config │ │ │ └── grafana │ │ │ │ ├── grafana-dashboards.yaml │ │ │ │ └── ragbits-dashboard.json │ │ └── otel.py │ └── prompt │ │ ├── multimodal.py │ │ ├── multimodal_with_few_shots.py │ │ ├── text.py │ │ └── text_with_few_shots.py ├── document-search │ ├── basic.py │ ├── chroma.py │ ├── configurable.py │ ├── distributed.py │ ├── images │ │ ├── bear.jpg │ │ ├── game.jpg │ │ └── tree.jpg │ ├── multimodal.py │ ├── pgvector.py │ └── qdrant.py ├── evaluation │ ├── dataset-generator │ │ ├── config │ │ │ └── generate.yaml │ │ └── generate.py │ └── document-search │ │ ├── advanced │ │ ├── README.md │ │ ├── config │ │ │ ├── dataloader │ │ │ │ └── hf.yaml │ │ │ ├── experiments │ │ │ │ ├── chunking-1000.yaml │ │ │ │ ├── chunking-250.yaml │ │ │ │ └── chunking-500.yaml │ │ │ ├── metrics │ │ │ │ ├── precision_recall_f1.yaml │ │ │ │ └── ranked_retrieval.yaml │ │ │ ├── optimization.yaml │ │ │ ├── pipeline │ │ │ │ ├── document_search.yaml │ │ │ │ ├── document_search_optimization.yaml │ │ │ │ ├── parser_router │ │ │ │ │ ├── unstructured.yaml │ │ │ │ │ └── unstructured_optimization.yaml │ │ │ │ ├── rephraser │ │ │ │ │ └── noop.yaml │ │ │ │ ├── reranker │ │ │ │ │ └── noop.yaml │ │ │ │ ├── source │ │ │ │ │ └── hf.yaml │ │ │ │ └── vector_store │ │ │ │ │ ├── chroma.yaml │ │ │ │ │ └── chroma_optimization.yaml │ │ │ └── retrieval.yaml │ │ ├── evaluate.py │ │ └── optimize.py │ │ └── basic │ │ ├── evaluate.py │ │ └── optimize.py └── guardrails │ └── openai_moderation.py ├── mkdocs.yml ├── mkdocs_hooks.py ├── packages ├── ragbits-agents │ ├── CHANGELOG.md │ ├── README.md │ ├── pyproject.toml │ └── src │ │ └── ragbits │ │ └── agents │ │ ├── __init__.py │ │ ├── _main.py │ │ ├── py.typed │ │ └── types.py ├── ragbits-chat │ ├── CHANGELOG.md │ ├── README.md │ ├── pyproject.toml │ ├── src │ │ └── ragbits │ │ │ └── chat │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── cli.py │ │ │ ├── history │ │ │ ├── __init__.py │ │ │ └── compressors │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ └── llm.py │ │ │ ├── interface │ │ │ ├── __init__.py │ │ │ ├── _interface.py │ │ │ ├── forms.py │ │ │ └── types.py │ │ │ ├── persistence │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── file.py │ │ │ └── sql.py │ │ │ ├── py.typed │ │ │ └── ui-build │ │ │ ├── assets │ │ │ ├── ExamplePluginComponent-CkxrO9jk.js │ │ │ ├── FeedbackFormPluginComponent-Bmct8_5y.js │ │ │ ├── index-B86z3tbJ.css │ │ │ ├── index-ByuhG0Hl.js │ │ │ ├── index-CMvp94wz.js │ │ │ └── ragbits-9U4hpuUb.svg │ │ │ └── index.html │ └── tests │ │ └── unit │ │ ├── history │ │ └── test_llm_compressor.py │ │ ├── persistence │ │ └── test_sql.py │ │ └── test_api.py ├── ragbits-cli │ ├── CHANGELOG.md │ ├── README.md │ ├── pyproject.toml │ ├── src │ │ └── ragbits │ │ │ └── cli │ │ │ ├── __init__.py │ │ │ ├── _utils.py │ │ │ ├── py.typed │ │ │ └── state.py │ └── tests │ │ └── unit │ │ └── test_state.py ├── ragbits-core │ ├── CHANGELOG.md │ ├── README.md │ ├── pyproject.toml │ ├── src │ │ └── ragbits │ │ │ └── core │ │ │ ├── __init__.py │ │ │ ├── audit │ │ │ ├── __init__.py │ │ │ ├── metrics │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ └── otel.py │ │ │ └── traces │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── cli.py │ │ │ │ └── otel.py │ │ │ ├── cli.py │ │ │ ├── config.py │ │ │ ├── embeddings │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── dense │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── fastembed.py │ │ │ │ ├── litellm.py │ │ │ │ ├── local.py │ │ │ │ ├── noop.py │ │ │ │ └── vertex_multimodal.py │ │ │ ├── exceptions.py │ │ │ └── sparse │ │ │ │ ├── __init__.py │ │ │ │ ├── bag_of_tokens.py │ │ │ │ ├── base.py │ │ │ │ └── fastembed.py │ │ │ ├── llms │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── exceptions.py │ │ │ ├── factory.py │ │ │ ├── litellm.py │ │ │ ├── local.py │ │ │ └── mock.py │ │ │ ├── options.py │ │ │ ├── prompt │ │ │ ├── __init__.py │ │ │ ├── _cli.py │ │ │ ├── base.py │ │ │ ├── discovery.py │ │ │ ├── exceptions.py │ │ │ ├── parsers.py │ │ │ ├── prompt.py │ │ │ └── promptfoo.py │ │ │ ├── py.typed │ │ │ ├── sources │ │ │ ├── __init__.py │ │ │ ├── azure.py │ │ │ ├── base.py │ │ │ ├── exceptions.py │ │ │ ├── gcs.py │ │ │ ├── git.py │ │ │ ├── hf.py │ │ │ ├── local.py │ │ │ ├── s3.py │ │ │ └── web.py │ │ │ ├── types.py │ │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── _pyproject.py │ │ │ ├── config_handling.py │ │ │ ├── decorators.py │ │ │ ├── dict_transformations.py │ │ │ ├── helpers.py │ │ │ ├── pydantic.py │ │ │ └── secrets.py │ │ │ └── vector_stores │ │ │ ├── __init__.py │ │ │ ├── _cli.py │ │ │ ├── base.py │ │ │ ├── chroma.py │ │ │ ├── hybrid.py │ │ │ ├── hybrid_strategies.py │ │ │ ├── in_memory.py │ │ │ ├── pgvector.py │ │ │ └── qdrant.py │ └── tests │ │ ├── assets │ │ ├── img │ │ │ ├── test.png │ │ │ └── test2.jpg │ │ └── md │ │ │ ├── bar.md │ │ │ └── foo.md │ │ ├── cli │ │ ├── __init__.py │ │ ├── test_cli_trace_handler.py │ │ └── test_vector_store.py │ │ ├── conftest.py │ │ ├── integration │ │ ├── sources │ │ │ ├── test_git.py │ │ │ ├── test_hf.py │ │ │ └── test_s3.py │ │ └── vector_stores │ │ │ ├── __init__.py │ │ │ ├── test_vector_store.py │ │ │ └── test_vector_store_sparse.py │ │ └── unit │ │ ├── __init__.py │ │ ├── audit │ │ ├── test_cli.py │ │ ├── test_metrics.py │ │ └── test_trace.py │ │ ├── embeddings │ │ ├── test_bag_of_tokens.py │ │ ├── test_fastembed.py │ │ ├── test_from_config.py │ │ ├── test_litellm.py │ │ ├── test_local.py │ │ ├── test_noop.py │ │ ├── test_vector_size.py │ │ └── test_vertex_multimodal.py │ │ ├── llms │ │ ├── __init__.py │ │ ├── factory │ │ │ ├── __init__.py │ │ │ └── test_get_preferred_llm.py │ │ ├── test_base.py │ │ ├── test_from_config.py │ │ └── test_litellm.py │ │ ├── prompts │ │ ├── __init__.py │ │ ├── discovery │ │ │ ├── __init__.py │ │ │ ├── prompt_classes_for_tests.py │ │ │ ├── ragbits_tests_pkg_with_prompts │ │ │ │ ├── __init__.py │ │ │ │ └── prompts │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── temp_prompt1.py │ │ │ │ │ └── temp_prompt2.py │ │ │ └── test_prompt_discovery.py │ │ ├── test_parsers.py │ │ └── test_prompt.py │ │ ├── sources │ │ ├── test_aws.py │ │ ├── test_azure.py │ │ ├── test_exceptions.py │ │ ├── test_gcs.py │ │ ├── test_git.py │ │ ├── test_hf.py │ │ ├── test_local.py │ │ ├── test_source_discriminator.py │ │ └── test_web.py │ │ ├── test_options.py │ │ ├── utils │ │ ├── __init__.py │ │ ├── pyproject │ │ │ ├── test_find.py │ │ │ ├── test_get_config.py │ │ │ └── test_get_instace.py │ │ ├── test_config_handling.py │ │ ├── test_decorators.py │ │ ├── test_dict_transformations.py │ │ ├── test_helpers.py │ │ ├── test_secrets.py │ │ └── testprojects │ │ │ ├── bad_factory_project │ │ │ └── pyproject.toml │ │ │ ├── factory_project │ │ │ └── pyproject.toml │ │ │ ├── happy_project │ │ │ └── pyproject.toml │ │ │ ├── project_with_instance_factory │ │ │ └── pyproject.toml │ │ │ └── project_with_instances_yaml │ │ │ ├── instances.yaml │ │ │ └── pyproject.toml │ │ └── vector_stores │ │ ├── test_chroma.py │ │ ├── test_from_config.py │ │ ├── test_hybrid.py │ │ ├── test_hybrid_strategies.py │ │ ├── test_in_memory.py │ │ ├── test_pgvector.py │ │ └── test_qdrant.py ├── ragbits-document-search │ ├── CHANGELOG.md │ ├── README.md │ ├── pyproject.toml │ ├── src │ │ └── ragbits │ │ │ └── document_search │ │ │ ├── __init__.py │ │ │ ├── _main.py │ │ │ ├── cli.py │ │ │ ├── documents │ │ │ ├── __init__.py │ │ │ ├── document.py │ │ │ └── element.py │ │ │ ├── ingestion │ │ │ ├── __init__.py │ │ │ ├── enrichers │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── image.py │ │ │ │ └── router.py │ │ │ ├── parsers │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── docling.py │ │ │ │ ├── exceptions.py │ │ │ │ ├── router.py │ │ │ │ └── unstructured.py │ │ │ └── strategies │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── batched.py │ │ │ │ ├── ray.py │ │ │ │ └── sequential.py │ │ │ ├── py.typed │ │ │ └── retrieval │ │ │ ├── __init__.py │ │ │ ├── rephrasers │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── llm.py │ │ │ └── noop.py │ │ │ └── rerankers │ │ │ ├── __init__.py │ │ │ ├── answerai.py │ │ │ ├── base.py │ │ │ ├── litellm.py │ │ │ ├── llm.py │ │ │ ├── noop.py │ │ │ └── rrf.py │ └── tests │ │ ├── assets │ │ ├── img │ │ │ └── transformers_paper_page.png │ │ ├── md │ │ │ ├── bar.md │ │ │ ├── foo.md │ │ │ └── test_file.md │ │ └── pdf │ │ │ └── transformers_paper_page.pdf │ │ ├── cli │ │ ├── custom_cli_source.py │ │ ├── test_ingest.py │ │ └── test_search.py │ │ ├── integration │ │ ├── __init__.py │ │ ├── test_docling.py │ │ ├── test_rerankers.py │ │ └── test_unstructured.py │ │ └── unit │ │ ├── test_config.py │ │ ├── test_document_parser_router.py │ │ ├── test_document_parsers.py │ │ ├── test_document_search.py │ │ ├── test_document_search_ingest_errors.py │ │ ├── test_documents.py │ │ ├── test_element_enricher_router.py │ │ ├── test_element_enrichers.py │ │ ├── test_elements.py │ │ ├── test_ingest_strategies.py │ │ ├── test_llm_reranker.py │ │ ├── test_rephrasers.py │ │ ├── test_rerankers.py │ │ └── testprojects │ │ ├── empty_project │ │ └── pyproject.toml │ │ ├── project_with_instance_factory │ │ ├── __init__.py │ │ ├── factories.py │ │ └── pyproject.toml │ │ ├── project_with_instances_yaml │ │ ├── instances.yaml │ │ └── pyproject.toml │ │ └── project_with_nested_yaml │ │ ├── instances.yaml │ │ └── pyproject.toml ├── ragbits-evaluate │ ├── CHANGELOG.md │ ├── README.md │ ├── pyproject.toml │ ├── src │ │ └── ragbits │ │ │ └── evaluate │ │ │ ├── __init__.py │ │ │ ├── cli.py │ │ │ ├── config.py │ │ │ ├── dataloaders │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── document_search.py │ │ │ ├── exceptions.py │ │ │ └── question_answer.py │ │ │ ├── dataset_generator │ │ │ ├── __init__.py │ │ │ ├── pipeline.py │ │ │ ├── prompts │ │ │ │ ├── __init__.py │ │ │ │ ├── corpus_generation.py │ │ │ │ └── qa.py │ │ │ ├── tasks │ │ │ │ ├── __init__.py │ │ │ │ ├── corpus_generation.py │ │ │ │ ├── filter │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── dont_know.py │ │ │ │ └── text_generation │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── qa.py │ │ │ └── utils.py │ │ │ ├── evaluator.py │ │ │ ├── factories │ │ │ └── __init__.py │ │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── document_search.py │ │ │ └── question_answer.py │ │ │ ├── optimizer.py │ │ │ ├── pipelines │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── document_search.py │ │ │ └── question_answer.py │ │ │ ├── py.typed │ │ │ └── utils.py │ └── tests │ │ ├── cli │ │ └── test_run_evaluation.py │ │ └── unit │ │ ├── test_evaluator.py │ │ ├── test_metrics.py │ │ └── test_optimizer.py ├── ragbits-guardrails │ ├── CHANGELOG.md │ ├── README.md │ ├── pyproject.toml │ ├── src │ │ └── ragbits │ │ │ └── guardrails │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── openai_moderation.py │ │ │ └── py.typed │ └── tests │ │ └── unit │ │ └── test_openai_moderation.py └── ragbits │ ├── CHANGELOG.md │ └── pyproject.toml ├── pyproject.toml ├── scripts ├── create_ragbits_package.py ├── create_release_notes.py ├── install_git_hooks.py └── update_ragbits_package.py ├── ui ├── .env.example ├── .gitignore ├── README.md ├── assets │ └── ragbits.svg ├── eslint.config.js ├── index.html ├── package-lock.json ├── package.json ├── postcss.config.js ├── prettier.config.js ├── src │ ├── App.tsx │ ├── contexts │ │ ├── HistoryContext │ │ │ ├── HistoryContext.ts │ │ │ ├── HistoryContextProvider.tsx │ │ │ └── useHistoryContext.ts │ │ └── ThemeContext │ │ │ ├── ThemeContext.ts │ │ │ ├── ThemeContextProvider.tsx │ │ │ └── useThemeContext.ts │ ├── core │ │ ├── components │ │ │ ├── ChatMessage.tsx │ │ │ ├── DelayedTooltip.tsx │ │ │ ├── Layout.tsx │ │ │ └── PromptInput │ │ │ │ ├── PromptInput.tsx │ │ │ │ └── PromptInputText.tsx │ │ └── utils │ │ │ ├── api.ts │ │ │ ├── eventSource.ts │ │ │ ├── plugins │ │ │ ├── PluginManager.ts │ │ │ ├── PluginWrapper.tsx │ │ │ ├── usePluginManager.ts │ │ │ └── utils.ts │ │ │ ├── request.ts │ │ │ └── types.ts │ ├── globals.css │ ├── main.tsx │ ├── plugins │ │ ├── ExamplePlugin │ │ │ ├── ExamplePluginComponent.tsx │ │ │ └── index.tsx │ │ └── FeedbackFormPlugin │ │ │ ├── FeedbackFormPluginComponent.tsx │ │ │ ├── index.tsx │ │ │ └── types.ts │ ├── types │ │ ├── api.ts │ │ ├── history.ts │ │ ├── plugins.ts │ │ └── utility.ts │ └── vite-env.d.ts ├── tailwind.config.js ├── tsconfig.app.json ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts └── uv.lock /.cursor/rules/repository-setup.mdc: -------------------------------------------------------------------------------- 1 | --- 2 | description: 3 | globs: 4 | alwaysApply: true 5 | --- 6 | 7 | # Repository setup and pre-commit checks 8 | 9 | This repository is using `uv` for package management. Rather than using `pip` use `uv pip` to install any packages and `uv run` to run python. 10 | 11 | Run following checks after implementing any changes: 12 | 13 | uv run ruff format 14 | uv run ruff check --fix 15 | uv run mypy 16 | pytest -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # This is a standard to preconfigure editors 2 | # check: https://editorconfig.org/ 3 | root = true 4 | 5 | # 4 space indentation 6 | [*.py] 7 | charset = utf-8 8 | indent_style = space 9 | indent_size = 4 10 | trim_trailing_whitespace = true 11 | insert_final_newline = false 12 | end_of_line = lf 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/01_feature_request.yml: -------------------------------------------------------------------------------- 1 | name: 🚀 Feature Request 2 | description: Submit a proposal/request for a new ragbits feature. 3 | title: "feat: " 4 | labels: ["feature"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for contributing to ragbits! 10 | - type: textarea 11 | id: feature-description 12 | attributes: 13 | label: Feature description 14 | description: A clear and concise description of the feature proposal 15 | placeholder: Tell us what you want! 16 | validations: 17 | required: true 18 | - type: textarea 19 | id: feature-motivation 20 | attributes: 21 | label: Motivation 22 | description: A clear and concise description of what the problem is, e.g., I'm always frustrated when [...] 23 | placeholder: Why do you need this feature? 24 | validations: 25 | required: true 26 | - type: textarea 27 | id: feature-context 28 | attributes: 29 | label: Additional context 30 | description: Add any other context or screenshots about the feature request here. 31 | placeholder: Screenshots, code snippets, etc. 32 | 33 | 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/02_bug_report.yml: -------------------------------------------------------------------------------- 1 | name: 🐞 Bug Report 2 | description: File a bug report 3 | title: "bug: " 4 | labels: ["bug"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to fill out this bug report! 10 | - type: textarea 11 | id: what-happened 12 | attributes: 13 | label: What happened? 14 | description: Also tell us, what did you expect to happen? 15 | placeholder: Tell us what you see! 16 | value: "A bug happened!" 17 | validations: 18 | required: true 19 | - type: textarea 20 | id : how-to-reproduce 21 | attributes: 22 | label: How can we reproduce it? 23 | description: Please provide a code snippet to reproduce the bug. 24 | placeholder: import ragbits 25 | render: python 26 | - type: textarea 27 | id: logs 28 | attributes: 29 | label: Relevant log output 30 | description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. 31 | render: shell -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Checklist 2 | 3 | - [ ] I have updated the documentation accordingly. 4 | - [ ] I have updated the CHANGELOG.md file accordingly. 5 | -------------------------------------------------------------------------------- /.github/scripts/check_changelog_update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Fetching main branch..." 4 | git fetch origin main --depth=1 5 | 6 | echo "Identifying changed files between the current branch and main branch..." 7 | CHANGED_FILES=$(git diff --name-only origin/main | tr '\n' ' ') 8 | 9 | if [ -z "$CHANGED_FILES" ]; then 10 | echo "No files have been changed in this branch." 11 | exit 0 12 | fi 13 | 14 | CHANGED_PACKAGES=$(echo "$CHANGED_FILES" | grep -oE 'packages/[^/]+/src' | cut -d '/' -f2 | sort -u) 15 | 16 | if [ -z "$CHANGED_PACKAGES" ]; then 17 | echo "No package changes detected. Skipping changelog check." 18 | exit 0 19 | fi 20 | 21 | echo "Found changes in the following packages: $CHANGED_PACKAGES" 22 | 23 | # Look for "Changelog-ignore: " in the commit message (possibly multiple entries in separate lines) 24 | IGNORED_PACKAGES=$(git log --pretty=format:%B origin/main..HEAD | grep -oP '^Changelog-ignore: \K[^ ]+' | sort -u) 25 | 26 | for IGNORED_PACKAGE in $IGNORED_PACKAGES; do 27 | if echo "$CHANGED_PACKAGES" | grep -q "^$IGNORED_PACKAGE$"; then 28 | echo "Ignoring changelog check for package: $IGNORED_PACKAGE" 29 | CHANGED_PACKAGES=$(echo "$CHANGED_PACKAGES" | grep -v "^$IGNORED_PACKAGE$") 30 | fi 31 | done 32 | 33 | for PACKAGE in $CHANGED_PACKAGES; do 34 | CHANGELOG="packages/$PACKAGE/CHANGELOG.md" 35 | echo "Checking changelog for package: $PACKAGE" 36 | 37 | if ! diff -u <(git show origin/main:$CHANGELOG | grep -Pzo '(?s)(## Unreleased.*?)(?=\n## |\Z)' | tr -d '\0') <(grep -Pzo '(?s)(## Unreleased.*?)(?=\n## |\Z)' $CHANGELOG | tr -d '\0') | grep -q '^\+'; then 38 | echo "No updates detected in changelog for package $PACKAGE. Please add an entry under '## Unreleased'." 39 | exit 1 40 | fi 41 | done 42 | 43 | echo "All modified packages have their changelog updates." 44 | -------------------------------------------------------------------------------- /.github/scripts/check_source_changes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get the changed files between the specified commits 4 | CHANGED_FILES=$(git diff --name-only "$1" "$2") 5 | 6 | # Check if any non-UI files have changed 7 | PACKAGES_CHANGED=$(echo "$CHANGED_FILES" | grep -qv '^ui/' && echo "true" || echo "false") 8 | 9 | # Check if any UI files have changed 10 | UI_CHANGED=$(echo "$CHANGED_FILES" | grep -q '^ui/' && echo "true" || echo "false") 11 | 12 | # Set the GitHub outputs 13 | echo "packages-changed=$PACKAGES_CHANGED" >> "$GITHUB_OUTPUT" 14 | echo "ui-changed=$UI_CHANGED" >> "$GITHUB_OUTPUT" 15 | -------------------------------------------------------------------------------- /.github/scripts/deploy_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euxo pipefail 4 | 5 | echo $GCP_KEY | base64 -d >> gcp_creds.json 6 | gcloud auth activate-service-account --key-file gcp_creds.json 7 | gcloud config set project ds-internal-db-ally 8 | 9 | # Build the documentation 10 | uv run mkdocs build 11 | 12 | # Upload built docs to a bucket 13 | gcloud storage cp -r site/* gs://ragbits-documentation 14 | 15 | # Invalidate cached content in the CDN 16 | gcloud compute url-maps invalidate-cdn-cache ragbits-documentation-lb \ 17 | --path "/*" --async -------------------------------------------------------------------------------- /.github/workflows/main-checks.yml: -------------------------------------------------------------------------------- 1 | name: Main branch checks 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | check-changelog-update: 10 | name: Check changelog update 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: Verify changelog updates 19 | run: | 20 | ./.github/scripts/check_changelog_update.sh 21 | 22 | check-source-changes: 23 | name: Check source changes 24 | runs-on: ubuntu-latest 25 | outputs: 26 | packages-changed: ${{ steps.filter.outputs.packages-changed }} 27 | ui-changed: ${{ steps.filter.outputs.ui-changed }} 28 | steps: 29 | - uses: actions/checkout@v4 30 | with: 31 | fetch-depth: 0 32 | 33 | - name: Check for changes 34 | id: filter 35 | run: | 36 | ./.github/scripts/check_source_changes.sh ${{ github.event.before }} ${{ github.sha }} 37 | 38 | packages: 39 | needs: check-source-changes 40 | if: ${{ needs.check-source-changes.outputs.packages-changed == 'true' }} 41 | uses: ./.github/workflows/shared-packages.yml 42 | 43 | ui: 44 | needs: check-source-changes 45 | if: ${{ needs.check-source-changes.outputs.ui-changed == 'true' }} 46 | uses: ./.github/workflows/shared-ui.yml 47 | -------------------------------------------------------------------------------- /.github/workflows/publish-docs.yaml: -------------------------------------------------------------------------------- 1 | name: Publish documentation 2 | 3 | on: 4 | release: 5 | branches: 6 | - main 7 | workflow_dispatch: 8 | 9 | jobs: 10 | deploy: 11 | runs-on: ubuntu-latest 12 | container: gcr.io/google.com/cloudsdktool/google-cloud-cli:latest 13 | environment: documentation 14 | permissions: 15 | contents: write 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Install uv 20 | uses: astral-sh/setup-uv@v2 21 | with: 22 | version: ${{ vars.UV_VERSION || '0.6.9' }} 23 | 24 | - name: Set up Python 3.10 25 | uses: actions/setup-python@v4 26 | with: 27 | python-version: "3.10" 28 | 29 | - name: Cache Dependencies 30 | uses: actions/cache@v3 31 | with: 32 | path: ~/.cache/uv 33 | key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }} 34 | restore-keys: | 35 | ${{ runner.os }}-pip- 36 | 37 | - name: Deploy docs 38 | shell: bash 39 | run: uv run ./.github/scripts/deploy_docs.sh 40 | env: 41 | GCP_KEY: ${{ secrets.GCP_KEY }} 42 | 43 | -------------------------------------------------------------------------------- /.github/workflows/publish-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish release 2 | 3 | on: 4 | pull_request_target: 5 | types: 6 | - closed 7 | 8 | jobs: 9 | publish-release: 10 | if: startsWith(github.head_ref, 'release/') && github.event.pull_request.merged == true && github.event.pull_request.user.login == 'ds-ragbits-robot' 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | 15 | - name: Install uv 16 | uses: astral-sh/setup-uv@v2 17 | with: 18 | version: ${{ vars.UV_VERSION || '0.6.9' }} 19 | 20 | - name: Set up Python 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: "3.10" 24 | 25 | - name: Get tag name 26 | id: tag_name 27 | run: | 28 | TAG_NAME=$(echo "${{ github.event.pull_request.head.ref }}" | sed 's/.*-//') 29 | echo "new_tag=$TAG_NAME" >> "$GITHUB_OUTPUT" 30 | 31 | - name: Create release notes 32 | run: | 33 | uv run scripts/create_release_notes.py 34 | 35 | - name: Publish release 36 | run: | 37 | gh release create ${{ steps.tag_name.outputs.new_tag }} \ 38 | --title "${{ steps.tag_name.outputs.new_tag }}" \ 39 | --notes-file RELEASE_NOTES.md 40 | env: 41 | GH_TOKEN: ${{ secrets.GH_TOKEN }} 42 | 43 | - name: Build packages 44 | run: | 45 | for dir in packages/*/; do uv build "$dir" --out-dir dist; done 46 | 47 | - name: Publish packages 48 | run: | 49 | uv tool run twine upload dist/* 50 | env: 51 | TWINE_USERNAME: __token__ 52 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 53 | -------------------------------------------------------------------------------- /.github/workflows/pull-request-checks.yml: -------------------------------------------------------------------------------- 1 | name: Pull request checks 2 | 3 | on: 4 | pull_request: 5 | 6 | jobs: 7 | check-changelog-update: 8 | name: Check changelog update 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | 16 | - name: Verify changelog updates 17 | run: | 18 | ./.github/scripts/check_changelog_update.sh 19 | 20 | check-pr-title: 21 | name: Check pull request title 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: amannn/action-semantic-pull-request@v5 25 | env: 26 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 27 | 28 | check-source-changes: 29 | name: Check source changes 30 | runs-on: ubuntu-latest 31 | outputs: 32 | packages-changed: ${{ steps.filter.outputs.packages-changed }} 33 | ui-changed: ${{ steps.filter.outputs.ui-changed }} 34 | steps: 35 | - uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 0 38 | 39 | - name: Check for changes 40 | id: filter 41 | run: | 42 | ./.github/scripts/check_source_changes.sh ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} 43 | 44 | packages: 45 | needs: check-source-changes 46 | if: ${{ needs.check-source-changes.outputs.packages-changed == 'true' }} 47 | uses: ./.github/workflows/shared-packages.yml 48 | 49 | ui: 50 | needs: check-source-changes 51 | if: ${{ needs.check-source-changes.outputs.ui-changed == 'true' }} 52 | uses: ./.github/workflows/shared-ui.yml 53 | -------------------------------------------------------------------------------- /.github/workflows/shared-ui.yml: -------------------------------------------------------------------------------- 1 | name: Shared ui checks 2 | 3 | on: 4 | workflow_call: 5 | 6 | jobs: 7 | lints: 8 | name: Run linters 9 | continue-on-error: false 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | 14 | - name: Set up Node.js 15 | uses: actions/setup-node@v4 16 | with: 17 | node-version: "lts/*" 18 | 19 | - name: Install UI dependencies 20 | run: npm i 21 | working-directory: ui 22 | 23 | - name: Run ESLint 24 | run: npm run lint 25 | working-directory: ui 26 | 27 | - name: Run Prettier 28 | run: npm run format:check 29 | working-directory: ui 30 | 31 | - name: Check build 32 | run: npm run build 33 | working-directory: ui 34 | 35 | - name: Check Ragbits Chat UI build sync 36 | run: | 37 | git diff --quiet || { 38 | echo "ragbits-chat package ui build not synced" 39 | exit 1 40 | } 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Directories 2 | .vscode/ 3 | .idea/ 4 | .neptune/ 5 | .pytest_cache/ 6 | .mypy_cache/ 7 | venv/ 8 | .venv/ 9 | __pycache__/ 10 | **.egg-info/ 11 | 12 | # Byte-compiled / optimized / DLL files 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Distribution / packaging 21 | .Python 22 | env/ 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | 38 | # Sphinx documentation 39 | docs/_build/ 40 | public/ 41 | # autogenerated package license table 42 | docs/licenses_table.rst 43 | 44 | # license dump file 45 | licenses.txt 46 | 47 | # File formats 48 | *.onnx 49 | *.pyc 50 | *.pt 51 | *.pth 52 | *.pkl 53 | *.mar 54 | *.torchscript 55 | **/.ipynb_checkpoints 56 | **/dist/ 57 | **/checkpoints/ 58 | **/outputs/ 59 | **/multirun/ 60 | 61 | # Other env files 62 | .python-version 63 | pyvenv.cfg 64 | pip-selfcheck.json 65 | 66 | # Unit test / coverage reports 67 | htmlcov/ 68 | .tox/ 69 | .coverage 70 | .coverage.* 71 | .cache 72 | nosetests.xml 73 | coverage.xml 74 | *,cover 75 | .hypothesis/ 76 | 77 | # dotenv 78 | .env 79 | 80 | # coverage and pytest reports 81 | coverage.xml 82 | report.xml 83 | 84 | # CMake 85 | cmake-build-*/ 86 | 87 | # Terraform 88 | **/.terraform.lock.hcl 89 | **/.terraform 90 | 91 | # mkdocs generated files 92 | site/ 93 | 94 | # build artifacts 95 | dist/ 96 | 97 | # examples 98 | chroma/ 99 | qdrant/ 100 | 101 | .aider* 102 | 103 | .DS_Store 104 | -------------------------------------------------------------------------------- /.libraries-whitelist.txt: -------------------------------------------------------------------------------- 1 | pkg_resources 2 | tiktoken 3 | chardet 4 | chroma-hnswlib 5 | rouge 6 | distilabel 7 | rerankers 8 | py_rust_stemmers 9 | mirakuru 10 | psycopg 11 | pytest-postgresql 12 | python-bidi 13 | -------------------------------------------------------------------------------- /.license-whitelist.txt: -------------------------------------------------------------------------------- 1 | 3-Clause BSD License 2 | Apache 2 3 | Apache License 2 4 | Apache Software License 5 | Apache Software License, BSD License 6 | Apache Software License, MIT License 7 | Apache-2 8 | Apache License, Version 2 9 | Apache License v2.0 10 | BSD 11 | BSD License 12 | BSD License, Apache Software License 13 | CC0 1.0 Universal (CC0 1.0) Public Domain Dedication 14 | Freely Distributable 15 | ISC License (ISCL) 16 | MIT 17 | MIT License 18 | MIT License, Mozilla Public License 2.0 (MPL 2.0) 19 | Mozilla Public License 2.0 (MPL 2.0) 20 | Public Domain 21 | Python Software Foundation License 22 | Python Software Foundation License, MIT License 23 | Unlicense 24 | Proprietary License 25 | Historical Permission Notice and Disclaimer (HPND) 26 | ISC 27 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.10 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v5.0.0 6 | hooks: 7 | - id: check-case-conflict 8 | - id: check-merge-conflict 9 | - id: trailing-whitespace 10 | exclude: .cursor/|ui-build/* 11 | - id: check-ast 12 | - id: check-added-large-files 13 | - id: check-toml 14 | - id: check-json 15 | - id: check-yaml 16 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Build from source 4 | 5 | Dependencies needed to build and run Ragbits from the source code: 6 | 7 | 1. [**uv**](https://docs.astral.sh/uv/getting-started/installation/) 8 | 2. [**python**](https://docs.astral.sh/uv/guides/install-python/) 3.10 9 | 10 | 11 | ## Linting and formatting 12 | We use `ruff` for linting and formatting our code. To format your code, run: 13 | 14 | ```bash 15 | $ uv run ruff format 16 | ``` 17 | 18 | To lint the code, run: 19 | ```bash 20 | $ uv run ruff check --fix 21 | ``` 22 | 23 | ## Type checking 24 | We use `mypy` for type checking. To perform type checking, simply run: 25 | 26 | ```bash 27 | $ uv run mypy . 28 | ``` 29 | 30 | ## Testing 31 | We use `pytest` for testing. To run the tests, simply run: 32 | 33 | ```bash 34 | $ uv run pytest 35 | ``` 36 | 37 | Running integration tests requires PostgreSQL with the pgvector extention installed. 38 | Minimal version of pgvector is 0.7.0, which added support for sparse vectors. 39 | 40 | On Ubuntu Linux you can get in by installing the `postgresql-17-pgvector` package. 41 | 42 | If it is not in your system's default repositories, you can install it from the official PostgreSQL Apt Repository: 43 | 44 | ```bash 45 | sudo apt install postgresql-common 46 | sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh 47 | sudo apt install postgresql-17-pgvector 48 | ``` 49 | 50 | ## Install pre-commit or pre-push hooks 51 | 52 | We also recommend to run checkers on pre-commit/push hook. To set it up, follow these steps: 53 | 54 | ```bash 55 | $ uv run scripts/install_git_hooks.py 56 | ``` 57 | 58 | Then decide whether you want to run the checks before each commit or before each push. 59 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2024 deepsense.ai sp. z o.o. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /check_licenses.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | uv run pip-licenses --from=mixed --ignore-packages `cat .libraries-whitelist.txt`> licenses.txt 5 | cat licenses.txt 6 | 7 | FOUND=$(tail -n +2 licenses.txt | grep -v -f .license-whitelist.txt | wc -l) 8 | 9 | if [[ $FOUND -gt 0 ]]; then 10 | echo "Detected license/s not on the whitelist ($FOUND found)." 11 | tail -n +2 licenses.txt | grep -v -f .license-whitelist.txt 12 | exit 1 13 | else 14 | echo "Okay." 15 | exit 0 16 | fi 17 | -------------------------------------------------------------------------------- /docs/api_reference/chat/compressors/base.md: -------------------------------------------------------------------------------- 1 | # Conversation History Compressors 2 | 3 | Conversation History Compressors are able to take conversation history and represent it as a single string. What's included in the string depends on the particular compressor. 4 | 5 | ::: ragbits.chat.history.compressors.base.ConversationHistoryCompressor 6 | -------------------------------------------------------------------------------- /docs/api_reference/chat/compressors/llm.md: -------------------------------------------------------------------------------- 1 | # Standalone Message Compressor 2 | 3 | A compressor that uses LLM to recontextualize the last message in the history, i.e. create a standalone version of the message that includes necessary context. 4 | 5 | ::: ragbits.chat.history.compressors.llm.StandaloneMessageCompressor 6 | 7 | ::: ragbits.chat.history.compressors.llm.LastMessageAndHistory 8 | 9 | ::: ragbits.chat.history.compressors.llm.StandaloneMessageCompressorPrompt -------------------------------------------------------------------------------- /docs/api_reference/chat/interface/chat_interface.md: -------------------------------------------------------------------------------- 1 | # Chat Interface 2 | 3 | The `ChatInterface` is the main interface for the chat service. It defines the core functionality required for a chat service 4 | that can return various types of responses such as: 5 | 6 | * Text: Regular text responses streamed chunk by chunk 7 | * References: Source documents used to generate the answer 8 | 9 | ::: ragbits.chat.interface.ChatInterface -------------------------------------------------------------------------------- /docs/api_reference/core/audit/metrics.md: -------------------------------------------------------------------------------- 1 | # Metrics 2 | 3 | ::: ragbits.core.audit.metrics.set_metric_handlers 4 | 5 | ::: ragbits.core.audit.metrics.clear_metric_handlers 6 | 7 | ::: ragbits.core.audit.metrics.create_histogram 8 | 9 | ::: ragbits.core.audit.metrics.record 10 | 11 | ::: ragbits.core.audit.metrics.base.HistogramMetric 12 | 13 | ::: ragbits.core.audit.metrics.base.MetricHandler 14 | 15 | ::: ragbits.core.audit.metrics.otel.OtelMetricHandler 16 | -------------------------------------------------------------------------------- /docs/api_reference/core/audit/traces.md: -------------------------------------------------------------------------------- 1 | # Traces 2 | 3 | ::: ragbits.core.audit.traces.set_trace_handlers 4 | 5 | ::: ragbits.core.audit.traces.clear_trace_handlers 6 | 7 | ::: ragbits.core.audit.traces.trace 8 | 9 | ::: ragbits.core.audit.traces.traceable 10 | 11 | ::: ragbits.core.audit.traces.base.TraceHandler 12 | 13 | ::: ragbits.core.audit.traces.cli.CLITraceHandler 14 | 15 | ::: ragbits.core.audit.traces.otel.OtelTraceHandler 16 | -------------------------------------------------------------------------------- /docs/api_reference/core/embeddings.md: -------------------------------------------------------------------------------- 1 | # Embedders 2 | 3 | ::: ragbits.core.embeddings.base.Embedder 4 | 5 | ::: ragbits.core.embeddings.dense.DenseEmbedder 6 | 7 | ::: ragbits.core.embeddings.dense.local.LocalEmbedder 8 | 9 | ::: ragbits.core.embeddings.dense.litellm.LiteLLMEmbedder 10 | 11 | ::: ragbits.core.embeddings.dense.fastembed.FastEmbedEmbedder 12 | 13 | ::: ragbits.core.embeddings.sparse.base.SparseEmbedder 14 | 15 | ::: ragbits.core.embeddings.sparse.fastembed.FastEmbedSparseEmbedder 16 | 17 | ::: ragbits.core.embeddings.sparse.bag_of_tokens.BagOfTokens -------------------------------------------------------------------------------- /docs/api_reference/core/hybrid.md: -------------------------------------------------------------------------------- 1 | # Hybrid Vector Store & Fusion Strategies 2 | 3 | ::: ragbits.core.vector_stores.hybrid.HybridSearchVectorStore 4 | 5 | ::: ragbits.core.vector_stores.hybrid_strategies.OrderedHybridRetrivalStrategy 6 | 7 | ::: ragbits.core.vector_stores.hybrid_strategies.ReciprocalRankFusion 8 | 9 | ::: ragbits.core.vector_stores.hybrid_strategies.DistributionBasedScoreFusion -------------------------------------------------------------------------------- /docs/api_reference/core/llms.md: -------------------------------------------------------------------------------- 1 | # LLMs 2 | 3 | ::: ragbits.core.llms.LLM 4 | 5 | ::: ragbits.core.llms.local.LocalLLM 6 | 7 | ::: ragbits.core.llms.litellm.LiteLLM -------------------------------------------------------------------------------- /docs/api_reference/core/prompt.md: -------------------------------------------------------------------------------- 1 | # Prompt 2 | 3 | ::: ragbits.core.prompt.Prompt -------------------------------------------------------------------------------- /docs/api_reference/core/sources.md: -------------------------------------------------------------------------------- 1 | # Sources 2 | 3 | ::: ragbits.core.sources.base.Source 4 | 5 | ::: ragbits.core.sources.azure.AzureBlobStorageSource 6 | 7 | ::: ragbits.core.sources.gcs.GCSSource 8 | 9 | ::: ragbits.core.sources.git.GitSource 10 | 11 | ::: ragbits.core.sources.hf.HuggingFaceSource 12 | 13 | ::: ragbits.core.sources.local.LocalFileSource 14 | 15 | ::: ragbits.core.sources.s3.S3Source 16 | 17 | ::: ragbits.core.sources.web.WebSource 18 | -------------------------------------------------------------------------------- /docs/api_reference/core/vector-stores.md: -------------------------------------------------------------------------------- 1 | # Vector Stores 2 | 3 | ::: ragbits.core.vector_stores.base.VectorStoreEntry 4 | 5 | ::: ragbits.core.vector_stores.base.VectorStoreOptions 6 | 7 | ::: ragbits.core.vector_stores.base.VectorStore 8 | 9 | ::: ragbits.core.vector_stores.in_memory.InMemoryVectorStore 10 | 11 | ::: ragbits.core.vector_stores.chroma.ChromaVectorStore 12 | 13 | ::: ragbits.core.vector_stores.qdrant.QdrantVectorStore 14 | 15 | ::: ragbits.core.vector_stores.pgvector.PgVectorStore -------------------------------------------------------------------------------- /docs/api_reference/document_search/documents/documents.md: -------------------------------------------------------------------------------- 1 | # Documents 2 | 3 | ::: ragbits.document_search.documents.document.Document 4 | 5 | ::: ragbits.document_search.documents.document.TextDocument 6 | 7 | ::: ragbits.document_search.documents.document.DocumentMeta 8 | 9 | ::: ragbits.document_search.documents.document.DocumentType 10 | -------------------------------------------------------------------------------- /docs/api_reference/document_search/documents/elements.md: -------------------------------------------------------------------------------- 1 | # Elements 2 | 3 | ::: ragbits.document_search.documents.element.Element 4 | 5 | ::: ragbits.document_search.documents.element.TextElement 6 | 7 | ::: ragbits.document_search.documents.element.ImageElement 8 | -------------------------------------------------------------------------------- /docs/api_reference/document_search/index.md: -------------------------------------------------------------------------------- 1 | # Document Search 2 | 3 | ::: ragbits.document_search.DocumentSearchOptions 4 | 5 | ::: ragbits.document_search.DocumentSearch 6 | -------------------------------------------------------------------------------- /docs/api_reference/document_search/ingest/enrichers.md: -------------------------------------------------------------------------------- 1 | # Element Enrichers 2 | 3 | ::: ragbits.document_search.ingestion.enrichers.router.ElementEnricherRouter 4 | 5 | ::: ragbits.document_search.ingestion.enrichers.base.ElementEnricher 6 | 7 | ::: ragbits.document_search.ingestion.enrichers.image.ImageElementEnricher 8 | -------------------------------------------------------------------------------- /docs/api_reference/document_search/ingest/parsers.md: -------------------------------------------------------------------------------- 1 | # Document Parsers 2 | 3 | ::: ragbits.document_search.ingestion.parsers.router.DocumentParserRouter 4 | 5 | ::: ragbits.document_search.ingestion.parsers.base.DocumentParser 6 | 7 | ::: ragbits.document_search.ingestion.parsers.base.TextDocumentParser 8 | 9 | ::: ragbits.document_search.ingestion.parsers.base.ImageDocumentParser 10 | 11 | ::: ragbits.document_search.ingestion.parsers.docling.DoclingDocumentParser 12 | 13 | ::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredDocumentParser 14 | -------------------------------------------------------------------------------- /docs/api_reference/document_search/ingest/strategies.md: -------------------------------------------------------------------------------- 1 | # Ingest Strategies 2 | 3 | ::: ragbits.document_search.ingestion.strategies.base.IngestStrategy 4 | 5 | ::: ragbits.document_search.ingestion.strategies.sequential.SequentialIngestStrategy 6 | 7 | ::: ragbits.document_search.ingestion.strategies.batched.BatchedIngestStrategy 8 | 9 | ::: ragbits.document_search.ingestion.strategies.ray.RayDistributedIngestStrategy 10 | -------------------------------------------------------------------------------- /docs/api_reference/document_search/retrieval/rephrasers.md: -------------------------------------------------------------------------------- 1 | # Query Rephrasers 2 | 3 | ::: ragbits.document_search.retrieval.rephrasers.base.QueryRephraserOptions 4 | 5 | ::: ragbits.document_search.retrieval.rephrasers.llm.LLMQueryRephraserOptions 6 | 7 | ::: ragbits.document_search.retrieval.rephrasers.base.QueryRephraser 8 | 9 | ::: ragbits.document_search.retrieval.rephrasers.llm.LLMQueryRephraser 10 | 11 | ::: ragbits.document_search.retrieval.rephrasers.noop.NoopQueryRephraser 12 | -------------------------------------------------------------------------------- /docs/api_reference/document_search/retrieval/rerankers.md: -------------------------------------------------------------------------------- 1 | # Rerankers 2 | 3 | ::: ragbits.document_search.retrieval.rerankers.base.RerankerOptions 4 | 5 | ::: ragbits.document_search.retrieval.rerankers.litellm.LiteLLMRerankerOptions 6 | 7 | ::: ragbits.document_search.retrieval.rerankers.llm.LLMRerankerOptions 8 | 9 | ::: ragbits.document_search.retrieval.rerankers.base.Reranker 10 | 11 | ::: ragbits.document_search.retrieval.rerankers.answerai.AnswerAIReranker 12 | 13 | ::: ragbits.document_search.retrieval.rerankers.litellm.LiteLLMReranker 14 | 15 | ::: ragbits.document_search.retrieval.rerankers.llm.LLMReranker 16 | 17 | ::: ragbits.document_search.retrieval.rerankers.noop.NoopReranker 18 | 19 | ::: ragbits.document_search.retrieval.rerankers.rrf.ReciprocalRankFusionReranker 20 | -------------------------------------------------------------------------------- /docs/api_reference/guardrails/index.md: -------------------------------------------------------------------------------- 1 | # Guardrails 2 | 3 | ::: ragbits.guardrails.base.Guardrail 4 | ::: ragbits.guardrails.base.GuardrailManager 5 | ::: ragbits.guardrails.base.GuardrailVerificationResult 6 | ::: ragbits.guardrails.openai_moderation.OpenAIModerationGuardrail -------------------------------------------------------------------------------- /docs/cli/main.md: -------------------------------------------------------------------------------- 1 | # Ragbits CLI 2 | 3 | Ragbits comes with a command-line interface (CLI) that provides several commands for working with the Ragbits platform. It can be accessed by running the `ragbits` command in your terminal. 4 | 5 | Commands that operate on Ragbits components, such as [`ragbits vector-store`](#ragbits-vector-store), use the project's preferred component implementations if a component configuration is not explicitly provided. To learn how to set component preferences in your project, see the [How-To: Set preferred components in Ragbits project](../how-to/project/component_preferences.md) guide. 6 | 7 | ::: mkdocs-click 8 | :module: ragbits.cli 9 | :command: _click_app 10 | :prog_name: ragbits 11 | :style: table 12 | :list_subcommands: true 13 | :depth: 1 -------------------------------------------------------------------------------- /docs/how-to/evaluate/custom_dataloader.md: -------------------------------------------------------------------------------- 1 | # How-To: Create custom data loader with Ragbits 2 | 3 | Ragbits provides a base interface for data loading, `ragbits.evaluate.dataloaders.base.DataLoader`, designed specifically for evaluation purposes. A ready-to-use implementation, `ragbits.evaluate.dataloaders.hf.HFLoader`, is available for handling datasets in huggingface format. 4 | 5 | To create a custom DataLoader for your specific needs, you need to implement the `load` method in a class that inherits from the `DataLoader` interface. 6 | 7 | Please find the [working example](optimize.md#define-the-data-loader) here. 8 | 9 | **Note:** This interface is not to be confused with PyTorch's `DataLoader`, as it serves a distinct purpose within the Ragbits evaluation framework. 10 | -------------------------------------------------------------------------------- /docs/how-to/evaluate/custom_evaluation_pipeline.md: -------------------------------------------------------------------------------- 1 | # How-To: Create custom evaluation pipeline in Ragbits 2 | 3 | Ragbits provides a ready-to-use evaluation pipeline for document search, implemented within the `ragbits.evaluate.document_search.DocumentSearchPipeline` module. 4 | 5 | To create a custom evaluation pipeline for your specific use case, you need to implement the `__call__` method as part of the `ragbits.evaluate.pipelines.base.EvaluationPipeline` interface. 6 | 7 | 8 | Please find the [working example](optimize.md#define-the-optimized-pipeline-structure) here -------------------------------------------------------------------------------- /docs/how-to/evaluate/custom_metric.md: -------------------------------------------------------------------------------- 1 | # How-To: Create custom evaluation metric in Ragbits 2 | 3 | `ragbits.evaluate` package provides the implementation of metrics that measure the quality of document search pipeline within `ragbits.evaluate.metrics.document_search` 4 | on your data, however you are not limited to this. In order to implement custom ones for your specific use case you would need to inherit from `ragbits.evaluate.metrics.base.Metric` 5 | abstract class and implement `compute` method. 6 | 7 | Please find the [working example](optimize.md#define-the-metrics) here. -------------------------------------------------------------------------------- /docs/how-to/project/custom_components.md: -------------------------------------------------------------------------------- 1 | # How-To: Register custom components 2 | 3 | Ragbits allows you to extend its functionality by adding custom implementations of various components, such as [`sources`][ragbits.core.sources.Source] or [`elements`][ragbits.document_search.documents.element.Element]. In most cases, you just need to import them directly in your code and use them, but in some cases, such as source ingest via CLI, you need to import them implictly to avoid errors. 4 | 5 | To register your component classes, include their module paths in the `modules_to_import` section of your `pyproject.toml` file: 6 | 7 | ```toml 8 | [tool.ragbits.core] 9 | modules_to_import = [ 10 | "python.path.to.custom_source", 11 | "python.path.to.custom_element", 12 | ... 13 | ] 14 | ``` 15 | 16 | And that's it, Ragbits always reads `pyproject.toml` every time you run it and imports modules from it, so you can be sure that your components will always be available in a runtime. 17 | 18 | !!! tip 19 | It is a good practice to put all custom components in the `modules_to_import` section to avoid potential errors in the future. 20 | -------------------------------------------------------------------------------- /docs/how-to/prompts/promptfoo.md: -------------------------------------------------------------------------------- 1 | # How-To: Test prompts with promptfoo and Ragbits 2 | 3 | Ragbits' [`Prompt`][ragbits.core.prompt.Prompt] abstraction can be seamlessly integrated with the `promptfoo` tool. After installing `promptfoo` as 4 | specified in the [promptfoo documentation](https://www.promptfoo.dev/docs/installation/), you can generate promptfoo 5 | configuration files for all the prompts discovered by our autodiscover mechanism by running the following command: 6 | 7 | ```bash 8 | ragbits prompts promptfoo 9 | ``` 10 | 11 | This command will generate a YAML files in the directory specified by `--target-path` (`promptfooconfigs` by 12 | default). The generated file should look like this: 13 | 14 | ```yaml 15 | prompts: 16 | - file:///path/to/your/prompt:PromptClass.to_promptfoo 17 | ``` 18 | 19 | You can then edit the generated file to add your custom `promptfoo` configurations. Once your `promptfoo` configuration 20 | file is ready, you can run `promptfoo` with the following command: 21 | 22 | ```bash 23 | promptfoo eval -c /path/to/generated/promptfoo-config.yaml 24 | ``` 25 | 26 | **Important: To ensure compatibility, make sure Node.js version 20 is installed.** 27 | -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --md-accent-fg-color: #1B54FF; 3 | } 4 | 5 | .md-header__title { 6 | margin-left: 0.5rem !important; 7 | } 8 | -------------------------------------------------------------------------------- /examples/conversations/recontextualize_message.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ragbits Conversations Example: Recontextualize Last Message 3 | 4 | This example demonstrates how to use the `StandaloneMessageCompressor` compressor to recontextualize 5 | the last message in a conversation history. 6 | """ 7 | 8 | # /// script 9 | # requires-python = ">=3.10" 10 | # dependencies = [ 11 | # "ragbits-conversations", 12 | # ] 13 | # /// 14 | 15 | import asyncio 16 | 17 | from ragbits.conversations.history.compressors.llm import StandaloneMessageCompressor 18 | from ragbits.core.llms.litellm import LiteLLM 19 | from ragbits.core.prompt import ChatFormat 20 | 21 | # Example conversation history 22 | conversation: ChatFormat = [ 23 | {"role": "user", "content": "Who's working on Friday?"}, 24 | {"role": "assistant", "content": "Jim"}, 25 | {"role": "user", "content": "Where is he based?"}, 26 | {"role": "assistant", "content": "At our California Head Office"}, 27 | {"role": "user", "content": "Is he a senior staff member?"}, 28 | {"role": "assistant", "content": "Yes, he's a senior manager"}, 29 | {"role": "user", "content": "What's his phone number (including the prefix for his state)?"}, 30 | ] 31 | 32 | 33 | async def main() -> None: 34 | """ 35 | Main function to demonstrate the StandaloneMessageCompressor compressor. 36 | """ 37 | # Initialize the LiteLLM client 38 | llm = LiteLLM("gpt-4o") 39 | 40 | # Initialize the StandaloneMessageCompressor compressor 41 | compressor = StandaloneMessageCompressor(llm, history_len=10) 42 | 43 | # Compress the conversation history 44 | recontextualized_message = await compressor.compress(conversation) 45 | 46 | # Print the recontextualized message 47 | print("Recontextualized Message:") 48 | print(recontextualized_message) 49 | 50 | 51 | if __name__ == "__main__": 52 | asyncio.run(main()) 53 | -------------------------------------------------------------------------------- /examples/core/audit/config/grafana/grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: "Ragbits" 5 | type: file 6 | options: 7 | path: /otel-lgtm/ragbits-dashboard.json 8 | foldersFromFilesStructure: false 9 | -------------------------------------------------------------------------------- /examples/core/prompt/text.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ragbits Core Example: Text Prompt 3 | 4 | This example shows how to use the `Prompt` class to generate themed text using an LLM. 5 | We define an `AnimalPrompt` that generates names for a given animal type. 6 | 7 | The script performs the following steps: 8 | 9 | 1. Define input and output formats using Pydantic models. 10 | 2. Implement the `AnimalPrompt` class with a structured system prompt. 11 | 3. Initialize the `LiteLLM` class to generate text. 12 | 4. Generate a name based on the specified animal. 13 | 5. Print the generated name. 14 | 15 | To run the script, execute the following command: 16 | 17 | ```bash 18 | uv run examples/core/prompt/text.py 19 | ``` 20 | """ 21 | 22 | # /// script 23 | # requires-python = ">=3.10" 24 | # dependencies = [ 25 | # "ragbits-core", 26 | # ] 27 | # /// 28 | import asyncio 29 | 30 | from pydantic import BaseModel 31 | 32 | from ragbits.core.llms import LiteLLM 33 | from ragbits.core.prompt import Prompt 34 | 35 | 36 | class AnimalPromptInput(BaseModel): 37 | """ 38 | Input format for the AnimalPrompt. 39 | """ 40 | 41 | animal: str 42 | 43 | 44 | class AnimalPromptOutput(BaseModel): 45 | """ 46 | Output format for the AnimalPrompt. 47 | """ 48 | 49 | name: str 50 | 51 | 52 | class AnimalPrompt(Prompt[AnimalPromptInput, AnimalPromptOutput]): 53 | """ 54 | Prompt that generates animal names. 55 | """ 56 | 57 | system_prompt = """ 58 | You are an animal name generator. Use provided animal kind as a base. 59 | """ 60 | 61 | user_prompt = """ 62 | Animal: {{ animal }} 63 | """ 64 | 65 | 66 | async def main() -> None: 67 | """ 68 | Run the example. 69 | """ 70 | llm = LiteLLM(model_name="gpt-4o-2024-08-06", use_structured_output=True) 71 | prompt = AnimalPrompt(AnimalPromptInput(animal="cat")) 72 | response = await llm.generate(prompt) 73 | print(response.name) 74 | 75 | 76 | if __name__ == "__main__": 77 | asyncio.run(main()) 78 | -------------------------------------------------------------------------------- /examples/document-search/images/bear.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/examples/document-search/images/bear.jpg -------------------------------------------------------------------------------- /examples/document-search/images/game.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/examples/document-search/images/game.jpg -------------------------------------------------------------------------------- /examples/document-search/images/tree.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/examples/document-search/images/tree.jpg -------------------------------------------------------------------------------- /examples/evaluation/dataset-generator/config/generate.yaml: -------------------------------------------------------------------------------- 1 | input_name: query 2 | name: synthetic-RAG-data 3 | tasks: 4 | - type: ragbits.evaluate.dataset_generator.tasks.corpus_generation:CorpusGenerationStep 5 | llm: 6 | provider_type: ragbits.core.llms.litellm:LiteLLM 7 | kwargs: 8 | model_name: gpt-4o 9 | kwargs: 10 | num_per_topic: 5 11 | prompt_class: ragbits.evaluate.dataset_generator.prompts.corpus_generation:BasicCorpusGenerationPrompt 12 | - type: ragbits.evaluate.dataset_generator.tasks.text_generation.qa:QueryGenTask 13 | llm: 14 | provider_type: distilabel.llms:OpenAILLM 15 | kwargs: 16 | model: gpt-4o 17 | kwargs: 18 | prompt_class: ragbits.evaluate.dataset_generator.prompts.qa:QueryGenPrompt 19 | - type: ragbits.evaluate.dataset_generator.tasks.text_generation.qa:AnswerGenTask 20 | llm: 21 | provider_type: distilabel.llms:OpenAILLM 22 | kwargs: 23 | model: gpt-4o 24 | kwargs: 25 | prompt_class: ragbits.evaluate.dataset_generator.prompts.qa:BasicAnswerGenPrompt 26 | - type: ragbits.evaluate.dataset_generator.tasks.text_generation.qa:PassagesGenTask 27 | llm: 28 | provider_type: distilabel.llms:OpenAILLM 29 | kwargs: 30 | model: gpt-4o 31 | kwargs: 32 | prompt_class: ragbits.evaluate.dataset_generator.prompts.qa:PassagesGenPrompt 33 | filters: 34 | - ragbits.evaluate.dataset_generator.tasks.filter.dont_know:DontKnowFilter 35 | -------------------------------------------------------------------------------- /examples/evaluation/dataset-generator/generate.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | from omegaconf import DictConfig 3 | 4 | from ragbits.evaluate.dataset_generator.pipeline import DatasetGenerationPipeline 5 | from ragbits.evaluate.utils import log_dataset_to_file 6 | 7 | 8 | @hydra.main(config_path="config", config_name="generate", version_base="3.2") 9 | def main(config: DictConfig) -> None: 10 | """ 11 | A main function for dataset generation example 12 | Args: 13 | config (DictConfig) - configuration should follow 14 | ragbits.evaluate.dataset_generator.DatasetGenerationPipelineConfig data model 15 | Returns: 16 | None 17 | """ 18 | TOPICS = ["conspiracy theories", "machine learning"] 19 | generation_pipeline = DatasetGenerationPipeline.from_dict_config(dict_config=config) 20 | result_dataset = generation_pipeline(corpus=TOPICS) 21 | log_dataset_to_file(dataset=result_dataset) 22 | 23 | 24 | if __name__ == "__main__": 25 | main() 26 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/README.md: -------------------------------------------------------------------------------- 1 | # Document Search Evaluation 2 | 3 | ## Evaluation 4 | 5 | ### Evaluation on ingested data 6 | 7 | ```sh 8 | uv run evaluate.py 9 | ``` 10 | 11 | ```sh 12 | uv run evaluate.py +experiments=chunking-250 13 | ``` 14 | 15 | ```sh 16 | uv run evaluate.py --multirun +experiments=chunking-250,chunking-500,chunking-1000 17 | ``` 18 | 19 | ### Logging 20 | 21 | ```sh 22 | uv run evaluate.py logger.local=True 23 | ``` 24 | 25 | ```sh 26 | uv run evaluate.py logger.neptune=True 27 | ``` 28 | 29 | ## Optimization 30 | 31 | ```sh 32 | uv run optimize.py 33 | ``` 34 | 35 | ### Monitoring 36 | 37 | ```sh 38 | uv run optimize.py neptune_callback=True 39 | ``` 40 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/dataloader/hf.yaml: -------------------------------------------------------------------------------- 1 | type: ragbits.evaluate.dataloaders.document_search:DocumentSearchDataLoader 2 | config: 3 | source: 4 | type: ragbits.core.sources:HuggingFaceSource 5 | config: 6 | path: "deepsense-ai/synthetic-rag-dataset_v1.0" 7 | split: "train" 8 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | task: 4 | name: chunking-1000 5 | 6 | pipeline: 7 | config: 8 | parser_router: 9 | txt: 10 | config: 11 | chunking_kwargs: 12 | max_characters: 1000 13 | new_after_n_chars: 200 14 | md: 15 | config: 16 | chunking_kwargs: 17 | max_characters: 1000 18 | new_after_n_chars: 200 19 | vector_store: 20 | config: 21 | index_name: chunk-1000 22 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | task: 4 | name: chunking-250 5 | 6 | pipeline: 7 | config: 8 | parser_router: 9 | txt: 10 | config: 11 | chunking_kwargs: 12 | max_characters: 250 13 | new_after_n_chars: 50 14 | md: 15 | config: 16 | chunking_kwargs: 17 | max_characters: 250 18 | new_after_n_chars: 50 19 | vector_store: 20 | config: 21 | index_name: chunk-250 22 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | task: 4 | name: chunking-500 5 | 6 | pipeline: 7 | config: 8 | parser_router: 9 | txt: 10 | config: 11 | chunking_kwargs: 12 | max_characters: 500 13 | new_after_n_chars: 100 14 | md: 15 | config: 16 | chunking_kwargs: 17 | max_characters: 500 18 | new_after_n_chars: 100 19 | vector_store: 20 | config: 21 | index_name: chunk-500 22 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/metrics/precision_recall_f1.yaml: -------------------------------------------------------------------------------- 1 | precision_recall_f1: 2 | type: ragbits.evaluate.metrics.document_search:DocumentSearchPrecisionRecallF1 3 | config: 4 | matching_strategy: 5 | type: RougeChunkMatch 6 | config: 7 | threshold: 0.5 8 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/metrics/ranked_retrieval.yaml: -------------------------------------------------------------------------------- 1 | ranked_retrieval: 2 | type: ragbits.evaluate.metrics.document_search:DocumentSearchRankedRetrievalMetrics 3 | config: 4 | matching_strategy: 5 | type: RougeChunkMatch 6 | config: 7 | threshold: 0.5 8 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/optimization.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - dataloader@evaluator.evaluation.dataloader: hf 3 | - pipeline@evaluator.evaluation.pipeline: document_search_optimization 4 | - metrics@evaluator.evaluation.metrics: 5 | - precision_recall_f1 6 | - ranked_retrieval 7 | - _self_ 8 | 9 | optimizer: 10 | direction: maximize 11 | n_trials: 5 12 | max_retries_for_trial: 1 13 | 14 | neptune_callback: False 15 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - vector_store@config.vector_store: chroma 3 | - rephraser@config.rephraser: noop 4 | - reranker@config.reranker: noop 5 | - parser_router@config.parser_router: unstructured 6 | - source@config.source: hf 7 | - _self_ 8 | 9 | type: ragbits.evaluate.pipelines.document_search:DocumentSearchPipeline 10 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - vector_store@config.vector_store: chroma_optimization 3 | - rephraser@config.rephraser: noop 4 | - reranker@config.reranker: noop 5 | - parser_router@config.parser_router: unstructured_optimization 6 | - source@config.source: hf 7 | - _self_ 8 | 9 | type: ragbits.evaluate.pipelines.document_search:DocumentSearchPipeline 10 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/parser_router/unstructured.yaml: -------------------------------------------------------------------------------- 1 | txt: 2 | type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser 3 | config: 4 | use_api: false 5 | partition_kwargs: 6 | strategy: hi_res 7 | chunking_kwargs: 8 | include_orig_elements: true 9 | max_characters: 1000 10 | new_after_n_chars: 1000 11 | overlap: 0 12 | overlap_all: 0 13 | 14 | md: 15 | type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser 16 | config: 17 | use_api: false 18 | partition_kwargs: 19 | strategy: hi_res 20 | chunking_kwargs: 21 | include_orig_elements: true 22 | max_characters: 1000 23 | new_after_n_chars: 1000 24 | overlap: 0 25 | overlap_all: 0 26 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/parser_router/unstructured_optimization.yaml: -------------------------------------------------------------------------------- 1 | txt: 2 | type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser 3 | config: 4 | use_api: false 5 | partition_kwargs: 6 | strategy: hi_res 7 | chunking_kwargs: 8 | include_orig_elements: true 9 | max_characters: 10 | optimize: true 11 | range: 12 | - 500 13 | - 1000 14 | new_after_n_chars: 1000 15 | overlap: 0 16 | overlap_all: 0 17 | 18 | md: 19 | type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser 20 | config: 21 | use_api: false 22 | partition_kwargs: 23 | strategy: hi_res 24 | chunking_kwargs: 25 | include_orig_elements: true 26 | max_characters: 1000 27 | new_after_n_chars: 1000 28 | overlap: 0 29 | overlap_all: 0 30 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/rephraser/noop.yaml: -------------------------------------------------------------------------------- 1 | type: NoopQueryRephraser 2 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/reranker/noop.yaml: -------------------------------------------------------------------------------- 1 | type: NoopReranker 2 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/source/hf.yaml: -------------------------------------------------------------------------------- 1 | type: ragbits.core.sources.hf:HuggingFaceSource 2 | config: 3 | path: "micpst/hf-docs" 4 | split: "train[:5]" 5 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/vector_store/chroma.yaml: -------------------------------------------------------------------------------- 1 | type: ragbits.core.vector_stores.chroma:ChromaVectorStore 2 | config: 3 | client: 4 | type: EphemeralClient 5 | index_name: baseline 6 | distance_method: l2 7 | default_options: 8 | k: 3 9 | score_threshold: -1.2 10 | embedder: 11 | type: ragbits.core.embeddings.dense:LiteLLMEmbedder 12 | config: 13 | model_name: "text-embedding-3-small" 14 | default_options: 15 | dimensions: 768 16 | encoding_format: float 17 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/pipeline/vector_store/chroma_optimization.yaml: -------------------------------------------------------------------------------- 1 | type: ragbits.core.vector_stores.chroma:ChromaVectorStore 2 | config: 3 | client: 4 | type: EphemeralClient 5 | index_name: baseline 6 | distance_method: l2 7 | default_options: 8 | k: 3 9 | score_threshold: -1.2 10 | embedder: 11 | type: ragbits.core.embeddings.dense:LiteLLMEmbedder 12 | config: 13 | optimize: true 14 | choices: 15 | - model_name: "text-embedding-3-small" 16 | default_options: 17 | dimensions: 18 | optimize: true 19 | range: 20 | - 32 21 | - 512 22 | encoding_format: float 23 | - model_name: "text-embedding-3-large" 24 | default_options: 25 | dimensions: 26 | optimize: true 27 | range: 28 | - 512 29 | - 1024 30 | encoding_format: float 31 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/config/retrieval.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - dataloader@evaluation.dataloader: hf 3 | - pipeline@evaluation.pipeline: document_search 4 | - metrics@evaluation.metrics: 5 | - precision_recall_f1 6 | - ranked_retrieval 7 | - _self_ 8 | 9 | evaluator: 10 | batch_size: 5 11 | num_retries: 1 12 | 13 | logger: 14 | local: True 15 | neptune: False 16 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/evaluate.py: -------------------------------------------------------------------------------- 1 | # /// script 2 | # requires-python = ">=3.10" 3 | # dependencies = [ 4 | # "ragbits-core[chroma,hf]", 5 | # "ragbits-document-search", 6 | # "ragbits-evaluate[relari]", 7 | # ] 8 | # /// 9 | import asyncio 10 | import logging 11 | from typing import cast 12 | 13 | import hydra 14 | from omegaconf import DictConfig, OmegaConf 15 | 16 | from ragbits.evaluate.evaluator import Evaluator 17 | from ragbits.evaluate.utils import log_evaluation_to_file, log_evaluation_to_neptune 18 | 19 | logging.getLogger("LiteLLM").setLevel(logging.ERROR) 20 | logging.getLogger("httpx").setLevel(logging.ERROR) 21 | 22 | 23 | async def evaluate(config: DictConfig) -> None: 24 | """ 25 | Document search evaluation runner. 26 | 27 | Args: 28 | config: Hydra configuration. 29 | """ 30 | print("Starting evaluation...") 31 | 32 | evaluator_config = cast(dict, OmegaConf.to_container(config)) 33 | results = await Evaluator.run_from_config(evaluator_config) 34 | 35 | if config.logger.local: 36 | output_dir = log_evaluation_to_file(results) 37 | print(f"Evaluation results saved under directory: {output_dir}") 38 | 39 | if config.logger.neptune: 40 | log_evaluation_to_neptune(results, config) 41 | print("Evaluation results uploaded to Neptune") 42 | 43 | 44 | @hydra.main(config_path="config", config_name="retrieval", version_base="3.2") 45 | def main(config: DictConfig) -> None: 46 | """ 47 | Runs the evaluation process. 48 | 49 | Args: 50 | config: Hydra configuration. 51 | """ 52 | asyncio.run(evaluate(config)) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /examples/evaluation/document-search/advanced/optimize.py: -------------------------------------------------------------------------------- 1 | # /// script 2 | # requires-python = ">=3.10" 3 | # dependencies = [ 4 | # "ragbits-core[chroma,hf]", 5 | # "ragbits-document-search", 6 | # "ragbits-evaluate[relari]", 7 | # ] 8 | # /// 9 | import logging 10 | from typing import cast 11 | 12 | import hydra 13 | from omegaconf import DictConfig, OmegaConf 14 | 15 | from ragbits.evaluate.optimizer import Optimizer 16 | from ragbits.evaluate.utils import log_optimization_to_file 17 | 18 | logging.getLogger("LiteLLM").setLevel(logging.ERROR) 19 | logging.getLogger("httpx").setLevel(logging.ERROR) 20 | 21 | 22 | @hydra.main(config_path="config", config_name="optimization", version_base="3.2") 23 | def main(config: DictConfig) -> None: 24 | """ 25 | Runs the optimization process. 26 | 27 | Args: 28 | config: Hydra configuration. 29 | """ 30 | print("Starting optimization...") 31 | 32 | optimizer_config = cast(dict, OmegaConf.to_container(config)) 33 | configs_with_scores = Optimizer.run_from_config(optimizer_config) 34 | 35 | output_dir = log_optimization_to_file(configs_with_scores) 36 | print(f"Optimization results saved under directory: {output_dir}") 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /examples/guardrails/openai_moderation.py: -------------------------------------------------------------------------------- 1 | # /// script 2 | # requires-python = ">=3.10" 3 | # dependencies = [ 4 | # "ragbits-core", 5 | # "openai", 6 | # ] 7 | # /// 8 | import asyncio 9 | from argparse import ArgumentParser 10 | 11 | from ragbits.guardrails.base import GuardrailManager 12 | from ragbits.guardrails.openai_moderation import OpenAIModerationGuardrail 13 | 14 | 15 | async def guardrail_run(message: str) -> None: 16 | """ 17 | Example of using the OpenAIModerationGuardrail. Requires the OPENAI_API_KEY environment variable to be set. 18 | """ 19 | manager = GuardrailManager([OpenAIModerationGuardrail()]) 20 | res = await manager.verify(message) 21 | print(res) 22 | 23 | 24 | if __name__ == "__main__": 25 | args = ArgumentParser() 26 | args.add_argument("message", nargs="+", type=str, help="Message to validate") 27 | parsed_args = args.parse_args() 28 | 29 | asyncio.run(guardrail_run("".join(parsed_args.message))) 30 | -------------------------------------------------------------------------------- /mkdocs_hooks.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from ragbits import cli 4 | 5 | 6 | def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool) -> None: 7 | """ 8 | Hook that runs during mkdocs startup. 9 | 10 | Args: 11 | command: The command that is being run. 12 | dirty: whether --dirty flag was passed. 13 | """ 14 | cli._init_for_mkdocs() 15 | -------------------------------------------------------------------------------- /packages/ragbits-agents/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## Unreleased 4 | 5 | ## 1.0.0 (2025-06-04) 6 | 7 | ### Changed 8 | 9 | - ragbits-core updated to version v1.0.0 10 | 11 | ## 0.20.1 (2025-06-04) 12 | 13 | ### Changed 14 | 15 | - ragbits-core updated to version v0.20.1 16 | 17 | ## 0.20.0 (2025-06-03) 18 | 19 | ### Changed 20 | 21 | - ragbits-core updated to version v0.20.0 22 | 23 | ## 0.19.1 (2025-05-27) 24 | 25 | ### Changed 26 | 27 | - ragbits-core updated to version v0.19.1 28 | 29 | ## 0.19.0 (2025-05-27) 30 | 31 | ### Changed 32 | 33 | - ragbits-core updated to version v0.19.0 34 | 35 | - Add Agent interface (#569) 36 | - Initial release of the package (#569) 37 | -------------------------------------------------------------------------------- /packages/ragbits-agents/README.md: -------------------------------------------------------------------------------- 1 | # Ragbits Agents 2 | 3 | Ragbits Agents contains primitives for building agentic systems. 4 | 5 | The package is in the experimental phase, the API may change in the future. 6 | 7 | ## Installation 8 | 9 | To install the Ragbits Agents package, run: 10 | 11 | ```sh 12 | pip install ragbits-agents 13 | ``` 14 | 15 | 18 | 19 | 24 | -------------------------------------------------------------------------------- /packages/ragbits-agents/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ragbits-agents" 3 | version = "1.0.0" 4 | description = "Building blocks for rapid development of GenAI applications" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | license = "MIT" 8 | authors = [ 9 | { name = "deepsense.ai", email = "ragbits@deepsense.ai"} 10 | ] 11 | keywords = [ 12 | "Retrieval Augmented Generation", 13 | "RAG", 14 | "Large Language Models", 15 | "LLMs", 16 | "Generative AI", 17 | "GenAI", 18 | "Agents", 19 | ] 20 | classifiers = [ 21 | "Development Status :: 4 - Beta", 22 | "Environment :: Console", 23 | "Intended Audience :: Science/Research", 24 | "License :: OSI Approved :: MIT License", 25 | "Natural Language :: English", 26 | "Operating System :: OS Independent", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Programming Language :: Python :: 3.13", 31 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 32 | "Topic :: Software Development :: Libraries :: Python Modules", 33 | ] 34 | dependencies = ["ragbits-core==1.0.0"] 35 | 36 | [project.urls] 37 | "Homepage" = "https://github.com/deepsense-ai/ragbits" 38 | "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues" 39 | "Documentation" = "https://ragbits.deepsense.ai/" 40 | "Source" = "https://github.com/deepsense-ai/ragbits" 41 | 42 | [build-system] 43 | requires = ["hatchling"] 44 | build-backend = "hatchling.build" 45 | 46 | [tool.hatch.metadata] 47 | allow-direct-references = true 48 | 49 | [tool.hatch.build.targets.wheel] 50 | packages = ["src/ragbits"] 51 | -------------------------------------------------------------------------------- /packages/ragbits-agents/src/ragbits/agents/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.agents._main import Agent, AgentOptions 2 | from ragbits.agents.types import QuestionAnswerAgent, QuestionAnswerPromptInput, QuestionAnswerPromptOutput 3 | 4 | __all__ = ["Agent", "AgentOptions", "QuestionAnswerAgent", "QuestionAnswerPromptInput", "QuestionAnswerPromptOutput"] 5 | -------------------------------------------------------------------------------- /packages/ragbits-agents/src/ragbits/agents/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-agents/src/ragbits/agents/py.typed -------------------------------------------------------------------------------- /packages/ragbits-agents/src/ragbits/agents/types.py: -------------------------------------------------------------------------------- 1 | from typing import Any, TypeVar 2 | 3 | from pydantic import BaseModel 4 | 5 | from ragbits.agents._main import Agent 6 | from ragbits.core.llms.base import LLMClientOptionsT 7 | 8 | QuestionAnswerPromptInputT = TypeVar("QuestionAnswerPromptInputT", bound="QuestionAnswerPromptInput") 9 | QuestionAnswerPromptOutputT = TypeVar("QuestionAnswerPromptOutputT", bound="QuestionAnswerPromptOutput | str") 10 | 11 | QuestionAnswerAgent = Agent[LLMClientOptionsT, QuestionAnswerPromptInputT, QuestionAnswerPromptOutputT] 12 | 13 | 14 | class QuestionAnswerPromptInput(BaseModel): 15 | """ 16 | Input for the question answer agent. 17 | """ 18 | 19 | question: str 20 | """The question to answer.""" 21 | context: Any | None = None 22 | """The context to answer the question.""" 23 | 24 | 25 | class QuestionAnswerPromptOutput(BaseModel): 26 | """ 27 | Output for the question answer agent. 28 | """ 29 | 30 | answer: str 31 | """The answer to the question.""" 32 | -------------------------------------------------------------------------------- /packages/ragbits-chat/README.md: -------------------------------------------------------------------------------- 1 | # Ragbits Chat 2 | 3 | ragbits-chat is a Python package that provides tools for building conversational AI applications. 4 | 5 | The package includes: 6 | - Framework for building chat experiences 7 | - History management for conversation tracking 8 | - UI components for building chat interfaces 9 | 10 | For detailed information, please refer to the [API documentation](https://ragbits.deepsense.ai/how-to/chatbots/api/). -------------------------------------------------------------------------------- /packages/ragbits-chat/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ragbits-chat" 3 | version = "1.0.0" 4 | description = "Building blocks for rapid development of GenAI applications" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | license = "MIT" 8 | authors = [ 9 | { name = "deepsense.ai", email = "ragbits@deepsense.ai"} 10 | ] 11 | keywords = [ 12 | "Retrieval Augmented Generation", 13 | "RAG", 14 | "Large Language Models", 15 | "LLMs", 16 | "Generative AI", 17 | "GenAI", 18 | "Prompt Management" 19 | ] 20 | classifiers = [ 21 | "Development Status :: 4 - Beta", 22 | "Environment :: Console", 23 | "Intended Audience :: Science/Research", 24 | "License :: OSI Approved :: MIT License", 25 | "Natural Language :: English", 26 | "Operating System :: OS Independent", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Programming Language :: Python :: 3.13", 31 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 32 | "Topic :: Software Development :: Libraries :: Python Modules", 33 | ] 34 | dependencies = ["fastapi>=0.115.0,<1.0.0", "uvicorn>=0.31.0,<1.0.0", "ragbits-core==1.0.0"] 35 | 36 | [project.urls] 37 | "Homepage" = "https://github.com/deepsense-ai/ragbits" 38 | "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues" 39 | "Documentation" = "https://ragbits.deepsense.ai/" 40 | "Source" = "https://github.com/deepsense-ai/ragbits" 41 | 42 | [project.optional-dependencies] 43 | sql = [ 44 | "sqlalchemy>=2.0.39,<3.0.0", 45 | ] 46 | 47 | [tool.uv] 48 | dev-dependencies = [ 49 | "pre-commit~=3.8.0", 50 | "pytest~=8.3.3", 51 | "pytest-cov~=5.0.0", 52 | "pytest-asyncio~=0.24.0", 53 | "pip-licenses>=4.0.0,<5.0.0" 54 | ] 55 | 56 | [build-system] 57 | requires = ["hatchling"] 58 | build-backend = "hatchling.build" 59 | 60 | [tool.hatch.metadata] 61 | allow-direct-references = true 62 | 63 | [tool.hatch.build.targets.wheel] 64 | packages = ["src/ragbits"] 65 | 66 | [tool.pytest.ini_options] 67 | asyncio_mode = "auto" 68 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-chat/src/ragbits/chat/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/cli.py: -------------------------------------------------------------------------------- 1 | import typer 2 | 3 | from ragbits.chat.api import RagbitsAPI 4 | 5 | ds_app = typer.Typer(no_args_is_help=True) 6 | 7 | 8 | def register(app: typer.Typer) -> None: 9 | """ 10 | Register the CLI commands for the package. 11 | 12 | Args: 13 | app: The Typer object to register the commands with. 14 | """ 15 | app.add_typer(ds_app, name="api", help="Commands for running API service") 16 | 17 | 18 | @ds_app.command() 19 | def run( 20 | chat_interface: str = typer.Argument(..., help="Path to a module with chat function"), 21 | host: str = typer.Option("127.0.0.1", "--host", help="Host to bind the API server to"), 22 | port: int = typer.Option(8000, "--port", help="Port to bind the API server to"), 23 | cors_origins: list[str] = typer.Option( # noqa: B008 24 | None, 25 | "--cors-origin", 26 | help="Allowed CORS origins. Can be specified multiple times.", 27 | ), 28 | ui_build_dir: str = typer.Option( 29 | None, 30 | "--ui-build-dir", 31 | help="Path to a custom UI build directory. If not specified, uses the default package UI.", 32 | ), 33 | ) -> None: 34 | """ 35 | Run API service with UI demo 36 | """ 37 | api = RagbitsAPI( 38 | chat_interface=chat_interface, 39 | cors_origins=cors_origins, 40 | ui_build_dir=ui_build_dir, 41 | ) 42 | api.run(host=host, port=port) 43 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/history/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-chat/src/ragbits/chat/history/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/history/compressors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import ConversationHistoryCompressor 2 | from .llm import StandaloneMessageCompressor 3 | 4 | __all__ = ["ConversationHistoryCompressor", "StandaloneMessageCompressor"] 5 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/history/compressors/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import ClassVar 3 | 4 | from ragbits.chat.history import compressors 5 | from ragbits.core.prompt.base import ChatFormat 6 | from ragbits.core.utils.config_handling import WithConstructionConfig 7 | 8 | 9 | class ConversationHistoryCompressor(WithConstructionConfig, ABC): 10 | """ 11 | An abstract class for conversation history compressors, 12 | i.e. class that takes the entire conversation history 13 | and returns a single string representation of it. 14 | 15 | The exact logic of what the string should include and represent 16 | depends on the specific implementation. 17 | 18 | Usually used to provide LLM additional context from the conversation history. 19 | """ 20 | 21 | default_module: ClassVar = compressors 22 | configuration_key: ClassVar = "history_compressor" 23 | 24 | @abstractmethod 25 | async def compress(self, conversation: ChatFormat) -> str: 26 | """ 27 | Compresses the conversation history to a single string. 28 | 29 | Args: 30 | conversation: List of dicts with "role" and "content" keys, representing the chat history so far. 31 | """ 32 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/interface/__init__.py: -------------------------------------------------------------------------------- 1 | from ._interface import ChatInterface 2 | 3 | __all__ = ["ChatInterface"] 4 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/interface/forms.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class FormField(BaseModel): 5 | """Field in a feedback form.""" 6 | 7 | name: str = Field(description="Name of the field") 8 | type: str = Field(description="Type of the field (text, select, etc.)") 9 | required: bool = Field(description="Whether the field is required") 10 | label: str = Field(description="Display label for the field") 11 | options: list[str] | None = Field(None, description="Options for select fields") 12 | 13 | 14 | class FeedbackForm(BaseModel): 15 | """Model for feedback form structure.""" 16 | 17 | title: str = Field(description="Title of the form") 18 | fields: list[FormField] = Field(description="Fields in the form") 19 | 20 | 21 | class FeedbackConfig(BaseModel): 22 | """Configuration for feedback collection.""" 23 | 24 | like_enabled: bool = Field(default=False, description="Whether like feedback is enabled") 25 | like_form: FeedbackForm | None = Field(default=None, description="The form to use for like feedback") 26 | 27 | dislike_enabled: bool = Field(default=False, description="Whether dislike feedback is enabled") 28 | dislike_form: FeedbackForm | None = Field(default=None, description="The form to use for dislike feedback") 29 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/persistence/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.chat.persistence.base import HistoryPersistenceStrategy 2 | 3 | __all__ = ["HistoryPersistenceStrategy"] 4 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/persistence/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from ragbits.chat.interface.types import ChatContext, ChatResponse 4 | 5 | 6 | class HistoryPersistenceStrategy(ABC): 7 | """Base class for history persistence strategies.""" 8 | 9 | @abstractmethod 10 | async def save_interaction( 11 | self, 12 | message: str, 13 | response: str, 14 | extra_responses: list[ChatResponse], 15 | context: ChatContext, 16 | timestamp: float, 17 | ) -> None: 18 | """ 19 | Save a chat interaction including the input message and responses. 20 | 21 | Args: 22 | message: The user's input message 23 | response: The main response text 24 | extra_responses: List of additional responses (references, state updates, etc.) 25 | context: Optional context dictionary containing metadata 26 | timestamp: Unix timestamp of when the interaction occurred 27 | """ 28 | pass 29 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/persistence/file.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | from ..interface.types import ChatContext, ChatResponse 5 | from .base import HistoryPersistenceStrategy 6 | 7 | 8 | class FileHistoryPersistence(HistoryPersistenceStrategy): 9 | """Strategy that saves chat history to dated files in a directory.""" 10 | 11 | def __init__(self, base_path: str | Path): 12 | self.base_path = Path(base_path) 13 | 14 | def _get_file_path(self, conversation_id: str) -> Path: 15 | """Get the current conversation file path based on date and conversation ID.""" 16 | return self.base_path / f"{conversation_id}.jsonl" 17 | 18 | async def save_interaction( 19 | self, 20 | message: str, 21 | response: str, 22 | extra_responses: list[ChatResponse], 23 | context: ChatContext, 24 | timestamp: float, 25 | ) -> None: 26 | """ 27 | Save a chat interaction to a dated file in JSON format. 28 | 29 | Args: 30 | message: The user's input message 31 | response: The main response text 32 | extra_responses: List of additional responses (references, state updates, etc.) 33 | context: Optional context dictionary containing metadata 34 | timestamp: Unix timestamp of when the interaction occurred 35 | """ 36 | # Create interaction record 37 | interaction = { 38 | "message": message, 39 | "context": context.model_dump(mode="json"), 40 | "response": response, 41 | "extra_responses": [r.model_dump(mode="json") for r in extra_responses], 42 | "timestamp": timestamp, 43 | } 44 | 45 | # Get current file path and ensure parent directory exists 46 | file_path = self._get_file_path(context.conversation_id or "no_conversation_id") 47 | file_path.parent.mkdir(parents=True, exist_ok=True) 48 | 49 | # Append to file 50 | with open(file_path, "a") as f: 51 | f.write(json.dumps(interaction) + "\n") 52 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-chat/src/ragbits/chat/py.typed -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/ui-build/assets/ExamplePluginComponent-CkxrO9jk.js: -------------------------------------------------------------------------------- 1 | import{H as e}from"./index-ByuhG0Hl.js";const i=()=>e.jsxs("div",{children:[e.jsx("h1",{children:"Example Plugin"}),e.jsx("p",{children:"This is an example plugin."})]});export{i as default}; 2 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/ui-build/assets/ragbits-9U4hpuUb.svg: -------------------------------------------------------------------------------- 1 | 2 | 🐰 3 | -------------------------------------------------------------------------------- /packages/ragbits-chat/src/ragbits/chat/ui-build/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Ragbits 8 | 9 | 10 | 11 | 12 | 13 |
14 | 15 | 16 | -------------------------------------------------------------------------------- /packages/ragbits-cli/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ragbits-cli" 3 | version = "1.0.0" 4 | description = "A CLI application for ragbits - building blocks for rapid development of GenAI applications" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | license = "MIT" 8 | authors = [ 9 | { name = "deepsense.ai", email = "ragbits@deepsense.ai"} 10 | ] 11 | keywords = [ 12 | "Retrieval Augmented Generation", 13 | "RAG", 14 | "Large Language Models", 15 | "LLMs", 16 | "Generative AI", 17 | "GenAI", 18 | "Prompt Management" 19 | ] 20 | classifiers = [ 21 | "Development Status :: 4 - Beta", 22 | "Environment :: Console", 23 | "Intended Audience :: Science/Research", 24 | "License :: OSI Approved :: MIT License", 25 | "Natural Language :: English", 26 | "Operating System :: OS Independent", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Programming Language :: Python :: 3.13", 31 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 32 | "Topic :: Software Development :: Libraries :: Python Modules", 33 | ] 34 | dependencies = ["typer>=0.12.5,<1.0.0", "ragbits-core==1.0.0"] 35 | 36 | [project.urls] 37 | "Homepage" = "https://github.com/deepsense-ai/ragbits" 38 | "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues" 39 | "Documentation" = "https://ragbits.deepsense.ai/" 40 | "Source" = "https://github.com/deepsense-ai/ragbits" 41 | 42 | [project.scripts] 43 | ragbits = "ragbits.cli:main" 44 | rbts = "ragbits.cli:main" 45 | 46 | [build-system] 47 | requires = ["hatchling"] 48 | build-backend = "hatchling.build" 49 | 50 | [tool.hatch.metadata] 51 | allow-direct-references = true 52 | 53 | [tool.hatch.build.targets.wheel] 54 | packages = ["src/ragbits"] 55 | 56 | [tool.pytest.ini_options] 57 | asyncio_mode = "auto" 58 | -------------------------------------------------------------------------------- /packages/ragbits-cli/src/ragbits/cli/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-cli/src/ragbits/cli/py.typed -------------------------------------------------------------------------------- /packages/ragbits-core/README.md: -------------------------------------------------------------------------------- 1 | # Ragbits Core 2 | 3 | Ragbits Core is a collection of utilities and tools that are used across all Ragbits packages. It includes fundamentals, such as utilities for logging, configuration, prompt creation, classes for comunicating with LLMs, embedders, vector stores, and more. 4 | 5 | ## Installation 6 | 7 | ```sh 8 | pip install ragbits-core 9 | ``` 10 | 11 | ## Quick Start 12 | 13 | ```python 14 | from pydantic import BaseModel 15 | from ragbits.core.prompt import Prompt 16 | from ragbits.core.llms.litellm import LiteLLM 17 | 18 | 19 | class Dog(BaseModel): 20 | breed: str 21 | age: int 22 | temperament: str 23 | 24 | class DogNamePrompt(Prompt[Dog, str]): 25 | system_prompt = """ 26 | You are a dog name generator. You come up with funny names for dogs given the dog details. 27 | """ 28 | 29 | user_prompt = """ 30 | The dog is a {breed} breed, {age} years old, and has a {temperament} temperament. 31 | """ 32 | 33 | async def main() -> None: 34 | llm = LiteLLM("gpt-4o") 35 | dog = Dog(breed="Golden Retriever", age=3, temperament="friendly") 36 | prompt = DogNamePrompt(dog) 37 | response = await llm.generate(prompt) 38 | print(response) 39 | 40 | 41 | if __name__ == "__main__": 42 | asyncio.run(main()) 43 | ``` 44 | 45 | ## Documentation 46 | * [Quickstart 1: Working with Prompts and LLMs](https://ragbits.deepsense.ai/quickstart/quickstart1_prompts/) 47 | * [How-To Guides - Core](https://ragbits.deepsense.ai/how-to/prompts/use_prompting/) 48 | * [API Reference - Core](https://ragbits.deepsense.ai/api_reference/core/prompt/) 49 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import typer 4 | 5 | from ragbits.core.audit.traces import set_trace_handlers 6 | from ragbits.core.config import import_modules_from_config 7 | 8 | if os.getenv("RAGBITS_VERBOSE", "0") == "1": 9 | typer.echo('Verbose mode is enabled with environment variable "RAGBITS_VERBOSE".') 10 | set_trace_handlers("cli") 11 | 12 | import_modules_from_config() 13 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/audit/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.core.audit.metrics import clear_metric_handlers, create_histogram, record, set_metric_handlers 2 | from ragbits.core.audit.metrics.base import HistogramMetric, MetricHandler 3 | from ragbits.core.audit.traces import clear_trace_handlers, set_trace_handlers, trace, traceable 4 | from ragbits.core.audit.traces.base import TraceHandler 5 | 6 | __all__ = [ 7 | "HistogramMetric", 8 | "MetricHandler", 9 | "TraceHandler", 10 | "clear_metric_handlers", 11 | "clear_trace_handlers", 12 | "create_histogram", 13 | "record", 14 | "set_metric_handlers", 15 | "set_trace_handlers", 16 | "trace", 17 | "traceable", 18 | ] 19 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/audit/metrics/otel.py: -------------------------------------------------------------------------------- 1 | from opentelemetry.metrics import Histogram, MeterProvider, get_meter 2 | 3 | from ragbits.core.audit.metrics.base import MetricHandler 4 | 5 | 6 | class OtelMetricHandler(MetricHandler[Histogram]): 7 | """ 8 | OpenTelemetry metric handler. 9 | """ 10 | 11 | def __init__(self, provider: MeterProvider | None = None, metric_prefix: str = "ragbits") -> None: 12 | """ 13 | Initialize the OtelMetricHandler instance. 14 | 15 | Args: 16 | provider: The meter provider to use. 17 | metric_prefix: Prefix for all metric names. 18 | """ 19 | super().__init__(metric_prefix=metric_prefix) 20 | self._meter = get_meter(name=__name__, meter_provider=provider) 21 | 22 | def create_histogram(self, name: str, unit: str = "", description: str = "") -> Histogram: 23 | """ 24 | Create a histogram metric. 25 | 26 | Args: 27 | name: The histogram metric name. 28 | unit: The histogram metric unit. 29 | description: The histogram metric description. 30 | 31 | Returns: 32 | The initialized histogram metric. 33 | """ 34 | return self._meter.create_histogram(name=name, unit=unit, description=description) 35 | 36 | def record(self, metric: Histogram, value: int | float, attributes: dict | None = None) -> None: # noqa: PLR6301 37 | """ 38 | Record the value for a specified histogram metric. 39 | 40 | Args: 41 | metric: The histogram metric to record. 42 | value: The value to record for the metric. 43 | attributes: Additional metadata for the metric. 44 | """ 45 | metric.record(value, attributes=attributes) 46 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/cli.py: -------------------------------------------------------------------------------- 1 | import typer 2 | 3 | from ragbits.core.prompt._cli import prompts_app 4 | from ragbits.core.vector_stores._cli import vector_stores_app 5 | 6 | 7 | def register(app: typer.Typer) -> None: 8 | """ 9 | Register the CLI commands for the package. 10 | 11 | Args: 12 | app: The Typer object to register the commands with. 13 | """ 14 | app.add_typer(prompts_app, name="prompts", help="Commands for managing prompts") 15 | app.add_typer(vector_stores_app, name="vector-store", help="Commands for managing vector stores") 16 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import Embedder, EmbedderOptionsT, SparseVector, VectorSize 2 | from .dense import DenseEmbedder, LiteLLMEmbedder, NoopEmbedder 3 | from .sparse import BagOfTokens, BagOfTokensOptions, SparseEmbedder, SparseEmbedderOptionsT 4 | 5 | __all__ = [ 6 | "BagOfTokens", 7 | "BagOfTokensOptions", 8 | "DenseEmbedder", 9 | "Embedder", 10 | "EmbedderOptionsT", 11 | "LiteLLMEmbedder", 12 | "NoopEmbedder", 13 | "SparseEmbedder", 14 | "SparseEmbedderOptionsT", 15 | "SparseVector", 16 | "VectorSize", 17 | ] 18 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/embeddings/dense/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import DenseEmbedder 2 | from .litellm import LiteLLMEmbedder, LiteLLMEmbedderOptions 3 | from .noop import NoopEmbedder 4 | 5 | __all__ = [ 6 | "DenseEmbedder", 7 | "LiteLLMEmbedder", 8 | "LiteLLMEmbedderOptions", 9 | "NoopEmbedder", 10 | ] 11 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/embeddings/dense/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from ragbits.core.embeddings.base import Embedder, EmbedderOptionsT, VectorSize 4 | 5 | 6 | class DenseEmbedder(Embedder[EmbedderOptionsT], ABC): # noqa: F821 7 | """ 8 | Abstract client for communication with dense embedding models. 9 | """ 10 | 11 | @abstractmethod 12 | async def embed_text(self, data: list[str], options: EmbedderOptionsT | None = None) -> list[list[float]]: 13 | """ 14 | Creates embeddings for the given strings. 15 | 16 | Args: 17 | data: List of strings to get embeddings for. 18 | options: Additional settings used by the Embedder model. 19 | 20 | Returns: 21 | List of embeddings for the given strings. 22 | """ 23 | 24 | @abstractmethod 25 | async def get_vector_size(self) -> VectorSize: 26 | """ 27 | Get information about the dense vector size/dimensions returned by this embedder. 28 | 29 | Returns: 30 | VectorSize object with is_sparse=False and the embedding dimension. 31 | """ 32 | 33 | async def embed_image(self, images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]: 34 | """ 35 | Creates embeddings for the given images. 36 | 37 | Args: 38 | images: List of images to get embeddings for. 39 | options: Additional settings used by the Embedder model. 40 | 41 | Returns: 42 | List of embeddings for the given images. 43 | """ 44 | raise NotImplementedError("Image embeddings are not supported by this model.") 45 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py: -------------------------------------------------------------------------------- 1 | class EmbeddingError(Exception): 2 | """ 3 | Base class for all exceptions raised by the EmbeddingClient. 4 | """ 5 | 6 | def __init__(self, message: str) -> None: 7 | super().__init__(message) 8 | self.message = message 9 | 10 | 11 | class EmbeddingConnectionError(EmbeddingError): 12 | """ 13 | Raised when there is an error connecting to the embedding API. 14 | """ 15 | 16 | def __init__(self, message: str = "Connection error.") -> None: 17 | super().__init__(message) 18 | 19 | 20 | class EmbeddingStatusError(EmbeddingError): 21 | """ 22 | Raised when an API response has a status code of 4xx or 5xx. 23 | """ 24 | 25 | def __init__(self, message: str, status_code: int) -> None: 26 | super().__init__(message) 27 | self.status_code = status_code 28 | 29 | 30 | class EmbeddingResponseError(EmbeddingError): 31 | """ 32 | Raised when an API response has an invalid schema. 33 | """ 34 | 35 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: 36 | super().__init__(message) 37 | 38 | 39 | class EmbeddingEmptyResponseError(EmbeddingError): 40 | """ 41 | Raised when an API response has an empty response. 42 | """ 43 | 44 | def __init__(self, message: str = "Empty response returned by API.") -> None: 45 | super().__init__(message) 46 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/embeddings/sparse/__init__.py: -------------------------------------------------------------------------------- 1 | from ..base import SparseVector 2 | from .bag_of_tokens import BagOfTokens, BagOfTokensOptions 3 | from .base import SparseEmbedder, SparseEmbedderOptionsT 4 | 5 | __all__ = [ 6 | "BagOfTokens", 7 | "BagOfTokensOptions", 8 | "SparseEmbedder", 9 | "SparseEmbedderOptionsT", 10 | "SparseVector", 11 | ] 12 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/embeddings/sparse/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import TypeVar 3 | 4 | from ragbits.core.embeddings.base import Embedder, SparseVector, VectorSize 5 | from ragbits.core.options import Options 6 | 7 | SparseEmbedderOptionsT = TypeVar("SparseEmbedderOptionsT", bound=Options) 8 | 9 | 10 | class SparseEmbedder(Embedder[SparseEmbedderOptionsT], ABC): 11 | """Sparse embedding interface""" 12 | 13 | @abstractmethod 14 | async def embed_text(self, texts: list[str], options: SparseEmbedderOptionsT | None = None) -> list[SparseVector]: 15 | """ 16 | Transforms a list of texts into sparse vectors. 17 | 18 | Args: 19 | texts: list of input texts. 20 | options: optional embedding options 21 | 22 | Returns: 23 | list of sparse embeddings. 24 | """ 25 | 26 | @abstractmethod 27 | async def get_vector_size(self) -> VectorSize: 28 | """ 29 | Get information about the sparse vector size/dimensions returned by this embedder. 30 | 31 | Returns: 32 | VectorSize object with is_sparse=True and the vocabulary size. 33 | """ 34 | 35 | async def embed_image( 36 | self, images: list[bytes], options: SparseEmbedderOptionsT | None = None 37 | ) -> list[SparseVector]: 38 | """ 39 | Creates embeddings for the given images. 40 | 41 | Args: 42 | images: List of images to get embeddings for. 43 | options: Additional settings used by the Embedder model. 44 | 45 | Returns: 46 | List of sparse embeddings for the given images. 47 | """ 48 | raise NotImplementedError("Image embeddings are not supported by this model.") 49 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/llms/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import LLM 2 | from .litellm import LiteLLM, LiteLLMOptions 3 | from .local import LocalLLMOptions 4 | 5 | __all__ = ["LLM", "LiteLLM", "LiteLLMOptions", "LocalLLMOptions"] 6 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/llms/exceptions.py: -------------------------------------------------------------------------------- 1 | class LLMError(Exception): 2 | """ 3 | Base class for all exceptions raised by the LLMClient. 4 | """ 5 | 6 | def __init__(self, message: str) -> None: 7 | super().__init__(message) 8 | self.message = message 9 | 10 | 11 | class LLMConnectionError(LLMError): 12 | """ 13 | Raised when there is an error connecting to the LLM API. 14 | """ 15 | 16 | def __init__(self, message: str = "Connection error.") -> None: 17 | super().__init__(message) 18 | 19 | 20 | class LLMStatusError(LLMError): 21 | """ 22 | Raised when an API response has a status code of 4xx or 5xx. 23 | """ 24 | 25 | def __init__(self, message: str, status_code: int) -> None: 26 | super().__init__(message) 27 | self.status_code = status_code 28 | 29 | 30 | class LLMResponseError(LLMError): 31 | """ 32 | Raised when an API response has an invalid schema. 33 | """ 34 | 35 | def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None: 36 | super().__init__(message) 37 | 38 | 39 | class LLMEmptyResponseError(LLMError): 40 | """ 41 | Raised when an API response is empty. 42 | """ 43 | 44 | def __init__(self, message: str = "Empty response returned by API.") -> None: 45 | super().__init__(message) 46 | 47 | 48 | class LLMNotSupportingImagesError(LLMError): 49 | """ 50 | Raised when there are images in the prompt, but LLM doesn't support them. 51 | """ 52 | 53 | def __init__(self, message: str = "There are images in the prompt, but given LLM doesn't support them.") -> None: 54 | super().__init__(message) 55 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/llms/factory.py: -------------------------------------------------------------------------------- 1 | from ragbits.core.config import core_config 2 | from ragbits.core.llms.base import LLM, LLMType 3 | from ragbits.core.llms.litellm import LiteLLM 4 | 5 | 6 | def get_preferred_llm(llm_type: LLMType = LLMType.TEXT) -> LLM: 7 | """ 8 | Get an instance of the preferred LLM using the factory function 9 | specified in the configuration. 10 | 11 | Args: 12 | llm_type: type of the LLM to get, defaults to text 13 | 14 | Returns: 15 | LLM: An instance of the preferred LLM. 16 | 17 | """ 18 | factory = core_config.llm_preference_factories[llm_type] 19 | return LLM.subclass_from_factory(factory) 20 | 21 | 22 | def simple_litellm_factory() -> LLM: 23 | """ 24 | A basic LLM factory that creates an LiteLLM instance with the default model, 25 | default options, and assumes that the API key is set in the environment. 26 | 27 | Returns: 28 | LLM: An instance of the LiteLLM class. 29 | """ 30 | return LiteLLM() 31 | 32 | 33 | def simple_litellm_vision_factory() -> LLM: 34 | """ 35 | A basic LLM factory that creates an LiteLLM instance with the vision enabled model, 36 | default options, and assumes that the API key is set in the environment. 37 | 38 | Returns: 39 | LLM: An instance of the LiteLLM class. 40 | """ 41 | return LiteLLM(model_name="gpt-4o-mini") 42 | 43 | 44 | def simple_litellm_structured_output_factory() -> LLM: 45 | """ 46 | A basic LLM factory that creates an LiteLLM instance with the support for structured output. 47 | 48 | Returns: 49 | LLM: An instance of the LiteLLM class. 50 | """ 51 | return LiteLLM(model_name="gpt-4o-mini-2024-07-18", use_structured_output=True) 52 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/options.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import Any, ClassVar, TypeVar 3 | 4 | from pydantic import BaseModel, ConfigDict 5 | from typing_extensions import Self 6 | 7 | from ragbits.core.types import NotGiven 8 | 9 | OptionsT = TypeVar("OptionsT", bound="Options") 10 | 11 | 12 | class Options(BaseModel, ABC): 13 | """ 14 | A dataclass that represents all available options. Thanks to the extra='allow' configuration, it allows for 15 | additional fields that are not defined in the class. 16 | """ 17 | 18 | model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True) 19 | _not_given: ClassVar[Any] = None 20 | 21 | def __or__(self, other: "Options") -> Self: 22 | """ 23 | Merges two Options, prioritizing non-NOT_GIVEN values from the 'other' object. 24 | """ 25 | self_dict = self.model_dump() 26 | other_dict = other.model_dump() 27 | 28 | updated_dict = { 29 | key: other_dict[key] 30 | if not isinstance(other_dict.get(key), NotGiven) and key in other_dict 31 | else self_dict[key] 32 | for key in self_dict.keys() | other_dict.keys() 33 | } 34 | 35 | return self.__class__(**updated_dict) 36 | 37 | def dict(self) -> dict[str, Any]: # type: ignore # mypy complains about overriding BaseModel.dict 38 | """ 39 | Creates a dictionary representation of the Options instance. 40 | If a value is None, it will be replaced with a provider-specific not-given sentinel. 41 | 42 | Returns: 43 | A dictionary representation of the Options instance. 44 | """ 45 | options = self.model_dump() 46 | 47 | return { 48 | key: self._not_given if value is None or isinstance(value, NotGiven) else value 49 | for key, value in options.items() 50 | } 51 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.core.prompt.prompt import ChatFormat, Prompt 2 | 3 | __all__ = ["ChatFormat", "Prompt"] 4 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/prompt/exceptions.py: -------------------------------------------------------------------------------- 1 | class PromptError(Exception): 2 | """ 3 | Base class for all exceptions raised by the Prompt. 4 | """ 5 | 6 | def __init__(self, message: str) -> None: 7 | super().__init__(message) 8 | self.message = message 9 | 10 | 11 | class PromptWithImagesOfInvalidFormat(PromptError): 12 | """ 13 | Raised when there is an image attached to the prompt that is not in the correct format. 14 | """ 15 | 16 | def __init__( 17 | self, message: str = "Invalid format of image in prompt detected. Use one of supported OpenAI mime types" 18 | ) -> None: 19 | super().__init__(message) 20 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | try: 5 | import yaml 6 | 7 | HAS_PYYAML = True 8 | except ImportError: 9 | HAS_PYYAML = False 10 | 11 | from rich.console import Console 12 | 13 | from ragbits.core.config import core_config 14 | from ragbits.core.prompt.discovery import PromptDiscovery 15 | 16 | 17 | def generate_configs( 18 | file_pattern: str = core_config.prompt_path_pattern, 19 | root_path: Path | None = None, 20 | target_path: Path = Path("promptfooconfigs"), 21 | ) -> None: 22 | """ 23 | Generates promptfoo configuration files for all discovered prompts. 24 | 25 | Args: 26 | file_pattern: The file pattern to search for Prompt objects. Defaults to "**/prompt_*.py" 27 | root_path: The root path to search for Prompt objects. Defaults to the directory where the script is run. 28 | target_path: The path to save the promptfoo configuration files. Defaults to "promptfooconfigs". 29 | """ 30 | root_path = root_path or Path.cwd() 31 | 32 | if not HAS_PYYAML: 33 | Console(stderr=True).print( 34 | "To generate configs for promptfoo, you need the PyYAML library. Please install it using the following" 35 | " command:\n[b]pip install ragbits-core\\[promptfoo][/b]" 36 | ) 37 | return 38 | 39 | prompts = PromptDiscovery(file_pattern=file_pattern, root_path=root_path).discover() 40 | Console().print( 41 | f"Discovered {len(prompts)} prompts." 42 | f" Saving promptfoo configuration files to [bold green]{target_path}[/] folder ..." 43 | ) 44 | 45 | if not target_path.exists(): 46 | target_path.mkdir() 47 | for prompt in prompts: 48 | with open(target_path / f"{prompt.__qualname__}.yaml", "w", encoding="utf-8") as f: 49 | prompt_path = f"file://{prompt.__module__.replace('.', os.sep)}.py:{prompt.__qualname__}.to_promptfoo" 50 | yaml.dump({"prompts": [prompt_path]}, f) 51 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/src/ragbits/core/py.typed -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/sources/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.core.sources.base import Source # noqa: I001 2 | from ragbits.core.sources.azure import AzureBlobStorageSource 3 | from ragbits.core.sources.gcs import GCSSource 4 | from ragbits.core.sources.git import GitSource 5 | from ragbits.core.sources.hf import HuggingFaceSource 6 | from ragbits.core.sources.local import LocalFileSource 7 | from ragbits.core.sources.s3 import S3Source 8 | from ragbits.core.sources.web import WebSource 9 | 10 | __all__ = [ 11 | "AzureBlobStorageSource", 12 | "GCSSource", 13 | "GitSource", 14 | "HuggingFaceSource", 15 | "LocalFileSource", 16 | "S3Source", 17 | "Source", 18 | "WebSource", 19 | ] 20 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/sources/exceptions.py: -------------------------------------------------------------------------------- 1 | class SourceError(Exception): 2 | """ 3 | Class for all exceptions raised by the document source. 4 | """ 5 | 6 | def __init__(self, message: str) -> None: 7 | super().__init__(message) 8 | self.message = message 9 | 10 | 11 | class SourceConnectionError(SourceError): 12 | """ 13 | Raised when there is an error connecting to the document source. 14 | """ 15 | 16 | def __init__(self) -> None: 17 | super().__init__("Connection error.") 18 | 19 | 20 | class SourceNotFoundError(SourceError): 21 | """ 22 | Raised when the document is not found. 23 | """ 24 | 25 | def __init__(self, source_id: str) -> None: 26 | super().__init__(f"Source with ID {source_id} not found.") 27 | self.source_id = source_id 28 | 29 | 30 | class SourceDownloadError(SourceError): 31 | """ 32 | Raised when an error occurs during the download of the source. 33 | """ 34 | 35 | def __init__(self, url: str, code: int): 36 | super().__init__(f"Download of {url} failed with code {code}.") 37 | self.url = url 38 | self.code = code 39 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/types.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from typing_extensions import override 4 | 5 | 6 | # Sentinel class used until PEP 0661 is accepted 7 | class NotGiven: 8 | """ 9 | A sentinel singleton class used to distinguish omitted keyword arguments 10 | from those passed in with the value None (which may have different behavior). 11 | 12 | For example: 13 | 14 | ```py 15 | def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ... 16 | 17 | 18 | get(timeout=1) # 1s timeout 19 | get(timeout=None) # No timeout 20 | get() # Default timeout behavior, which may not be statically known at the method definition. 21 | ``` 22 | """ 23 | 24 | def __bool__(self) -> Literal[False]: 25 | return False 26 | 27 | @override 28 | def __repr__(self) -> str: 29 | return "NOT_GIVEN" 30 | 31 | 32 | NOT_GIVEN = NotGiven() 33 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .secrets import get_secret_key 2 | 3 | __all__ = ["get_secret_key"] 4 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/utils/helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections.abc import Iterable, Iterator 3 | from itertools import islice 4 | from typing import TypeVar 5 | 6 | T = TypeVar("T") 7 | 8 | 9 | def env_vars_not_set(env_vars: list[str]) -> bool: 10 | """ 11 | Checks if no environment variable is set. 12 | 13 | Args: 14 | env_vars: The list of environment variables to check. 15 | 16 | Returns: 17 | True if no environment variable is set, otherwise False. 18 | """ 19 | return all(os.environ.get(env_var) is None for env_var in env_vars) 20 | 21 | 22 | def batched(data: Iterable[T], batch_size: int | None = None) -> Iterator[list[T]]: 23 | """ 24 | Batches the data into chunks of the given size. 25 | 26 | Args: 27 | data: The data to batch. 28 | batch_size: The size of the batch. If None, no batching is performed. 29 | 30 | Returns: 31 | An iterator of batches of the data when batch_size is provided, 32 | or the original iterator when batch_size is None. 33 | """ 34 | it = iter(data) 35 | while batch := list(islice(it, batch_size)): 36 | yield batch 37 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/utils/pydantic.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, Any 2 | 3 | from pydantic import PlainSerializer, PlainValidator 4 | 5 | 6 | def _pydantic_hex_to_bytes(val: Any) -> bytes: # noqa: ANN401 7 | """ 8 | Deserialize hex string to bytes. 9 | """ 10 | if isinstance(val, bytes): 11 | return val 12 | elif isinstance(val, bytearray): 13 | return bytes(val) 14 | elif isinstance(val, str): 15 | return bytes.fromhex(val) 16 | raise ValueError(f"Cannot convert {val} to bytes.") 17 | 18 | 19 | def _pydantic_bytes_to_hex(val: bytes) -> str: 20 | """ 21 | Serialize bytes to hex string. 22 | """ 23 | return val.hex() 24 | 25 | 26 | SerializableBytes = Annotated[ 27 | bytes, PlainValidator(_pydantic_hex_to_bytes), PlainSerializer(_pydantic_bytes_to_hex, return_type=str) 28 | ] 29 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/utils/secrets.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import logging 3 | import os 4 | import secrets 5 | import warnings 6 | from functools import lru_cache 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | # Environment variable name for the secret key 11 | RAGBITS_KEY_ENV_VAR = "RAGBITS_SECRET_KEY" 12 | 13 | # Default key length in bytes (32 bytes = 256 bits) 14 | DEFAULT_KEY_LENGTH = 32 15 | 16 | 17 | @lru_cache(maxsize=1) 18 | def get_secret_key(env_var: str = RAGBITS_KEY_ENV_VAR, key_length: int = DEFAULT_KEY_LENGTH) -> str: 19 | """ 20 | Get a secret key from environment variable with fallback to a default or randomly generated key. 21 | 22 | Args: 23 | env_var: The environment variable name to check for the secret key 24 | default: Optional default key to use if environment variable is not set 25 | key_length: Length of the key to generate if no key is provided 26 | 27 | Returns: 28 | The secret key as a string 29 | """ 30 | # Try to get from environment variable 31 | secret_key = os.environ.get(env_var) 32 | 33 | if secret_key: 34 | logger.debug(f"Using secret key from environment variable: {env_var}") 35 | return secret_key 36 | 37 | # Generate a random key 38 | random_key = base64.urlsafe_b64encode(secrets.token_bytes(key_length)).decode("utf-8") 39 | warnings.warn( 40 | f"No secret key found in environment variable {env_var}. " 41 | f"Using an ephemeral randomly generated key: '{random_key}'. " 42 | f"This key will be regenerated on restart, breaking any existing signatures. " 43 | f"Set the {env_var} environment variable to use a persistent key.", 44 | UserWarning, 45 | stacklevel=2, 46 | ) 47 | 48 | return random_key 49 | -------------------------------------------------------------------------------- /packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.core.vector_stores.base import VectorStore, VectorStoreEntry, VectorStoreOptions, WhereQuery 2 | from ragbits.core.vector_stores.in_memory import InMemoryVectorStore 3 | 4 | __all__ = ["InMemoryVectorStore", "VectorStore", "VectorStoreEntry", "VectorStoreOptions", "WhereQuery"] 5 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/assets/img/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/assets/img/test.png -------------------------------------------------------------------------------- /packages/ragbits-core/tests/assets/img/test2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/assets/img/test2.jpg -------------------------------------------------------------------------------- /packages/ragbits-core/tests/assets/md/bar.md: -------------------------------------------------------------------------------- 1 | bar -------------------------------------------------------------------------------- /packages/ragbits-core/tests/assets/md/foo.md: -------------------------------------------------------------------------------- 1 | foo -------------------------------------------------------------------------------- /packages/ragbits-core/tests/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/cli/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-core/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | from pathlib import Path 5 | 6 | import pytest 7 | 8 | from ragbits.core.sources.base import LOCAL_STORAGE_DIR_ENV 9 | 10 | 11 | @pytest.fixture(scope="module", autouse=True) 12 | def configure_local_storage_dir(): 13 | random_tmp_dir = Path(tempfile.mkdtemp()) 14 | os.environ[LOCAL_STORAGE_DIR_ENV] = random_tmp_dir.as_posix() 15 | yield 16 | shutil.rmtree(random_tmp_dir) 17 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/integration/sources/test_hf.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ragbits.core.sources.exceptions import SourceNotFoundError 4 | from ragbits.core.sources.hf import HuggingFaceSource 5 | from ragbits.core.utils.helpers import env_vars_not_set 6 | 7 | HF_TOKEN_ENV = "HF_TOKEN" # noqa: S105 8 | HF_DATASET_PATH = "micpst/hf-docs" 9 | 10 | 11 | @pytest.mark.skipif( 12 | env_vars_not_set([HF_TOKEN_ENV]), # noqa: S105 13 | reason="Hugging Face environment variables not set", 14 | ) 15 | async def test_huggingface_source_fetch() -> None: 16 | source = HuggingFaceSource(path=HF_DATASET_PATH, row=0) 17 | path = await source.fetch() 18 | 19 | assert path.is_file() 20 | assert path.name == "README.md" 21 | assert ( 22 | path.read_text() 23 | == " `tokenizers-linux-x64-musl`\n\nThis is the **x86_64-unknown-linux-musl** binary for `tokenizers`\n" 24 | ) 25 | 26 | 27 | @pytest.mark.skipif( 28 | env_vars_not_set([HF_TOKEN_ENV]), 29 | reason="Hugging Face environment variables not set", 30 | ) 31 | async def test_huggingface_source_fetch_not_found() -> None: 32 | source = HuggingFaceSource(path=HF_DATASET_PATH, row=1000) 33 | 34 | with pytest.raises(SourceNotFoundError) as exc: 35 | await source.fetch() 36 | 37 | assert str(exc.value) == "Source with ID hf:micpst/hf-docs/train/1000 not found." 38 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/integration/vector_stores/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/integration/vector_stores/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/audit/test_metrics.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | import pytest 4 | 5 | from ragbits.core.audit.metrics import create_histogram, record, set_metric_handlers 6 | from ragbits.core.audit.metrics.base import MetricHandler 7 | 8 | 9 | class MockMetricHandler(MetricHandler[MagicMock]): 10 | def create_histogram(self, name: str, unit: str = "", description: str = "") -> MagicMock: # noqa: PLR6301 11 | return MagicMock() 12 | 13 | def record(self, metric: MagicMock, value: int | float, attributes: dict | None = None) -> None: ... 14 | 15 | 16 | @pytest.fixture 17 | def mock_handler() -> MockMetricHandler: 18 | handler = MockMetricHandler() 19 | set_metric_handlers(handler) 20 | return handler 21 | 22 | 23 | def test_record_with_default_create_histogram(mock_handler: MockMetricHandler) -> None: 24 | metric = MagicMock() 25 | mock_handler.create_histogram = MagicMock(return_value=metric) # type: ignore 26 | mock_handler.record = MagicMock() # type: ignore 27 | 28 | record("test_metric", 1) 29 | 30 | mock_handler.create_histogram.assert_called_once_with( 31 | name="ragbits_test_metric", 32 | unit="", 33 | description="", 34 | ) 35 | mock_handler.record.assert_called_once_with(metric=metric, value=1, attributes={}) 36 | 37 | 38 | def test_record_with_create_histogram(mock_handler: MockMetricHandler) -> None: 39 | metric = MagicMock() 40 | mock_handler.create_histogram = MagicMock(return_value=metric) # type: ignore 41 | mock_handler.record = MagicMock() # type: ignore 42 | 43 | metric_name = create_histogram(name="test_metric", unit="test_unit", description="test_description") 44 | record(metric_name, 1) 45 | 46 | mock_handler.create_histogram.assert_called_once_with( 47 | name="ragbits_test_metric", 48 | unit="test_unit", 49 | description="test_description", 50 | ) 51 | mock_handler.record.assert_called_once_with(metric=metric, value=1, attributes={}) 52 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/llms/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/llms/factory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/llms/factory/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/llms/factory/test_get_preferred_llm.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ragbits.core.config import core_config 4 | from ragbits.core.llms.base import LLMType 5 | from ragbits.core.llms.factory import get_preferred_llm 6 | from ragbits.core.llms.litellm import LiteLLM 7 | 8 | 9 | def mock_llm_factory() -> LiteLLM: 10 | """ 11 | A mock LLM factory that creates a LiteLLM instance with a mock model name. 12 | 13 | Returns: 14 | LiteLLM: An instance of the LiteLLM. 15 | """ 16 | return LiteLLM(model_name="mock_model") 17 | 18 | 19 | def test_get_preferred_llm(monkeypatch: pytest.MonkeyPatch) -> None: 20 | """ 21 | Test the get_llm_from_factory function. 22 | """ 23 | monkeypatch.setattr( 24 | core_config, 25 | "llm_preference_factories", 26 | {LLMType.TEXT: "unit.llms.factory.test_get_preferred_llm:mock_llm_factory"}, 27 | ) 28 | 29 | llm = get_preferred_llm() 30 | assert isinstance(llm, LiteLLM) 31 | assert llm.model_name == "mock_model" 32 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/prompts/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/prompts/discovery/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/prompts/discovery/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/prompts/discovery/prompt_classes_for_tests.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | from pydantic import BaseModel 4 | 5 | from ragbits.core.prompt import Prompt 6 | 7 | 8 | class PromptForTestInput(BaseModel): 9 | """ 10 | Input format for the LoremPrompt. 11 | """ 12 | 13 | theme: str 14 | nsfw_allowed: bool = False 15 | var1: str 16 | var2: str 17 | var3: str 18 | var4: str 19 | 20 | 21 | class PromptForTestOutput(BaseModel): 22 | """ 23 | Output format for the LoremPrompt. 24 | """ 25 | 26 | text: str 27 | 28 | 29 | class PromptForTest(Prompt[PromptForTestInput, PromptForTestOutput]): 30 | system_prompt = "fake system prompt" 31 | user_prompt = "fake user prompt" 32 | 33 | 34 | class PromptForTestInput2(BaseModel): 35 | """ 36 | Input format for the LoremPrompt. 37 | """ 38 | 39 | theme: str 40 | 41 | 42 | class PromptForTestOutput2(BaseModel): 43 | """ 44 | Output format for the LoremPrompt. 45 | """ 46 | 47 | text: str 48 | 49 | 50 | class PromptForTest2(Prompt[PromptForTestInput2, PromptForTestOutput2]): 51 | system_prompt = "fake system prompt2" 52 | user_prompt = "fake user prompt2" 53 | 54 | 55 | class MyBasePrompt(Prompt, ABC): 56 | system_prompt = "my base system prompt" 57 | user_prompt = "temp user prompt" 58 | 59 | 60 | class MyPromptWithBase(MyBasePrompt): 61 | user_prompt = "custom user prompt" 62 | 63 | 64 | class PromptWithoutInput(Prompt): 65 | system_prompt = "fake system prompt without typing" 66 | user_prompt = "fake user prompt without typing" 67 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/prompts/discovery/ragbits_tests_pkg_with_prompts/__init__.py: -------------------------------------------------------------------------------- 1 | from . import prompts 2 | 3 | __all__ = ["prompts"] 4 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/prompts/discovery/ragbits_tests_pkg_with_prompts/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | from .temp_prompt1 import PromptForTestA 2 | from .temp_prompt2 import PromptForTestB 3 | 4 | __all__ = ["PromptForTestA", "PromptForTestB"] 5 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/prompts/discovery/ragbits_tests_pkg_with_prompts/prompts/temp_prompt1.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | from ragbits.core.prompt import Prompt 4 | 5 | 6 | class PromptForTestInputA(BaseModel): 7 | """ 8 | Input format for the LoremPrompt. 9 | """ 10 | 11 | theme: str 12 | nsfw_allowed: bool = False 13 | var1: str 14 | var2: str 15 | var3: str 16 | var4: str 17 | 18 | 19 | class PromptForTestOutputA(BaseModel): 20 | """ 21 | Output format for the LoremPrompt. 22 | """ 23 | 24 | text: str 25 | 26 | 27 | class PromptForTestA(Prompt[PromptForTestInputA, PromptForTestOutputA]): 28 | system_prompt = "fake system prompt" 29 | user_prompt = "fake user prompt" 30 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/prompts/discovery/ragbits_tests_pkg_with_prompts/prompts/temp_prompt2.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | from ragbits.core.prompt import Prompt 4 | 5 | 6 | class PromptForTestInputB(BaseModel): 7 | """ 8 | Input format for the LoremPrompt. 9 | """ 10 | 11 | theme: str 12 | nsfw_allowed: bool = False 13 | var1: str 14 | var2: str 15 | var3: str 16 | var4: str 17 | 18 | 19 | class PromptForTestOutputB(BaseModel): 20 | """ 21 | Output format for the LoremPrompt. 22 | """ 23 | 24 | text: str 25 | 26 | 27 | class PromptForTestB(Prompt[PromptForTestInputB, PromptForTestOutputB]): 28 | system_prompt = "fake system prompt" 29 | user_prompt = "fake user prompt" 30 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/prompts/discovery/test_prompt_discovery.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from ragbits.core.prompt.discovery import PromptDiscovery 4 | 5 | current_dir = Path(__file__).parent 6 | 7 | 8 | def test_prompt_discovery_from_file(): 9 | discovery_results = PromptDiscovery(root_path=current_dir).discover() 10 | print(discovery_results) 11 | 12 | assert len(discovery_results) == 5 13 | 14 | class_names = [cls.__name__ for cls in discovery_results] 15 | assert "PromptForTest" in class_names 16 | assert "PromptForTest2" in class_names 17 | assert "PromptWithoutInput" in class_names 18 | assert "PromptForTestInput" not in class_names 19 | 20 | 21 | def test_prompt_discovery_from_package(): 22 | discovery_results = PromptDiscovery( 23 | root_path=current_dir, file_pattern="ragbits_tests_pkg_with_prompts/**/*.py" 24 | ).discover() 25 | 26 | assert len(discovery_results) == 2 27 | 28 | class_names = [cls.__name__ for cls in discovery_results] 29 | assert "PromptForTestA" in class_names 30 | assert "PromptForTestB" in class_names 31 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/sources/test_aws.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | from sympy.testing import pytest 4 | 5 | from ragbits.core.sources.s3 import S3Source 6 | 7 | 8 | def test_id(): 9 | source = S3Source(bucket_name="AA", key="bb/cc.pdf") 10 | expected_id = "s3:AA/bb/cc.pdf" 11 | assert source.id == expected_id 12 | 13 | 14 | async def test_from_uri_one_file(): 15 | one_file_paths = [ 16 | "s3://bucket/path/to/file", 17 | "https://s3.us-west-2.amazonaws.com/bucket/path/to/file", 18 | "https://bucket.s3-us-west-2.amazonaws.com/path/to/file", 19 | ] 20 | for path in one_file_paths: 21 | result = await S3Source.from_uri(path) 22 | assert result == [S3Source(bucket_name="bucket", key="path/to/file")] 23 | 24 | 25 | async def test_from_uri_with_prefix(): 26 | good_paths = [ 27 | "s3://bucket/path/to/files*", 28 | "https://s3.us-west-2.amazonaws.com/bucket/path/to/files*", 29 | "https://bucket.s3-us-west-2.amazonaws.com/path/to/files*", 30 | ] 31 | with patch("ragbits.core.sources.s3.S3Source.list_sources") as mock_list_sources: 32 | for path in good_paths: 33 | await S3Source.from_uri(path) 34 | mock_list_sources.assert_called_with(bucket_name="bucket", prefix="path/to/files") 35 | 36 | 37 | async def test_from_uri_raises_exception(): 38 | wrong_uris = [ 39 | "some string", 40 | "https://bucket.s3.us-west-2.amazonaws.com/path/to/file**", 41 | "https://bucket.s3.us-west-2.amazonaws.com/path/*/file*", 42 | "https://some/random/address", 43 | "https://s3.us-west-2.amazonaws.pl/path/to/file", 44 | "s3://short_address", 45 | ] 46 | for uri in wrong_uris: 47 | with pytest.raises(ValueError): 48 | await S3Source.from_uri(uri) 49 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/sources/test_exceptions.py: -------------------------------------------------------------------------------- 1 | from ragbits.core.sources.exceptions import ( 2 | SourceConnectionError, 3 | SourceDownloadError, 4 | SourceError, 5 | SourceNotFoundError, 6 | ) 7 | 8 | 9 | def test_source_error_init(): 10 | error = SourceError("Test error message") 11 | assert error.message == "Test error message" 12 | assert str(error) == "Test error message" 13 | 14 | 15 | def test_source_connection_error_init(): 16 | error = SourceConnectionError() 17 | assert error.message == "Connection error." 18 | assert str(error) == "Connection error." 19 | 20 | 21 | def test_source_not_found_error_init(): 22 | error = SourceNotFoundError("test-source-id") 23 | assert error.source_id == "test-source-id" 24 | assert error.message == "Source with ID test-source-id not found." 25 | assert str(error) == "Source with ID test-source-id not found." 26 | 27 | 28 | def test_web_download_error_init(): 29 | url = "https://example.com/file.pdf" 30 | code = 404 31 | error = SourceDownloadError(url, code) 32 | 33 | assert error.url == url 34 | assert error.code == code 35 | assert error.message == f"Download of {url} failed with code {code}." 36 | assert str(error) == f"Download of {url} failed with code {code}." 37 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/sources/test_hf.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock, patch 2 | 3 | from ragbits.core.sources.hf import HuggingFaceSource 4 | 5 | 6 | async def test_huggingface_source_fetch() -> None: 7 | take = MagicMock(return_value=[{"content": "This is the content of the file.", "source": "doc.md"}]) 8 | skip = MagicMock(return_value=MagicMock(take=take)) 9 | data = MagicMock(skip=skip) 10 | source = HuggingFaceSource(path="org/docs", split="train", row=1) 11 | 12 | with patch("ragbits.core.sources.hf.load_dataset", return_value=data): 13 | path = await source.fetch() 14 | 15 | assert source.id == "hf:org/docs/train/1" 16 | assert path.name == "doc.md" 17 | assert path.read_text() == "This is the content of the file." 18 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/sources/test_local.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from ragbits.core.sources.local import LocalFileSource 4 | 5 | TEST_FILE_PATH = Path(__file__) 6 | 7 | 8 | async def test_local_source_fetch(): 9 | source = LocalFileSource(path=TEST_FILE_PATH) 10 | 11 | path = await source.fetch() 12 | 13 | assert path == TEST_FILE_PATH 14 | 15 | 16 | async def test_local_source_list_sources(): 17 | example_files = TEST_FILE_PATH.parent.parent.parent / "assets" / "md" 18 | 19 | sources = await LocalFileSource.list_sources(example_files, file_pattern="*.md") 20 | 21 | assert sum(1 for _ in sources) == 2 22 | assert all(isinstance(source, LocalFileSource) for source in sources) 23 | assert all(source.path.suffix == ".md" for source in sources) 24 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/test_options.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ragbits.core.options import Options 4 | from ragbits.core.types import NOT_GIVEN, NotGiven 5 | 6 | 7 | class OptionA(Options): 8 | a: int = 1 9 | d: int | NotGiven = NOT_GIVEN 10 | 11 | 12 | class OptionsB(Options): 13 | b: int = 2 14 | e: int | None = None 15 | 16 | 17 | class OptionsC(Options): 18 | a: int = 2 19 | c: str = "c" 20 | 21 | 22 | @pytest.mark.parametrize( 23 | ("options", "expected"), 24 | [ 25 | (OptionA(), {"a": 1, "d": None}), 26 | (OptionsB(), {"b": 2, "e": None}), 27 | ], 28 | ) 29 | def test_default_options(options: Options, expected: dict) -> None: 30 | assert options.dict() == expected 31 | 32 | 33 | def test_merge_options() -> None: 34 | options_a = OptionA() 35 | options_b = OptionsB() 36 | options_c = OptionsC() 37 | 38 | merged = options_a | options_b | options_c 39 | 40 | assert merged.dict() == {"a": 2, "b": 2, "c": "c", "d": None, "e": None} 41 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/utils/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/pyproject/test_find.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from ragbits.core.utils._pyproject import find_pyproject 6 | 7 | projects_dir = Path(__file__).parent.parent / "testprojects" 8 | 9 | 10 | def test_find_in_current_dir(): 11 | """Test finding a pyproject.toml file in the current directory.""" 12 | found = find_pyproject(projects_dir / "happy_project") 13 | assert found == projects_dir / "happy_project" / "pyproject.toml" 14 | 15 | 16 | def test_find_in_parent_dir(): 17 | """Test finding a pyproject.toml file in a parent directory.""" 18 | found = find_pyproject(projects_dir / "happy_project" / "subdirectory") 19 | assert found == projects_dir / "happy_project" / "pyproject.toml" 20 | 21 | 22 | def test_find_not_found(): 23 | """Test that it raises FileNotFoundError if the file is not found.""" 24 | with pytest.raises(FileNotFoundError): 25 | find_pyproject(Path("/")) 26 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/pyproject/test_get_config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from ragbits.core.utils._pyproject import get_ragbits_config 4 | 5 | projects_dir = Path(__file__).parent.parent / "testprojects" 6 | 7 | 8 | def test_get_config(): 9 | """Test getting config from pyproject.toml file.""" 10 | config = get_ragbits_config(projects_dir / "happy_project") 11 | 12 | assert config == { 13 | "lorem": "ipsum", 14 | "happy-project": { 15 | "foo": "bar", 16 | "is_happy": True, 17 | "happiness_level": 100, 18 | }, 19 | "project_base_path": str(projects_dir / "happy_project"), 20 | } 21 | 22 | 23 | def test_get_config_no_file(): 24 | """Test getting config when the pyproject.toml file is not found.""" 25 | config = get_ragbits_config(Path("/")) 26 | 27 | assert config == {} 28 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/test_decorators.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ragbits.core.utils.decorators import requires_dependencies 4 | 5 | 6 | def test_single_dependency_installed() -> None: 7 | @requires_dependencies("pytest") 8 | def some_function() -> str: 9 | return "success" 10 | 11 | assert some_function() == "success" 12 | 13 | 14 | def test_single_dependency_missing() -> None: 15 | @requires_dependencies("nonexistent_dependency") 16 | def some_function() -> str: 17 | return "success" 18 | 19 | with pytest.raises(ImportError) as exc: 20 | some_function() 21 | 22 | assert ( 23 | str(exc.value) == "Following dependencies are missing: nonexistent_dependency." 24 | " Please install them using `pip install nonexistent_dependency`." 25 | ) 26 | 27 | 28 | def test_multiple_dependencies_installed() -> None: 29 | @requires_dependencies(["pytest", "asyncio"]) 30 | def some_function() -> str: 31 | return "success" 32 | 33 | assert some_function() == "success" 34 | 35 | 36 | def test_multiple_dependencies_some_missing() -> None: 37 | @requires_dependencies(["pytest", "nonexistent_dependency"]) 38 | def some_function() -> str: 39 | return "success" 40 | 41 | with pytest.raises(ImportError) as exc: 42 | some_function() 43 | 44 | assert ( 45 | str(exc.value) == "Following dependencies are missing: nonexistent_dependency." 46 | " Please install them using `pip install nonexistent_dependency`." 47 | ) 48 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/test_helpers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ragbits.core.utils.helpers import batched 4 | 5 | 6 | @pytest.mark.parametrize( 7 | ("input_data", "batch_size", "expected"), 8 | [ 9 | ([], 3, []), 10 | ([], None, []), 11 | ([1, 2, 3], None, [[1, 2, 3]]), 12 | ([1, 2, 3], 5, [[1, 2, 3]]), 13 | ([1, 2, 3], 3, [[1, 2, 3]]), 14 | ([1, 2, 3, 4, 5, 6], 2, [[1, 2], [3, 4], [5, 6]]), 15 | ([1, 2, 3, 4, 5], 2, [[1, 2], [3, 4], [5]]), 16 | ], 17 | ids=[ 18 | "empty_iterable", 19 | "none_batch_size", 20 | "none_batch_size_with_remainder", 21 | "batch_size_larger_than_data", 22 | "batch_size_equal_to_data", 23 | "batch_size_divides_data_evenly", 24 | "batch_size_with_remainder", 25 | ], 26 | ) 27 | def test_batched(input_data: list[int], batch_size: int, expected: list[list[int]]) -> None: 28 | result = list(batched(input_data, batch_size)) 29 | assert result == expected 30 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/test_secrets.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | from ragbits.core.utils.secrets import RAGBITS_KEY_ENV_VAR, get_secret_key 7 | 8 | 9 | def test_get_secret_key_from_env(): 10 | """Test getting the secret key from an environment variable.""" 11 | get_secret_key.cache_clear() 12 | test_key = "test-env-secret-key" 13 | with patch.dict(os.environ, {RAGBITS_KEY_ENV_VAR: test_key}, clear=True): 14 | assert get_secret_key() == test_key 15 | 16 | 17 | def test_get_secret_key_generates_random(): 18 | """Test that a random key is generated when neither env var nor default is provided.""" 19 | with patch.dict(os.environ, {}, clear=True): 20 | # The function is cached, so we need to test with different env_var names 21 | key1 = get_secret_key(env_var="TEST_KEY_1") 22 | key2 = get_secret_key(env_var="TEST_KEY_2") 23 | 24 | # Keys should be different and not empty 25 | assert key1 != key2 26 | assert key1 27 | assert key2 28 | 29 | 30 | def test_get_secret_key_warning(): 31 | """Test that a warning is emitted when generating a random key.""" 32 | with ( 33 | patch.dict(os.environ, {}, clear=True), 34 | pytest.warns(UserWarning, match=f"No secret key found in environment variable {RAGBITS_KEY_ENV_VAR}"), 35 | ): 36 | get_secret_key(env_var=RAGBITS_KEY_ENV_VAR) 37 | 38 | 39 | def test_get_secret_key_caching(): 40 | """Test that the secret key function caches results.""" 41 | with patch.dict(os.environ, {}, clear=True): 42 | # The same env_var should produce the same key due to caching 43 | key1 = get_secret_key(env_var="TEST_CACHE_KEY") 44 | key2 = get_secret_key(env_var="TEST_CACHE_KEY") 45 | assert key1 == key2 46 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/testprojects/bad_factory_project/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "bad_factory_project" 3 | 4 | [tool.ragbits.core.llm_preference_factories] 5 | non_existing = "ragbits.core.llms.factory:simple_litellm_factory" 6 | vision = "ragbits.core.llms.factory:simple_litellm_vision_factory" 7 | structured_output = "ragbits.core.llms.factory:simple_litellm_vision_factory" 8 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/testprojects/factory_project/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "factory_project" 3 | 4 | [tool.ragbits.core.llm_preference_factories] 5 | text = "ragbits.core.llms.factory:simple_litellm_factory" 6 | vision = "ragbits.core.llms.factory:simple_litellm_vision_factory" 7 | structured_output = "ragbits.core.llms.factory:simple_litellm_vision_factory" 8 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/testprojects/happy_project/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "happy-project" 3 | 4 | [tool.ragbits] 5 | lorem = "ipsum" 6 | 7 | [tool.ragbits.happy-project] 8 | foo = "bar" 9 | is_happy = true 10 | happiness_level = 100 11 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/testprojects/project_with_instance_factory/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "instance_factory_project" 3 | 4 | [tool.ragbits.core.component_preference_factories] 5 | example = "unit.utils.test_config_handling:example_factory" 6 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/testprojects/project_with_instances_yaml/instances.yaml: -------------------------------------------------------------------------------- 1 | example: 2 | type: unit.utils.test_config_handling:ExampleSubclass 3 | config: 4 | foo: I am a foo 5 | bar: 122 6 | -------------------------------------------------------------------------------- /packages/ragbits-core/tests/unit/utils/testprojects/project_with_instances_yaml/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "project_with_instances_yaml" 3 | 4 | [tool.ragbits.core] 5 | component_preference_config_path = "instances.yaml" 6 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/README.md: -------------------------------------------------------------------------------- 1 | # Ragbits Document Search 2 | 3 | Ragbits Document Search is a Python package that provides tools for building RAG applications. It helps ingest, index, and search documents to retrieve relevant information for your prompts. 4 | 5 | ## Installation 6 | 7 | You can install the latest version of Ragbits Document Search using pip: 8 | 9 | ```bash 10 | pip install ragbits-document-search 11 | ``` 12 | 13 | ## Quickstart 14 | ```python 15 | from ragbits.core.embeddings.litellm import LiteLLMEmbedder 16 | from ragbits.core.vector_stores.in_memory import InMemoryVectorStore 17 | from ragbits.document_search import DocumentSearch 18 | 19 | async def main() -> None: 20 | """ 21 | Run the example. 22 | """ 23 | embedder = LiteLLMEmbedder( 24 | model="text-embedding-3-small", 25 | ) 26 | vector_store = InMemoryVectorStore(embedder=embedder) 27 | document_search = DocumentSearch( 28 | vector_store=vector_store, 29 | ) 30 | 31 | # Ingest all .txt files from the "biographies" directory 32 | await document_search.ingest("file://biographies/*.txt") 33 | 34 | # Search the documents for the query 35 | results = await document_search.search("When was Marie Curie-Sklodowska born?") 36 | print(results) 37 | 38 | 39 | if __name__ == "__main__": 40 | asyncio.run(main()) 41 | ``` 42 | 43 | ## Documentation 44 | * [Quickstart 2: Adding RAG Capabilities](https://ragbits.deepsense.ai/quickstart/quickstart2_rag/) 45 | * [How-To Guides - Document Search](https://ragbits.deepsense.ai/how-to/document_search/async_processing/) 46 | * [API Reference - Document Search](https://ragbits.deepsense.ai/api_reference/document_search/) 47 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search._main import DocumentSearch, DocumentSearchOptions 2 | 3 | __all__ = ["DocumentSearch", "DocumentSearchOptions"] 4 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/documents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/src/ragbits/document_search/documents/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/ingestion/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/src/ragbits/document_search/ingestion/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search.ingestion.enrichers.base import ElementEnricher 2 | from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher 3 | from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter 4 | 5 | __all__ = ["ElementEnricher", "ElementEnricherRouter", "ImageElementEnricher"] 6 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search.documents.element import Element 2 | 3 | 4 | class EnricherError(Exception): 5 | """ 6 | Class for all exceptions raised by the element enricher and router. 7 | """ 8 | 9 | def __init__(self, message: str) -> None: 10 | super().__init__(message) 11 | self.message = message 12 | 13 | 14 | class EnricherNotFoundError(EnricherError): 15 | """ 16 | Raised when no enricher was found for the element type. 17 | """ 18 | 19 | def __init__(self, element_type: type[Element]) -> None: 20 | super().__init__(f"No enricher found for the element type {element_type}") 21 | self.element_type = element_type 22 | 23 | 24 | class EnricherElementNotSupportedError(EnricherError): 25 | """ 26 | Raised when the element type is not supported by the enricher. 27 | """ 28 | 29 | def __init__(self, enricher_name: str, element_type: type[Element]) -> None: 30 | super().__init__(f"Element type {element_type} is not supported by the {enricher_name}") 31 | self.enricher_name = enricher_name 32 | self.element_type = element_type 33 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search.ingestion.parsers.base import DocumentParser, ImageDocumentParser, TextDocumentParser 2 | from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter 3 | 4 | __all__ = ["DocumentParser", "DocumentParserRouter", "ImageDocumentParser", "TextDocumentParser"] 5 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search.documents.document import DocumentType 2 | 3 | 4 | class ParserError(Exception): 5 | """ 6 | Class for all exceptions raised by the document parser and router. 7 | """ 8 | 9 | def __init__(self, message: str) -> None: 10 | super().__init__(message) 11 | self.message = message 12 | 13 | 14 | class ParserNotFoundError(ParserError): 15 | """ 16 | Raised when no parser was found for the document type. 17 | """ 18 | 19 | def __init__(self, document_type: DocumentType) -> None: 20 | super().__init__(f"No parser found for the document type {document_type}") 21 | self.document_type = document_type 22 | 23 | 24 | class ParserDocumentNotSupportedError(ParserError): 25 | """ 26 | Raised when the document type is not supported by the parser. 27 | """ 28 | 29 | def __init__(self, parser_name: str, document_type: DocumentType) -> None: 30 | super().__init__(f"Document type {document_type.value} is not supported by the {parser_name}") 31 | self.parser_name = parser_name 32 | self.document_type = document_type 33 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search.ingestion.strategies.base import IngestStrategy 2 | from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy 3 | from ragbits.document_search.ingestion.strategies.ray import RayDistributedIngestStrategy 4 | from ragbits.document_search.ingestion.strategies.sequential import SequentialIngestStrategy 5 | 6 | __all__ = ["BatchedIngestStrategy", "IngestStrategy", "RayDistributedIngestStrategy", "SequentialIngestStrategy"] 7 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy 2 | 3 | 4 | class SequentialIngestStrategy(BatchedIngestStrategy): 5 | """ 6 | Ingest strategy that processes documents in sequence, one at a time. 7 | """ 8 | 9 | def __init__(self, num_retries: int = 3, backoff_multiplier: int = 1, backoff_max: int = 60) -> None: 10 | """ 11 | Initialize the SequentialIngestStrategy instance. 12 | 13 | Args: 14 | num_retries: The number of retries per document ingest task error. 15 | backoff_multiplier: The base delay multiplier for exponential backoff (in seconds). 16 | backoff_max: The maximum allowed delay (in seconds) between retries. 17 | """ 18 | super().__init__( 19 | batch_size=1, 20 | num_retries=num_retries, 21 | backoff_multiplier=backoff_multiplier, 22 | backoff_max=backoff_max, 23 | ) 24 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/src/ragbits/document_search/py.typed -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/retrieval/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/src/ragbits/document_search/retrieval/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search.retrieval.rephrasers.base import QueryRephraser, QueryRephraserOptions 2 | from ragbits.document_search.retrieval.rephrasers.llm import ( 3 | LLMQueryRephraser, 4 | LLMQueryRephraserOptions, 5 | LLMQueryRephraserPrompt, 6 | LLMQueryRephraserPromptInput, 7 | ) 8 | from ragbits.document_search.retrieval.rephrasers.noop import NoopQueryRephraser 9 | 10 | __all__ = [ 11 | "LLMQueryRephraser", 12 | "LLMQueryRephraserOptions", 13 | "LLMQueryRephraserPrompt", 14 | "LLMQueryRephraserPromptInput", 15 | "NoopQueryRephraser", 16 | "QueryRephraser", 17 | "QueryRephraserOptions", 18 | ] 19 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections.abc import Iterable 3 | from typing import ClassVar, TypeVar 4 | 5 | from ragbits.core.options import Options 6 | from ragbits.core.utils.config_handling import ConfigurableComponent 7 | from ragbits.document_search.retrieval import rephrasers 8 | 9 | 10 | class QueryRephraserOptions(Options): 11 | """ 12 | Object representing the options for the rephraser. 13 | """ 14 | 15 | 16 | QueryRephraserOptionsT = TypeVar("QueryRephraserOptionsT", bound=QueryRephraserOptions) 17 | 18 | 19 | class QueryRephraser(ConfigurableComponent[QueryRephraserOptionsT], ABC): 20 | """ 21 | Rephrases a query. Can provide multiple rephrased queries from one sentence / question. 22 | """ 23 | 24 | options_cls: type[QueryRephraserOptionsT] 25 | default_module: ClassVar = rephrasers 26 | configuration_key: ClassVar = "rephraser" 27 | 28 | @abstractmethod 29 | async def rephrase(self, query: str, options: QueryRephraserOptionsT | None = None) -> Iterable[str]: 30 | """ 31 | Rephrase a query. 32 | 33 | Args: 34 | query: The query to rephrase. 35 | options: The options for the rephraser. 36 | 37 | Returns: 38 | The rephrased queries. 39 | """ 40 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | 3 | from ragbits.core.audit.traces import traceable 4 | from ragbits.document_search.retrieval.rephrasers.base import QueryRephraser, QueryRephraserOptions 5 | 6 | 7 | class NoopQueryRephraser(QueryRephraser[QueryRephraserOptions]): 8 | """ 9 | A no-op query paraphraser that does not change the query. 10 | """ 11 | 12 | options_cls: type[QueryRephraserOptions] = QueryRephraserOptions 13 | 14 | @traceable 15 | async def rephrase(self, query: str, options: QueryRephraserOptions | None = None) -> Iterable[str]: # noqa: PLR6301 16 | """ 17 | Mock implementation which outputs the same query as in input. 18 | 19 | Args: 20 | query: The query to rephrase. 21 | options: The options for the rephraser. 22 | 23 | Returns: 24 | The list with non-transformed query. 25 | """ 26 | return [query] 27 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.document_search.retrieval.rerankers.base import Reranker, RerankerOptions 2 | from ragbits.document_search.retrieval.rerankers.noop import NoopReranker 3 | 4 | __all__ = ["NoopReranker", "Reranker", "RerankerOptions"] 5 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections.abc import Sequence 3 | from typing import ClassVar, TypeVar 4 | 5 | from ragbits.core.options import Options 6 | from ragbits.core.types import NOT_GIVEN, NotGiven 7 | from ragbits.core.utils.config_handling import ConfigurableComponent 8 | from ragbits.document_search.documents.element import Element 9 | from ragbits.document_search.retrieval import rerankers 10 | 11 | 12 | class RerankerOptions(Options): 13 | """ 14 | Object representing the options for the reranker. 15 | 16 | Attributes: 17 | top_n: The number of entries to return. 18 | score_threshold: The minimum relevance score for an entry to be returned. 19 | override_score: If True reranking will override element score. 20 | """ 21 | 22 | top_n: int | None | NotGiven = NOT_GIVEN 23 | score_threshold: float | None | NotGiven = NOT_GIVEN 24 | override_score: bool = True 25 | 26 | 27 | RerankerOptionsT = TypeVar("RerankerOptionsT", bound=RerankerOptions) 28 | 29 | 30 | class Reranker(ConfigurableComponent[RerankerOptionsT], ABC): 31 | """ 32 | Reranks elements retrieved from vector store. 33 | """ 34 | 35 | options_cls: type[RerankerOptionsT] 36 | default_module: ClassVar = rerankers 37 | configuration_key: ClassVar = "reranker" 38 | 39 | @abstractmethod 40 | async def rerank( 41 | self, 42 | elements: Sequence[Sequence[Element]], 43 | query: str, 44 | options: RerankerOptionsT | None = None, 45 | ) -> Sequence[Element]: 46 | """ 47 | Rerank elements. 48 | 49 | Args: 50 | elements: The elements to rerank. 51 | query: The query to rerank the elements against. 52 | options: The options for reranking. 53 | 54 | Returns: 55 | The reranked elements. 56 | """ 57 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from itertools import chain 3 | 4 | from ragbits.core.audit.traces import traceable 5 | from ragbits.document_search.documents.element import Element 6 | from ragbits.document_search.retrieval.rerankers.base import Reranker, RerankerOptions 7 | 8 | 9 | class NoopReranker(Reranker[RerankerOptions]): 10 | """ 11 | A no-op reranker that does not change the order of the elements. 12 | """ 13 | 14 | options_cls: type[RerankerOptions] = RerankerOptions 15 | 16 | @traceable 17 | async def rerank( # noqa: PLR6301 18 | self, 19 | elements: Sequence[Sequence[Element]], 20 | query: str, 21 | options: RerankerOptions | None = None, 22 | ) -> Sequence[Element]: 23 | """ 24 | No reranking, returning the elements in the same order. 25 | 26 | Args: 27 | elements: The elements to rerank. 28 | query: The query to rerank the elements against. 29 | options: The options for reranking. 30 | 31 | Returns: 32 | The reranked elements. 33 | """ 34 | return [*{element.id: element for element in chain.from_iterable(elements)}.values()] 35 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/assets/img/transformers_paper_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/tests/assets/img/transformers_paper_page.png -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/assets/md/bar.md: -------------------------------------------------------------------------------- 1 | bar -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/assets/md/foo.md: -------------------------------------------------------------------------------- 1 | foo -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/assets/md/test_file.md: -------------------------------------------------------------------------------- 1 | # Ragbits 2 | 3 | Repository for internal experiment with our upcoming LLM framework. 4 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/cli/custom_cli_source.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | from pathlib import Path 3 | from typing import ClassVar 4 | 5 | from typing_extensions import Self 6 | 7 | from ragbits.core.audit.traces import traceable 8 | from ragbits.core.sources.base import Source 9 | 10 | 11 | class CustomCliSource(Source): 12 | """ 13 | An object representing a custom source for CLI testing. 14 | """ 15 | 16 | path: Path 17 | protocol: ClassVar[str] = "custom_cli_protocol" 18 | 19 | @property 20 | def id(self) -> str: 21 | """Get unique identifier of the object in the custom CLI source.""" 22 | return f"custom_cli_source:{self.path}" 23 | 24 | @traceable 25 | async def fetch(self) -> Path: 26 | """Fetch the custom CLI source.""" 27 | return self.path 28 | 29 | @classmethod 30 | async def list_sources(cls, path: str) -> Iterable[Self]: 31 | """List all sources from the Custom LCI source.""" 32 | return [cls(path=Path(path))] 33 | 34 | @classmethod 35 | @traceable 36 | async def from_uri(cls, path: str) -> Iterable[Self]: 37 | """Custom CLI source from URI path.""" 38 | return [cls(path=Path(path))] 39 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/tests/integration/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/integration/test_docling.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from ragbits.document_search.documents.document import DocumentMeta 6 | from ragbits.document_search.ingestion.parsers.docling import DoclingDocumentParser 7 | 8 | 9 | @pytest.mark.parametrize( 10 | ("document_metadata", "expected_num_elements"), 11 | [ 12 | pytest.param( 13 | DocumentMeta.from_literal("Name of Peppa's brother is George."), 14 | 1, 15 | id="TextDocument", 16 | ), 17 | pytest.param( 18 | DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "md" / "test_file.md"), 19 | 1, 20 | id="MarkdownDocument", 21 | ), 22 | pytest.param( 23 | DocumentMeta.from_local_path( 24 | Path(__file__).parent.parent / "assets" / "img" / "transformers_paper_page.png" 25 | ), 26 | 6, 27 | id="ImageDocument", 28 | ), 29 | pytest.param( 30 | DocumentMeta.from_local_path( 31 | Path(__file__).parent.parent / "assets" / "pdf" / "transformers_paper_page.pdf" 32 | ), 33 | 7, 34 | id="PDFDocument", 35 | ), 36 | ], 37 | ) 38 | async def test_docling_parser(document_metadata: DocumentMeta, expected_num_elements: int) -> None: 39 | document = await document_metadata.fetch() 40 | parser = DoclingDocumentParser() 41 | 42 | elements = await parser.parse(document) 43 | 44 | assert len(elements) == expected_num_elements 45 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/test_documents.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | 4 | from ragbits.core.sources.local import LocalFileSource 5 | from ragbits.document_search.documents.document import ( 6 | DocumentMeta, 7 | DocumentType, 8 | TextDocument, 9 | ) 10 | 11 | 12 | async def test_loading_local_file_source(): 13 | with tempfile.NamedTemporaryFile() as f: 14 | f.write(b"test") 15 | f.seek(0) 16 | 17 | source = LocalFileSource(path=Path(f.name)) 18 | 19 | document_meta = DocumentMeta(document_type=DocumentType.TXT, source=source) 20 | 21 | document = await document_meta.fetch() 22 | 23 | assert isinstance(document, TextDocument) 24 | assert document.content == "test" 25 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/test_element_enricher_router.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ragbits.core.utils.config_handling import ObjectConstructionConfig 4 | from ragbits.document_search.documents.element import ImageElement, TextElement 5 | from ragbits.document_search.ingestion.enrichers.exceptions import EnricherNotFoundError 6 | from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher 7 | from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter 8 | 9 | 10 | def test_enricher_router_from_config() -> None: 11 | config = { 12 | "TextElement": ObjectConstructionConfig.model_validate( 13 | {"type": "ragbits.document_search.ingestion.enrichers.image:ImageElementEnricher"} 14 | ), 15 | "ImageElement": ObjectConstructionConfig.model_validate( 16 | {"type": "ragbits.document_search.ingestion.enrichers.image:ImageElementEnricher"} 17 | ), 18 | } 19 | router = ElementEnricherRouter.from_config(config) 20 | 21 | assert isinstance(router._enrichers[TextElement], ImageElementEnricher) 22 | assert isinstance(router._enrichers[ImageElement], ImageElementEnricher) 23 | 24 | 25 | async def test_enricher_router_get() -> None: 26 | enricher = ImageElementEnricher() 27 | enricher_router = ElementEnricherRouter({ImageElement: enricher}) 28 | 29 | assert enricher_router.get(ImageElement) is enricher 30 | 31 | 32 | async def test_enricher_router_get_raises_when_no_enricher_found() -> None: 33 | enricher = ImageElementEnricher() 34 | enricher_router = ElementEnricherRouter() 35 | enricher_router._enrichers = {ImageElement: enricher} 36 | 37 | with pytest.raises(EnricherNotFoundError) as exc: 38 | enricher_router.get(TextElement) 39 | 40 | assert exc.value.message == f"No enricher found for the element type {TextElement}" 41 | assert exc.value.element_type == TextElement 42 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/test_elements.py: -------------------------------------------------------------------------------- 1 | from uuid import UUID 2 | 3 | from pydantic import computed_field 4 | 5 | from ragbits.core.vector_stores.base import VectorStoreEntry 6 | from ragbits.document_search.documents.document import DocumentType 7 | from ragbits.document_search.documents.element import Element 8 | 9 | 10 | def test_resolving_element_type() -> None: 11 | class MyElement(Element): 12 | element_type: str = "custom_element" 13 | foo: str 14 | 15 | @computed_field # type: ignore[prop-decorator] 16 | @property 17 | def text_representation(self) -> str: 18 | return self.foo + self.foo 19 | 20 | element = Element.from_vector_db_entry( 21 | db_entry=VectorStoreEntry( 22 | id=UUID("1c7d6b27-4ef1-537c-ad7c-676edb8bc8a8"), 23 | text="test content", 24 | metadata={ 25 | "element_type": "custom_element", 26 | "foo": "bar", 27 | "document_meta": { 28 | "document_type": "txt", 29 | "source": {"source_type": "local_file_source", "path": "/example/path"}, 30 | }, 31 | }, 32 | ), 33 | score=0.85, 34 | ) 35 | 36 | assert isinstance(element, MyElement) 37 | assert element.foo == "bar" 38 | assert element.key == "barbar" 39 | assert element.text_representation == "barbar" 40 | assert element.document_meta.document_type == DocumentType.TXT 41 | assert element.document_meta.source.source_type == "local_file_source" 42 | assert element.score == 0.85 43 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/testprojects/empty_project/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "empty_project" 3 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/testprojects/project_with_instance_factory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/tests/unit/testprojects/project_with_instance_factory/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/testprojects/project_with_instance_factory/factories.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from ragbits.core.embeddings.dense import NoopEmbedder 4 | from ragbits.core.vector_stores.in_memory import InMemoryVectorStore, VectorStoreOptions 5 | from ragbits.document_search import DocumentSearch 6 | from ragbits.document_search.documents.document import DocumentMeta 7 | from ragbits.document_search.retrieval.rerankers.base import RerankerOptions 8 | from ragbits.document_search.retrieval.rerankers.noop import NoopReranker 9 | 10 | 11 | def create_document_search_instance_223(): 12 | vector_store_options = VectorStoreOptions(k=223) 13 | document_search: DocumentSearch = DocumentSearch( 14 | reranker=NoopReranker(default_options=RerankerOptions(top_n=223)), 15 | vector_store=InMemoryVectorStore(embedder=NoopEmbedder(), default_options=vector_store_options), 16 | ) 17 | return document_search 18 | 19 | 20 | def create_document_search_instance_825(): 21 | vector_store_options = VectorStoreOptions(k=825) 22 | document_search: DocumentSearch = DocumentSearch( 23 | reranker=NoopReranker(default_options=RerankerOptions(top_n=825)), 24 | vector_store=InMemoryVectorStore(embedder=NoopEmbedder(), default_options=vector_store_options), 25 | ) 26 | return document_search 27 | 28 | 29 | async def _add_example_documents(document_search: DocumentSearch) -> None: 30 | documents = [ 31 | DocumentMeta.from_literal("Foo document"), 32 | DocumentMeta.from_literal("Bar document"), 33 | DocumentMeta.from_literal("Baz document"), 34 | ] 35 | await document_search.ingest(documents) 36 | 37 | 38 | def create_document_search_instance_with_documents(): 39 | document_search: DocumentSearch = DocumentSearch(vector_store=InMemoryVectorStore(embedder=NoopEmbedder())) 40 | asyncio.run(_add_example_documents(document_search)) 41 | return document_search 42 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/testprojects/project_with_instance_factory/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "instance_factory_project" 3 | 4 | [tool.ragbits.core.component_preference_factories] 5 | document_search = "project_with_instance_factory.factories:create_document_search_instance_223" 6 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/testprojects/project_with_instances_yaml/instances.yaml: -------------------------------------------------------------------------------- 1 | reranker: 2 | type: NoopReranker 3 | config: 4 | default_options: 5 | top_n: 17 6 | vector_store: 7 | type: InMemoryVectorStore 8 | config: 9 | embedder: 10 | type: NoopEmbedder 11 | default_options: 12 | k: 147 13 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/testprojects/project_with_instances_yaml/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "project_with_instances_yaml" 3 | 4 | [tool.ragbits.core] 5 | component_preference_config_path = "instances.yaml" 6 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/testprojects/project_with_nested_yaml/instances.yaml: -------------------------------------------------------------------------------- 1 | reranker: 2 | type: NoopReranker 3 | config: 4 | default_options: 5 | top_n: 23 6 | vector_store: 7 | type: InMemoryVectorStore 8 | config: 9 | embedder: 10 | type: NoopEmbedder 11 | default_options: 12 | k: 147 13 | document_search: 14 | type: DocumentSearch 15 | config: 16 | reranker: 17 | type: NoopReranker 18 | config: 19 | default_options: 20 | top_n: 17 21 | vector_store: 22 | type: InMemoryVectorStore 23 | config: 24 | embedder: 25 | type: NoopEmbedder 26 | default_options: 27 | k: 12 28 | -------------------------------------------------------------------------------- /packages/ragbits-document-search/tests/unit/testprojects/project_with_nested_yaml/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "project_with_instances_yaml" 3 | 4 | [tool.ragbits.core] 5 | component_preference_config_path = "instances.yaml" 6 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/README.md: -------------------------------------------------------------------------------- 1 | # Ragbits Evaluate 2 | 3 | Ragbits Evaluate is a package that contains tools for evaluating the performance of AI pipelines defined with Ragbits components. It also helps with automatically finding the best hyperparameter configurations for them. 4 | 5 | ## Installation 6 | 7 | To install the Ragbits Evaluate package, run: 8 | 9 | ```sh 10 | pip install ragbits-evaluate 11 | ``` 12 | 13 | 16 | 17 | ## Documentation 18 | 23 | * [How-To Guides - Evaluate](https://ragbits.deepsense.ai/how-to/evaluate/optimize/) 24 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/config.py: -------------------------------------------------------------------------------- 1 | from ragbits.core.config import CoreConfig 2 | from ragbits.core.utils._pyproject import get_config_instance 3 | 4 | 5 | class EvaluateConfig(CoreConfig): 6 | """ 7 | Configuration for the ragbits-evaluate package, loaded from downstream projects' pyproject.toml files. 8 | """ 9 | 10 | 11 | eval_config = get_config_instance(EvaluateConfig, subproject="evaluate") 12 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.evaluate.dataloaders.base import DataLoader 2 | 3 | __all__ = ["DataLoader"] 4 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/exceptions.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | class DataLoaderError(Exception): 5 | """ 6 | Class for all exceptions raised by the data loader. 7 | """ 8 | 9 | def __init__(self, message: str, data_path: Path) -> None: 10 | super().__init__(message) 11 | self.message = message 12 | self.data_path = data_path 13 | 14 | 15 | class DataLoaderIncorrectFormatDataError(DataLoaderError): 16 | """ 17 | Raised when the data are incorrectly formatted. 18 | """ 19 | 20 | def __init__(self, required_features: list[str], data_path: Path) -> None: 21 | super().__init__( 22 | message=f"Dataset {data_path} is incorrectly formatted. Required features: {required_features}", 23 | data_path=data_path, 24 | ) 25 | self.required_features = required_features 26 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/prompts/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/prompts/corpus_generation.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | from ragbits.core.prompt import Prompt 4 | 5 | 6 | class BasicCorpusGenerationPromptInput(BaseModel): 7 | """A definition of input for corpus generation task""" 8 | 9 | query: str 10 | 11 | 12 | class BasicCorpusGenerationPrompt(Prompt[BasicCorpusGenerationPromptInput]): 13 | """A basic prompt for corpus generation""" 14 | 15 | system_prompt: str = ( 16 | "You are a provider of random factoids on topic requested by a user." 17 | "Do not write a long essays, the response for given query should be a single sentence" 18 | "For each query provide only a single fact about a given topic" 19 | "Use as few tokens as possible" 20 | ) 21 | user_prompt: str = "Provide factoids about {{ query }}" 22 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/filter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/filter/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/filter/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from distilabel.steps import Step, StepInput, StepOutput 4 | 5 | from ..corpus_generation import CorpusGenerationStep 6 | from ..text_generation.base import BaseDistilabelTask 7 | 8 | 9 | class BaseFilter(Step, ABC): 10 | """Base class for filtering the outputs of pipeline steps""" 11 | 12 | def __init__(self, task: BaseDistilabelTask | CorpusGenerationStep): 13 | super().__init__() 14 | self._task = task 15 | 16 | @property 17 | def inputs(self) -> list[str]: 18 | """ 19 | Property describing input fields for a filter 20 | Returns: 21 | list of input fields for a filter 22 | """ 23 | return self._task.outputs 24 | 25 | @property 26 | def outputs(self) -> list[str]: 27 | """ 28 | Property describing output fields for a filter 29 | Returns: 30 | list of output fields for a filter 31 | """ 32 | return self._task.outputs 33 | 34 | @abstractmethod 35 | def process(self, *inputs: StepInput) -> "StepOutput": 36 | """ 37 | Abstract method for filter step processing 38 | Args: 39 | inputs - inputs to a filter 40 | Returns: 41 | filtered outputs 42 | """ 43 | pass 44 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/filter/dont_know.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from distilabel.steps import StepInput, StepOutput 4 | 5 | from .base import BaseFilter 6 | 7 | DONT_KNOW_PHRASES: list[str] = [ 8 | "I don't know", 9 | "I do not know", 10 | "don't know", 11 | ] 12 | 13 | 14 | class DontKnowFilter(BaseFilter): 15 | """A class for basic rule-based filtering of don't know anwers""" 16 | 17 | def process(self, *inputs: StepInput) -> "StepOutput": 18 | """ 19 | Runs the basic rule-based filtering of the inputs 20 | Args: 21 | inputs - the outputs of some generation step 22 | Returns: 23 | outputs filtered to the ones that do not contain the pre-defined phrases 24 | """ 25 | result = [ 26 | {input_type: input_[input_type] for input_type in input_} 27 | for input_ in inputs[0] 28 | if not self._is_dont_know(input_) 29 | ] 30 | yield result 31 | 32 | @staticmethod 33 | def _is_dont_know(input_: dict[str, Any]) -> bool: 34 | return any(s.lower() in input_["basic_answer"].lower() for s in DONT_KNOW_PHRASES) 35 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import warnings 4 | from difflib import SequenceMatcher 5 | from itertools import combinations 6 | 7 | 8 | def get_closest_substring(long: str, short: str) -> str: 9 | """ 10 | Finds the closest substring to short string in longer one 11 | Args: 12 | long: str - longer string 13 | short: str - shorter string 14 | Returns: 15 | closest substring of longer 16 | """ 17 | a, b = max( 18 | combinations(re.finditer("|".join(short.split()), long), 2), 19 | key=lambda c: SequenceMatcher(None, long[c[0].start() : c[1].end()], short).ratio(), 20 | ) 21 | return long[a.start() : b.end()] 22 | 23 | 24 | def get_passages_list(raw_passages: str) -> list[str]: 25 | """ 26 | Formats LLM output to list of passages 27 | Args: 28 | raw_passages: string representing raw passages returned by llm 29 | Returns: 30 | list of parsed passages 31 | """ 32 | match = re.search(r"\[(.*?)\]", raw_passages, re.DOTALL) 33 | 34 | if match: 35 | passages_content = match.group(1) 36 | try: 37 | return json.loads("[" + passages_content + "]") 38 | except (SyntaxError, ValueError): 39 | warnings.warn("Unable to evaluate the passages content. Check the format.", category=UserWarning) 40 | return [] 41 | else: 42 | warnings.warn(message="No brackets found in the input string.", category=UserWarning) 43 | return [] 44 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/factories/__init__.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from continuous_eval.metrics.retrieval.matching_strategy import RougeChunkMatch 4 | from datasets import load_dataset 5 | 6 | from ragbits.core.embeddings.dense import LiteLLMEmbedder 7 | from ragbits.core.sources.hf import HuggingFaceSource 8 | from ragbits.core.vector_stores.in_memory import InMemoryVectorStore 9 | from ragbits.document_search import DocumentSearch 10 | from ragbits.document_search.documents.document import DocumentMeta 11 | from ragbits.evaluate.dataloaders.document_search import DocumentSearchDataLoader 12 | from ragbits.evaluate.metrics import MetricSet 13 | from ragbits.evaluate.metrics.document_search import DocumentSearchPrecisionRecallF1 14 | 15 | 16 | async def _add_example_documents(document_search: DocumentSearch) -> None: 17 | dataset = load_dataset(path="deepsense-ai/synthetic-rag-dataset_v1.0", split="train") 18 | documents = [DocumentMeta.from_literal(doc) for chunks in dataset["chunks"] for doc in chunks] 19 | await document_search.ingest(documents) 20 | 21 | 22 | def basic_document_search_factory() -> DocumentSearch: 23 | """ 24 | Factory for basic example document search instance. 25 | """ 26 | document_search: DocumentSearch = DocumentSearch(vector_store=InMemoryVectorStore(embedder=LiteLLMEmbedder())) 27 | asyncio.run(_add_example_documents(document_search)) 28 | return document_search 29 | 30 | 31 | def synthetic_rag_dataset() -> DocumentSearchDataLoader: 32 | """ 33 | Factory for synthetic RAG dataset. 34 | """ 35 | return DocumentSearchDataLoader(source=HuggingFaceSource(path="deepsense-ai/synthetic-rag-dataset_v1.0")) 36 | 37 | 38 | def precision_recall_f1() -> MetricSet: 39 | """ 40 | Factory of precision recall f1 metric set for retrival evaluation. 41 | """ 42 | return MetricSet(DocumentSearchPrecisionRecallF1(matching_strategy=RougeChunkMatch())) 43 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.evaluate.metrics.base import Metric, MetricSet 2 | 3 | __all__ = ["Metric", "MetricSet"] 4 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from ragbits.core.utils.config_handling import WithConstructionConfig 2 | from ragbits.document_search import DocumentSearch 3 | from ragbits.evaluate.pipelines.base import EvaluationData, EvaluationPipeline, EvaluationResult 4 | from ragbits.evaluate.pipelines.document_search import DocumentSearchPipeline 5 | 6 | __all__ = ["DocumentSearchPipeline", "EvaluationData", "EvaluationPipeline", "EvaluationResult"] 7 | 8 | _target_to_evaluation_pipeline: dict[type[WithConstructionConfig], type[EvaluationPipeline]] = { 9 | DocumentSearch: DocumentSearchPipeline, 10 | } 11 | 12 | 13 | def get_evaluation_pipeline_for_target(evaluation_target: WithConstructionConfig) -> EvaluationPipeline: 14 | """ 15 | A function instantiating evaluation pipeline for given WithConstructionConfig object 16 | Args: 17 | evaluation_target: WithConstructionConfig object to be evaluated 18 | Returns: 19 | instance of evaluation pipeline 20 | Raises: 21 | ValueError for classes with no registered evaluation pipeline 22 | """ 23 | for supported_type, evaluation_pipeline_type in _target_to_evaluation_pipeline.items(): 24 | if isinstance(evaluation_target, supported_type): 25 | return evaluation_pipeline_type(evaluation_target=evaluation_target) 26 | raise ValueError(f"Evaluation pipeline not implemented for {evaluation_target.__class__}") 27 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections.abc import Iterable 3 | from dataclasses import dataclass 4 | from types import ModuleType 5 | from typing import ClassVar, Generic, TypeVar 6 | 7 | from pydantic import BaseModel 8 | 9 | from ragbits.core.utils.config_handling import WithConstructionConfig 10 | from ragbits.evaluate import pipelines 11 | 12 | EvaluationDataT = TypeVar("EvaluationDataT", bound="EvaluationData") 13 | EvaluationResultT = TypeVar("EvaluationResultT", bound="EvaluationResult") 14 | EvaluationTargetT = TypeVar("EvaluationTargetT", bound=WithConstructionConfig) 15 | 16 | 17 | class EvaluationData(BaseModel, ABC): 18 | """ 19 | Represents the data for a single evaluation. 20 | """ 21 | 22 | 23 | @dataclass 24 | class EvaluationResult(ABC): 25 | """ 26 | Represents the result of a single evaluation. 27 | """ 28 | 29 | 30 | class EvaluationPipeline(WithConstructionConfig, Generic[EvaluationTargetT, EvaluationDataT, EvaluationResultT], ABC): 31 | """ 32 | Evaluation pipeline. 33 | """ 34 | 35 | default_module: ClassVar[ModuleType | None] = pipelines 36 | configuration_key: ClassVar[str] = "pipeline" 37 | 38 | def __init__(self, evaluation_target: EvaluationTargetT) -> None: 39 | """ 40 | Initialize the evaluation pipeline. 41 | 42 | Args: 43 | evaluation_target: Evaluation target instance. 44 | """ 45 | super().__init__() 46 | self.evaluation_target = evaluation_target 47 | 48 | async def prepare(self) -> None: 49 | """ 50 | Prepare pipeline for evaluation. Optional step. 51 | """ 52 | pass 53 | 54 | @abstractmethod 55 | async def __call__(self, data: Iterable[EvaluationDataT]) -> Iterable[EvaluationResultT]: 56 | """ 57 | Run the evaluation pipeline. 58 | 59 | Args: 60 | data: The evaluation data. 61 | 62 | Returns: 63 | The evaluation result. 64 | """ 65 | -------------------------------------------------------------------------------- /packages/ragbits-evaluate/src/ragbits/evaluate/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/py.typed -------------------------------------------------------------------------------- /packages/ragbits-guardrails/README.md: -------------------------------------------------------------------------------- 1 | # Ragbits Guardrails 2 | 3 | Ragbits Guardrails is a Python package that contains utilities for ensuring the safety and relevance of responses generated by Ragbits components. 4 | 5 | ## Installation 6 | 7 | You can install the latest version of Ragbits Guardrails using pip: 8 | 9 | ```bash 10 | pip install ragbits-guardrails 11 | ``` 12 | 13 | ## Quickstart 14 | Example of using the OpenAI Moderation Guardrail to verify a message: 15 | 16 | ```python 17 | import asyncio 18 | from ragbits.guardrails.base import GuardrailManager, GuardrailVerificationResult 19 | from ragbits.guardrails.openai_moderation import OpenAIModerationGuardrail 20 | 21 | 22 | async def verify_message(message: str) -> list[GuardrailVerificationResult]: 23 | manager = GuardrailManager([OpenAIModerationGuardrail()]) 24 | return await manager.verify(message) 25 | 26 | 27 | if __name__ == '__main__': 28 | print(asyncio.run(verify_message("Test message"))) 29 | ``` 30 | 31 | ## Documentation 32 | * [How-To Guides - Guardrails](https://ragbits.deepsense.ai/how-to/use_guardrails/) 33 | 37 | -------------------------------------------------------------------------------- /packages/ragbits-guardrails/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ragbits-guardrails" 3 | version = "1.0.0" 4 | description = "Guardrails module for Ragbits components" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | license = "MIT" 8 | authors = [ 9 | { name = "deepsense.ai", email = "ragbits@deepsense.ai"} 10 | ] 11 | keywords = [ 12 | "Retrieval Augmented Generation", 13 | "RAG", 14 | "Large Language Models", 15 | "LLMs", 16 | "Generative AI", 17 | "GenAI", 18 | "Evaluation" 19 | ] 20 | classifiers = [ 21 | "Development Status :: 4 - Beta", 22 | "Environment :: Console", 23 | "Intended Audience :: Science/Research", 24 | "License :: OSI Approved :: MIT License", 25 | "Natural Language :: English", 26 | "Operating System :: OS Independent", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Programming Language :: Python :: 3.13", 31 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 32 | "Topic :: Software Development :: Libraries :: Python Modules", 33 | ] 34 | dependencies = ["ragbits-core==1.0.0"] 35 | 36 | [project.urls] 37 | "Homepage" = "https://github.com/deepsense-ai/ragbits" 38 | "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues" 39 | "Documentation" = "https://ragbits.deepsense.ai/" 40 | "Source" = "https://github.com/deepsense-ai/ragbits" 41 | 42 | [project.optional-dependencies] 43 | openai = [ 44 | "openai>=1.57.3,<2.0.0", 45 | ] 46 | 47 | [tool.uv] 48 | dev-dependencies = [ 49 | "pre-commit~=3.8.0", 50 | "pytest~=8.3.3", 51 | "pytest-cov~=5.0.0", 52 | "pytest-asyncio~=0.24.0", 53 | "pip-licenses>=4.0.0,<5.0.0" 54 | ] 55 | 56 | [build-system] 57 | requires = ["hatchling"] 58 | build-backend = "hatchling.build" 59 | 60 | [tool.hatch.metadata] 61 | allow-direct-references = true 62 | 63 | [tool.hatch.build.targets.wheel] 64 | packages = ["src/ragbits"] 65 | 66 | [tool.pytest.ini_options] 67 | asyncio_mode = "auto" 68 | -------------------------------------------------------------------------------- /packages/ragbits-guardrails/src/ragbits/guardrails/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-guardrails/src/ragbits/guardrails/__init__.py -------------------------------------------------------------------------------- /packages/ragbits-guardrails/src/ragbits/guardrails/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from pydantic import BaseModel 4 | 5 | from ragbits.core.prompt import Prompt 6 | 7 | 8 | class GuardrailVerificationResult(BaseModel): 9 | """ 10 | Class representing result of guardrail verification 11 | """ 12 | 13 | guardrail_name: str 14 | succeeded: bool 15 | fail_reason: str | None 16 | 17 | 18 | class Guardrail(ABC): 19 | """ 20 | Abstract class representing guardrail 21 | """ 22 | 23 | @abstractmethod 24 | async def verify(self, input_to_verify: Prompt | str) -> GuardrailVerificationResult: 25 | """ 26 | Verifies whether provided input meets certain criteria 27 | 28 | Args: 29 | input_to_verify: prompt or output of the model to check 30 | 31 | Returns: 32 | verification result 33 | """ 34 | 35 | 36 | class GuardrailManager: 37 | """ 38 | Class responsible for running guardrails 39 | """ 40 | 41 | def __init__(self, guardrails: list[Guardrail]): 42 | self._guardrails = guardrails 43 | 44 | async def verify(self, input_to_verify: Prompt | str) -> list[GuardrailVerificationResult]: 45 | """ 46 | Verifies whether provided input meets certain criteria 47 | 48 | Args: 49 | input_to_verify: prompt or output of the model to check 50 | 51 | Returns: 52 | list of verification result 53 | """ 54 | return [await guardrail.verify(input_to_verify) for guardrail in self._guardrails] 55 | -------------------------------------------------------------------------------- /packages/ragbits-guardrails/src/ragbits/guardrails/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-guardrails/src/ragbits/guardrails/py.typed -------------------------------------------------------------------------------- /scripts/install_git_hooks.py: -------------------------------------------------------------------------------- 1 | # /// script 2 | # requires-python = ">=3.10" 3 | # dependencies = [ 4 | # "inquirer", 5 | # "rich", 6 | # ] 7 | # /// 8 | # To run this script and install git hooks, run the following command: 9 | # 10 | # uv run scripts/install_git_hooks.py 11 | # 12 | from pathlib import Path 13 | 14 | from inquirer.shortcuts import list_input 15 | from rich import print as pprint 16 | 17 | HOOK_BODY = """ 18 | #!/usr/bin/env bash 19 | 20 | echo "🧹 Running formatting...\n" 21 | uv run ruff format --check 22 | 23 | if [ $? -ne 0 ] 24 | then 25 | echo "⚠ Formatting failed. Running autofix & aborting..." 26 | uv run ruff format 27 | exit 1 28 | fi 29 | 30 | echo "✅ Formatting passed!" 31 | echo "\n📜 Running linting...\n" 32 | 33 | uv run ruff check 34 | 35 | if [ $? -ne 0 ] 36 | then 37 | echo "⚠ Linting failed. Aborting..." 38 | exit 1 39 | fi 40 | 41 | echo "✅ Linting passed!" 42 | 43 | echo "\n📚 Making sure that docs build...\n" 44 | 45 | uv run mkdocs build --strict 46 | 47 | if [ $? -ne 0 ] 48 | then 49 | echo "⚠ Docs build failed. Aborting..." 50 | exit 1 51 | fi 52 | 53 | echo "\n🔎 Running type checking...\n" 54 | 55 | uv run mypy . 56 | 57 | if [ $? -ne 0 ] 58 | then 59 | echo "⚠ Type checking failed. Aborting..." 60 | exit 1 61 | fi 62 | 63 | echo "✅ Type checking passed!" 64 | """ 65 | 66 | 67 | def main() -> None: 68 | """ 69 | Install pre-commit or pre-push git hooks. 70 | """ 71 | hooks_dir = Path(__file__).parent.parent / ".git" / "hooks" 72 | hooks_dir.mkdir(exist_ok=True) 73 | 74 | hook_type = list_input("Select a hook to install", choices=["pre-commit", "pre-push"]) 75 | 76 | (hooks_dir / "pre-commit").unlink(missing_ok=True) 77 | (hooks_dir / "pre-push").unlink(missing_ok=True) 78 | 79 | pre_commit_hook = hooks_dir / hook_type 80 | pre_commit_hook.write_text(HOOK_BODY) 81 | pre_commit_hook.chmod(0o755) 82 | 83 | pprint(f"[cyan]Git hook for [b]{hook_type}[/b] installed!") 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /ui/.env.example: -------------------------------------------------------------------------------- 1 | VITE_API_URL=http://localhost:8000 # API URL for development -------------------------------------------------------------------------------- /ui/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | .vite 26 | -------------------------------------------------------------------------------- /ui/README.md: -------------------------------------------------------------------------------- 1 | # React + TypeScript + Vite 2 | 3 | This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. 4 | 5 | Currently, two official plugins are available: 6 | 7 | - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh 8 | - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh 9 | 10 | ## Expanding the ESLint configuration 11 | 12 | If you are developing a production application, we recommend updating the configuration to enable type aware lint rules: 13 | 14 | - Configure the top-level `parserOptions` property like this: 15 | 16 | ```js 17 | export default tseslint.config({ 18 | languageOptions: { 19 | // other options... 20 | parserOptions: { 21 | project: ["./tsconfig.node.json", "./tsconfig.app.json"], 22 | tsconfigRootDir: import.meta.dirname, 23 | }, 24 | }, 25 | }); 26 | ``` 27 | 28 | - Replace `tseslint.configs.recommended` to `tseslint.configs.recommendedTypeChecked` or `tseslint.configs.strictTypeChecked` 29 | - Optionally add `...tseslint.configs.stylisticTypeChecked` 30 | - Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and update the config: 31 | 32 | ```js 33 | // eslint.config.js 34 | import react from "eslint-plugin-react"; 35 | 36 | export default tseslint.config({ 37 | // Set the react version 38 | settings: { react: { version: "18.3" } }, 39 | plugins: { 40 | // Add the react plugin 41 | react, 42 | }, 43 | rules: { 44 | // other rules... 45 | // Enable its recommended rules 46 | ...react.configs.recommended.rules, 47 | ...react.configs["jsx-runtime"].rules, 48 | }, 49 | }); 50 | ``` 51 | -------------------------------------------------------------------------------- /ui/assets/ragbits.svg: -------------------------------------------------------------------------------- 1 | 2 | 🐰 3 | -------------------------------------------------------------------------------- /ui/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from "@eslint/js"; 2 | import globals from "globals"; 3 | import reactHooks from "eslint-plugin-react-hooks"; 4 | import reactRefresh from "eslint-plugin-react-refresh"; 5 | import tseslint from "typescript-eslint"; 6 | 7 | export default tseslint.config( 8 | { ignores: ["dist"] }, 9 | { 10 | extends: [js.configs.recommended, ...tseslint.configs.recommended], 11 | files: ["**/*.{ts,tsx}"], 12 | languageOptions: { 13 | ecmaVersion: 2020, 14 | globals: globals.browser, 15 | }, 16 | plugins: { 17 | "react-hooks": reactHooks, 18 | "react-refresh": reactRefresh, 19 | }, 20 | rules: { 21 | ...reactHooks.configs.recommended.rules, 22 | "react-refresh/only-export-components": [ 23 | "warn", 24 | { allowConstantExport: true }, 25 | ], 26 | }, 27 | }, 28 | ); 29 | -------------------------------------------------------------------------------- /ui/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Ragbits 8 | 9 | 10 | 11 |
12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "tsc -b && vite build", 9 | "lint": "eslint .", 10 | "preview": "vite preview", 11 | "format": "prettier --write .", 12 | "format:check": "prettier --check ." 13 | }, 14 | "dependencies": { 15 | "@heroicons/react": "^2.2.0", 16 | "@heroui/react": "^2.6.14", 17 | "@hookform/resolvers": "^5.0.1", 18 | "axios": "^1.8.3", 19 | "framer-motion": "^12.2.0", 20 | "github-markdown-css": "^5.8.1", 21 | "lodash": "^4.17.21", 22 | "react": "^18.3.1", 23 | "react-dom": "^18.3.1", 24 | "react-hook-form": "^7.55.0", 25 | "react-markdown": "^9.0.3", 26 | "remark-gfm": "^4.0.0", 27 | "uuid": "^11.1.0", 28 | "zod": "^3.24.2" 29 | }, 30 | "devDependencies": { 31 | "@eslint/js": "^9.17.0", 32 | "@iconify/react": "^5.2.0", 33 | "@tailwindcss/typography": "^0.5.16", 34 | "@types/react": "^18.3.18", 35 | "@types/react-dom": "^18.3.5", 36 | "@vitejs/plugin-react": "^4.3.4", 37 | "autoprefixer": "^10.4.20", 38 | "eslint": "^9.17.0", 39 | "eslint-plugin-react-hooks": "^5.0.0", 40 | "eslint-plugin-react-refresh": "^0.4.16", 41 | "globals": "^15.14.0", 42 | "postcss": "^8.5.1", 43 | "prettier": "^3.4.2", 44 | "prettier-plugin-tailwindcss": "^0.6.11", 45 | "tailwindcss": "^3.4.17", 46 | "typescript": "~5.6.2", 47 | "typescript-eslint": "^8.18.2", 48 | "vite": "^6.3.5" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /ui/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /ui/prettier.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: ["prettier-plugin-tailwindcss"], 3 | }; 4 | -------------------------------------------------------------------------------- /ui/src/contexts/HistoryContext/HistoryContext.ts: -------------------------------------------------------------------------------- 1 | import { createContext } from "react"; 2 | import { HistoryContext as IHistoryContext } from "../../types/history"; 3 | 4 | export const HistoryContext = createContext( 5 | undefined, 6 | ); 7 | -------------------------------------------------------------------------------- /ui/src/contexts/HistoryContext/useHistoryContext.ts: -------------------------------------------------------------------------------- 1 | import { useContext } from "react"; 2 | import { HistoryContext } from "./HistoryContext"; 3 | 4 | export const useHistoryContext = () => { 5 | const context = useContext(HistoryContext); 6 | if (!context) { 7 | throw new Error("useChat must be used within a ChatProvider"); 8 | } 9 | return context; 10 | }; 11 | -------------------------------------------------------------------------------- /ui/src/contexts/ThemeContext/ThemeContext.ts: -------------------------------------------------------------------------------- 1 | import { createContext } from "react"; 2 | 3 | export enum Theme { 4 | LIGHT = "light", 5 | DARK = "dark", 6 | } 7 | 8 | export interface IThemeContext { 9 | theme: Theme; 10 | setTheme: (theme: Theme) => void; 11 | } 12 | 13 | export const ThemeContext = createContext(null); 14 | -------------------------------------------------------------------------------- /ui/src/contexts/ThemeContext/ThemeContextProvider.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | FC, 3 | ReactNode, 4 | useCallback, 5 | useMemo, 6 | useSyncExternalStore, 7 | } from "react"; 8 | import { ThemeContext, Theme } from "./ThemeContext"; 9 | 10 | function getPreferredTheme() { 11 | return window.matchMedia("(prefers-color-scheme: dark)").matches 12 | ? Theme.DARK 13 | : Theme.LIGHT; 14 | } 15 | 16 | function getSnapshot() { 17 | const saved = window.localStorage.getItem("theme"); 18 | if (saved === Theme.DARK || saved === Theme.LIGHT) { 19 | return saved; 20 | } 21 | return getPreferredTheme(); 22 | } 23 | 24 | function subscribe(callback: () => void) { 25 | window.addEventListener("storage", callback); 26 | return () => window.removeEventListener("storage", callback); 27 | } 28 | 29 | export const ThemeContextProvider: FC<{ children: ReactNode }> = ({ 30 | children, 31 | }) => { 32 | const themeValue = useSyncExternalStore(subscribe, getSnapshot, getSnapshot); 33 | 34 | const setTheme = useCallback((newTheme: Theme) => { 35 | window.localStorage.setItem("theme", newTheme); 36 | window.dispatchEvent(new Event("storage")); 37 | }, []); 38 | 39 | const value = useMemo( 40 | () => ({ 41 | theme: themeValue as Theme, 42 | setTheme, 43 | }), 44 | [themeValue, setTheme], 45 | ); 46 | 47 | return ( 48 | {children} 49 | ); 50 | }; 51 | -------------------------------------------------------------------------------- /ui/src/contexts/ThemeContext/useThemeContext.ts: -------------------------------------------------------------------------------- 1 | import { useContext } from "react"; 2 | import { ThemeContext, IThemeContext } from "./ThemeContext"; 3 | 4 | export function useThemeContext(): IThemeContext { 5 | const context = useContext(ThemeContext); 6 | if (!context) { 7 | throw new Error( 8 | "useThemeContext must be used within a ThemeContextProvider", 9 | ); 10 | } 11 | return context; 12 | } 13 | -------------------------------------------------------------------------------- /ui/src/core/components/DelayedTooltip.tsx: -------------------------------------------------------------------------------- 1 | import { Tooltip, TooltipProps } from "@heroui/react"; 2 | 3 | const DelayedTooltip = (props: TooltipProps) => { 4 | return ; 5 | }; 6 | 7 | export default DelayedTooltip; 8 | -------------------------------------------------------------------------------- /ui/src/core/components/PromptInput/PromptInputText.tsx: -------------------------------------------------------------------------------- 1 | import type { TextAreaProps } from "@heroui/react"; 2 | import { forwardRef } from "react"; 3 | import { Textarea } from "@heroui/react"; 4 | 5 | const PromptInputText = forwardRef( 6 | ({ classNames = {}, ...props }, ref) => { 7 | return ( 8 |