├── .cursor
    └── rules
    │   └── repository-setup.mdc
├── .editorconfig
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── 01_feature_request.yml
    │   └── 02_bug_report.yml
    ├── PULL_REQUEST_TEMPLATE
    │   └── pull_request_template.md
    ├── scripts
    │   ├── check_changelog_update.sh
    │   ├── check_source_changes.sh
    │   └── deploy_docs.sh
    └── workflows
    │   ├── main-checks.yml
    │   ├── prepare-release.yml
    │   ├── publish-docs.yaml
    │   ├── publish-pypi.yml
    │   ├── pull-request-checks.yml
    │   ├── shared-packages.yml
    │   └── shared-ui.yml
├── .gitignore
├── .libraries-whitelist.txt
├── .license-whitelist.txt
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── check_licenses.sh
├── docs
    ├── api_reference
    │   ├── chat
    │   │   ├── compressors
    │   │   │   ├── base.md
    │   │   │   └── llm.md
    │   │   └── interface
    │   │   │   └── chat_interface.md
    │   ├── core
    │   │   ├── audit
    │   │   │   ├── metrics.md
    │   │   │   └── traces.md
    │   │   ├── embeddings.md
    │   │   ├── hybrid.md
    │   │   ├── llms.md
    │   │   ├── prompt.md
    │   │   ├── sources.md
    │   │   └── vector-stores.md
    │   ├── document_search
    │   │   ├── documents
    │   │   │   ├── documents.md
    │   │   │   └── elements.md
    │   │   ├── index.md
    │   │   ├── ingest
    │   │   │   ├── enrichers.md
    │   │   │   ├── parsers.md
    │   │   │   └── strategies.md
    │   │   └── retrieval
    │   │   │   ├── rephrasers.md
    │   │   │   └── rerankers.md
    │   └── guardrails
    │   │   └── index.md
    ├── cli
    │   └── main.md
    ├── how-to
    │   ├── audit
    │   │   ├── use_metrics.md
    │   │   └── use_tracing.md
    │   ├── chatbots
    │   │   └── api.md
    │   ├── document_search
    │   │   ├── ingest-documents.md
    │   │   └── search-documents.md
    │   ├── evaluate
    │   │   ├── custom_dataloader.md
    │   │   ├── custom_evaluation_pipeline.md
    │   │   ├── custom_metric.md
    │   │   ├── evaluate.md
    │   │   ├── generate_dataset.md
    │   │   └── optimize.md
    │   ├── guardrails
    │   │   └── use_guardrails.md
    │   ├── llms
    │   │   ├── use_llms.md
    │   │   └── use_local_llms.md
    │   ├── project
    │   │   ├── component_preferences.md
    │   │   └── custom_components.md
    │   ├── prompts
    │   │   ├── promptfoo.md
    │   │   ├── use_images_in_prompts.md
    │   │   └── use_prompting.md
    │   ├── sources
    │   │   └── load-dataset.md
    │   └── vector_stores
    │   │   ├── hybrid.md
    │   │   ├── sparse_vectors.md
    │   │   └── use_pgVector_store.md
    ├── index.md
    ├── quickstart
    │   ├── quickstart1_prompts.md
    │   └── quickstart2_rag.md
    └── stylesheets
    │   └── extra.css
├── examples
    ├── api
    │   ├── chat.py
    │   └── offline_chat.py
    ├── apps
    │   └── documents_chat.py
    ├── conversations
    │   └── recontextualize_message.py
    ├── core
    │   ├── audit
    │   │   ├── config
    │   │   │   └── grafana
    │   │   │   │   ├── grafana-dashboards.yaml
    │   │   │   │   └── ragbits-dashboard.json
    │   │   └── otel.py
    │   └── prompt
    │   │   ├── multimodal.py
    │   │   ├── multimodal_with_few_shots.py
    │   │   ├── text.py
    │   │   └── text_with_few_shots.py
    ├── document-search
    │   ├── basic.py
    │   ├── chroma.py
    │   ├── configurable.py
    │   ├── distributed.py
    │   ├── images
    │   │   ├── bear.jpg
    │   │   ├── game.jpg
    │   │   └── tree.jpg
    │   ├── multimodal.py
    │   ├── pgvector.py
    │   └── qdrant.py
    ├── evaluation
    │   ├── dataset-generator
    │   │   ├── config
    │   │   │   └── generate.yaml
    │   │   └── generate.py
    │   └── document-search
    │   │   ├── advanced
    │   │       ├── README.md
    │   │       ├── config
    │   │       │   ├── dataloader
    │   │       │   │   └── hf.yaml
    │   │       │   ├── experiments
    │   │       │   │   ├── chunking-1000.yaml
    │   │       │   │   ├── chunking-250.yaml
    │   │       │   │   └── chunking-500.yaml
    │   │       │   ├── metrics
    │   │       │   │   ├── precision_recall_f1.yaml
    │   │       │   │   └── ranked_retrieval.yaml
    │   │       │   ├── optimization.yaml
    │   │       │   ├── pipeline
    │   │       │   │   ├── document_search.yaml
    │   │       │   │   ├── document_search_optimization.yaml
    │   │       │   │   ├── parser_router
    │   │       │   │   │   ├── unstructured.yaml
    │   │       │   │   │   └── unstructured_optimization.yaml
    │   │       │   │   ├── rephraser
    │   │       │   │   │   └── noop.yaml
    │   │       │   │   ├── reranker
    │   │       │   │   │   └── noop.yaml
    │   │       │   │   ├── source
    │   │       │   │   │   └── hf.yaml
    │   │       │   │   └── vector_store
    │   │       │   │   │   ├── chroma.yaml
    │   │       │   │   │   └── chroma_optimization.yaml
    │   │       │   └── retrieval.yaml
    │   │       ├── evaluate.py
    │   │       └── optimize.py
    │   │   └── basic
    │   │       ├── evaluate.py
    │   │       └── optimize.py
    └── guardrails
    │   └── openai_moderation.py
├── mkdocs.yml
├── mkdocs_hooks.py
├── packages
    ├── ragbits-agents
    │   ├── CHANGELOG.md
    │   ├── README.md
    │   ├── pyproject.toml
    │   └── src
    │   │   └── ragbits
    │   │       └── agents
    │   │           ├── __init__.py
    │   │           ├── _main.py
    │   │           ├── py.typed
    │   │           └── types.py
    ├── ragbits-chat
    │   ├── CHANGELOG.md
    │   ├── README.md
    │   ├── pyproject.toml
    │   ├── src
    │   │   └── ragbits
    │   │   │   └── chat
    │   │   │       ├── __init__.py
    │   │   │       ├── api.py
    │   │   │       ├── cli.py
    │   │   │       ├── history
    │   │   │           ├── __init__.py
    │   │   │           └── compressors
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── base.py
    │   │   │           │   └── llm.py
    │   │   │       ├── interface
    │   │   │           ├── __init__.py
    │   │   │           ├── _interface.py
    │   │   │           ├── forms.py
    │   │   │           └── types.py
    │   │   │       ├── persistence
    │   │   │           ├── __init__.py
    │   │   │           ├── base.py
    │   │   │           ├── file.py
    │   │   │           └── sql.py
    │   │   │       ├── py.typed
    │   │   │       └── ui-build
    │   │   │           ├── assets
    │   │   │               ├── ExamplePluginComponent-CkxrO9jk.js
    │   │   │               ├── FeedbackFormPluginComponent-Bmct8_5y.js
    │   │   │               ├── index-B86z3tbJ.css
    │   │   │               ├── index-ByuhG0Hl.js
    │   │   │               ├── index-CMvp94wz.js
    │   │   │               └── ragbits-9U4hpuUb.svg
    │   │   │           └── index.html
    │   └── tests
    │   │   └── unit
    │   │       ├── history
    │   │           └── test_llm_compressor.py
    │   │       ├── persistence
    │   │           └── test_sql.py
    │   │       └── test_api.py
    ├── ragbits-cli
    │   ├── CHANGELOG.md
    │   ├── README.md
    │   ├── pyproject.toml
    │   ├── src
    │   │   └── ragbits
    │   │   │   └── cli
    │   │   │       ├── __init__.py
    │   │   │       ├── _utils.py
    │   │   │       ├── py.typed
    │   │   │       └── state.py
    │   └── tests
    │   │   └── unit
    │   │       └── test_state.py
    ├── ragbits-core
    │   ├── CHANGELOG.md
    │   ├── README.md
    │   ├── pyproject.toml
    │   ├── src
    │   │   └── ragbits
    │   │   │   └── core
    │   │   │       ├── __init__.py
    │   │   │       ├── audit
    │   │   │           ├── __init__.py
    │   │   │           ├── metrics
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── base.py
    │   │   │           │   └── otel.py
    │   │   │           └── traces
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── base.py
    │   │   │           │   ├── cli.py
    │   │   │           │   └── otel.py
    │   │   │       ├── cli.py
    │   │   │       ├── config.py
    │   │   │       ├── embeddings
    │   │   │           ├── __init__.py
    │   │   │           ├── base.py
    │   │   │           ├── dense
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── base.py
    │   │   │           │   ├── fastembed.py
    │   │   │           │   ├── litellm.py
    │   │   │           │   ├── local.py
    │   │   │           │   ├── noop.py
    │   │   │           │   └── vertex_multimodal.py
    │   │   │           ├── exceptions.py
    │   │   │           └── sparse
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── bag_of_tokens.py
    │   │   │           │   ├── base.py
    │   │   │           │   └── fastembed.py
    │   │   │       ├── llms
    │   │   │           ├── __init__.py
    │   │   │           ├── base.py
    │   │   │           ├── exceptions.py
    │   │   │           ├── factory.py
    │   │   │           ├── litellm.py
    │   │   │           ├── local.py
    │   │   │           └── mock.py
    │   │   │       ├── options.py
    │   │   │       ├── prompt
    │   │   │           ├── __init__.py
    │   │   │           ├── _cli.py
    │   │   │           ├── base.py
    │   │   │           ├── discovery.py
    │   │   │           ├── exceptions.py
    │   │   │           ├── parsers.py
    │   │   │           ├── prompt.py
    │   │   │           └── promptfoo.py
    │   │   │       ├── py.typed
    │   │   │       ├── sources
    │   │   │           ├── __init__.py
    │   │   │           ├── azure.py
    │   │   │           ├── base.py
    │   │   │           ├── exceptions.py
    │   │   │           ├── gcs.py
    │   │   │           ├── git.py
    │   │   │           ├── hf.py
    │   │   │           ├── local.py
    │   │   │           ├── s3.py
    │   │   │           └── web.py
    │   │   │       ├── types.py
    │   │   │       ├── utils
    │   │   │           ├── __init__.py
    │   │   │           ├── _pyproject.py
    │   │   │           ├── config_handling.py
    │   │   │           ├── decorators.py
    │   │   │           ├── dict_transformations.py
    │   │   │           ├── helpers.py
    │   │   │           ├── pydantic.py
    │   │   │           └── secrets.py
    │   │   │       └── vector_stores
    │   │   │           ├── __init__.py
    │   │   │           ├── _cli.py
    │   │   │           ├── base.py
    │   │   │           ├── chroma.py
    │   │   │           ├── hybrid.py
    │   │   │           ├── hybrid_strategies.py
    │   │   │           ├── in_memory.py
    │   │   │           ├── pgvector.py
    │   │   │           └── qdrant.py
    │   └── tests
    │   │   ├── assets
    │   │       ├── img
    │   │       │   ├── test.png
    │   │       │   └── test2.jpg
    │   │       └── md
    │   │       │   ├── bar.md
    │   │       │   └── foo.md
    │   │   ├── cli
    │   │       ├── __init__.py
    │   │       ├── test_cli_trace_handler.py
    │   │       └── test_vector_store.py
    │   │   ├── conftest.py
    │   │   ├── integration
    │   │       ├── sources
    │   │       │   ├── test_git.py
    │   │       │   ├── test_hf.py
    │   │       │   └── test_s3.py
    │   │       └── vector_stores
    │   │       │   ├── __init__.py
    │   │       │   ├── test_vector_store.py
    │   │       │   └── test_vector_store_sparse.py
    │   │   └── unit
    │   │       ├── __init__.py
    │   │       ├── audit
    │   │           ├── test_cli.py
    │   │           ├── test_metrics.py
    │   │           └── test_trace.py
    │   │       ├── embeddings
    │   │           ├── test_bag_of_tokens.py
    │   │           ├── test_fastembed.py
    │   │           ├── test_from_config.py
    │   │           ├── test_litellm.py
    │   │           ├── test_local.py
    │   │           ├── test_noop.py
    │   │           ├── test_vector_size.py
    │   │           └── test_vertex_multimodal.py
    │   │       ├── llms
    │   │           ├── __init__.py
    │   │           ├── factory
    │   │           │   ├── __init__.py
    │   │           │   └── test_get_preferred_llm.py
    │   │           ├── test_base.py
    │   │           ├── test_from_config.py
    │   │           └── test_litellm.py
    │   │       ├── prompts
    │   │           ├── __init__.py
    │   │           ├── discovery
    │   │           │   ├── __init__.py
    │   │           │   ├── prompt_classes_for_tests.py
    │   │           │   ├── ragbits_tests_pkg_with_prompts
    │   │           │   │   ├── __init__.py
    │   │           │   │   └── prompts
    │   │           │   │   │   ├── __init__.py
    │   │           │   │   │   ├── temp_prompt1.py
    │   │           │   │   │   └── temp_prompt2.py
    │   │           │   └── test_prompt_discovery.py
    │   │           ├── test_parsers.py
    │   │           └── test_prompt.py
    │   │       ├── sources
    │   │           ├── test_aws.py
    │   │           ├── test_azure.py
    │   │           ├── test_exceptions.py
    │   │           ├── test_gcs.py
    │   │           ├── test_git.py
    │   │           ├── test_hf.py
    │   │           ├── test_local.py
    │   │           ├── test_source_discriminator.py
    │   │           └── test_web.py
    │   │       ├── test_options.py
    │   │       ├── utils
    │   │           ├── __init__.py
    │   │           ├── pyproject
    │   │           │   ├── test_find.py
    │   │           │   ├── test_get_config.py
    │   │           │   └── test_get_instace.py
    │   │           ├── test_config_handling.py
    │   │           ├── test_decorators.py
    │   │           ├── test_dict_transformations.py
    │   │           ├── test_helpers.py
    │   │           ├── test_secrets.py
    │   │           └── testprojects
    │   │           │   ├── bad_factory_project
    │   │           │       └── pyproject.toml
    │   │           │   ├── factory_project
    │   │           │       └── pyproject.toml
    │   │           │   ├── happy_project
    │   │           │       └── pyproject.toml
    │   │           │   ├── project_with_instance_factory
    │   │           │       └── pyproject.toml
    │   │           │   └── project_with_instances_yaml
    │   │           │       ├── instances.yaml
    │   │           │       └── pyproject.toml
    │   │       └── vector_stores
    │   │           ├── test_chroma.py
    │   │           ├── test_from_config.py
    │   │           ├── test_hybrid.py
    │   │           ├── test_hybrid_strategies.py
    │   │           ├── test_in_memory.py
    │   │           ├── test_pgvector.py
    │   │           └── test_qdrant.py
    ├── ragbits-document-search
    │   ├── CHANGELOG.md
    │   ├── README.md
    │   ├── pyproject.toml
    │   ├── src
    │   │   └── ragbits
    │   │   │   └── document_search
    │   │   │       ├── __init__.py
    │   │   │       ├── _main.py
    │   │   │       ├── cli.py
    │   │   │       ├── documents
    │   │   │           ├── __init__.py
    │   │   │           ├── document.py
    │   │   │           └── element.py
    │   │   │       ├── ingestion
    │   │   │           ├── __init__.py
    │   │   │           ├── enrichers
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── base.py
    │   │   │           │   ├── exceptions.py
    │   │   │           │   ├── image.py
    │   │   │           │   └── router.py
    │   │   │           ├── parsers
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── base.py
    │   │   │           │   ├── docling.py
    │   │   │           │   ├── exceptions.py
    │   │   │           │   ├── router.py
    │   │   │           │   └── unstructured.py
    │   │   │           └── strategies
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── base.py
    │   │   │           │   ├── batched.py
    │   │   │           │   ├── ray.py
    │   │   │           │   └── sequential.py
    │   │   │       ├── py.typed
    │   │   │       └── retrieval
    │   │   │           ├── __init__.py
    │   │   │           ├── rephrasers
    │   │   │               ├── __init__.py
    │   │   │               ├── base.py
    │   │   │               ├── llm.py
    │   │   │               └── noop.py
    │   │   │           └── rerankers
    │   │   │               ├── __init__.py
    │   │   │               ├── answerai.py
    │   │   │               ├── base.py
    │   │   │               ├── litellm.py
    │   │   │               ├── llm.py
    │   │   │               ├── noop.py
    │   │   │               └── rrf.py
    │   └── tests
    │   │   ├── assets
    │   │       ├── img
    │   │       │   └── transformers_paper_page.png
    │   │       ├── md
    │   │       │   ├── bar.md
    │   │       │   ├── foo.md
    │   │       │   └── test_file.md
    │   │       └── pdf
    │   │       │   └── transformers_paper_page.pdf
    │   │   ├── cli
    │   │       ├── custom_cli_source.py
    │   │       ├── test_ingest.py
    │   │       └── test_search.py
    │   │   ├── integration
    │   │       ├── __init__.py
    │   │       ├── test_docling.py
    │   │       ├── test_rerankers.py
    │   │       └── test_unstructured.py
    │   │   └── unit
    │   │       ├── test_config.py
    │   │       ├── test_document_parser_router.py
    │   │       ├── test_document_parsers.py
    │   │       ├── test_document_search.py
    │   │       ├── test_document_search_ingest_errors.py
    │   │       ├── test_documents.py
    │   │       ├── test_element_enricher_router.py
    │   │       ├── test_element_enrichers.py
    │   │       ├── test_elements.py
    │   │       ├── test_ingest_strategies.py
    │   │       ├── test_llm_reranker.py
    │   │       ├── test_rephrasers.py
    │   │       ├── test_rerankers.py
    │   │       └── testprojects
    │   │           ├── empty_project
    │   │               └── pyproject.toml
    │   │           ├── project_with_instance_factory
    │   │               ├── __init__.py
    │   │               ├── factories.py
    │   │               └── pyproject.toml
    │   │           ├── project_with_instances_yaml
    │   │               ├── instances.yaml
    │   │               └── pyproject.toml
    │   │           └── project_with_nested_yaml
    │   │               ├── instances.yaml
    │   │               └── pyproject.toml
    ├── ragbits-evaluate
    │   ├── CHANGELOG.md
    │   ├── README.md
    │   ├── pyproject.toml
    │   ├── src
    │   │   └── ragbits
    │   │   │   └── evaluate
    │   │   │       ├── __init__.py
    │   │   │       ├── cli.py
    │   │   │       ├── config.py
    │   │   │       ├── dataloaders
    │   │   │           ├── __init__.py
    │   │   │           ├── base.py
    │   │   │           ├── document_search.py
    │   │   │           ├── exceptions.py
    │   │   │           └── question_answer.py
    │   │   │       ├── dataset_generator
    │   │   │           ├── __init__.py
    │   │   │           ├── pipeline.py
    │   │   │           ├── prompts
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── corpus_generation.py
    │   │   │           │   └── qa.py
    │   │   │           ├── tasks
    │   │   │           │   ├── __init__.py
    │   │   │           │   ├── corpus_generation.py
    │   │   │           │   ├── filter
    │   │   │           │   │   ├── __init__.py
    │   │   │           │   │   ├── base.py
    │   │   │           │   │   └── dont_know.py
    │   │   │           │   └── text_generation
    │   │   │           │   │   ├── __init__.py
    │   │   │           │   │   ├── base.py
    │   │   │           │   │   └── qa.py
    │   │   │           └── utils.py
    │   │   │       ├── evaluator.py
    │   │   │       ├── factories
    │   │   │           └── __init__.py
    │   │   │       ├── metrics
    │   │   │           ├── __init__.py
    │   │   │           ├── base.py
    │   │   │           ├── document_search.py
    │   │   │           └── question_answer.py
    │   │   │       ├── optimizer.py
    │   │   │       ├── pipelines
    │   │   │           ├── __init__.py
    │   │   │           ├── base.py
    │   │   │           ├── document_search.py
    │   │   │           └── question_answer.py
    │   │   │       ├── py.typed
    │   │   │       └── utils.py
    │   └── tests
    │   │   ├── cli
    │   │       └── test_run_evaluation.py
    │   │   └── unit
    │   │       ├── test_evaluator.py
    │   │       ├── test_metrics.py
    │   │       └── test_optimizer.py
    ├── ragbits-guardrails
    │   ├── CHANGELOG.md
    │   ├── README.md
    │   ├── pyproject.toml
    │   ├── src
    │   │   └── ragbits
    │   │   │   └── guardrails
    │   │   │       ├── __init__.py
    │   │   │       ├── base.py
    │   │   │       ├── openai_moderation.py
    │   │   │       └── py.typed
    │   └── tests
    │   │   └── unit
    │   │       └── test_openai_moderation.py
    └── ragbits
    │   ├── CHANGELOG.md
    │   └── pyproject.toml
├── pyproject.toml
├── scripts
    ├── create_ragbits_package.py
    ├── create_release_notes.py
    ├── install_git_hooks.py
    └── update_ragbits_package.py
├── ui
    ├── .env.example
    ├── .gitignore
    ├── README.md
    ├── assets
    │   └── ragbits.svg
    ├── eslint.config.js
    ├── index.html
    ├── package-lock.json
    ├── package.json
    ├── postcss.config.js
    ├── prettier.config.js
    ├── src
    │   ├── App.tsx
    │   ├── contexts
    │   │   ├── HistoryContext
    │   │   │   ├── HistoryContext.ts
    │   │   │   ├── HistoryContextProvider.tsx
    │   │   │   └── useHistoryContext.ts
    │   │   └── ThemeContext
    │   │   │   ├── ThemeContext.ts
    │   │   │   ├── ThemeContextProvider.tsx
    │   │   │   └── useThemeContext.ts
    │   ├── core
    │   │   ├── components
    │   │   │   ├── ChatMessage.tsx
    │   │   │   ├── DelayedTooltip.tsx
    │   │   │   ├── Layout.tsx
    │   │   │   └── PromptInput
    │   │   │   │   ├── PromptInput.tsx
    │   │   │   │   └── PromptInputText.tsx
    │   │   └── utils
    │   │   │   ├── api.ts
    │   │   │   ├── eventSource.ts
    │   │   │   ├── plugins
    │   │   │       ├── PluginManager.ts
    │   │   │       ├── PluginWrapper.tsx
    │   │   │       ├── usePluginManager.ts
    │   │   │       └── utils.ts
    │   │   │   ├── request.ts
    │   │   │   └── types.ts
    │   ├── globals.css
    │   ├── main.tsx
    │   ├── plugins
    │   │   ├── ExamplePlugin
    │   │   │   ├── ExamplePluginComponent.tsx
    │   │   │   └── index.tsx
    │   │   └── FeedbackFormPlugin
    │   │   │   ├── FeedbackFormPluginComponent.tsx
    │   │   │   ├── index.tsx
    │   │   │   └── types.ts
    │   ├── types
    │   │   ├── api.ts
    │   │   ├── history.ts
    │   │   ├── plugins.ts
    │   │   └── utility.ts
    │   └── vite-env.d.ts
    ├── tailwind.config.js
    ├── tsconfig.app.json
    ├── tsconfig.json
    ├── tsconfig.node.json
    └── vite.config.ts
└── uv.lock


/.cursor/rules/repository-setup.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: 
 3 | globs: 
 4 | alwaysApply: true
 5 | ---
 6 | 
 7 | # Repository setup and pre-commit checks
 8 | 
 9 | This repository is using `uv` for package management. Rather than using `pip` use `uv pip` to install any packages and `uv run` to run python.
10 | 
11 | Run following checks after implementing any changes:
12 | 
13 | uv run ruff format
14 | uv run ruff check --fix
15 | uv run mypy <PATH>
16 | pytest


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # This is a standard to preconfigure editors
 2 | # check: https://editorconfig.org/
 3 | root = true
 4 | 
 5 | # 4 space indentation
 6 | [*.py]
 7 | charset = utf-8
 8 | indent_style = space
 9 | indent_size = 4
10 | trim_trailing_whitespace = true
11 | insert_final_newline = false
12 | end_of_line = lf
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/01_feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Feature Request
 2 | description: Submit a proposal/request for a new ragbits feature.
 3 | title: "feat: "
 4 | labels: ["feature"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for contributing to ragbits!
10 |   - type: textarea
11 |     id: feature-description
12 |     attributes:
13 |       label: Feature description
14 |       description: A clear and concise description of the feature proposal
15 |       placeholder: Tell us what you want!
16 |     validations:
17 |       required: true
18 |   - type: textarea
19 |     id: feature-motivation
20 |     attributes:
21 |       label: Motivation
22 |       description: A clear and concise description of what the problem is, e.g., I'm always frustrated when [...]
23 |       placeholder: Why do you need this feature?
24 |     validations:
25 |       required: true
26 |   - type: textarea
27 |     id: feature-context
28 |     attributes:
29 |       label: Additional context
30 |       description: Add any other context or screenshots about the feature request here.
31 |       placeholder: Screenshots, code snippets, etc.
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/02_bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: 🐞 Bug Report
 2 | description: File a bug report
 3 | title: "bug: <short_name>"
 4 | labels: ["bug"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for taking the time to fill out this bug report!
10 |   - type: textarea
11 |     id: what-happened
12 |     attributes:
13 |       label: What happened?
14 |       description: Also tell us, what did you expect to happen?
15 |       placeholder: Tell us what you see!
16 |       value: "A bug happened!"
17 |     validations:
18 |       required: true
19 |   - type: textarea
20 |     id : how-to-reproduce
21 |     attributes:
22 |       label: How can we reproduce it?
23 |       description: Please provide a code snippet to reproduce the bug.
24 |       placeholder: import ragbits
25 |       render: python
26 |   - type: textarea
27 |     id: logs
28 |     attributes:
29 |       label: Relevant log output
30 |       description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
31 |       render: shell


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ### Checklist
2 | 
3 | - [ ] I have updated the documentation accordingly.
4 | - [ ] I have updated the CHANGELOG.md file accordingly.
5 | 


--------------------------------------------------------------------------------
/.github/scripts/check_changelog_update.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Fetching main branch..."
 4 | git fetch origin main --depth=1
 5 | 
 6 | echo "Identifying changed files between the current branch and main branch..."
 7 | CHANGED_FILES=$(git diff --name-only origin/main | tr '\n' ' ')
 8 | 
 9 | if [ -z "$CHANGED_FILES" ]; then
10 |   echo "No files have been changed in this branch."
11 |   exit 0
12 | fi
13 | 
14 | CHANGED_PACKAGES=$(echo "$CHANGED_FILES" | grep -oE 'packages/[^/]+/src' | cut -d '/' -f2 | sort -u)
15 | 
16 | if [ -z "$CHANGED_PACKAGES" ]; then
17 |   echo "No package changes detected. Skipping changelog check."
18 |   exit 0
19 | fi
20 | 
21 | echo "Found changes in the following packages: $CHANGED_PACKAGES"
22 | 
23 | # Look for "Changelog-ignore: <package-name>" in the commit message (possibly multiple entries in separate lines)
24 | IGNORED_PACKAGES=$(git log --pretty=format:%B origin/main..HEAD | grep -oP '^Changelog-ignore: \K[^ ]+' | sort -u)
25 | 
26 | for IGNORED_PACKAGE in $IGNORED_PACKAGES; do
27 |   if echo "$CHANGED_PACKAGES" | grep -q "^$IGNORED_PACKAGE$"; then
28 |     echo "Ignoring changelog check for package: $IGNORED_PACKAGE"
29 |     CHANGED_PACKAGES=$(echo "$CHANGED_PACKAGES" | grep -v "^$IGNORED_PACKAGE$")
30 |   fi
31 | done
32 | 
33 | for PACKAGE in $CHANGED_PACKAGES; do
34 |   CHANGELOG="packages/$PACKAGE/CHANGELOG.md"
35 |   echo "Checking changelog for package: $PACKAGE"
36 | 
37 |   if ! diff -u <(git show origin/main:$CHANGELOG | grep -Pzo '(?s)(## Unreleased.*?)(?=\n## |\Z)' | tr -d '\0') <(grep -Pzo '(?s)(## Unreleased.*?)(?=\n## |\Z)' $CHANGELOG | tr -d '\0') | grep -q '^\+'; then
38 |     echo "No updates detected in changelog for package $PACKAGE. Please add an entry under '## Unreleased'."
39 |     exit 1
40 |   fi
41 | done
42 | 
43 | echo "All modified packages have their changelog updates."
44 | 


--------------------------------------------------------------------------------
/.github/scripts/check_source_changes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Get the changed files between the specified commits
 4 | CHANGED_FILES=$(git diff --name-only "$1" "$2")
 5 | 
 6 | # Check if any non-UI files have changed
 7 | PACKAGES_CHANGED=$(echo "$CHANGED_FILES" | grep -qv '^ui/' && echo "true" || echo "false")
 8 | 
 9 | # Check if any UI files have changed
10 | UI_CHANGED=$(echo "$CHANGED_FILES" | grep -q '^ui/' && echo "true" || echo "false")
11 | 
12 | # Set the GitHub outputs
13 | echo "packages-changed=$PACKAGES_CHANGED" >> "$GITHUB_OUTPUT"
14 | echo "ui-changed=$UI_CHANGED" >> "$GITHUB_OUTPUT"
15 | 


--------------------------------------------------------------------------------
/.github/scripts/deploy_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -euxo pipefail
 4 | 
 5 | echo $GCP_KEY | base64 -d >> gcp_creds.json
 6 | gcloud auth activate-service-account --key-file gcp_creds.json
 7 | gcloud config set project ds-internal-db-ally
 8 | 
 9 | # Build the documentation
10 | uv run mkdocs build
11 | 
12 | # Upload built docs to a bucket
13 | gcloud storage cp -r site/* gs://ragbits-documentation
14 | 
15 | # Invalidate cached content in the CDN
16 | gcloud compute url-maps invalidate-cdn-cache ragbits-documentation-lb \
17 |     --path "/*" --async


--------------------------------------------------------------------------------
/.github/workflows/main-checks.yml:
--------------------------------------------------------------------------------
 1 | name: Main branch checks
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   check-changelog-update:
10 |     name: Check changelog update
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout code
14 |         uses: actions/checkout@v4
15 |         with:
16 |           fetch-depth: 0
17 | 
18 |       - name: Verify changelog updates
19 |         run: |
20 |           ./.github/scripts/check_changelog_update.sh
21 | 
22 |   check-source-changes:
23 |     name: Check source changes
24 |     runs-on: ubuntu-latest
25 |     outputs:
26 |       packages-changed: ${{ steps.filter.outputs.packages-changed }}
27 |       ui-changed: ${{ steps.filter.outputs.ui-changed }}
28 |     steps:
29 |       - uses: actions/checkout@v4
30 |         with:
31 |           fetch-depth: 0
32 | 
33 |       - name: Check for changes
34 |         id: filter
35 |         run: |
36 |           ./.github/scripts/check_source_changes.sh ${{ github.event.before }} ${{ github.sha }}
37 | 
38 |   packages:
39 |     needs: check-source-changes
40 |     if: ${{ needs.check-source-changes.outputs.packages-changed == 'true' }}
41 |     uses: ./.github/workflows/shared-packages.yml
42 | 
43 |   ui:
44 |     needs: check-source-changes
45 |     if: ${{ needs.check-source-changes.outputs.ui-changed == 'true' }}
46 |     uses: ./.github/workflows/shared-ui.yml
47 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-docs.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish documentation
 2 | 
 3 | on:
 4 |   release:
 5 |     branches:
 6 |       - main
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   deploy:
11 |     runs-on: ubuntu-latest
12 |     container: gcr.io/google.com/cloudsdktool/google-cloud-cli:latest
13 |     environment: documentation
14 |     permissions:
15 |       contents: write
16 |     steps:
17 |       - uses: actions/checkout@v4
18 | 
19 |       - name: Install uv
20 |         uses: astral-sh/setup-uv@v2
21 |         with:
22 |           version: ${{ vars.UV_VERSION || '0.6.9' }}
23 | 
24 |       - name: Set up Python 3.10
25 |         uses: actions/setup-python@v4
26 |         with:
27 |           python-version: "3.10"
28 | 
29 |       - name: Cache Dependencies
30 |         uses: actions/cache@v3
31 |         with:
32 |           path: ~/.cache/uv
33 |           key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
34 |           restore-keys: |
35 |             ${{ runner.os }}-pip-
36 | 
37 |       - name: Deploy docs
38 |         shell: bash
39 |         run: uv run ./.github/scripts/deploy_docs.sh
40 |         env:
41 |           GCP_KEY: ${{ secrets.GCP_KEY }}
42 | 
43 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish release
 2 | 
 3 | on:
 4 |   pull_request_target:
 5 |     types:
 6 |       - closed
 7 | 
 8 | jobs:
 9 |   publish-release:
10 |     if: startsWith(github.head_ref, 'release/') && github.event.pull_request.merged == true && github.event.pull_request.user.login == 'ds-ragbits-robot'
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 | 
15 |       - name: Install uv
16 |         uses: astral-sh/setup-uv@v2
17 |         with:
18 |           version: ${{ vars.UV_VERSION || '0.6.9' }}
19 | 
20 |       - name: Set up Python
21 |         uses: actions/setup-python@v4
22 |         with:
23 |           python-version: "3.10"
24 | 
25 |       - name: Get tag name
26 |         id: tag_name
27 |         run: |
28 |           TAG_NAME=$(echo "${{ github.event.pull_request.head.ref }}" | sed 's/.*-//')
29 |           echo "new_tag=$TAG_NAME" >> "$GITHUB_OUTPUT"
30 | 
31 |       - name: Create release notes
32 |         run: |
33 |           uv run scripts/create_release_notes.py
34 | 
35 |       - name: Publish release
36 |         run: |
37 |           gh release create ${{ steps.tag_name.outputs.new_tag }} \
38 |             --title "${{ steps.tag_name.outputs.new_tag }}" \
39 |             --notes-file RELEASE_NOTES.md
40 |         env:
41 |           GH_TOKEN: ${{ secrets.GH_TOKEN }}
42 | 
43 |       - name: Build packages
44 |         run: |
45 |           for dir in packages/*/; do uv build "$dir" --out-dir dist; done
46 | 
47 |       - name: Publish packages
48 |         run: |
49 |           uv tool run twine upload dist/*
50 |         env:
51 |             TWINE_USERNAME: __token__
52 |             TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
53 | 


--------------------------------------------------------------------------------
/.github/workflows/pull-request-checks.yml:
--------------------------------------------------------------------------------
 1 | name: Pull request checks
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   check-changelog-update:
 8 |     name: Check changelog update
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Checkout code
12 |         uses: actions/checkout@v4
13 |         with:
14 |           fetch-depth: 0
15 | 
16 |       - name: Verify changelog updates
17 |         run: |
18 |           ./.github/scripts/check_changelog_update.sh
19 | 
20 |   check-pr-title:
21 |     name: Check pull request title
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |       - uses: amannn/action-semantic-pull-request@v5
25 |         env:
26 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
27 | 
28 |   check-source-changes:
29 |     name: Check source changes
30 |     runs-on: ubuntu-latest
31 |     outputs:
32 |       packages-changed: ${{ steps.filter.outputs.packages-changed }}
33 |       ui-changed: ${{ steps.filter.outputs.ui-changed }}
34 |     steps:
35 |       - uses: actions/checkout@v4
36 |         with:
37 |           fetch-depth: 0
38 | 
39 |       - name: Check for changes
40 |         id: filter
41 |         run: |
42 |           ./.github/scripts/check_source_changes.sh ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}
43 | 
44 |   packages:
45 |     needs: check-source-changes
46 |     if: ${{ needs.check-source-changes.outputs.packages-changed == 'true' }}
47 |     uses: ./.github/workflows/shared-packages.yml
48 | 
49 |   ui:
50 |     needs: check-source-changes
51 |     if: ${{ needs.check-source-changes.outputs.ui-changed == 'true' }}
52 |     uses: ./.github/workflows/shared-ui.yml
53 | 


--------------------------------------------------------------------------------
/.github/workflows/shared-ui.yml:
--------------------------------------------------------------------------------
 1 | name: Shared ui checks
 2 | 
 3 | on:
 4 |   workflow_call:
 5 | 
 6 | jobs:
 7 |   lints:
 8 |     name: Run linters
 9 |     continue-on-error: false
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v4
13 | 
14 |       - name: Set up Node.js
15 |         uses: actions/setup-node@v4
16 |         with:
17 |           node-version: "lts/*"
18 | 
19 |       - name: Install UI dependencies
20 |         run: npm i
21 |         working-directory: ui
22 | 
23 |       - name: Run ESLint
24 |         run: npm run lint
25 |         working-directory: ui
26 | 
27 |       - name: Run Prettier
28 |         run: npm run format:check
29 |         working-directory: ui
30 | 
31 |       - name: Check build
32 |         run: npm run build
33 |         working-directory: ui
34 | 
35 |       - name: Check Ragbits Chat UI build sync
36 |         run: |
37 |           git diff --quiet || {
38 |             echo "ragbits-chat package ui build not synced"
39 |             exit 1
40 |           }
41 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Directories
  2 | .vscode/
  3 | .idea/
  4 | .neptune/
  5 | .pytest_cache/
  6 | .mypy_cache/
  7 | venv/
  8 | .venv/
  9 | __pycache__/
 10 | **.egg-info/
 11 | 
 12 | # Byte-compiled / optimized / DLL files
 13 | __pycache__/
 14 | *.py[cod]
 15 | *$py.class
 16 | 
 17 | # C extensions
 18 | *.so
 19 | 
 20 | # Distribution / packaging
 21 | .Python
 22 | env/
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib/
 30 | lib64/
 31 | parts/
 32 | sdist/
 33 | var/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | 
 38 | # Sphinx documentation
 39 | docs/_build/
 40 | public/
 41 | # autogenerated package license table
 42 | docs/licenses_table.rst
 43 | 
 44 | # license dump file
 45 | licenses.txt
 46 | 
 47 | # File formats
 48 | *.onnx
 49 | *.pyc
 50 | *.pt
 51 | *.pth
 52 | *.pkl
 53 | *.mar
 54 | *.torchscript
 55 | **/.ipynb_checkpoints
 56 | **/dist/
 57 | **/checkpoints/
 58 | **/outputs/
 59 | **/multirun/
 60 | 
 61 | # Other env files
 62 | .python-version
 63 | pyvenv.cfg
 64 | pip-selfcheck.json
 65 | 
 66 | # Unit test / coverage reports
 67 | htmlcov/
 68 | .tox/
 69 | .coverage
 70 | .coverage.*
 71 | .cache
 72 | nosetests.xml
 73 | coverage.xml
 74 | *,cover
 75 | .hypothesis/
 76 | 
 77 | # dotenv
 78 | .env
 79 | 
 80 | # coverage and pytest reports
 81 | coverage.xml
 82 | report.xml
 83 | 
 84 | # CMake
 85 | cmake-build-*/
 86 | 
 87 | # Terraform
 88 | **/.terraform.lock.hcl
 89 | **/.terraform
 90 | 
 91 | # mkdocs generated files
 92 | site/
 93 | 
 94 | # build artifacts
 95 | dist/
 96 | 
 97 | # examples
 98 | chroma/
 99 | qdrant/
100 | 
101 | .aider*
102 | 
103 | .DS_Store
104 | 


--------------------------------------------------------------------------------
/.libraries-whitelist.txt:
--------------------------------------------------------------------------------
 1 | pkg_resources
 2 | tiktoken
 3 | chardet
 4 | chroma-hnswlib
 5 | rouge
 6 | distilabel
 7 | rerankers
 8 | py_rust_stemmers
 9 | mirakuru
10 | psycopg
11 | pytest-postgresql
12 | python-bidi
13 | 


--------------------------------------------------------------------------------
/.license-whitelist.txt:
--------------------------------------------------------------------------------
 1 | 3-Clause BSD License
 2 | Apache 2
 3 | Apache License 2
 4 | Apache Software License
 5 | Apache Software License, BSD License
 6 | Apache Software License, MIT License
 7 | Apache-2
 8 | Apache License, Version 2
 9 | Apache License v2.0
10 | BSD
11 | BSD License
12 | BSD License, Apache Software License
13 | CC0 1.0 Universal (CC0 1.0) Public Domain Dedication
14 | Freely Distributable
15 | ISC License (ISCL)
16 | MIT
17 | MIT License
18 | MIT License, Mozilla Public License 2.0 (MPL 2.0)
19 | Mozilla Public License 2.0 (MPL 2.0)
20 | Public Domain
21 | Python Software Foundation License
22 | Python Software Foundation License, MIT License
23 | Unlicense
24 | Proprietary License
25 | Historical Permission Notice and Disclaimer (HPND)
26 | ISC
27 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |     python: python3.10
 3 | repos:
 4 |     - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |       rev: v5.0.0
 6 |       hooks:
 7 |           - id: check-case-conflict
 8 |           - id: check-merge-conflict
 9 |           - id: trailing-whitespace
10 |             exclude: .cursor/|ui-build/*
11 |           - id: check-ast
12 |           - id: check-added-large-files
13 |           - id: check-toml
14 |           - id: check-json
15 |           - id: check-yaml
16 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Build from source
 4 | 
 5 | Dependencies needed to build and run Ragbits from the source code:
 6 | 
 7 | 1. [**uv**](https://docs.astral.sh/uv/getting-started/installation/)
 8 | 2. [**python**](https://docs.astral.sh/uv/guides/install-python/) 3.10
 9 | 
10 | 
11 | ## Linting and formatting
12 | We use `ruff` for linting and formatting our code. To format your code, run:
13 | 
14 | ```bash
15 | $ uv run ruff format
16 | ```
17 | 
18 | To lint the code, run:
19 | ```bash
20 | $ uv run ruff check --fix
21 | ```
22 | 
23 | ## Type checking
24 | We use `mypy` for type checking. To perform type checking, simply run:
25 | 
26 | ```bash
27 | $ uv run mypy .
28 | ```
29 | 
30 | ## Testing
31 | We use `pytest` for testing. To run the tests, simply run:
32 | 
33 | ```bash
34 | $ uv run pytest
35 | ```
36 | 
37 | Running integration tests requires PostgreSQL with the pgvector extention installed.
38 | Minimal version of pgvector is 0.7.0, which added support for sparse vectors.
39 | 
40 | On Ubuntu Linux you can get in by installing the `postgresql-17-pgvector` package.
41 | 
42 | If it is not in your system's default repositories, you can install it from the official PostgreSQL Apt Repository:
43 | 
44 | ```bash
45 | sudo apt install postgresql-common
46 | sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
47 | sudo apt install postgresql-17-pgvector
48 | ```
49 | 
50 | ## Install pre-commit or pre-push hooks
51 | 
52 | We also recommend to run checkers on pre-commit/push hook. To set it up, follow these steps:
53 | 
54 | ```bash
55 | $ uv run scripts/install_git_hooks.py
56 | ```
57 | 
58 | Then decide whether you want to run the checks before each commit or before each push.
59 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2024 deepsense.ai sp. z o.o.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/check_licenses.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | uv run pip-licenses --from=mixed  --ignore-packages `cat .libraries-whitelist.txt`> licenses.txt
 5 | cat licenses.txt
 6 | 
 7 | FOUND=$(tail -n +2 licenses.txt | grep -v -f .license-whitelist.txt | wc -l)
 8 | 
 9 | if [[ $FOUND -gt 0 ]]; then
10 |   echo "Detected license/s not on the whitelist ($FOUND found)."
11 |   tail -n +2 licenses.txt | grep -v -f .license-whitelist.txt
12 |   exit 1
13 | else
14 |   echo "Okay."
15 |   exit 0
16 | fi
17 | 


--------------------------------------------------------------------------------
/docs/api_reference/chat/compressors/base.md:
--------------------------------------------------------------------------------
1 | # Conversation History Compressors
2 | 
3 | Conversation History Compressors are able to take conversation history and represent it as a single string. What's included in the string depends on the particular compressor.
4 | 
5 | ::: ragbits.chat.history.compressors.base.ConversationHistoryCompressor
6 | 


--------------------------------------------------------------------------------
/docs/api_reference/chat/compressors/llm.md:
--------------------------------------------------------------------------------
1 | # Standalone Message Compressor
2 | 
3 | A compressor that uses LLM to recontextualize the last message in the history, i.e. create a standalone version of the message that includes necessary context.
4 | 
5 | ::: ragbits.chat.history.compressors.llm.StandaloneMessageCompressor
6 | 
7 | ::: ragbits.chat.history.compressors.llm.LastMessageAndHistory
8 | 
9 | ::: ragbits.chat.history.compressors.llm.StandaloneMessageCompressorPrompt


--------------------------------------------------------------------------------
/docs/api_reference/chat/interface/chat_interface.md:
--------------------------------------------------------------------------------
1 | # Chat Interface
2 | 
3 | The `ChatInterface` is the main interface for the chat service. It defines the core functionality required for a chat service
4 | that can return various types of responses such as:
5 | 
6 | * Text: Regular text responses streamed chunk by chunk
7 | * References: Source documents used to generate the answer
8 | 
9 | ::: ragbits.chat.interface.ChatInterface


--------------------------------------------------------------------------------
/docs/api_reference/core/audit/metrics.md:
--------------------------------------------------------------------------------
 1 | # Metrics
 2 | 
 3 | ::: ragbits.core.audit.metrics.set_metric_handlers
 4 | 
 5 | ::: ragbits.core.audit.metrics.clear_metric_handlers
 6 | 
 7 | ::: ragbits.core.audit.metrics.create_histogram
 8 | 
 9 | ::: ragbits.core.audit.metrics.record
10 | 
11 | ::: ragbits.core.audit.metrics.base.HistogramMetric
12 | 
13 | ::: ragbits.core.audit.metrics.base.MetricHandler
14 | 
15 | ::: ragbits.core.audit.metrics.otel.OtelMetricHandler
16 | 


--------------------------------------------------------------------------------
/docs/api_reference/core/audit/traces.md:
--------------------------------------------------------------------------------
 1 | # Traces
 2 | 
 3 | ::: ragbits.core.audit.traces.set_trace_handlers
 4 | 
 5 | ::: ragbits.core.audit.traces.clear_trace_handlers
 6 | 
 7 | ::: ragbits.core.audit.traces.trace
 8 | 
 9 | ::: ragbits.core.audit.traces.traceable
10 | 
11 | ::: ragbits.core.audit.traces.base.TraceHandler
12 | 
13 | ::: ragbits.core.audit.traces.cli.CLITraceHandler
14 | 
15 | ::: ragbits.core.audit.traces.otel.OtelTraceHandler
16 | 


--------------------------------------------------------------------------------
/docs/api_reference/core/embeddings.md:
--------------------------------------------------------------------------------
 1 | # Embedders
 2 | 
 3 | ::: ragbits.core.embeddings.base.Embedder
 4 | 
 5 | ::: ragbits.core.embeddings.dense.DenseEmbedder
 6 | 
 7 | ::: ragbits.core.embeddings.dense.local.LocalEmbedder
 8 | 
 9 | ::: ragbits.core.embeddings.dense.litellm.LiteLLMEmbedder
10 | 
11 | ::: ragbits.core.embeddings.dense.fastembed.FastEmbedEmbedder
12 | 
13 | ::: ragbits.core.embeddings.sparse.base.SparseEmbedder
14 | 
15 | ::: ragbits.core.embeddings.sparse.fastembed.FastEmbedSparseEmbedder
16 | 
17 | ::: ragbits.core.embeddings.sparse.bag_of_tokens.BagOfTokens


--------------------------------------------------------------------------------
/docs/api_reference/core/hybrid.md:
--------------------------------------------------------------------------------
1 | # Hybrid Vector Store & Fusion Strategies
2 | 
3 | ::: ragbits.core.vector_stores.hybrid.HybridSearchVectorStore
4 | 
5 | ::: ragbits.core.vector_stores.hybrid_strategies.OrderedHybridRetrivalStrategy
6 | 
7 | ::: ragbits.core.vector_stores.hybrid_strategies.ReciprocalRankFusion
8 | 
9 | ::: ragbits.core.vector_stores.hybrid_strategies.DistributionBasedScoreFusion


--------------------------------------------------------------------------------
/docs/api_reference/core/llms.md:
--------------------------------------------------------------------------------
1 | # LLMs
2 | 
3 | ::: ragbits.core.llms.LLM
4 | 
5 | ::: ragbits.core.llms.local.LocalLLM
6 | 
7 | ::: ragbits.core.llms.litellm.LiteLLM


--------------------------------------------------------------------------------
/docs/api_reference/core/prompt.md:
--------------------------------------------------------------------------------
1 | # Prompt
2 | 
3 | ::: ragbits.core.prompt.Prompt


--------------------------------------------------------------------------------
/docs/api_reference/core/sources.md:
--------------------------------------------------------------------------------
 1 | # Sources
 2 | 
 3 | ::: ragbits.core.sources.base.Source
 4 | 
 5 | ::: ragbits.core.sources.azure.AzureBlobStorageSource
 6 | 
 7 | ::: ragbits.core.sources.gcs.GCSSource
 8 | 
 9 | ::: ragbits.core.sources.git.GitSource
10 | 
11 | ::: ragbits.core.sources.hf.HuggingFaceSource
12 | 
13 | ::: ragbits.core.sources.local.LocalFileSource
14 | 
15 | ::: ragbits.core.sources.s3.S3Source
16 | 
17 | ::: ragbits.core.sources.web.WebSource
18 | 


--------------------------------------------------------------------------------
/docs/api_reference/core/vector-stores.md:
--------------------------------------------------------------------------------
 1 | # Vector Stores
 2 | 
 3 | ::: ragbits.core.vector_stores.base.VectorStoreEntry
 4 | 
 5 | ::: ragbits.core.vector_stores.base.VectorStoreOptions
 6 | 
 7 | ::: ragbits.core.vector_stores.base.VectorStore
 8 | 
 9 | ::: ragbits.core.vector_stores.in_memory.InMemoryVectorStore
10 | 
11 | ::: ragbits.core.vector_stores.chroma.ChromaVectorStore
12 | 
13 | ::: ragbits.core.vector_stores.qdrant.QdrantVectorStore
14 | 
15 | ::: ragbits.core.vector_stores.pgvector.PgVectorStore


--------------------------------------------------------------------------------
/docs/api_reference/document_search/documents/documents.md:
--------------------------------------------------------------------------------
 1 | # Documents
 2 | 
 3 | ::: ragbits.document_search.documents.document.Document
 4 | 
 5 | ::: ragbits.document_search.documents.document.TextDocument
 6 | 
 7 | ::: ragbits.document_search.documents.document.DocumentMeta
 8 | 
 9 | ::: ragbits.document_search.documents.document.DocumentType
10 | 


--------------------------------------------------------------------------------
/docs/api_reference/document_search/documents/elements.md:
--------------------------------------------------------------------------------
1 | # Elements
2 | 
3 | ::: ragbits.document_search.documents.element.Element
4 | 
5 | ::: ragbits.document_search.documents.element.TextElement
6 | 
7 | ::: ragbits.document_search.documents.element.ImageElement
8 | 


--------------------------------------------------------------------------------
/docs/api_reference/document_search/index.md:
--------------------------------------------------------------------------------
1 | # Document Search
2 | 
3 | ::: ragbits.document_search.DocumentSearchOptions
4 | 
5 | ::: ragbits.document_search.DocumentSearch
6 | 


--------------------------------------------------------------------------------
/docs/api_reference/document_search/ingest/enrichers.md:
--------------------------------------------------------------------------------
1 | # Element Enrichers
2 | 
3 | ::: ragbits.document_search.ingestion.enrichers.router.ElementEnricherRouter
4 | 
5 | ::: ragbits.document_search.ingestion.enrichers.base.ElementEnricher
6 | 
7 | ::: ragbits.document_search.ingestion.enrichers.image.ImageElementEnricher
8 | 


--------------------------------------------------------------------------------
/docs/api_reference/document_search/ingest/parsers.md:
--------------------------------------------------------------------------------
 1 | # Document Parsers
 2 | 
 3 | ::: ragbits.document_search.ingestion.parsers.router.DocumentParserRouter
 4 | 
 5 | ::: ragbits.document_search.ingestion.parsers.base.DocumentParser
 6 | 
 7 | ::: ragbits.document_search.ingestion.parsers.base.TextDocumentParser
 8 | 
 9 | ::: ragbits.document_search.ingestion.parsers.base.ImageDocumentParser
10 | 
11 | ::: ragbits.document_search.ingestion.parsers.docling.DoclingDocumentParser
12 | 
13 | ::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredDocumentParser
14 | 


--------------------------------------------------------------------------------
/docs/api_reference/document_search/ingest/strategies.md:
--------------------------------------------------------------------------------
 1 | # Ingest Strategies
 2 | 
 3 | ::: ragbits.document_search.ingestion.strategies.base.IngestStrategy
 4 | 
 5 | ::: ragbits.document_search.ingestion.strategies.sequential.SequentialIngestStrategy
 6 | 
 7 | ::: ragbits.document_search.ingestion.strategies.batched.BatchedIngestStrategy
 8 | 
 9 | ::: ragbits.document_search.ingestion.strategies.ray.RayDistributedIngestStrategy
10 | 


--------------------------------------------------------------------------------
/docs/api_reference/document_search/retrieval/rephrasers.md:
--------------------------------------------------------------------------------
 1 | # Query Rephrasers
 2 | 
 3 | ::: ragbits.document_search.retrieval.rephrasers.base.QueryRephraserOptions
 4 | 
 5 | ::: ragbits.document_search.retrieval.rephrasers.llm.LLMQueryRephraserOptions
 6 | 
 7 | ::: ragbits.document_search.retrieval.rephrasers.base.QueryRephraser
 8 | 
 9 | ::: ragbits.document_search.retrieval.rephrasers.llm.LLMQueryRephraser
10 | 
11 | ::: ragbits.document_search.retrieval.rephrasers.noop.NoopQueryRephraser
12 | 


--------------------------------------------------------------------------------
/docs/api_reference/document_search/retrieval/rerankers.md:
--------------------------------------------------------------------------------
 1 | # Rerankers
 2 | 
 3 | ::: ragbits.document_search.retrieval.rerankers.base.RerankerOptions
 4 | 
 5 | ::: ragbits.document_search.retrieval.rerankers.litellm.LiteLLMRerankerOptions
 6 | 
 7 | ::: ragbits.document_search.retrieval.rerankers.llm.LLMRerankerOptions
 8 | 
 9 | ::: ragbits.document_search.retrieval.rerankers.base.Reranker
10 | 
11 | ::: ragbits.document_search.retrieval.rerankers.answerai.AnswerAIReranker
12 | 
13 | ::: ragbits.document_search.retrieval.rerankers.litellm.LiteLLMReranker
14 | 
15 | ::: ragbits.document_search.retrieval.rerankers.llm.LLMReranker
16 | 
17 | ::: ragbits.document_search.retrieval.rerankers.noop.NoopReranker
18 | 
19 | ::: ragbits.document_search.retrieval.rerankers.rrf.ReciprocalRankFusionReranker
20 | 


--------------------------------------------------------------------------------
/docs/api_reference/guardrails/index.md:
--------------------------------------------------------------------------------
1 | # Guardrails
2 | 
3 | ::: ragbits.guardrails.base.Guardrail
4 | ::: ragbits.guardrails.base.GuardrailManager
5 | ::: ragbits.guardrails.base.GuardrailVerificationResult
6 | ::: ragbits.guardrails.openai_moderation.OpenAIModerationGuardrail


--------------------------------------------------------------------------------
/docs/cli/main.md:
--------------------------------------------------------------------------------
 1 | # Ragbits CLI
 2 | 
 3 | Ragbits comes with a command-line interface (CLI) that provides several commands for working with the Ragbits platform. It can be accessed by running the `ragbits` command in your terminal.
 4 | 
 5 | Commands that operate on Ragbits components, such as [`ragbits vector-store`](#ragbits-vector-store), use the project's preferred component implementations if a component configuration is not explicitly provided. To learn how to set component preferences in your project, see the [How-To: Set preferred components in Ragbits project](../how-to/project/component_preferences.md) guide.
 6 | 
 7 | ::: mkdocs-click
 8 |     :module: ragbits.cli
 9 |     :command: _click_app
10 |     :prog_name: ragbits
11 |     :style: table
12 |     :list_subcommands: true
13 |     :depth: 1


--------------------------------------------------------------------------------
/docs/how-to/evaluate/custom_dataloader.md:
--------------------------------------------------------------------------------
 1 | # How-To: Create custom data loader with Ragbits
 2 | 
 3 | Ragbits provides a base interface for data loading, `ragbits.evaluate.dataloaders.base.DataLoader`, designed specifically for evaluation purposes. A ready-to-use implementation, `ragbits.evaluate.dataloaders.hf.HFLoader`, is available for handling datasets in huggingface format.
 4 | 
 5 | To create a custom DataLoader for your specific needs, you need to implement the `load` method in a class that inherits from the `DataLoader` interface.
 6 | 
 7 | Please find the [working example](optimize.md#define-the-data-loader) here.
 8 | 
 9 | **Note:** This interface is not to be confused with PyTorch's `DataLoader`, as it serves a distinct purpose within the Ragbits evaluation framework.
10 | 


--------------------------------------------------------------------------------
/docs/how-to/evaluate/custom_evaluation_pipeline.md:
--------------------------------------------------------------------------------
1 | # How-To: Create custom evaluation pipeline in Ragbits
2 | 
3 | Ragbits provides a ready-to-use evaluation pipeline for document search, implemented within the `ragbits.evaluate.document_search.DocumentSearchPipeline` module.
4 | 
5 | To create a custom evaluation pipeline for your specific use case, you need to implement the `__call__` method as part of the `ragbits.evaluate.pipelines.base.EvaluationPipeline` interface.
6 | 
7 | 
8 | Please find the [working example](optimize.md#define-the-optimized-pipeline-structure) here


--------------------------------------------------------------------------------
/docs/how-to/evaluate/custom_metric.md:
--------------------------------------------------------------------------------
1 | # How-To: Create custom evaluation metric in Ragbits
2 | 
3 | `ragbits.evaluate` package provides the implementation of metrics that measure the quality of document search pipeline within `ragbits.evaluate.metrics.document_search`
4 | on your data, however you are not limited to this. In order to implement custom ones for your specific use case you would need to inherit from `ragbits.evaluate.metrics.base.Metric`
5 | abstract class and implement `compute` method.
6 | 
7 | Please find the [working example](optimize.md#define-the-metrics) here.


--------------------------------------------------------------------------------
/docs/how-to/project/custom_components.md:
--------------------------------------------------------------------------------
 1 | # How-To: Register custom components
 2 | 
 3 | Ragbits allows you to extend its functionality by adding custom implementations of various components, such as [`sources`][ragbits.core.sources.Source] or [`elements`][ragbits.document_search.documents.element.Element]. In most cases, you just need to import them directly in your code and use them, but in some cases, such as source ingest via CLI, you need to import them implictly to avoid errors.
 4 | 
 5 | To register your component classes, include their module paths in the `modules_to_import` section of your `pyproject.toml` file:
 6 | 
 7 | ```toml
 8 | [tool.ragbits.core]
 9 | modules_to_import = [
10 |     "python.path.to.custom_source",
11 |     "python.path.to.custom_element",
12 |     ...
13 | ]
14 | ```
15 | 
16 | And that's it, Ragbits always reads `pyproject.toml` every time you run it and imports modules from it, so you can be sure that your components will always be available in a runtime.
17 | 
18 | !!! tip
19 |     It is a good practice to put all custom components in the `modules_to_import` section to avoid potential errors in the future.
20 | 


--------------------------------------------------------------------------------
/docs/how-to/prompts/promptfoo.md:
--------------------------------------------------------------------------------
 1 | # How-To: Test prompts with promptfoo and Ragbits
 2 | 
 3 | Ragbits' [`Prompt`][ragbits.core.prompt.Prompt] abstraction can be seamlessly integrated with the `promptfoo` tool. After installing `promptfoo` as
 4 | specified in the [promptfoo documentation](https://www.promptfoo.dev/docs/installation/), you can generate promptfoo
 5 | configuration files for all the prompts discovered by our autodiscover mechanism by running the following command:
 6 | 
 7 | ```bash
 8 | ragbits prompts promptfoo
 9 | ```
10 | 
11 | This command will generate a YAML files in the directory specified by `--target-path` (`promptfooconfigs` by
12 | default). The generated file should look like this:
13 | 
14 | ```yaml
15 | prompts:
16 |   - file:///path/to/your/prompt:PromptClass.to_promptfoo
17 | ```
18 | 
19 | You can then edit the generated file to add your custom `promptfoo` configurations. Once your `promptfoo` configuration
20 | file is ready, you can run `promptfoo` with the following command:
21 | 
22 | ```bash
23 | promptfoo eval -c /path/to/generated/promptfoo-config.yaml
24 | ```
25 | 
26 | **Important: To ensure compatibility, make sure Node.js version 20 is installed.**
27 | 


--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
1 | :root {
2 |   --md-accent-fg-color:  #1B54FF;
3 | }
4 | 
5 | .md-header__title {
6 |   margin-left: 0.5rem !important;
7 | }
8 | 


--------------------------------------------------------------------------------
/examples/conversations/recontextualize_message.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Ragbits Conversations Example: Recontextualize Last Message
 3 | 
 4 | This example demonstrates how to use the `StandaloneMessageCompressor` compressor to recontextualize
 5 | the last message in a conversation history.
 6 | """
 7 | 
 8 | # /// script
 9 | # requires-python = ">=3.10"
10 | # dependencies = [
11 | #     "ragbits-conversations",
12 | # ]
13 | # ///
14 | 
15 | import asyncio
16 | 
17 | from ragbits.conversations.history.compressors.llm import StandaloneMessageCompressor
18 | from ragbits.core.llms.litellm import LiteLLM
19 | from ragbits.core.prompt import ChatFormat
20 | 
21 | # Example conversation history
22 | conversation: ChatFormat = [
23 |     {"role": "user", "content": "Who's working on Friday?"},
24 |     {"role": "assistant", "content": "Jim"},
25 |     {"role": "user", "content": "Where is he based?"},
26 |     {"role": "assistant", "content": "At our California Head Office"},
27 |     {"role": "user", "content": "Is he a senior staff member?"},
28 |     {"role": "assistant", "content": "Yes, he's a senior manager"},
29 |     {"role": "user", "content": "What's his phone number (including the prefix for his state)?"},
30 | ]
31 | 
32 | 
33 | async def main() -> None:
34 |     """
35 |     Main function to demonstrate the StandaloneMessageCompressor compressor.
36 |     """
37 |     # Initialize the LiteLLM client
38 |     llm = LiteLLM("gpt-4o")
39 | 
40 |     # Initialize the StandaloneMessageCompressor compressor
41 |     compressor = StandaloneMessageCompressor(llm, history_len=10)
42 | 
43 |     # Compress the conversation history
44 |     recontextualized_message = await compressor.compress(conversation)
45 | 
46 |     # Print the recontextualized message
47 |     print("Recontextualized Message:")
48 |     print(recontextualized_message)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     asyncio.run(main())
53 | 


--------------------------------------------------------------------------------
/examples/core/audit/config/grafana/grafana-dashboards.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: 1
2 | 
3 | providers:
4 |   - name: "Ragbits"
5 |     type: file
6 |     options:
7 |       path: /otel-lgtm/ragbits-dashboard.json
8 |       foldersFromFilesStructure: false
9 | 


--------------------------------------------------------------------------------
/examples/core/prompt/text.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Ragbits Core Example: Text Prompt
 3 | 
 4 | This example shows how to use the `Prompt` class to generate themed text using an LLM.
 5 | We define an `AnimalPrompt` that generates names for a given animal type.
 6 | 
 7 | The script performs the following steps:
 8 | 
 9 |     1. Define input and output formats using Pydantic models.
10 |     2. Implement the `AnimalPrompt` class with a structured system prompt.
11 |     3. Initialize the `LiteLLM` class to generate text.
12 |     4. Generate a name based on the specified animal.
13 |     5. Print the generated name.
14 | 
15 | To run the script, execute the following command:
16 | 
17 |     ```bash
18 |     uv run examples/core/prompt/text.py
19 |     ```
20 | """
21 | 
22 | # /// script
23 | # requires-python = ">=3.10"
24 | # dependencies = [
25 | #     "ragbits-core",
26 | # ]
27 | # ///
28 | import asyncio
29 | 
30 | from pydantic import BaseModel
31 | 
32 | from ragbits.core.llms import LiteLLM
33 | from ragbits.core.prompt import Prompt
34 | 
35 | 
36 | class AnimalPromptInput(BaseModel):
37 |     """
38 |     Input format for the AnimalPrompt.
39 |     """
40 | 
41 |     animal: str
42 | 
43 | 
44 | class AnimalPromptOutput(BaseModel):
45 |     """
46 |     Output format for the AnimalPrompt.
47 |     """
48 | 
49 |     name: str
50 | 
51 | 
52 | class AnimalPrompt(Prompt[AnimalPromptInput, AnimalPromptOutput]):
53 |     """
54 |     Prompt that generates animal names.
55 |     """
56 | 
57 |     system_prompt = """
58 |     You are an animal name generator. Use provided animal kind as a base.
59 |     """
60 | 
61 |     user_prompt = """
62 |     Animal: {{ animal }}
63 |     """
64 | 
65 | 
66 | async def main() -> None:
67 |     """
68 |     Run the example.
69 |     """
70 |     llm = LiteLLM(model_name="gpt-4o-2024-08-06", use_structured_output=True)
71 |     prompt = AnimalPrompt(AnimalPromptInput(animal="cat"))
72 |     response = await llm.generate(prompt)
73 |     print(response.name)
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     asyncio.run(main())
78 | 


--------------------------------------------------------------------------------
/examples/document-search/images/bear.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/examples/document-search/images/bear.jpg


--------------------------------------------------------------------------------
/examples/document-search/images/game.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/examples/document-search/images/game.jpg


--------------------------------------------------------------------------------
/examples/document-search/images/tree.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/examples/document-search/images/tree.jpg


--------------------------------------------------------------------------------
/examples/evaluation/dataset-generator/config/generate.yaml:
--------------------------------------------------------------------------------
 1 | input_name: query
 2 | name: synthetic-RAG-data
 3 | tasks:
 4 |   - type: ragbits.evaluate.dataset_generator.tasks.corpus_generation:CorpusGenerationStep
 5 |     llm:
 6 |       provider_type: ragbits.core.llms.litellm:LiteLLM
 7 |       kwargs:
 8 |         model_name: gpt-4o
 9 |     kwargs:
10 |       num_per_topic: 5
11 |       prompt_class: ragbits.evaluate.dataset_generator.prompts.corpus_generation:BasicCorpusGenerationPrompt
12 |   - type: ragbits.evaluate.dataset_generator.tasks.text_generation.qa:QueryGenTask
13 |     llm:
14 |       provider_type: distilabel.llms:OpenAILLM
15 |       kwargs:
16 |         model: gpt-4o
17 |     kwargs:
18 |       prompt_class: ragbits.evaluate.dataset_generator.prompts.qa:QueryGenPrompt
19 |   - type: ragbits.evaluate.dataset_generator.tasks.text_generation.qa:AnswerGenTask
20 |     llm:
21 |       provider_type: distilabel.llms:OpenAILLM
22 |       kwargs:
23 |         model: gpt-4o
24 |     kwargs:
25 |       prompt_class: ragbits.evaluate.dataset_generator.prompts.qa:BasicAnswerGenPrompt
26 |   - type: ragbits.evaluate.dataset_generator.tasks.text_generation.qa:PassagesGenTask
27 |     llm:
28 |       provider_type: distilabel.llms:OpenAILLM
29 |       kwargs:
30 |         model: gpt-4o
31 |     kwargs:
32 |       prompt_class: ragbits.evaluate.dataset_generator.prompts.qa:PassagesGenPrompt
33 |     filters:
34 |       - ragbits.evaluate.dataset_generator.tasks.filter.dont_know:DontKnowFilter
35 | 


--------------------------------------------------------------------------------
/examples/evaluation/dataset-generator/generate.py:
--------------------------------------------------------------------------------
 1 | import hydra
 2 | from omegaconf import DictConfig
 3 | 
 4 | from ragbits.evaluate.dataset_generator.pipeline import DatasetGenerationPipeline
 5 | from ragbits.evaluate.utils import log_dataset_to_file
 6 | 
 7 | 
 8 | @hydra.main(config_path="config", config_name="generate", version_base="3.2")
 9 | def main(config: DictConfig) -> None:
10 |     """
11 |     A main function for dataset generation example
12 |     Args:
13 |         config (DictConfig) - configuration should follow
14 |         ragbits.evaluate.dataset_generator.DatasetGenerationPipelineConfig data model
15 |     Returns:
16 |         None
17 |     """
18 |     TOPICS = ["conspiracy theories", "machine learning"]
19 |     generation_pipeline = DatasetGenerationPipeline.from_dict_config(dict_config=config)
20 |     result_dataset = generation_pipeline(corpus=TOPICS)
21 |     log_dataset_to_file(dataset=result_dataset)
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     main()
26 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/README.md:
--------------------------------------------------------------------------------
 1 | # Document Search Evaluation
 2 | 
 3 | ## Evaluation
 4 | 
 5 | ### Evaluation on ingested data
 6 | 
 7 | ```sh
 8 | uv run evaluate.py
 9 | ```
10 | 
11 | ```sh
12 | uv run evaluate.py +experiments=chunking-250
13 | ```
14 | 
15 | ```sh
16 | uv run evaluate.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
17 | ```
18 | 
19 | ### Logging
20 | 
21 | ```sh
22 | uv run evaluate.py logger.local=True
23 | ```
24 | 
25 | ```sh
26 | uv run evaluate.py logger.neptune=True
27 | ```
28 | 
29 | ## Optimization
30 | 
31 | ```sh
32 | uv run optimize.py
33 | ```
34 | 
35 | ### Monitoring
36 | 
37 | ```sh
38 | uv run optimize.py neptune_callback=True
39 | ```
40 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/dataloader/hf.yaml:
--------------------------------------------------------------------------------
1 | type: ragbits.evaluate.dataloaders.document_search:DocumentSearchDataLoader
2 | config:
3 |   source:
4 |     type: ragbits.core.sources:HuggingFaceSource
5 |     config:
6 |       path: "deepsense-ai/synthetic-rag-dataset_v1.0"
7 |       split: "train"
8 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | task:
 4 |   name: chunking-1000
 5 | 
 6 | pipeline:
 7 |   config:
 8 |     parser_router:
 9 |       txt:
10 |         config:
11 |           chunking_kwargs:
12 |             max_characters: 1000
13 |             new_after_n_chars: 200
14 |       md:
15 |         config:
16 |           chunking_kwargs:
17 |             max_characters: 1000
18 |             new_after_n_chars: 200
19 |     vector_store:
20 |       config:
21 |         index_name: chunk-1000
22 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | task:
 4 |   name: chunking-250
 5 | 
 6 | pipeline:
 7 |   config:
 8 |     parser_router:
 9 |       txt:
10 |         config:
11 |           chunking_kwargs:
12 |             max_characters: 250
13 |             new_after_n_chars: 50
14 |       md:
15 |         config:
16 |           chunking_kwargs:
17 |             max_characters: 250
18 |             new_after_n_chars: 50
19 |     vector_store:
20 |       config:
21 |         index_name: chunk-250
22 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | task:
 4 |   name: chunking-500
 5 | 
 6 | pipeline:
 7 |   config:
 8 |     parser_router:
 9 |       txt:
10 |         config:
11 |           chunking_kwargs:
12 |             max_characters: 500
13 |             new_after_n_chars: 100
14 |       md:
15 |         config:
16 |           chunking_kwargs:
17 |             max_characters: 500
18 |             new_after_n_chars: 100
19 |     vector_store:
20 |       config:
21 |         index_name: chunk-500
22 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/metrics/precision_recall_f1.yaml:
--------------------------------------------------------------------------------
1 | precision_recall_f1:
2 |   type: ragbits.evaluate.metrics.document_search:DocumentSearchPrecisionRecallF1
3 |   config:
4 |     matching_strategy:
5 |       type: RougeChunkMatch
6 |       config:
7 |         threshold: 0.5
8 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/metrics/ranked_retrieval.yaml:
--------------------------------------------------------------------------------
1 | ranked_retrieval:
2 |   type: ragbits.evaluate.metrics.document_search:DocumentSearchRankedRetrievalMetrics
3 |   config:
4 |     matching_strategy:
5 |       type: RougeChunkMatch
6 |       config:
7 |         threshold: 0.5
8 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/optimization.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataloader@evaluator.evaluation.dataloader: hf
 3 |   - pipeline@evaluator.evaluation.pipeline: document_search_optimization
 4 |   - metrics@evaluator.evaluation.metrics:
 5 |     - precision_recall_f1
 6 |     - ranked_retrieval
 7 |   - _self_
 8 | 
 9 | optimizer:
10 |   direction: maximize
11 |   n_trials: 5
12 |   max_retries_for_trial: 1
13 | 
14 | neptune_callback: False
15 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - vector_store@config.vector_store: chroma
 3 |   - rephraser@config.rephraser: noop
 4 |   - reranker@config.reranker: noop
 5 |   - parser_router@config.parser_router: unstructured
 6 |   - source@config.source: hf
 7 |   - _self_
 8 | 
 9 | type: ragbits.evaluate.pipelines.document_search:DocumentSearchPipeline
10 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - vector_store@config.vector_store: chroma_optimization
 3 |   - rephraser@config.rephraser: noop
 4 |   - reranker@config.reranker: noop
 5 |   - parser_router@config.parser_router: unstructured_optimization
 6 |   - source@config.source: hf
 7 |   - _self_
 8 | 
 9 | type: ragbits.evaluate.pipelines.document_search:DocumentSearchPipeline
10 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/parser_router/unstructured.yaml:
--------------------------------------------------------------------------------
 1 | txt:
 2 |   type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser
 3 |   config:
 4 |     use_api: false
 5 |     partition_kwargs:
 6 |       strategy: hi_res
 7 |     chunking_kwargs:
 8 |       include_orig_elements: true
 9 |       max_characters: 1000
10 |       new_after_n_chars: 1000
11 |       overlap: 0
12 |       overlap_all: 0
13 | 
14 | md:
15 |   type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser
16 |   config:
17 |     use_api: false
18 |     partition_kwargs:
19 |       strategy: hi_res
20 |     chunking_kwargs:
21 |       include_orig_elements: true
22 |       max_characters: 1000
23 |       new_after_n_chars: 1000
24 |       overlap: 0
25 |       overlap_all: 0
26 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/parser_router/unstructured_optimization.yaml:
--------------------------------------------------------------------------------
 1 | txt:
 2 |   type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser
 3 |   config:
 4 |     use_api: false
 5 |     partition_kwargs:
 6 |       strategy: hi_res
 7 |     chunking_kwargs:
 8 |       include_orig_elements: true
 9 |       max_characters:
10 |         optimize: true
11 |         range:
12 |           - 500
13 |           - 1000
14 |       new_after_n_chars: 1000
15 |       overlap: 0
16 |       overlap_all: 0
17 | 
18 | md:
19 |   type: ragbits.document_search.ingestion.parsers.unstructured:UnstructuredDocumentParser
20 |   config:
21 |     use_api: false
22 |     partition_kwargs:
23 |       strategy: hi_res
24 |     chunking_kwargs:
25 |       include_orig_elements: true
26 |       max_characters: 1000
27 |       new_after_n_chars: 1000
28 |       overlap: 0
29 |       overlap_all: 0
30 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/rephraser/noop.yaml:
--------------------------------------------------------------------------------
1 | type: NoopQueryRephraser
2 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/reranker/noop.yaml:
--------------------------------------------------------------------------------
1 | type: NoopReranker
2 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/source/hf.yaml:
--------------------------------------------------------------------------------
1 | type: ragbits.core.sources.hf:HuggingFaceSource
2 | config:
3 |   path: "micpst/hf-docs"
4 |   split: "train[:5]"
5 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/vector_store/chroma.yaml:
--------------------------------------------------------------------------------
 1 | type: ragbits.core.vector_stores.chroma:ChromaVectorStore
 2 | config:
 3 |   client:
 4 |     type: EphemeralClient
 5 |   index_name: baseline
 6 |   distance_method: l2
 7 |   default_options:
 8 |     k: 3
 9 |     score_threshold: -1.2
10 |   embedder:
11 |     type: ragbits.core.embeddings.dense:LiteLLMEmbedder
12 |     config:
13 |       model_name: "text-embedding-3-small"
14 |       default_options:
15 |         dimensions: 768
16 |         encoding_format: float
17 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/pipeline/vector_store/chroma_optimization.yaml:
--------------------------------------------------------------------------------
 1 | type: ragbits.core.vector_stores.chroma:ChromaVectorStore
 2 | config:
 3 |   client:
 4 |     type: EphemeralClient
 5 |   index_name: baseline
 6 |   distance_method: l2
 7 |   default_options:
 8 |     k: 3
 9 |     score_threshold: -1.2
10 |   embedder:
11 |     type: ragbits.core.embeddings.dense:LiteLLMEmbedder
12 |     config:
13 |       optimize: true
14 |       choices:
15 |         - model_name: "text-embedding-3-small"
16 |           default_options:
17 |             dimensions:
18 |               optimize: true
19 |               range:
20 |                 - 32
21 |                 - 512
22 |             encoding_format: float
23 |         - model_name: "text-embedding-3-large"
24 |           default_options:
25 |             dimensions:
26 |               optimize: true
27 |               range:
28 |                 - 512
29 |                 - 1024
30 |             encoding_format: float
31 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/config/retrieval.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - dataloader@evaluation.dataloader: hf
 3 |   - pipeline@evaluation.pipeline: document_search
 4 |   - metrics@evaluation.metrics:
 5 |     - precision_recall_f1
 6 |     - ranked_retrieval
 7 |   - _self_
 8 | 
 9 | evaluator:
10 |   batch_size: 5
11 |   num_retries: 1
12 | 
13 | logger:
14 |   local: True
15 |   neptune: False
16 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/evaluate.py:
--------------------------------------------------------------------------------
 1 | # /// script
 2 | # requires-python = ">=3.10"
 3 | # dependencies = [
 4 | #     "ragbits-core[chroma,hf]",
 5 | #     "ragbits-document-search",
 6 | #     "ragbits-evaluate[relari]",
 7 | # ]
 8 | # ///
 9 | import asyncio
10 | import logging
11 | from typing import cast
12 | 
13 | import hydra
14 | from omegaconf import DictConfig, OmegaConf
15 | 
16 | from ragbits.evaluate.evaluator import Evaluator
17 | from ragbits.evaluate.utils import log_evaluation_to_file, log_evaluation_to_neptune
18 | 
19 | logging.getLogger("LiteLLM").setLevel(logging.ERROR)
20 | logging.getLogger("httpx").setLevel(logging.ERROR)
21 | 
22 | 
23 | async def evaluate(config: DictConfig) -> None:
24 |     """
25 |     Document search evaluation runner.
26 | 
27 |     Args:
28 |         config: Hydra configuration.
29 |     """
30 |     print("Starting evaluation...")
31 | 
32 |     evaluator_config = cast(dict, OmegaConf.to_container(config))
33 |     results = await Evaluator.run_from_config(evaluator_config)
34 | 
35 |     if config.logger.local:
36 |         output_dir = log_evaluation_to_file(results)
37 |         print(f"Evaluation results saved under directory: {output_dir}")
38 | 
39 |     if config.logger.neptune:
40 |         log_evaluation_to_neptune(results, config)
41 |         print("Evaluation results uploaded to Neptune")
42 | 
43 | 
44 | @hydra.main(config_path="config", config_name="retrieval", version_base="3.2")
45 | def main(config: DictConfig) -> None:
46 |     """
47 |     Runs the evaluation process.
48 | 
49 |     Args:
50 |         config: Hydra configuration.
51 |     """
52 |     asyncio.run(evaluate(config))
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/examples/evaluation/document-search/advanced/optimize.py:
--------------------------------------------------------------------------------
 1 | # /// script
 2 | # requires-python = ">=3.10"
 3 | # dependencies = [
 4 | #     "ragbits-core[chroma,hf]",
 5 | #     "ragbits-document-search",
 6 | #     "ragbits-evaluate[relari]",
 7 | # ]
 8 | # ///
 9 | import logging
10 | from typing import cast
11 | 
12 | import hydra
13 | from omegaconf import DictConfig, OmegaConf
14 | 
15 | from ragbits.evaluate.optimizer import Optimizer
16 | from ragbits.evaluate.utils import log_optimization_to_file
17 | 
18 | logging.getLogger("LiteLLM").setLevel(logging.ERROR)
19 | logging.getLogger("httpx").setLevel(logging.ERROR)
20 | 
21 | 
22 | @hydra.main(config_path="config", config_name="optimization", version_base="3.2")
23 | def main(config: DictConfig) -> None:
24 |     """
25 |     Runs the optimization process.
26 | 
27 |     Args:
28 |         config: Hydra configuration.
29 |     """
30 |     print("Starting optimization...")
31 | 
32 |     optimizer_config = cast(dict, OmegaConf.to_container(config))
33 |     configs_with_scores = Optimizer.run_from_config(optimizer_config)
34 | 
35 |     output_dir = log_optimization_to_file(configs_with_scores)
36 |     print(f"Optimization results saved under directory: {output_dir}")
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/examples/guardrails/openai_moderation.py:
--------------------------------------------------------------------------------
 1 | # /// script
 2 | # requires-python = ">=3.10"
 3 | # dependencies = [
 4 | #     "ragbits-core",
 5 | #     "openai",
 6 | # ]
 7 | # ///
 8 | import asyncio
 9 | from argparse import ArgumentParser
10 | 
11 | from ragbits.guardrails.base import GuardrailManager
12 | from ragbits.guardrails.openai_moderation import OpenAIModerationGuardrail
13 | 
14 | 
15 | async def guardrail_run(message: str) -> None:
16 |     """
17 |     Example of using the OpenAIModerationGuardrail. Requires the OPENAI_API_KEY environment variable to be set.
18 |     """
19 |     manager = GuardrailManager([OpenAIModerationGuardrail()])
20 |     res = await manager.verify(message)
21 |     print(res)
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     args = ArgumentParser()
26 |     args.add_argument("message", nargs="+", type=str, help="Message to validate")
27 |     parsed_args = args.parse_args()
28 | 
29 |     asyncio.run(guardrail_run("".join(parsed_args.message)))
30 | 


--------------------------------------------------------------------------------
/mkdocs_hooks.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from ragbits import cli
 4 | 
 5 | 
 6 | def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool) -> None:
 7 |     """
 8 |     Hook that runs during mkdocs startup.
 9 | 
10 |     Args:
11 |         command: The command that is being run.
12 |         dirty: whether --dirty flag was passed.
13 |     """
14 |     cli._init_for_mkdocs()
15 | 


--------------------------------------------------------------------------------
/packages/ragbits-agents/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # CHANGELOG
 2 | 
 3 | ## Unreleased
 4 | 
 5 | ## 1.0.0 (2025-06-04)
 6 | 
 7 | ### Changed
 8 | 
 9 | - ragbits-core updated to version v1.0.0
10 | 
11 | ## 0.20.1 (2025-06-04)
12 | 
13 | ### Changed
14 | 
15 | - ragbits-core updated to version v0.20.1
16 | 
17 | ## 0.20.0 (2025-06-03)
18 | 
19 | ### Changed
20 | 
21 | - ragbits-core updated to version v0.20.0
22 | 
23 | ## 0.19.1 (2025-05-27)
24 | 
25 | ### Changed
26 | 
27 | - ragbits-core updated to version v0.19.1
28 | 
29 | ## 0.19.0 (2025-05-27)
30 | 
31 | ### Changed
32 | 
33 | - ragbits-core updated to version v0.19.0
34 | 
35 | - Add Agent interface (#569)
36 | - Initial release of the package (#569)
37 | 


--------------------------------------------------------------------------------
/packages/ragbits-agents/README.md:
--------------------------------------------------------------------------------
 1 | # Ragbits Agents
 2 | 
 3 | Ragbits Agents contains primitives for building agentic systems.
 4 | 
 5 | The package is in the experimental phase, the API may change in the future.
 6 | 
 7 | ## Installation
 8 | 
 9 | To install the Ragbits Agents package, run:
10 | 
11 | ```sh
12 | pip install ragbits-agents
13 | ```
14 | 
15 | <!--
16 | TODO: Add a minimalistic example inspired by the Quickstart chapter on Ragbits Evaluate once it is ready.
17 | -->
18 | 
19 | <!--
20 | TODO:
21 | * Add link to the Quickstart chapter on Ragbits Evaluate once it is ready.
22 | * Add link to API Reference once classes from the Evaluate package are added to the API Reference.
23 | -->
24 | 


--------------------------------------------------------------------------------
/packages/ragbits-agents/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "ragbits-agents"
 3 | version = "1.0.0"
 4 | description = "Building blocks for rapid development of GenAI applications"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | license = "MIT"
 8 | authors = [
 9 |     { name = "deepsense.ai", email = "ragbits@deepsense.ai"}
10 | ]
11 | keywords = [
12 |     "Retrieval Augmented Generation",
13 |     "RAG",
14 |     "Large Language Models",
15 |     "LLMs",
16 |     "Generative AI",
17 |     "GenAI",
18 |     "Agents",
19 | ]
20 | classifiers = [
21 |     "Development Status :: 4 - Beta",
22 |     "Environment :: Console",
23 |     "Intended Audience :: Science/Research",
24 |     "License :: OSI Approved :: MIT License",
25 |     "Natural Language :: English",
26 |     "Operating System :: OS Independent",
27 |     "Programming Language :: Python :: 3.10",
28 |     "Programming Language :: Python :: 3.11",
29 |     "Programming Language :: Python :: 3.12",
30 |     "Programming Language :: Python :: 3.13",
31 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
32 |     "Topic :: Software Development :: Libraries :: Python Modules",
33 | ]
34 | dependencies = ["ragbits-core==1.0.0"]
35 | 
36 | [project.urls]
37 | "Homepage" = "https://github.com/deepsense-ai/ragbits"
38 | "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues"
39 | "Documentation" = "https://ragbits.deepsense.ai/"
40 | "Source" = "https://github.com/deepsense-ai/ragbits"
41 | 
42 | [build-system]
43 | requires = ["hatchling"]
44 | build-backend = "hatchling.build"
45 | 
46 | [tool.hatch.metadata]
47 | allow-direct-references = true
48 | 
49 | [tool.hatch.build.targets.wheel]
50 | packages = ["src/ragbits"]
51 | 


--------------------------------------------------------------------------------
/packages/ragbits-agents/src/ragbits/agents/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.agents._main import Agent, AgentOptions
2 | from ragbits.agents.types import QuestionAnswerAgent, QuestionAnswerPromptInput, QuestionAnswerPromptOutput
3 | 
4 | __all__ = ["Agent", "AgentOptions", "QuestionAnswerAgent", "QuestionAnswerPromptInput", "QuestionAnswerPromptOutput"]
5 | 


--------------------------------------------------------------------------------
/packages/ragbits-agents/src/ragbits/agents/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-agents/src/ragbits/agents/py.typed


--------------------------------------------------------------------------------
/packages/ragbits-agents/src/ragbits/agents/types.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, TypeVar
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from ragbits.agents._main import Agent
 6 | from ragbits.core.llms.base import LLMClientOptionsT
 7 | 
 8 | QuestionAnswerPromptInputT = TypeVar("QuestionAnswerPromptInputT", bound="QuestionAnswerPromptInput")
 9 | QuestionAnswerPromptOutputT = TypeVar("QuestionAnswerPromptOutputT", bound="QuestionAnswerPromptOutput | str")
10 | 
11 | QuestionAnswerAgent = Agent[LLMClientOptionsT, QuestionAnswerPromptInputT, QuestionAnswerPromptOutputT]
12 | 
13 | 
14 | class QuestionAnswerPromptInput(BaseModel):
15 |     """
16 |     Input for the question answer agent.
17 |     """
18 | 
19 |     question: str
20 |     """The question to answer."""
21 |     context: Any | None = None
22 |     """The context to answer the question."""
23 | 
24 | 
25 | class QuestionAnswerPromptOutput(BaseModel):
26 |     """
27 |     Output for the question answer agent.
28 |     """
29 | 
30 |     answer: str
31 |     """The answer to the question."""
32 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/README.md:
--------------------------------------------------------------------------------
 1 | # Ragbits Chat
 2 | 
 3 | ragbits-chat is a Python package that provides tools for building conversational AI applications.
 4 | 
 5 | The package includes:
 6 | - Framework for building chat experiences
 7 | - History management for conversation tracking
 8 | - UI components for building chat interfaces
 9 | 
10 | For detailed information, please refer to the [API documentation](https://ragbits.deepsense.ai/how-to/chatbots/api/).


--------------------------------------------------------------------------------
/packages/ragbits-chat/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "ragbits-chat"
 3 | version = "1.0.0"
 4 | description = "Building blocks for rapid development of GenAI applications"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | license = "MIT"
 8 | authors = [
 9 |     { name = "deepsense.ai", email = "ragbits@deepsense.ai"}
10 | ]
11 | keywords = [
12 |     "Retrieval Augmented Generation",
13 |     "RAG",
14 |     "Large Language Models",
15 |     "LLMs",
16 |     "Generative AI",
17 |     "GenAI",
18 |     "Prompt Management"
19 | ]
20 | classifiers = [
21 |     "Development Status :: 4 - Beta",
22 |     "Environment :: Console",
23 |     "Intended Audience :: Science/Research",
24 |     "License :: OSI Approved :: MIT License",
25 |     "Natural Language :: English",
26 |     "Operating System :: OS Independent",
27 |     "Programming Language :: Python :: 3.10",
28 |     "Programming Language :: Python :: 3.11",
29 |     "Programming Language :: Python :: 3.12",
30 |     "Programming Language :: Python :: 3.13",
31 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
32 |     "Topic :: Software Development :: Libraries :: Python Modules",
33 | ]
34 | dependencies = ["fastapi>=0.115.0,<1.0.0", "uvicorn>=0.31.0,<1.0.0", "ragbits-core==1.0.0"]
35 | 
36 | [project.urls]
37 | "Homepage" = "https://github.com/deepsense-ai/ragbits"
38 | "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues"
39 | "Documentation" = "https://ragbits.deepsense.ai/"
40 | "Source" = "https://github.com/deepsense-ai/ragbits"
41 | 
42 | [project.optional-dependencies]
43 | sql = [
44 |     "sqlalchemy>=2.0.39,<3.0.0",
45 | ]
46 | 
47 | [tool.uv]
48 | dev-dependencies = [
49 |     "pre-commit~=3.8.0",
50 |     "pytest~=8.3.3",
51 |     "pytest-cov~=5.0.0",
52 |     "pytest-asyncio~=0.24.0",
53 |     "pip-licenses>=4.0.0,<5.0.0"
54 | ]
55 | 
56 | [build-system]
57 | requires = ["hatchling"]
58 | build-backend = "hatchling.build"
59 | 
60 | [tool.hatch.metadata]
61 | allow-direct-references = true
62 | 
63 | [tool.hatch.build.targets.wheel]
64 | packages = ["src/ragbits"]
65 | 
66 | [tool.pytest.ini_options]
67 | asyncio_mode = "auto"
68 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-chat/src/ragbits/chat/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/cli.py:
--------------------------------------------------------------------------------
 1 | import typer
 2 | 
 3 | from ragbits.chat.api import RagbitsAPI
 4 | 
 5 | ds_app = typer.Typer(no_args_is_help=True)
 6 | 
 7 | 
 8 | def register(app: typer.Typer) -> None:
 9 |     """
10 |     Register the CLI commands for the package.
11 | 
12 |     Args:
13 |         app: The Typer object to register the commands with.
14 |     """
15 |     app.add_typer(ds_app, name="api", help="Commands for running API service")
16 | 
17 | 
18 | @ds_app.command()
19 | def run(
20 |     chat_interface: str = typer.Argument(..., help="Path to a module with chat function"),
21 |     host: str = typer.Option("127.0.0.1", "--host", help="Host to bind the API server to"),
22 |     port: int = typer.Option(8000, "--port", help="Port to bind the API server to"),
23 |     cors_origins: list[str] = typer.Option(  # noqa: B008
24 |         None,
25 |         "--cors-origin",
26 |         help="Allowed CORS origins. Can be specified multiple times.",
27 |     ),
28 |     ui_build_dir: str = typer.Option(
29 |         None,
30 |         "--ui-build-dir",
31 |         help="Path to a custom UI build directory. If not specified, uses the default package UI.",
32 |     ),
33 | ) -> None:
34 |     """
35 |     Run API service with UI demo
36 |     """
37 |     api = RagbitsAPI(
38 |         chat_interface=chat_interface,
39 |         cors_origins=cors_origins,
40 |         ui_build_dir=ui_build_dir,
41 |     )
42 |     api.run(host=host, port=port)
43 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/history/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-chat/src/ragbits/chat/history/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/history/compressors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import ConversationHistoryCompressor
2 | from .llm import StandaloneMessageCompressor
3 | 
4 | __all__ = ["ConversationHistoryCompressor", "StandaloneMessageCompressor"]
5 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/history/compressors/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import ClassVar
 3 | 
 4 | from ragbits.chat.history import compressors
 5 | from ragbits.core.prompt.base import ChatFormat
 6 | from ragbits.core.utils.config_handling import WithConstructionConfig
 7 | 
 8 | 
 9 | class ConversationHistoryCompressor(WithConstructionConfig, ABC):
10 |     """
11 |     An abstract class for conversation history compressors,
12 |     i.e. class that takes the entire conversation history
13 |     and returns a single string representation of it.
14 | 
15 |     The exact logic of what the string should include and represent
16 |     depends on the specific implementation.
17 | 
18 |     Usually used to provide LLM additional context from the conversation history.
19 |     """
20 | 
21 |     default_module: ClassVar = compressors
22 |     configuration_key: ClassVar = "history_compressor"
23 | 
24 |     @abstractmethod
25 |     async def compress(self, conversation: ChatFormat) -> str:
26 |         """
27 |         Compresses the conversation history to a single string.
28 | 
29 |         Args:
30 |             conversation:  List of dicts with "role" and "content" keys, representing the chat history so far.
31 |         """
32 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/interface/__init__.py:
--------------------------------------------------------------------------------
1 | from ._interface import ChatInterface
2 | 
3 | __all__ = ["ChatInterface"]
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/interface/forms.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | 
 3 | 
 4 | class FormField(BaseModel):
 5 |     """Field in a feedback form."""
 6 | 
 7 |     name: str = Field(description="Name of the field")
 8 |     type: str = Field(description="Type of the field (text, select, etc.)")
 9 |     required: bool = Field(description="Whether the field is required")
10 |     label: str = Field(description="Display label for the field")
11 |     options: list[str] | None = Field(None, description="Options for select fields")
12 | 
13 | 
14 | class FeedbackForm(BaseModel):
15 |     """Model for feedback form structure."""
16 | 
17 |     title: str = Field(description="Title of the form")
18 |     fields: list[FormField] = Field(description="Fields in the form")
19 | 
20 | 
21 | class FeedbackConfig(BaseModel):
22 |     """Configuration for feedback collection."""
23 | 
24 |     like_enabled: bool = Field(default=False, description="Whether like feedback is enabled")
25 |     like_form: FeedbackForm | None = Field(default=None, description="The form to use for like feedback")
26 | 
27 |     dislike_enabled: bool = Field(default=False, description="Whether dislike feedback is enabled")
28 |     dislike_form: FeedbackForm | None = Field(default=None, description="The form to use for dislike feedback")
29 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/persistence/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.chat.persistence.base import HistoryPersistenceStrategy
2 | 
3 | __all__ = ["HistoryPersistenceStrategy"]
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/persistence/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from ragbits.chat.interface.types import ChatContext, ChatResponse
 4 | 
 5 | 
 6 | class HistoryPersistenceStrategy(ABC):
 7 |     """Base class for history persistence strategies."""
 8 | 
 9 |     @abstractmethod
10 |     async def save_interaction(
11 |         self,
12 |         message: str,
13 |         response: str,
14 |         extra_responses: list[ChatResponse],
15 |         context: ChatContext,
16 |         timestamp: float,
17 |     ) -> None:
18 |         """
19 |         Save a chat interaction including the input message and responses.
20 | 
21 |         Args:
22 |             message: The user's input message
23 |             response: The main response text
24 |             extra_responses: List of additional responses (references, state updates, etc.)
25 |             context: Optional context dictionary containing metadata
26 |             timestamp: Unix timestamp of when the interaction occurred
27 |         """
28 |         pass
29 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/persistence/file.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | from ..interface.types import ChatContext, ChatResponse
 5 | from .base import HistoryPersistenceStrategy
 6 | 
 7 | 
 8 | class FileHistoryPersistence(HistoryPersistenceStrategy):
 9 |     """Strategy that saves chat history to dated files in a directory."""
10 | 
11 |     def __init__(self, base_path: str | Path):
12 |         self.base_path = Path(base_path)
13 | 
14 |     def _get_file_path(self, conversation_id: str) -> Path:
15 |         """Get the current conversation file path based on date and conversation ID."""
16 |         return self.base_path / f"{conversation_id}.jsonl"
17 | 
18 |     async def save_interaction(
19 |         self,
20 |         message: str,
21 |         response: str,
22 |         extra_responses: list[ChatResponse],
23 |         context: ChatContext,
24 |         timestamp: float,
25 |     ) -> None:
26 |         """
27 |         Save a chat interaction to a dated file in JSON format.
28 | 
29 |         Args:
30 |             message: The user's input message
31 |             response: The main response text
32 |             extra_responses: List of additional responses (references, state updates, etc.)
33 |             context: Optional context dictionary containing metadata
34 |             timestamp: Unix timestamp of when the interaction occurred
35 |         """
36 |         # Create interaction record
37 |         interaction = {
38 |             "message": message,
39 |             "context": context.model_dump(mode="json"),
40 |             "response": response,
41 |             "extra_responses": [r.model_dump(mode="json") for r in extra_responses],
42 |             "timestamp": timestamp,
43 |         }
44 | 
45 |         # Get current file path and ensure parent directory exists
46 |         file_path = self._get_file_path(context.conversation_id or "no_conversation_id")
47 |         file_path.parent.mkdir(parents=True, exist_ok=True)
48 | 
49 |         # Append to file
50 |         with open(file_path, "a") as f:
51 |             f.write(json.dumps(interaction) + "\n")
52 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-chat/src/ragbits/chat/py.typed


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/ui-build/assets/ExamplePluginComponent-CkxrO9jk.js:
--------------------------------------------------------------------------------
1 | import{H as e}from"./index-ByuhG0Hl.js";const i=()=>e.jsxs("div",{children:[e.jsx("h1",{children:"Example Plugin"}),e.jsx("p",{children:"This is an example plugin."})]});export{i as default};
2 | 


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/ui-build/assets/ragbits-9U4hpuUb.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg">
2 |     <text y="24" font-size="24">🐰</text>
3 | </svg>


--------------------------------------------------------------------------------
/packages/ragbits-chat/src/ragbits/chat/ui-build/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en" class="h-full light">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/svg+xml" href="/assets/ragbits-9U4hpuUb.svg" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>Ragbits</title>
 8 |     <script type="module" crossorigin src="/assets/index-ByuhG0Hl.js"></script>
 9 |     <link rel="stylesheet" crossorigin href="/assets/index-B86z3tbJ.css">
10 |   </head>
11 | 
12 |   <body class="h-full">
13 |     <div id="root" class="h-full"></div>
14 |   </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/packages/ragbits-cli/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "ragbits-cli"
 3 | version = "1.0.0"
 4 | description = "A CLI application for ragbits - building blocks for rapid development of GenAI applications"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | license = "MIT"
 8 | authors = [
 9 |     { name = "deepsense.ai", email = "ragbits@deepsense.ai"}
10 | ]
11 | keywords = [
12 |     "Retrieval Augmented Generation",
13 |     "RAG",
14 |     "Large Language Models",
15 |     "LLMs",
16 |     "Generative AI",
17 |     "GenAI",
18 |     "Prompt Management"
19 | ]
20 | classifiers = [
21 |     "Development Status :: 4 - Beta",
22 |     "Environment :: Console",
23 |     "Intended Audience :: Science/Research",
24 |     "License :: OSI Approved :: MIT License",
25 |     "Natural Language :: English",
26 |     "Operating System :: OS Independent",
27 |     "Programming Language :: Python :: 3.10",
28 |     "Programming Language :: Python :: 3.11",
29 |     "Programming Language :: Python :: 3.12",
30 |     "Programming Language :: Python :: 3.13",
31 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
32 |     "Topic :: Software Development :: Libraries :: Python Modules",
33 | ]
34 | dependencies = ["typer>=0.12.5,<1.0.0", "ragbits-core==1.0.0"]
35 | 
36 | [project.urls]
37 | "Homepage" = "https://github.com/deepsense-ai/ragbits"
38 | "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues"
39 | "Documentation" = "https://ragbits.deepsense.ai/"
40 | "Source" = "https://github.com/deepsense-ai/ragbits"
41 | 
42 | [project.scripts]
43 | ragbits = "ragbits.cli:main"
44 | rbts = "ragbits.cli:main"
45 | 
46 | [build-system]
47 | requires = ["hatchling"]
48 | build-backend = "hatchling.build"
49 | 
50 | [tool.hatch.metadata]
51 | allow-direct-references = true
52 | 
53 | [tool.hatch.build.targets.wheel]
54 | packages = ["src/ragbits"]
55 | 
56 | [tool.pytest.ini_options]
57 | asyncio_mode = "auto"
58 | 


--------------------------------------------------------------------------------
/packages/ragbits-cli/src/ragbits/cli/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-cli/src/ragbits/cli/py.typed


--------------------------------------------------------------------------------
/packages/ragbits-core/README.md:
--------------------------------------------------------------------------------
 1 | # Ragbits Core
 2 | 
 3 | Ragbits Core is a collection of utilities and tools that are used across all Ragbits packages. It includes fundamentals, such as utilities for logging, configuration, prompt creation, classes for comunicating with LLMs, embedders, vector stores, and more.
 4 | 
 5 | ## Installation
 6 | 
 7 | ```sh
 8 | pip install ragbits-core
 9 | ```
10 | 
11 | ## Quick Start
12 | 
13 | ```python
14 | from pydantic import BaseModel
15 | from ragbits.core.prompt import Prompt
16 | from ragbits.core.llms.litellm import LiteLLM
17 | 
18 | 
19 | class Dog(BaseModel):
20 |     breed: str
21 |     age: int
22 |     temperament: str
23 | 
24 | class DogNamePrompt(Prompt[Dog, str]):
25 |     system_prompt = """
26 |     You are a dog name generator. You come up with funny names for dogs given the dog details.
27 |     """
28 | 
29 |     user_prompt = """
30 |     The dog is a {breed} breed, {age} years old, and has a {temperament} temperament.
31 |     """
32 | 
33 | async def main() -> None:
34 |     llm = LiteLLM("gpt-4o")
35 |     dog = Dog(breed="Golden Retriever", age=3, temperament="friendly")
36 |     prompt = DogNamePrompt(dog)
37 |     response = await llm.generate(prompt)
38 |     print(response)
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     asyncio.run(main())
43 | ```
44 | 
45 | ## Documentation
46 | * [Quickstart 1: Working with Prompts and LLMs](https://ragbits.deepsense.ai/quickstart/quickstart1_prompts/)
47 | * [How-To Guides - Core](https://ragbits.deepsense.ai/how-to/prompts/use_prompting/)
48 | * [API Reference - Core](https://ragbits.deepsense.ai/api_reference/core/prompt/)
49 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import typer
 4 | 
 5 | from ragbits.core.audit.traces import set_trace_handlers
 6 | from ragbits.core.config import import_modules_from_config
 7 | 
 8 | if os.getenv("RAGBITS_VERBOSE", "0") == "1":
 9 |     typer.echo('Verbose mode is enabled with environment variable "RAGBITS_VERBOSE".')
10 |     set_trace_handlers("cli")
11 | 
12 | import_modules_from_config()
13 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/audit/__init__.py:
--------------------------------------------------------------------------------
 1 | from ragbits.core.audit.metrics import clear_metric_handlers, create_histogram, record, set_metric_handlers
 2 | from ragbits.core.audit.metrics.base import HistogramMetric, MetricHandler
 3 | from ragbits.core.audit.traces import clear_trace_handlers, set_trace_handlers, trace, traceable
 4 | from ragbits.core.audit.traces.base import TraceHandler
 5 | 
 6 | __all__ = [
 7 |     "HistogramMetric",
 8 |     "MetricHandler",
 9 |     "TraceHandler",
10 |     "clear_metric_handlers",
11 |     "clear_trace_handlers",
12 |     "create_histogram",
13 |     "record",
14 |     "set_metric_handlers",
15 |     "set_trace_handlers",
16 |     "trace",
17 |     "traceable",
18 | ]
19 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/audit/metrics/otel.py:
--------------------------------------------------------------------------------
 1 | from opentelemetry.metrics import Histogram, MeterProvider, get_meter
 2 | 
 3 | from ragbits.core.audit.metrics.base import MetricHandler
 4 | 
 5 | 
 6 | class OtelMetricHandler(MetricHandler[Histogram]):
 7 |     """
 8 |     OpenTelemetry metric handler.
 9 |     """
10 | 
11 |     def __init__(self, provider: MeterProvider | None = None, metric_prefix: str = "ragbits") -> None:
12 |         """
13 |         Initialize the OtelMetricHandler instance.
14 | 
15 |         Args:
16 |             provider: The meter provider to use.
17 |             metric_prefix: Prefix for all metric names.
18 |         """
19 |         super().__init__(metric_prefix=metric_prefix)
20 |         self._meter = get_meter(name=__name__, meter_provider=provider)
21 | 
22 |     def create_histogram(self, name: str, unit: str = "", description: str = "") -> Histogram:
23 |         """
24 |         Create a histogram metric.
25 | 
26 |         Args:
27 |             name: The histogram metric name.
28 |             unit: The histogram metric unit.
29 |             description: The histogram metric description.
30 | 
31 |         Returns:
32 |             The initialized histogram metric.
33 |         """
34 |         return self._meter.create_histogram(name=name, unit=unit, description=description)
35 | 
36 |     def record(self, metric: Histogram, value: int | float, attributes: dict | None = None) -> None:  # noqa: PLR6301
37 |         """
38 |         Record the value for a specified histogram metric.
39 | 
40 |         Args:
41 |             metric: The histogram metric to record.
42 |             value: The value to record for the metric.
43 |             attributes: Additional metadata for the metric.
44 |         """
45 |         metric.record(value, attributes=attributes)
46 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/cli.py:
--------------------------------------------------------------------------------
 1 | import typer
 2 | 
 3 | from ragbits.core.prompt._cli import prompts_app
 4 | from ragbits.core.vector_stores._cli import vector_stores_app
 5 | 
 6 | 
 7 | def register(app: typer.Typer) -> None:
 8 |     """
 9 |     Register the CLI commands for the package.
10 | 
11 |     Args:
12 |         app: The Typer object to register the commands with.
13 |     """
14 |     app.add_typer(prompts_app, name="prompts", help="Commands for managing prompts")
15 |     app.add_typer(vector_stores_app, name="vector-store", help="Commands for managing vector stores")
16 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/embeddings/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import Embedder, EmbedderOptionsT, SparseVector, VectorSize
 2 | from .dense import DenseEmbedder, LiteLLMEmbedder, NoopEmbedder
 3 | from .sparse import BagOfTokens, BagOfTokensOptions, SparseEmbedder, SparseEmbedderOptionsT
 4 | 
 5 | __all__ = [
 6 |     "BagOfTokens",
 7 |     "BagOfTokensOptions",
 8 |     "DenseEmbedder",
 9 |     "Embedder",
10 |     "EmbedderOptionsT",
11 |     "LiteLLMEmbedder",
12 |     "NoopEmbedder",
13 |     "SparseEmbedder",
14 |     "SparseEmbedderOptionsT",
15 |     "SparseVector",
16 |     "VectorSize",
17 | ]
18 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/embeddings/dense/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import DenseEmbedder
 2 | from .litellm import LiteLLMEmbedder, LiteLLMEmbedderOptions
 3 | from .noop import NoopEmbedder
 4 | 
 5 | __all__ = [
 6 |     "DenseEmbedder",
 7 |     "LiteLLMEmbedder",
 8 |     "LiteLLMEmbedderOptions",
 9 |     "NoopEmbedder",
10 | ]
11 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/embeddings/dense/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from ragbits.core.embeddings.base import Embedder, EmbedderOptionsT, VectorSize
 4 | 
 5 | 
 6 | class DenseEmbedder(Embedder[EmbedderOptionsT], ABC):  # noqa: F821
 7 |     """
 8 |     Abstract client for communication with dense embedding models.
 9 |     """
10 | 
11 |     @abstractmethod
12 |     async def embed_text(self, data: list[str], options: EmbedderOptionsT | None = None) -> list[list[float]]:
13 |         """
14 |         Creates embeddings for the given strings.
15 | 
16 |         Args:
17 |             data: List of strings to get embeddings for.
18 |             options: Additional settings used by the Embedder model.
19 | 
20 |         Returns:
21 |             List of embeddings for the given strings.
22 |         """
23 | 
24 |     @abstractmethod
25 |     async def get_vector_size(self) -> VectorSize:
26 |         """
27 |         Get information about the dense vector size/dimensions returned by this embedder.
28 | 
29 |         Returns:
30 |             VectorSize object with is_sparse=False and the embedding dimension.
31 |         """
32 | 
33 |     async def embed_image(self, images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]:
34 |         """
35 |         Creates embeddings for the given images.
36 | 
37 |         Args:
38 |             images: List of images to get embeddings for.
39 |             options: Additional settings used by the Embedder model.
40 | 
41 |         Returns:
42 |             List of embeddings for the given images.
43 |         """
44 |         raise NotImplementedError("Image embeddings are not supported by this model.")
45 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/embeddings/exceptions.py:
--------------------------------------------------------------------------------
 1 | class EmbeddingError(Exception):
 2 |     """
 3 |     Base class for all exceptions raised by the EmbeddingClient.
 4 |     """
 5 | 
 6 |     def __init__(self, message: str) -> None:
 7 |         super().__init__(message)
 8 |         self.message = message
 9 | 
10 | 
11 | class EmbeddingConnectionError(EmbeddingError):
12 |     """
13 |     Raised when there is an error connecting to the embedding API.
14 |     """
15 | 
16 |     def __init__(self, message: str = "Connection error.") -> None:
17 |         super().__init__(message)
18 | 
19 | 
20 | class EmbeddingStatusError(EmbeddingError):
21 |     """
22 |     Raised when an API response has a status code of 4xx or 5xx.
23 |     """
24 | 
25 |     def __init__(self, message: str, status_code: int) -> None:
26 |         super().__init__(message)
27 |         self.status_code = status_code
28 | 
29 | 
30 | class EmbeddingResponseError(EmbeddingError):
31 |     """
32 |     Raised when an API response has an invalid schema.
33 |     """
34 | 
35 |     def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None:
36 |         super().__init__(message)
37 | 
38 | 
39 | class EmbeddingEmptyResponseError(EmbeddingError):
40 |     """
41 |     Raised when an API response has an empty response.
42 |     """
43 | 
44 |     def __init__(self, message: str = "Empty response returned by API.") -> None:
45 |         super().__init__(message)
46 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/embeddings/sparse/__init__.py:
--------------------------------------------------------------------------------
 1 | from ..base import SparseVector
 2 | from .bag_of_tokens import BagOfTokens, BagOfTokensOptions
 3 | from .base import SparseEmbedder, SparseEmbedderOptionsT
 4 | 
 5 | __all__ = [
 6 |     "BagOfTokens",
 7 |     "BagOfTokensOptions",
 8 |     "SparseEmbedder",
 9 |     "SparseEmbedderOptionsT",
10 |     "SparseVector",
11 | ]
12 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/embeddings/sparse/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import TypeVar
 3 | 
 4 | from ragbits.core.embeddings.base import Embedder, SparseVector, VectorSize
 5 | from ragbits.core.options import Options
 6 | 
 7 | SparseEmbedderOptionsT = TypeVar("SparseEmbedderOptionsT", bound=Options)
 8 | 
 9 | 
10 | class SparseEmbedder(Embedder[SparseEmbedderOptionsT], ABC):
11 |     """Sparse embedding interface"""
12 | 
13 |     @abstractmethod
14 |     async def embed_text(self, texts: list[str], options: SparseEmbedderOptionsT | None = None) -> list[SparseVector]:
15 |         """
16 |         Transforms a list of texts into sparse vectors.
17 | 
18 |         Args:
19 |             texts: list of input texts.
20 |             options: optional embedding options
21 | 
22 |         Returns:
23 |             list of sparse embeddings.
24 |         """
25 | 
26 |     @abstractmethod
27 |     async def get_vector_size(self) -> VectorSize:
28 |         """
29 |         Get information about the sparse vector size/dimensions returned by this embedder.
30 | 
31 |         Returns:
32 |             VectorSize object with is_sparse=True and the vocabulary size.
33 |         """
34 | 
35 |     async def embed_image(
36 |         self, images: list[bytes], options: SparseEmbedderOptionsT | None = None
37 |     ) -> list[SparseVector]:
38 |         """
39 |         Creates embeddings for the given images.
40 | 
41 |         Args:
42 |             images: List of images to get embeddings for.
43 |             options: Additional settings used by the Embedder model.
44 | 
45 |         Returns:
46 |             List of sparse embeddings for the given images.
47 |         """
48 |         raise NotImplementedError("Image embeddings are not supported by this model.")
49 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/llms/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import LLM
2 | from .litellm import LiteLLM, LiteLLMOptions
3 | from .local import LocalLLMOptions
4 | 
5 | __all__ = ["LLM", "LiteLLM", "LiteLLMOptions", "LocalLLMOptions"]
6 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/llms/exceptions.py:
--------------------------------------------------------------------------------
 1 | class LLMError(Exception):
 2 |     """
 3 |     Base class for all exceptions raised by the LLMClient.
 4 |     """
 5 | 
 6 |     def __init__(self, message: str) -> None:
 7 |         super().__init__(message)
 8 |         self.message = message
 9 | 
10 | 
11 | class LLMConnectionError(LLMError):
12 |     """
13 |     Raised when there is an error connecting to the LLM API.
14 |     """
15 | 
16 |     def __init__(self, message: str = "Connection error.") -> None:
17 |         super().__init__(message)
18 | 
19 | 
20 | class LLMStatusError(LLMError):
21 |     """
22 |     Raised when an API response has a status code of 4xx or 5xx.
23 |     """
24 | 
25 |     def __init__(self, message: str, status_code: int) -> None:
26 |         super().__init__(message)
27 |         self.status_code = status_code
28 | 
29 | 
30 | class LLMResponseError(LLMError):
31 |     """
32 |     Raised when an API response has an invalid schema.
33 |     """
34 | 
35 |     def __init__(self, message: str = "Data returned by API invalid for expected schema.") -> None:
36 |         super().__init__(message)
37 | 
38 | 
39 | class LLMEmptyResponseError(LLMError):
40 |     """
41 |     Raised when an API response is empty.
42 |     """
43 | 
44 |     def __init__(self, message: str = "Empty response returned by API.") -> None:
45 |         super().__init__(message)
46 | 
47 | 
48 | class LLMNotSupportingImagesError(LLMError):
49 |     """
50 |     Raised when there are images in the prompt, but LLM doesn't support them.
51 |     """
52 | 
53 |     def __init__(self, message: str = "There are images in the prompt, but given LLM doesn't support them.") -> None:
54 |         super().__init__(message)
55 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/llms/factory.py:
--------------------------------------------------------------------------------
 1 | from ragbits.core.config import core_config
 2 | from ragbits.core.llms.base import LLM, LLMType
 3 | from ragbits.core.llms.litellm import LiteLLM
 4 | 
 5 | 
 6 | def get_preferred_llm(llm_type: LLMType = LLMType.TEXT) -> LLM:
 7 |     """
 8 |     Get an instance of the preferred LLM using the factory function
 9 |     specified in the configuration.
10 | 
11 |     Args:
12 |         llm_type: type of the LLM to get, defaults to text
13 | 
14 |     Returns:
15 |         LLM: An instance of the preferred LLM.
16 | 
17 |     """
18 |     factory = core_config.llm_preference_factories[llm_type]
19 |     return LLM.subclass_from_factory(factory)
20 | 
21 | 
22 | def simple_litellm_factory() -> LLM:
23 |     """
24 |     A basic LLM factory that creates an LiteLLM instance with the default model,
25 |     default options, and assumes that the API key is set in the environment.
26 | 
27 |     Returns:
28 |         LLM: An instance of the LiteLLM class.
29 |     """
30 |     return LiteLLM()
31 | 
32 | 
33 | def simple_litellm_vision_factory() -> LLM:
34 |     """
35 |     A basic LLM factory that creates an LiteLLM instance with the vision enabled model,
36 |     default options, and assumes that the API key is set in the environment.
37 | 
38 |     Returns:
39 |         LLM: An instance of the LiteLLM class.
40 |     """
41 |     return LiteLLM(model_name="gpt-4o-mini")
42 | 
43 | 
44 | def simple_litellm_structured_output_factory() -> LLM:
45 |     """
46 |     A basic LLM factory that creates an LiteLLM instance with the support for structured output.
47 | 
48 |     Returns:
49 |         LLM: An instance of the LiteLLM class.
50 |     """
51 |     return LiteLLM(model_name="gpt-4o-mini-2024-07-18", use_structured_output=True)
52 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/options.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from typing import Any, ClassVar, TypeVar
 3 | 
 4 | from pydantic import BaseModel, ConfigDict
 5 | from typing_extensions import Self
 6 | 
 7 | from ragbits.core.types import NotGiven
 8 | 
 9 | OptionsT = TypeVar("OptionsT", bound="Options")
10 | 
11 | 
12 | class Options(BaseModel, ABC):
13 |     """
14 |     A dataclass that represents all available options. Thanks to the extra='allow' configuration, it allows for
15 |     additional fields that are not defined in the class.
16 |     """
17 | 
18 |     model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
19 |     _not_given: ClassVar[Any] = None
20 | 
21 |     def __or__(self, other: "Options") -> Self:
22 |         """
23 |         Merges two Options, prioritizing non-NOT_GIVEN values from the 'other' object.
24 |         """
25 |         self_dict = self.model_dump()
26 |         other_dict = other.model_dump()
27 | 
28 |         updated_dict = {
29 |             key: other_dict[key]
30 |             if not isinstance(other_dict.get(key), NotGiven) and key in other_dict
31 |             else self_dict[key]
32 |             for key in self_dict.keys() | other_dict.keys()
33 |         }
34 | 
35 |         return self.__class__(**updated_dict)
36 | 
37 |     def dict(self) -> dict[str, Any]:  # type: ignore # mypy complains about overriding BaseModel.dict
38 |         """
39 |         Creates a dictionary representation of the Options instance.
40 |         If a value is None, it will be replaced with a provider-specific not-given sentinel.
41 | 
42 |         Returns:
43 |             A dictionary representation of the Options instance.
44 |         """
45 |         options = self.model_dump()
46 | 
47 |         return {
48 |             key: self._not_given if value is None or isinstance(value, NotGiven) else value
49 |             for key, value in options.items()
50 |         }
51 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/prompt/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.core.prompt.prompt import ChatFormat, Prompt
2 | 
3 | __all__ = ["ChatFormat", "Prompt"]
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/prompt/exceptions.py:
--------------------------------------------------------------------------------
 1 | class PromptError(Exception):
 2 |     """
 3 |     Base class for all exceptions raised by the Prompt.
 4 |     """
 5 | 
 6 |     def __init__(self, message: str) -> None:
 7 |         super().__init__(message)
 8 |         self.message = message
 9 | 
10 | 
11 | class PromptWithImagesOfInvalidFormat(PromptError):
12 |     """
13 |     Raised when there is an image attached to the prompt that is not in the correct format.
14 |     """
15 | 
16 |     def __init__(
17 |         self, message: str = "Invalid format of image in prompt detected. Use one of supported OpenAI mime types"
18 |     ) -> None:
19 |         super().__init__(message)
20 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/prompt/promptfoo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | try:
 5 |     import yaml
 6 | 
 7 |     HAS_PYYAML = True
 8 | except ImportError:
 9 |     HAS_PYYAML = False
10 | 
11 | from rich.console import Console
12 | 
13 | from ragbits.core.config import core_config
14 | from ragbits.core.prompt.discovery import PromptDiscovery
15 | 
16 | 
17 | def generate_configs(
18 |     file_pattern: str = core_config.prompt_path_pattern,
19 |     root_path: Path | None = None,
20 |     target_path: Path = Path("promptfooconfigs"),
21 | ) -> None:
22 |     """
23 |     Generates promptfoo configuration files for all discovered prompts.
24 | 
25 |     Args:
26 |         file_pattern: The file pattern to search for Prompt objects. Defaults to "**/prompt_*.py"
27 |         root_path: The root path to search for Prompt objects. Defaults to the directory where the script is run.
28 |         target_path: The path to save the promptfoo configuration files. Defaults to "promptfooconfigs".
29 |     """
30 |     root_path = root_path or Path.cwd()
31 | 
32 |     if not HAS_PYYAML:
33 |         Console(stderr=True).print(
34 |             "To generate configs for promptfoo, you need the PyYAML library. Please install it using the following"
35 |             " command:\n[b]pip install ragbits-core\\[promptfoo][/b]"
36 |         )
37 |         return
38 | 
39 |     prompts = PromptDiscovery(file_pattern=file_pattern, root_path=root_path).discover()
40 |     Console().print(
41 |         f"Discovered {len(prompts)} prompts."
42 |         f" Saving promptfoo configuration files to [bold green]{target_path}[/] folder ..."
43 |     )
44 | 
45 |     if not target_path.exists():
46 |         target_path.mkdir()
47 |     for prompt in prompts:
48 |         with open(target_path / f"{prompt.__qualname__}.yaml", "w", encoding="utf-8") as f:
49 |             prompt_path = f"file://{prompt.__module__.replace('.', os.sep)}.py:{prompt.__qualname__}.to_promptfoo"
50 |             yaml.dump({"prompts": [prompt_path]}, f)
51 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/src/ragbits/core/py.typed


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/sources/__init__.py:
--------------------------------------------------------------------------------
 1 | from ragbits.core.sources.base import Source  # noqa: I001
 2 | from ragbits.core.sources.azure import AzureBlobStorageSource
 3 | from ragbits.core.sources.gcs import GCSSource
 4 | from ragbits.core.sources.git import GitSource
 5 | from ragbits.core.sources.hf import HuggingFaceSource
 6 | from ragbits.core.sources.local import LocalFileSource
 7 | from ragbits.core.sources.s3 import S3Source
 8 | from ragbits.core.sources.web import WebSource
 9 | 
10 | __all__ = [
11 |     "AzureBlobStorageSource",
12 |     "GCSSource",
13 |     "GitSource",
14 |     "HuggingFaceSource",
15 |     "LocalFileSource",
16 |     "S3Source",
17 |     "Source",
18 |     "WebSource",
19 | ]
20 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/sources/exceptions.py:
--------------------------------------------------------------------------------
 1 | class SourceError(Exception):
 2 |     """
 3 |     Class for all exceptions raised by the document source.
 4 |     """
 5 | 
 6 |     def __init__(self, message: str) -> None:
 7 |         super().__init__(message)
 8 |         self.message = message
 9 | 
10 | 
11 | class SourceConnectionError(SourceError):
12 |     """
13 |     Raised when there is an error connecting to the document source.
14 |     """
15 | 
16 |     def __init__(self) -> None:
17 |         super().__init__("Connection error.")
18 | 
19 | 
20 | class SourceNotFoundError(SourceError):
21 |     """
22 |     Raised when the document is not found.
23 |     """
24 | 
25 |     def __init__(self, source_id: str) -> None:
26 |         super().__init__(f"Source with ID {source_id} not found.")
27 |         self.source_id = source_id
28 | 
29 | 
30 | class SourceDownloadError(SourceError):
31 |     """
32 |     Raised when an error occurs during the download of the source.
33 |     """
34 | 
35 |     def __init__(self, url: str, code: int):
36 |         super().__init__(f"Download of {url} failed with code {code}.")
37 |         self.url = url
38 |         self.code = code
39 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/types.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | from typing_extensions import override
 4 | 
 5 | 
 6 | # Sentinel class used until PEP 0661 is accepted
 7 | class NotGiven:
 8 |     """
 9 |     A sentinel singleton class used to distinguish omitted keyword arguments
10 |     from those passed in with the value None (which may have different behavior).
11 | 
12 |     For example:
13 | 
14 |     ```py
15 |     def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...
16 | 
17 | 
18 |     get(timeout=1)  # 1s timeout
19 |     get(timeout=None)  # No timeout
20 |     get()  # Default timeout behavior, which may not be statically known at the method definition.
21 |     ```
22 |     """
23 | 
24 |     def __bool__(self) -> Literal[False]:
25 |         return False
26 | 
27 |     @override
28 |     def __repr__(self) -> str:
29 |         return "NOT_GIVEN"
30 | 
31 | 
32 | NOT_GIVEN = NotGiven()
33 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .secrets import get_secret_key
2 | 
3 | __all__ = ["get_secret_key"]
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/utils/helpers.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from collections.abc import Iterable, Iterator
 3 | from itertools import islice
 4 | from typing import TypeVar
 5 | 
 6 | T = TypeVar("T")
 7 | 
 8 | 
 9 | def env_vars_not_set(env_vars: list[str]) -> bool:
10 |     """
11 |     Checks if no environment variable is set.
12 | 
13 |     Args:
14 |         env_vars: The list of environment variables to check.
15 | 
16 |     Returns:
17 |         True if no environment variable is set, otherwise False.
18 |     """
19 |     return all(os.environ.get(env_var) is None for env_var in env_vars)
20 | 
21 | 
22 | def batched(data: Iterable[T], batch_size: int | None = None) -> Iterator[list[T]]:
23 |     """
24 |     Batches the data into chunks of the given size.
25 | 
26 |     Args:
27 |         data: The data to batch.
28 |         batch_size: The size of the batch. If None, no batching is performed.
29 | 
30 |     Returns:
31 |         An iterator of batches of the data when batch_size is provided,
32 |         or the original iterator when batch_size is None.
33 |     """
34 |     it = iter(data)
35 |     while batch := list(islice(it, batch_size)):
36 |         yield batch
37 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/utils/pydantic.py:
--------------------------------------------------------------------------------
 1 | from typing import Annotated, Any
 2 | 
 3 | from pydantic import PlainSerializer, PlainValidator
 4 | 
 5 | 
 6 | def _pydantic_hex_to_bytes(val: Any) -> bytes:  # noqa: ANN401
 7 |     """
 8 |     Deserialize hex string to bytes.
 9 |     """
10 |     if isinstance(val, bytes):
11 |         return val
12 |     elif isinstance(val, bytearray):
13 |         return bytes(val)
14 |     elif isinstance(val, str):
15 |         return bytes.fromhex(val)
16 |     raise ValueError(f"Cannot convert {val} to bytes.")
17 | 
18 | 
19 | def _pydantic_bytes_to_hex(val: bytes) -> str:
20 |     """
21 |     Serialize bytes to hex string.
22 |     """
23 |     return val.hex()
24 | 
25 | 
26 | SerializableBytes = Annotated[
27 |     bytes, PlainValidator(_pydantic_hex_to_bytes), PlainSerializer(_pydantic_bytes_to_hex, return_type=str)
28 | ]
29 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/utils/secrets.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import logging
 3 | import os
 4 | import secrets
 5 | import warnings
 6 | from functools import lru_cache
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | # Environment variable name for the secret key
11 | RAGBITS_KEY_ENV_VAR = "RAGBITS_SECRET_KEY"
12 | 
13 | # Default key length in bytes (32 bytes = 256 bits)
14 | DEFAULT_KEY_LENGTH = 32
15 | 
16 | 
17 | @lru_cache(maxsize=1)
18 | def get_secret_key(env_var: str = RAGBITS_KEY_ENV_VAR, key_length: int = DEFAULT_KEY_LENGTH) -> str:
19 |     """
20 |     Get a secret key from environment variable with fallback to a default or randomly generated key.
21 | 
22 |     Args:
23 |         env_var: The environment variable name to check for the secret key
24 |         default: Optional default key to use if environment variable is not set
25 |         key_length: Length of the key to generate if no key is provided
26 | 
27 |     Returns:
28 |         The secret key as a string
29 |     """
30 |     # Try to get from environment variable
31 |     secret_key = os.environ.get(env_var)
32 | 
33 |     if secret_key:
34 |         logger.debug(f"Using secret key from environment variable: {env_var}")
35 |         return secret_key
36 | 
37 |     # Generate a random key
38 |     random_key = base64.urlsafe_b64encode(secrets.token_bytes(key_length)).decode("utf-8")
39 |     warnings.warn(
40 |         f"No secret key found in environment variable {env_var}. "
41 |         f"Using an ephemeral randomly generated key: '{random_key}'. "
42 |         f"This key will be regenerated on restart, breaking any existing signatures. "
43 |         f"Set the {env_var} environment variable to use a persistent key.",
44 |         UserWarning,
45 |         stacklevel=2,
46 |     )
47 | 
48 |     return random_key
49 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.core.vector_stores.base import VectorStore, VectorStoreEntry, VectorStoreOptions, WhereQuery
2 | from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
3 | 
4 | __all__ = ["InMemoryVectorStore", "VectorStore", "VectorStoreEntry", "VectorStoreOptions", "WhereQuery"]
5 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/assets/img/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/assets/img/test.png


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/assets/img/test2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/assets/img/test2.jpg


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/assets/md/bar.md:
--------------------------------------------------------------------------------
1 | bar


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/assets/md/foo.md:
--------------------------------------------------------------------------------
1 | foo


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/cli/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import tempfile
 4 | from pathlib import Path
 5 | 
 6 | import pytest
 7 | 
 8 | from ragbits.core.sources.base import LOCAL_STORAGE_DIR_ENV
 9 | 
10 | 
11 | @pytest.fixture(scope="module", autouse=True)
12 | def configure_local_storage_dir():
13 |     random_tmp_dir = Path(tempfile.mkdtemp())
14 |     os.environ[LOCAL_STORAGE_DIR_ENV] = random_tmp_dir.as_posix()
15 |     yield
16 |     shutil.rmtree(random_tmp_dir)
17 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/integration/sources/test_hf.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ragbits.core.sources.exceptions import SourceNotFoundError
 4 | from ragbits.core.sources.hf import HuggingFaceSource
 5 | from ragbits.core.utils.helpers import env_vars_not_set
 6 | 
 7 | HF_TOKEN_ENV = "HF_TOKEN"  # noqa: S105
 8 | HF_DATASET_PATH = "micpst/hf-docs"
 9 | 
10 | 
11 | @pytest.mark.skipif(
12 |     env_vars_not_set([HF_TOKEN_ENV]),  # noqa: S105
13 |     reason="Hugging Face environment variables not set",
14 | )
15 | async def test_huggingface_source_fetch() -> None:
16 |     source = HuggingFaceSource(path=HF_DATASET_PATH, row=0)
17 |     path = await source.fetch()
18 | 
19 |     assert path.is_file()
20 |     assert path.name == "README.md"
21 |     assert (
22 |         path.read_text()
23 |         == " `tokenizers-linux-x64-musl`\n\nThis is the **x86_64-unknown-linux-musl** binary for `tokenizers`\n"
24 |     )
25 | 
26 | 
27 | @pytest.mark.skipif(
28 |     env_vars_not_set([HF_TOKEN_ENV]),
29 |     reason="Hugging Face environment variables not set",
30 | )
31 | async def test_huggingface_source_fetch_not_found() -> None:
32 |     source = HuggingFaceSource(path=HF_DATASET_PATH, row=1000)
33 | 
34 |     with pytest.raises(SourceNotFoundError) as exc:
35 |         await source.fetch()
36 | 
37 |     assert str(exc.value) == "Source with ID hf:micpst/hf-docs/train/1000 not found."
38 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/integration/vector_stores/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/integration/vector_stores/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/audit/test_metrics.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import MagicMock
 2 | 
 3 | import pytest
 4 | 
 5 | from ragbits.core.audit.metrics import create_histogram, record, set_metric_handlers
 6 | from ragbits.core.audit.metrics.base import MetricHandler
 7 | 
 8 | 
 9 | class MockMetricHandler(MetricHandler[MagicMock]):
10 |     def create_histogram(self, name: str, unit: str = "", description: str = "") -> MagicMock:  # noqa: PLR6301
11 |         return MagicMock()
12 | 
13 |     def record(self, metric: MagicMock, value: int | float, attributes: dict | None = None) -> None: ...
14 | 
15 | 
16 | @pytest.fixture
17 | def mock_handler() -> MockMetricHandler:
18 |     handler = MockMetricHandler()
19 |     set_metric_handlers(handler)
20 |     return handler
21 | 
22 | 
23 | def test_record_with_default_create_histogram(mock_handler: MockMetricHandler) -> None:
24 |     metric = MagicMock()
25 |     mock_handler.create_histogram = MagicMock(return_value=metric)  # type: ignore
26 |     mock_handler.record = MagicMock()  # type: ignore
27 | 
28 |     record("test_metric", 1)
29 | 
30 |     mock_handler.create_histogram.assert_called_once_with(
31 |         name="ragbits_test_metric",
32 |         unit="",
33 |         description="",
34 |     )
35 |     mock_handler.record.assert_called_once_with(metric=metric, value=1, attributes={})
36 | 
37 | 
38 | def test_record_with_create_histogram(mock_handler: MockMetricHandler) -> None:
39 |     metric = MagicMock()
40 |     mock_handler.create_histogram = MagicMock(return_value=metric)  # type: ignore
41 |     mock_handler.record = MagicMock()  # type: ignore
42 | 
43 |     metric_name = create_histogram(name="test_metric", unit="test_unit", description="test_description")
44 |     record(metric_name, 1)
45 | 
46 |     mock_handler.create_histogram.assert_called_once_with(
47 |         name="ragbits_test_metric",
48 |         unit="test_unit",
49 |         description="test_description",
50 |     )
51 |     mock_handler.record.assert_called_once_with(metric=metric, value=1, attributes={})
52 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/llms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/llms/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/llms/factory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/llms/factory/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/llms/factory/test_get_preferred_llm.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ragbits.core.config import core_config
 4 | from ragbits.core.llms.base import LLMType
 5 | from ragbits.core.llms.factory import get_preferred_llm
 6 | from ragbits.core.llms.litellm import LiteLLM
 7 | 
 8 | 
 9 | def mock_llm_factory() -> LiteLLM:
10 |     """
11 |     A mock LLM factory that creates a LiteLLM instance with a mock model name.
12 | 
13 |     Returns:
14 |         LiteLLM: An instance of the LiteLLM.
15 |     """
16 |     return LiteLLM(model_name="mock_model")
17 | 
18 | 
19 | def test_get_preferred_llm(monkeypatch: pytest.MonkeyPatch) -> None:
20 |     """
21 |     Test the get_llm_from_factory function.
22 |     """
23 |     monkeypatch.setattr(
24 |         core_config,
25 |         "llm_preference_factories",
26 |         {LLMType.TEXT: "unit.llms.factory.test_get_preferred_llm:mock_llm_factory"},
27 |     )
28 | 
29 |     llm = get_preferred_llm()
30 |     assert isinstance(llm, LiteLLM)
31 |     assert llm.model_name == "mock_model"
32 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/prompts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/prompts/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/prompts/discovery/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/prompts/discovery/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/prompts/discovery/prompt_classes_for_tests.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from ragbits.core.prompt import Prompt
 6 | 
 7 | 
 8 | class PromptForTestInput(BaseModel):
 9 |     """
10 |     Input format for the LoremPrompt.
11 |     """
12 | 
13 |     theme: str
14 |     nsfw_allowed: bool = False
15 |     var1: str
16 |     var2: str
17 |     var3: str
18 |     var4: str
19 | 
20 | 
21 | class PromptForTestOutput(BaseModel):
22 |     """
23 |     Output format for the LoremPrompt.
24 |     """
25 | 
26 |     text: str
27 | 
28 | 
29 | class PromptForTest(Prompt[PromptForTestInput, PromptForTestOutput]):
30 |     system_prompt = "fake system prompt"
31 |     user_prompt = "fake user prompt"
32 | 
33 | 
34 | class PromptForTestInput2(BaseModel):
35 |     """
36 |     Input format for the LoremPrompt.
37 |     """
38 | 
39 |     theme: str
40 | 
41 | 
42 | class PromptForTestOutput2(BaseModel):
43 |     """
44 |     Output format for the LoremPrompt.
45 |     """
46 | 
47 |     text: str
48 | 
49 | 
50 | class PromptForTest2(Prompt[PromptForTestInput2, PromptForTestOutput2]):
51 |     system_prompt = "fake system prompt2"
52 |     user_prompt = "fake user prompt2"
53 | 
54 | 
55 | class MyBasePrompt(Prompt, ABC):
56 |     system_prompt = "my base system prompt"
57 |     user_prompt = "temp user prompt"
58 | 
59 | 
60 | class MyPromptWithBase(MyBasePrompt):
61 |     user_prompt = "custom user prompt"
62 | 
63 | 
64 | class PromptWithoutInput(Prompt):
65 |     system_prompt = "fake system prompt without typing"
66 |     user_prompt = "fake user prompt without typing"
67 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/prompts/discovery/ragbits_tests_pkg_with_prompts/__init__.py:
--------------------------------------------------------------------------------
1 | from . import prompts
2 | 
3 | __all__ = ["prompts"]
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/prompts/discovery/ragbits_tests_pkg_with_prompts/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | from .temp_prompt1 import PromptForTestA
2 | from .temp_prompt2 import PromptForTestB
3 | 
4 | __all__ = ["PromptForTestA", "PromptForTestB"]
5 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/prompts/discovery/ragbits_tests_pkg_with_prompts/prompts/temp_prompt1.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | from ragbits.core.prompt import Prompt
 4 | 
 5 | 
 6 | class PromptForTestInputA(BaseModel):
 7 |     """
 8 |     Input format for the LoremPrompt.
 9 |     """
10 | 
11 |     theme: str
12 |     nsfw_allowed: bool = False
13 |     var1: str
14 |     var2: str
15 |     var3: str
16 |     var4: str
17 | 
18 | 
19 | class PromptForTestOutputA(BaseModel):
20 |     """
21 |     Output format for the LoremPrompt.
22 |     """
23 | 
24 |     text: str
25 | 
26 | 
27 | class PromptForTestA(Prompt[PromptForTestInputA, PromptForTestOutputA]):
28 |     system_prompt = "fake system prompt"
29 |     user_prompt = "fake user prompt"
30 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/prompts/discovery/ragbits_tests_pkg_with_prompts/prompts/temp_prompt2.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | from ragbits.core.prompt import Prompt
 4 | 
 5 | 
 6 | class PromptForTestInputB(BaseModel):
 7 |     """
 8 |     Input format for the LoremPrompt.
 9 |     """
10 | 
11 |     theme: str
12 |     nsfw_allowed: bool = False
13 |     var1: str
14 |     var2: str
15 |     var3: str
16 |     var4: str
17 | 
18 | 
19 | class PromptForTestOutputB(BaseModel):
20 |     """
21 |     Output format for the LoremPrompt.
22 |     """
23 | 
24 |     text: str
25 | 
26 | 
27 | class PromptForTestB(Prompt[PromptForTestInputB, PromptForTestOutputB]):
28 |     system_prompt = "fake system prompt"
29 |     user_prompt = "fake user prompt"
30 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/prompts/discovery/test_prompt_discovery.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ragbits.core.prompt.discovery import PromptDiscovery
 4 | 
 5 | current_dir = Path(__file__).parent
 6 | 
 7 | 
 8 | def test_prompt_discovery_from_file():
 9 |     discovery_results = PromptDiscovery(root_path=current_dir).discover()
10 |     print(discovery_results)
11 | 
12 |     assert len(discovery_results) == 5
13 | 
14 |     class_names = [cls.__name__ for cls in discovery_results]
15 |     assert "PromptForTest" in class_names
16 |     assert "PromptForTest2" in class_names
17 |     assert "PromptWithoutInput" in class_names
18 |     assert "PromptForTestInput" not in class_names
19 | 
20 | 
21 | def test_prompt_discovery_from_package():
22 |     discovery_results = PromptDiscovery(
23 |         root_path=current_dir, file_pattern="ragbits_tests_pkg_with_prompts/**/*.py"
24 |     ).discover()
25 | 
26 |     assert len(discovery_results) == 2
27 | 
28 |     class_names = [cls.__name__ for cls in discovery_results]
29 |     assert "PromptForTestA" in class_names
30 |     assert "PromptForTestB" in class_names
31 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/sources/test_aws.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | from sympy.testing import pytest
 4 | 
 5 | from ragbits.core.sources.s3 import S3Source
 6 | 
 7 | 
 8 | def test_id():
 9 |     source = S3Source(bucket_name="AA", key="bb/cc.pdf")
10 |     expected_id = "s3:AA/bb/cc.pdf"
11 |     assert source.id == expected_id
12 | 
13 | 
14 | async def test_from_uri_one_file():
15 |     one_file_paths = [
16 |         "s3://bucket/path/to/file",
17 |         "https://s3.us-west-2.amazonaws.com/bucket/path/to/file",
18 |         "https://bucket.s3-us-west-2.amazonaws.com/path/to/file",
19 |     ]
20 |     for path in one_file_paths:
21 |         result = await S3Source.from_uri(path)
22 |         assert result == [S3Source(bucket_name="bucket", key="path/to/file")]
23 | 
24 | 
25 | async def test_from_uri_with_prefix():
26 |     good_paths = [
27 |         "s3://bucket/path/to/files*",
28 |         "https://s3.us-west-2.amazonaws.com/bucket/path/to/files*",
29 |         "https://bucket.s3-us-west-2.amazonaws.com/path/to/files*",
30 |     ]
31 |     with patch("ragbits.core.sources.s3.S3Source.list_sources") as mock_list_sources:
32 |         for path in good_paths:
33 |             await S3Source.from_uri(path)
34 |             mock_list_sources.assert_called_with(bucket_name="bucket", prefix="path/to/files")
35 | 
36 | 
37 | async def test_from_uri_raises_exception():
38 |     wrong_uris = [
39 |         "some string",
40 |         "https://bucket.s3.us-west-2.amazonaws.com/path/to/file**",
41 |         "https://bucket.s3.us-west-2.amazonaws.com/path/*/file*",
42 |         "https://some/random/address",
43 |         "https://s3.us-west-2.amazonaws.pl/path/to/file",
44 |         "s3://short_address",
45 |     ]
46 |     for uri in wrong_uris:
47 |         with pytest.raises(ValueError):
48 |             await S3Source.from_uri(uri)
49 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/sources/test_exceptions.py:
--------------------------------------------------------------------------------
 1 | from ragbits.core.sources.exceptions import (
 2 |     SourceConnectionError,
 3 |     SourceDownloadError,
 4 |     SourceError,
 5 |     SourceNotFoundError,
 6 | )
 7 | 
 8 | 
 9 | def test_source_error_init():
10 |     error = SourceError("Test error message")
11 |     assert error.message == "Test error message"
12 |     assert str(error) == "Test error message"
13 | 
14 | 
15 | def test_source_connection_error_init():
16 |     error = SourceConnectionError()
17 |     assert error.message == "Connection error."
18 |     assert str(error) == "Connection error."
19 | 
20 | 
21 | def test_source_not_found_error_init():
22 |     error = SourceNotFoundError("test-source-id")
23 |     assert error.source_id == "test-source-id"
24 |     assert error.message == "Source with ID test-source-id not found."
25 |     assert str(error) == "Source with ID test-source-id not found."
26 | 
27 | 
28 | def test_web_download_error_init():
29 |     url = "https://example.com/file.pdf"
30 |     code = 404
31 |     error = SourceDownloadError(url, code)
32 | 
33 |     assert error.url == url
34 |     assert error.code == code
35 |     assert error.message == f"Download of {url} failed with code {code}."
36 |     assert str(error) == f"Download of {url} failed with code {code}."
37 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/sources/test_hf.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import MagicMock, patch
 2 | 
 3 | from ragbits.core.sources.hf import HuggingFaceSource
 4 | 
 5 | 
 6 | async def test_huggingface_source_fetch() -> None:
 7 |     take = MagicMock(return_value=[{"content": "This is the content of the file.", "source": "doc.md"}])
 8 |     skip = MagicMock(return_value=MagicMock(take=take))
 9 |     data = MagicMock(skip=skip)
10 |     source = HuggingFaceSource(path="org/docs", split="train", row=1)
11 | 
12 |     with patch("ragbits.core.sources.hf.load_dataset", return_value=data):
13 |         path = await source.fetch()
14 | 
15 |     assert source.id == "hf:org/docs/train/1"
16 |     assert path.name == "doc.md"
17 |     assert path.read_text() == "This is the content of the file."
18 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/sources/test_local.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ragbits.core.sources.local import LocalFileSource
 4 | 
 5 | TEST_FILE_PATH = Path(__file__)
 6 | 
 7 | 
 8 | async def test_local_source_fetch():
 9 |     source = LocalFileSource(path=TEST_FILE_PATH)
10 | 
11 |     path = await source.fetch()
12 | 
13 |     assert path == TEST_FILE_PATH
14 | 
15 | 
16 | async def test_local_source_list_sources():
17 |     example_files = TEST_FILE_PATH.parent.parent.parent / "assets" / "md"
18 | 
19 |     sources = await LocalFileSource.list_sources(example_files, file_pattern="*.md")
20 | 
21 |     assert sum(1 for _ in sources) == 2
22 |     assert all(isinstance(source, LocalFileSource) for source in sources)
23 |     assert all(source.path.suffix == ".md" for source in sources)
24 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/test_options.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ragbits.core.options import Options
 4 | from ragbits.core.types import NOT_GIVEN, NotGiven
 5 | 
 6 | 
 7 | class OptionA(Options):
 8 |     a: int = 1
 9 |     d: int | NotGiven = NOT_GIVEN
10 | 
11 | 
12 | class OptionsB(Options):
13 |     b: int = 2
14 |     e: int | None = None
15 | 
16 | 
17 | class OptionsC(Options):
18 |     a: int = 2
19 |     c: str = "c"
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     ("options", "expected"),
24 |     [
25 |         (OptionA(), {"a": 1, "d": None}),
26 |         (OptionsB(), {"b": 2, "e": None}),
27 |     ],
28 | )
29 | def test_default_options(options: Options, expected: dict) -> None:
30 |     assert options.dict() == expected
31 | 
32 | 
33 | def test_merge_options() -> None:
34 |     options_a = OptionA()
35 |     options_b = OptionsB()
36 |     options_c = OptionsC()
37 | 
38 |     merged = options_a | options_b | options_c
39 | 
40 |     assert merged.dict() == {"a": 2, "b": 2, "c": "c", "d": None, "e": None}
41 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-core/tests/unit/utils/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/pyproject/test_find.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from ragbits.core.utils._pyproject import find_pyproject
 6 | 
 7 | projects_dir = Path(__file__).parent.parent / "testprojects"
 8 | 
 9 | 
10 | def test_find_in_current_dir():
11 |     """Test finding a pyproject.toml file in the current directory."""
12 |     found = find_pyproject(projects_dir / "happy_project")
13 |     assert found == projects_dir / "happy_project" / "pyproject.toml"
14 | 
15 | 
16 | def test_find_in_parent_dir():
17 |     """Test finding a pyproject.toml file in a parent directory."""
18 |     found = find_pyproject(projects_dir / "happy_project" / "subdirectory")
19 |     assert found == projects_dir / "happy_project" / "pyproject.toml"
20 | 
21 | 
22 | def test_find_not_found():
23 |     """Test that it raises FileNotFoundError if the file is not found."""
24 |     with pytest.raises(FileNotFoundError):
25 |         find_pyproject(Path("/"))
26 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/pyproject/test_get_config.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from ragbits.core.utils._pyproject import get_ragbits_config
 4 | 
 5 | projects_dir = Path(__file__).parent.parent / "testprojects"
 6 | 
 7 | 
 8 | def test_get_config():
 9 |     """Test getting config from pyproject.toml file."""
10 |     config = get_ragbits_config(projects_dir / "happy_project")
11 | 
12 |     assert config == {
13 |         "lorem": "ipsum",
14 |         "happy-project": {
15 |             "foo": "bar",
16 |             "is_happy": True,
17 |             "happiness_level": 100,
18 |         },
19 |         "project_base_path": str(projects_dir / "happy_project"),
20 |     }
21 | 
22 | 
23 | def test_get_config_no_file():
24 |     """Test getting config when the pyproject.toml file is not found."""
25 |     config = get_ragbits_config(Path("/"))
26 | 
27 |     assert config == {}
28 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/test_decorators.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ragbits.core.utils.decorators import requires_dependencies
 4 | 
 5 | 
 6 | def test_single_dependency_installed() -> None:
 7 |     @requires_dependencies("pytest")
 8 |     def some_function() -> str:
 9 |         return "success"
10 | 
11 |     assert some_function() == "success"
12 | 
13 | 
14 | def test_single_dependency_missing() -> None:
15 |     @requires_dependencies("nonexistent_dependency")
16 |     def some_function() -> str:
17 |         return "success"
18 | 
19 |     with pytest.raises(ImportError) as exc:
20 |         some_function()
21 | 
22 |     assert (
23 |         str(exc.value) == "Following dependencies are missing: nonexistent_dependency."
24 |         " Please install them using `pip install nonexistent_dependency`."
25 |     )
26 | 
27 | 
28 | def test_multiple_dependencies_installed() -> None:
29 |     @requires_dependencies(["pytest", "asyncio"])
30 |     def some_function() -> str:
31 |         return "success"
32 | 
33 |     assert some_function() == "success"
34 | 
35 | 
36 | def test_multiple_dependencies_some_missing() -> None:
37 |     @requires_dependencies(["pytest", "nonexistent_dependency"])
38 |     def some_function() -> str:
39 |         return "success"
40 | 
41 |     with pytest.raises(ImportError) as exc:
42 |         some_function()
43 | 
44 |     assert (
45 |         str(exc.value) == "Following dependencies are missing: nonexistent_dependency."
46 |         " Please install them using `pip install nonexistent_dependency`."
47 |     )
48 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/test_helpers.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ragbits.core.utils.helpers import batched
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     ("input_data", "batch_size", "expected"),
 8 |     [
 9 |         ([], 3, []),
10 |         ([], None, []),
11 |         ([1, 2, 3], None, [[1, 2, 3]]),
12 |         ([1, 2, 3], 5, [[1, 2, 3]]),
13 |         ([1, 2, 3], 3, [[1, 2, 3]]),
14 |         ([1, 2, 3, 4, 5, 6], 2, [[1, 2], [3, 4], [5, 6]]),
15 |         ([1, 2, 3, 4, 5], 2, [[1, 2], [3, 4], [5]]),
16 |     ],
17 |     ids=[
18 |         "empty_iterable",
19 |         "none_batch_size",
20 |         "none_batch_size_with_remainder",
21 |         "batch_size_larger_than_data",
22 |         "batch_size_equal_to_data",
23 |         "batch_size_divides_data_evenly",
24 |         "batch_size_with_remainder",
25 |     ],
26 | )
27 | def test_batched(input_data: list[int], batch_size: int, expected: list[list[int]]) -> None:
28 |     result = list(batched(input_data, batch_size))
29 |     assert result == expected
30 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/test_secrets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest.mock import patch
 3 | 
 4 | import pytest
 5 | 
 6 | from ragbits.core.utils.secrets import RAGBITS_KEY_ENV_VAR, get_secret_key
 7 | 
 8 | 
 9 | def test_get_secret_key_from_env():
10 |     """Test getting the secret key from an environment variable."""
11 |     get_secret_key.cache_clear()
12 |     test_key = "test-env-secret-key"
13 |     with patch.dict(os.environ, {RAGBITS_KEY_ENV_VAR: test_key}, clear=True):
14 |         assert get_secret_key() == test_key
15 | 
16 | 
17 | def test_get_secret_key_generates_random():
18 |     """Test that a random key is generated when neither env var nor default is provided."""
19 |     with patch.dict(os.environ, {}, clear=True):
20 |         # The function is cached, so we need to test with different env_var names
21 |         key1 = get_secret_key(env_var="TEST_KEY_1")
22 |         key2 = get_secret_key(env_var="TEST_KEY_2")
23 | 
24 |         # Keys should be different and not empty
25 |         assert key1 != key2
26 |         assert key1
27 |         assert key2
28 | 
29 | 
30 | def test_get_secret_key_warning():
31 |     """Test that a warning is emitted when generating a random key."""
32 |     with (
33 |         patch.dict(os.environ, {}, clear=True),
34 |         pytest.warns(UserWarning, match=f"No secret key found in environment variable {RAGBITS_KEY_ENV_VAR}"),
35 |     ):
36 |         get_secret_key(env_var=RAGBITS_KEY_ENV_VAR)
37 | 
38 | 
39 | def test_get_secret_key_caching():
40 |     """Test that the secret key function caches results."""
41 |     with patch.dict(os.environ, {}, clear=True):
42 |         # The same env_var should produce the same key due to caching
43 |         key1 = get_secret_key(env_var="TEST_CACHE_KEY")
44 |         key2 = get_secret_key(env_var="TEST_CACHE_KEY")
45 |         assert key1 == key2
46 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/testprojects/bad_factory_project/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "bad_factory_project"
3 | 
4 | [tool.ragbits.core.llm_preference_factories]
5 | non_existing = "ragbits.core.llms.factory:simple_litellm_factory"
6 | vision = "ragbits.core.llms.factory:simple_litellm_vision_factory"
7 | structured_output = "ragbits.core.llms.factory:simple_litellm_vision_factory"
8 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/testprojects/factory_project/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "factory_project"
3 | 
4 | [tool.ragbits.core.llm_preference_factories]
5 | text = "ragbits.core.llms.factory:simple_litellm_factory"
6 | vision = "ragbits.core.llms.factory:simple_litellm_vision_factory"
7 | structured_output = "ragbits.core.llms.factory:simple_litellm_vision_factory"
8 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/testprojects/happy_project/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "happy-project"
 3 | 
 4 | [tool.ragbits]
 5 | lorem = "ipsum"
 6 | 
 7 | [tool.ragbits.happy-project]
 8 | foo = "bar"
 9 | is_happy = true
10 | happiness_level = 100
11 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/testprojects/project_with_instance_factory/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "instance_factory_project"
3 | 
4 | [tool.ragbits.core.component_preference_factories]
5 | example = "unit.utils.test_config_handling:example_factory"
6 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/testprojects/project_with_instances_yaml/instances.yaml:
--------------------------------------------------------------------------------
1 | example:
2 |   type: unit.utils.test_config_handling:ExampleSubclass
3 |   config:
4 |     foo: I am a foo
5 |     bar: 122
6 | 


--------------------------------------------------------------------------------
/packages/ragbits-core/tests/unit/utils/testprojects/project_with_instances_yaml/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "project_with_instances_yaml"
3 | 
4 | [tool.ragbits.core]
5 | component_preference_config_path = "instances.yaml"
6 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/README.md:
--------------------------------------------------------------------------------
 1 | # Ragbits Document Search
 2 | 
 3 | Ragbits Document Search is a Python package that provides tools for building RAG applications. It helps ingest, index, and search documents to retrieve relevant information for your prompts.
 4 | 
 5 | ## Installation
 6 | 
 7 | You can install the latest version of Ragbits Document Search using pip:
 8 | 
 9 | ```bash
10 | pip install ragbits-document-search
11 | ```
12 | 
13 | ## Quickstart
14 | ```python
15 | from ragbits.core.embeddings.litellm import LiteLLMEmbedder
16 | from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
17 | from ragbits.document_search import DocumentSearch
18 | 
19 | async def main() -> None:
20 |     """
21 |     Run the example.
22 |     """
23 |     embedder = LiteLLMEmbedder(
24 |         model="text-embedding-3-small",
25 |     )
26 |     vector_store = InMemoryVectorStore(embedder=embedder)
27 |     document_search = DocumentSearch(
28 |         vector_store=vector_store,
29 |     )
30 | 
31 |     # Ingest all .txt files from the "biographies" directory
32 |     await document_search.ingest("file://biographies/*.txt")
33 | 
34 |     # Search the documents for the query
35 |     results = await document_search.search("When was Marie Curie-Sklodowska born?")
36 |     print(results)
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     asyncio.run(main())
41 | ```
42 | 
43 | ## Documentation
44 | * [Quickstart 2: Adding RAG Capabilities](https://ragbits.deepsense.ai/quickstart/quickstart2_rag/)
45 | * [How-To Guides - Document Search](https://ragbits.deepsense.ai/how-to/document_search/async_processing/)
46 | * [API Reference - Document Search](https://ragbits.deepsense.ai/api_reference/document_search/)
47 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.document_search._main import DocumentSearch, DocumentSearchOptions
2 | 
3 | __all__ = ["DocumentSearch", "DocumentSearchOptions"]
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/documents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/src/ragbits/document_search/documents/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/ingestion/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/src/ragbits/document_search/ingestion/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.document_search.ingestion.enrichers.base import ElementEnricher
2 | from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher
3 | from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter
4 | 
5 | __all__ = ["ElementEnricher", "ElementEnricherRouter", "ImageElementEnricher"]
6 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/exceptions.py:
--------------------------------------------------------------------------------
 1 | from ragbits.document_search.documents.element import Element
 2 | 
 3 | 
 4 | class EnricherError(Exception):
 5 |     """
 6 |     Class for all exceptions raised by the element enricher and router.
 7 |     """
 8 | 
 9 |     def __init__(self, message: str) -> None:
10 |         super().__init__(message)
11 |         self.message = message
12 | 
13 | 
14 | class EnricherNotFoundError(EnricherError):
15 |     """
16 |     Raised when no enricher was found for the element type.
17 |     """
18 | 
19 |     def __init__(self, element_type: type[Element]) -> None:
20 |         super().__init__(f"No enricher found for the element type {element_type}")
21 |         self.element_type = element_type
22 | 
23 | 
24 | class EnricherElementNotSupportedError(EnricherError):
25 |     """
26 |     Raised when the element type is not supported by the enricher.
27 |     """
28 | 
29 |     def __init__(self, enricher_name: str, element_type: type[Element]) -> None:
30 |         super().__init__(f"Element type {element_type} is not supported by the {enricher_name}")
31 |         self.enricher_name = enricher_name
32 |         self.element_type = element_type
33 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.document_search.ingestion.parsers.base import DocumentParser, ImageDocumentParser, TextDocumentParser
2 | from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter
3 | 
4 | __all__ = ["DocumentParser", "DocumentParserRouter", "ImageDocumentParser", "TextDocumentParser"]
5 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/exceptions.py:
--------------------------------------------------------------------------------
 1 | from ragbits.document_search.documents.document import DocumentType
 2 | 
 3 | 
 4 | class ParserError(Exception):
 5 |     """
 6 |     Class for all exceptions raised by the document parser and router.
 7 |     """
 8 | 
 9 |     def __init__(self, message: str) -> None:
10 |         super().__init__(message)
11 |         self.message = message
12 | 
13 | 
14 | class ParserNotFoundError(ParserError):
15 |     """
16 |     Raised when no parser was found for the document type.
17 |     """
18 | 
19 |     def __init__(self, document_type: DocumentType) -> None:
20 |         super().__init__(f"No parser found for the document type {document_type}")
21 |         self.document_type = document_type
22 | 
23 | 
24 | class ParserDocumentNotSupportedError(ParserError):
25 |     """
26 |     Raised when the document type is not supported by the parser.
27 |     """
28 | 
29 |     def __init__(self, parser_name: str, document_type: DocumentType) -> None:
30 |         super().__init__(f"Document type {document_type.value} is not supported by the {parser_name}")
31 |         self.parser_name = parser_name
32 |         self.document_type = document_type
33 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.document_search.ingestion.strategies.base import IngestStrategy
2 | from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy
3 | from ragbits.document_search.ingestion.strategies.ray import RayDistributedIngestStrategy
4 | from ragbits.document_search.ingestion.strategies.sequential import SequentialIngestStrategy
5 | 
6 | __all__ = ["BatchedIngestStrategy", "IngestStrategy", "RayDistributedIngestStrategy", "SequentialIngestStrategy"]
7 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/ingestion/strategies/sequential.py:
--------------------------------------------------------------------------------
 1 | from ragbits.document_search.ingestion.strategies.batched import BatchedIngestStrategy
 2 | 
 3 | 
 4 | class SequentialIngestStrategy(BatchedIngestStrategy):
 5 |     """
 6 |     Ingest strategy that processes documents in sequence, one at a time.
 7 |     """
 8 | 
 9 |     def __init__(self, num_retries: int = 3, backoff_multiplier: int = 1, backoff_max: int = 60) -> None:
10 |         """
11 |         Initialize the SequentialIngestStrategy instance.
12 | 
13 |         Args:
14 |             num_retries: The number of retries per document ingest task error.
15 |             backoff_multiplier: The base delay multiplier for exponential backoff (in seconds).
16 |             backoff_max: The maximum allowed delay (in seconds) between retries.
17 |         """
18 |         super().__init__(
19 |             batch_size=1,
20 |             num_retries=num_retries,
21 |             backoff_multiplier=backoff_multiplier,
22 |             backoff_max=backoff_max,
23 |         )
24 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/src/ragbits/document_search/py.typed


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/retrieval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/src/ragbits/document_search/retrieval/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/__init__.py:
--------------------------------------------------------------------------------
 1 | from ragbits.document_search.retrieval.rephrasers.base import QueryRephraser, QueryRephraserOptions
 2 | from ragbits.document_search.retrieval.rephrasers.llm import (
 3 |     LLMQueryRephraser,
 4 |     LLMQueryRephraserOptions,
 5 |     LLMQueryRephraserPrompt,
 6 |     LLMQueryRephraserPromptInput,
 7 | )
 8 | from ragbits.document_search.retrieval.rephrasers.noop import NoopQueryRephraser
 9 | 
10 | __all__ = [
11 |     "LLMQueryRephraser",
12 |     "LLMQueryRephraserOptions",
13 |     "LLMQueryRephraserPrompt",
14 |     "LLMQueryRephraserPromptInput",
15 |     "NoopQueryRephraser",
16 |     "QueryRephraser",
17 |     "QueryRephraserOptions",
18 | ]
19 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from collections.abc import Iterable
 3 | from typing import ClassVar, TypeVar
 4 | 
 5 | from ragbits.core.options import Options
 6 | from ragbits.core.utils.config_handling import ConfigurableComponent
 7 | from ragbits.document_search.retrieval import rephrasers
 8 | 
 9 | 
10 | class QueryRephraserOptions(Options):
11 |     """
12 |     Object representing the options for the rephraser.
13 |     """
14 | 
15 | 
16 | QueryRephraserOptionsT = TypeVar("QueryRephraserOptionsT", bound=QueryRephraserOptions)
17 | 
18 | 
19 | class QueryRephraser(ConfigurableComponent[QueryRephraserOptionsT], ABC):
20 |     """
21 |     Rephrases a query. Can provide multiple rephrased queries from one sentence / question.
22 |     """
23 | 
24 |     options_cls: type[QueryRephraserOptionsT]
25 |     default_module: ClassVar = rephrasers
26 |     configuration_key: ClassVar = "rephraser"
27 | 
28 |     @abstractmethod
29 |     async def rephrase(self, query: str, options: QueryRephraserOptionsT | None = None) -> Iterable[str]:
30 |         """
31 |         Rephrase a query.
32 | 
33 |         Args:
34 |             query: The query to rephrase.
35 |             options: The options for the rephraser.
36 | 
37 |         Returns:
38 |             The rephrased queries.
39 |         """
40 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rephrasers/noop.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Iterable
 2 | 
 3 | from ragbits.core.audit.traces import traceable
 4 | from ragbits.document_search.retrieval.rephrasers.base import QueryRephraser, QueryRephraserOptions
 5 | 
 6 | 
 7 | class NoopQueryRephraser(QueryRephraser[QueryRephraserOptions]):
 8 |     """
 9 |     A no-op query paraphraser that does not change the query.
10 |     """
11 | 
12 |     options_cls: type[QueryRephraserOptions] = QueryRephraserOptions
13 | 
14 |     @traceable
15 |     async def rephrase(self, query: str, options: QueryRephraserOptions | None = None) -> Iterable[str]:  # noqa: PLR6301
16 |         """
17 |         Mock implementation which outputs the same query as in input.
18 | 
19 |         Args:
20 |             query: The query to rephrase.
21 |             options: The options for the rephraser.
22 | 
23 |         Returns:
24 |             The list with non-transformed query.
25 |         """
26 |         return [query]
27 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.document_search.retrieval.rerankers.base import Reranker, RerankerOptions
2 | from ragbits.document_search.retrieval.rerankers.noop import NoopReranker
3 | 
4 | __all__ = ["NoopReranker", "Reranker", "RerankerOptions"]
5 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from collections.abc import Sequence
 3 | from typing import ClassVar, TypeVar
 4 | 
 5 | from ragbits.core.options import Options
 6 | from ragbits.core.types import NOT_GIVEN, NotGiven
 7 | from ragbits.core.utils.config_handling import ConfigurableComponent
 8 | from ragbits.document_search.documents.element import Element
 9 | from ragbits.document_search.retrieval import rerankers
10 | 
11 | 
12 | class RerankerOptions(Options):
13 |     """
14 |     Object representing the options for the reranker.
15 | 
16 |     Attributes:
17 |         top_n: The number of entries to return.
18 |         score_threshold: The minimum relevance score for an entry to be returned.
19 |         override_score: If True reranking will override element score.
20 |     """
21 | 
22 |     top_n: int | None | NotGiven = NOT_GIVEN
23 |     score_threshold: float | None | NotGiven = NOT_GIVEN
24 |     override_score: bool = True
25 | 
26 | 
27 | RerankerOptionsT = TypeVar("RerankerOptionsT", bound=RerankerOptions)
28 | 
29 | 
30 | class Reranker(ConfigurableComponent[RerankerOptionsT], ABC):
31 |     """
32 |     Reranks elements retrieved from vector store.
33 |     """
34 | 
35 |     options_cls: type[RerankerOptionsT]
36 |     default_module: ClassVar = rerankers
37 |     configuration_key: ClassVar = "reranker"
38 | 
39 |     @abstractmethod
40 |     async def rerank(
41 |         self,
42 |         elements: Sequence[Sequence[Element]],
43 |         query: str,
44 |         options: RerankerOptionsT | None = None,
45 |     ) -> Sequence[Element]:
46 |         """
47 |         Rerank elements.
48 | 
49 |         Args:
50 |             elements: The elements to rerank.
51 |             query: The query to rerank the elements against.
52 |             options: The options for reranking.
53 | 
54 |         Returns:
55 |             The reranked elements.
56 |         """
57 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/src/ragbits/document_search/retrieval/rerankers/noop.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from itertools import chain
 3 | 
 4 | from ragbits.core.audit.traces import traceable
 5 | from ragbits.document_search.documents.element import Element
 6 | from ragbits.document_search.retrieval.rerankers.base import Reranker, RerankerOptions
 7 | 
 8 | 
 9 | class NoopReranker(Reranker[RerankerOptions]):
10 |     """
11 |     A no-op reranker that does not change the order of the elements.
12 |     """
13 | 
14 |     options_cls: type[RerankerOptions] = RerankerOptions
15 | 
16 |     @traceable
17 |     async def rerank(  # noqa: PLR6301
18 |         self,
19 |         elements: Sequence[Sequence[Element]],
20 |         query: str,
21 |         options: RerankerOptions | None = None,
22 |     ) -> Sequence[Element]:
23 |         """
24 |         No reranking, returning the elements in the same order.
25 | 
26 |         Args:
27 |             elements: The elements to rerank.
28 |             query: The query to rerank the elements against.
29 |             options: The options for reranking.
30 | 
31 |         Returns:
32 |             The reranked elements.
33 |         """
34 |         return [*{element.id: element for element in chain.from_iterable(elements)}.values()]
35 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/assets/img/transformers_paper_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/tests/assets/img/transformers_paper_page.png


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/assets/md/bar.md:
--------------------------------------------------------------------------------
1 | bar


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/assets/md/foo.md:
--------------------------------------------------------------------------------
1 | foo


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/assets/md/test_file.md:
--------------------------------------------------------------------------------
1 | # Ragbits
2 | 
3 | Repository for internal experiment with our upcoming LLM framework.
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/tests/assets/pdf/transformers_paper_page.pdf


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/cli/custom_cli_source.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Iterable
 2 | from pathlib import Path
 3 | from typing import ClassVar
 4 | 
 5 | from typing_extensions import Self
 6 | 
 7 | from ragbits.core.audit.traces import traceable
 8 | from ragbits.core.sources.base import Source
 9 | 
10 | 
11 | class CustomCliSource(Source):
12 |     """
13 |     An object representing a custom source for CLI testing.
14 |     """
15 | 
16 |     path: Path
17 |     protocol: ClassVar[str] = "custom_cli_protocol"
18 | 
19 |     @property
20 |     def id(self) -> str:
21 |         """Get unique identifier of the object in the custom CLI source."""
22 |         return f"custom_cli_source:{self.path}"
23 | 
24 |     @traceable
25 |     async def fetch(self) -> Path:
26 |         """Fetch the custom CLI source."""
27 |         return self.path
28 | 
29 |     @classmethod
30 |     async def list_sources(cls, path: str) -> Iterable[Self]:
31 |         """List all sources from the Custom LCI source."""
32 |         return [cls(path=Path(path))]
33 | 
34 |     @classmethod
35 |     @traceable
36 |     async def from_uri(cls, path: str) -> Iterable[Self]:
37 |         """Custom CLI source from URI path."""
38 |         return [cls(path=Path(path))]
39 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/integration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/tests/integration/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/integration/test_docling.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | 
 5 | from ragbits.document_search.documents.document import DocumentMeta
 6 | from ragbits.document_search.ingestion.parsers.docling import DoclingDocumentParser
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     ("document_metadata", "expected_num_elements"),
11 |     [
12 |         pytest.param(
13 |             DocumentMeta.from_literal("Name of Peppa's brother is George."),
14 |             1,
15 |             id="TextDocument",
16 |         ),
17 |         pytest.param(
18 |             DocumentMeta.from_local_path(Path(__file__).parent.parent / "assets" / "md" / "test_file.md"),
19 |             1,
20 |             id="MarkdownDocument",
21 |         ),
22 |         pytest.param(
23 |             DocumentMeta.from_local_path(
24 |                 Path(__file__).parent.parent / "assets" / "img" / "transformers_paper_page.png"
25 |             ),
26 |             6,
27 |             id="ImageDocument",
28 |         ),
29 |         pytest.param(
30 |             DocumentMeta.from_local_path(
31 |                 Path(__file__).parent.parent / "assets" / "pdf" / "transformers_paper_page.pdf"
32 |             ),
33 |             7,
34 |             id="PDFDocument",
35 |         ),
36 |     ],
37 | )
38 | async def test_docling_parser(document_metadata: DocumentMeta, expected_num_elements: int) -> None:
39 |     document = await document_metadata.fetch()
40 |     parser = DoclingDocumentParser()
41 | 
42 |     elements = await parser.parse(document)
43 | 
44 |     assert len(elements) == expected_num_elements
45 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/test_documents.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from pathlib import Path
 3 | 
 4 | from ragbits.core.sources.local import LocalFileSource
 5 | from ragbits.document_search.documents.document import (
 6 |     DocumentMeta,
 7 |     DocumentType,
 8 |     TextDocument,
 9 | )
10 | 
11 | 
12 | async def test_loading_local_file_source():
13 |     with tempfile.NamedTemporaryFile() as f:
14 |         f.write(b"test")
15 |         f.seek(0)
16 | 
17 |         source = LocalFileSource(path=Path(f.name))
18 | 
19 |         document_meta = DocumentMeta(document_type=DocumentType.TXT, source=source)
20 | 
21 |         document = await document_meta.fetch()
22 | 
23 |         assert isinstance(document, TextDocument)
24 |         assert document.content == "test"
25 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/test_element_enricher_router.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from ragbits.core.utils.config_handling import ObjectConstructionConfig
 4 | from ragbits.document_search.documents.element import ImageElement, TextElement
 5 | from ragbits.document_search.ingestion.enrichers.exceptions import EnricherNotFoundError
 6 | from ragbits.document_search.ingestion.enrichers.image import ImageElementEnricher
 7 | from ragbits.document_search.ingestion.enrichers.router import ElementEnricherRouter
 8 | 
 9 | 
10 | def test_enricher_router_from_config() -> None:
11 |     config = {
12 |         "TextElement": ObjectConstructionConfig.model_validate(
13 |             {"type": "ragbits.document_search.ingestion.enrichers.image:ImageElementEnricher"}
14 |         ),
15 |         "ImageElement": ObjectConstructionConfig.model_validate(
16 |             {"type": "ragbits.document_search.ingestion.enrichers.image:ImageElementEnricher"}
17 |         ),
18 |     }
19 |     router = ElementEnricherRouter.from_config(config)
20 | 
21 |     assert isinstance(router._enrichers[TextElement], ImageElementEnricher)
22 |     assert isinstance(router._enrichers[ImageElement], ImageElementEnricher)
23 | 
24 | 
25 | async def test_enricher_router_get() -> None:
26 |     enricher = ImageElementEnricher()
27 |     enricher_router = ElementEnricherRouter({ImageElement: enricher})
28 | 
29 |     assert enricher_router.get(ImageElement) is enricher
30 | 
31 | 
32 | async def test_enricher_router_get_raises_when_no_enricher_found() -> None:
33 |     enricher = ImageElementEnricher()
34 |     enricher_router = ElementEnricherRouter()
35 |     enricher_router._enrichers = {ImageElement: enricher}
36 | 
37 |     with pytest.raises(EnricherNotFoundError) as exc:
38 |         enricher_router.get(TextElement)
39 | 
40 |     assert exc.value.message == f"No enricher found for the element type {TextElement}"
41 |     assert exc.value.element_type == TextElement
42 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/test_elements.py:
--------------------------------------------------------------------------------
 1 | from uuid import UUID
 2 | 
 3 | from pydantic import computed_field
 4 | 
 5 | from ragbits.core.vector_stores.base import VectorStoreEntry
 6 | from ragbits.document_search.documents.document import DocumentType
 7 | from ragbits.document_search.documents.element import Element
 8 | 
 9 | 
10 | def test_resolving_element_type() -> None:
11 |     class MyElement(Element):
12 |         element_type: str = "custom_element"
13 |         foo: str
14 | 
15 |         @computed_field  # type: ignore[prop-decorator]
16 |         @property
17 |         def text_representation(self) -> str:
18 |             return self.foo + self.foo
19 | 
20 |     element = Element.from_vector_db_entry(
21 |         db_entry=VectorStoreEntry(
22 |             id=UUID("1c7d6b27-4ef1-537c-ad7c-676edb8bc8a8"),
23 |             text="test content",
24 |             metadata={
25 |                 "element_type": "custom_element",
26 |                 "foo": "bar",
27 |                 "document_meta": {
28 |                     "document_type": "txt",
29 |                     "source": {"source_type": "local_file_source", "path": "/example/path"},
30 |                 },
31 |             },
32 |         ),
33 |         score=0.85,
34 |     )
35 | 
36 |     assert isinstance(element, MyElement)
37 |     assert element.foo == "bar"
38 |     assert element.key == "barbar"
39 |     assert element.text_representation == "barbar"
40 |     assert element.document_meta.document_type == DocumentType.TXT
41 |     assert element.document_meta.source.source_type == "local_file_source"
42 |     assert element.score == 0.85
43 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/testprojects/empty_project/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "empty_project"
3 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/testprojects/project_with_instance_factory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-document-search/tests/unit/testprojects/project_with_instance_factory/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/testprojects/project_with_instance_factory/factories.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from ragbits.core.embeddings.dense import NoopEmbedder
 4 | from ragbits.core.vector_stores.in_memory import InMemoryVectorStore, VectorStoreOptions
 5 | from ragbits.document_search import DocumentSearch
 6 | from ragbits.document_search.documents.document import DocumentMeta
 7 | from ragbits.document_search.retrieval.rerankers.base import RerankerOptions
 8 | from ragbits.document_search.retrieval.rerankers.noop import NoopReranker
 9 | 
10 | 
11 | def create_document_search_instance_223():
12 |     vector_store_options = VectorStoreOptions(k=223)
13 |     document_search: DocumentSearch = DocumentSearch(
14 |         reranker=NoopReranker(default_options=RerankerOptions(top_n=223)),
15 |         vector_store=InMemoryVectorStore(embedder=NoopEmbedder(), default_options=vector_store_options),
16 |     )
17 |     return document_search
18 | 
19 | 
20 | def create_document_search_instance_825():
21 |     vector_store_options = VectorStoreOptions(k=825)
22 |     document_search: DocumentSearch = DocumentSearch(
23 |         reranker=NoopReranker(default_options=RerankerOptions(top_n=825)),
24 |         vector_store=InMemoryVectorStore(embedder=NoopEmbedder(), default_options=vector_store_options),
25 |     )
26 |     return document_search
27 | 
28 | 
29 | async def _add_example_documents(document_search: DocumentSearch) -> None:
30 |     documents = [
31 |         DocumentMeta.from_literal("Foo document"),
32 |         DocumentMeta.from_literal("Bar document"),
33 |         DocumentMeta.from_literal("Baz document"),
34 |     ]
35 |     await document_search.ingest(documents)
36 | 
37 | 
38 | def create_document_search_instance_with_documents():
39 |     document_search: DocumentSearch = DocumentSearch(vector_store=InMemoryVectorStore(embedder=NoopEmbedder()))
40 |     asyncio.run(_add_example_documents(document_search))
41 |     return document_search
42 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/testprojects/project_with_instance_factory/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "instance_factory_project"
3 | 
4 | [tool.ragbits.core.component_preference_factories]
5 | document_search = "project_with_instance_factory.factories:create_document_search_instance_223"
6 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/testprojects/project_with_instances_yaml/instances.yaml:
--------------------------------------------------------------------------------
 1 | reranker:
 2 |   type: NoopReranker
 3 |   config:
 4 |     default_options:
 5 |       top_n: 17
 6 | vector_store:
 7 |   type: InMemoryVectorStore
 8 |   config:
 9 |     embedder:
10 |       type: NoopEmbedder
11 |     default_options:
12 |       k: 147
13 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/testprojects/project_with_instances_yaml/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "project_with_instances_yaml"
3 | 
4 | [tool.ragbits.core]
5 | component_preference_config_path = "instances.yaml"
6 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/testprojects/project_with_nested_yaml/instances.yaml:
--------------------------------------------------------------------------------
 1 | reranker:
 2 |   type: NoopReranker
 3 |   config:
 4 |     default_options:
 5 |       top_n: 23
 6 | vector_store:
 7 |   type: InMemoryVectorStore
 8 |   config:
 9 |     embedder:
10 |       type: NoopEmbedder
11 |     default_options:
12 |       k: 147
13 | document_search:
14 |   type: DocumentSearch
15 |   config:
16 |     reranker:
17 |       type: NoopReranker
18 |       config:
19 |         default_options:
20 |           top_n: 17
21 |     vector_store:
22 |       type: InMemoryVectorStore
23 |       config:
24 |         embedder:
25 |           type: NoopEmbedder
26 |         default_options:
27 |           k: 12
28 | 


--------------------------------------------------------------------------------
/packages/ragbits-document-search/tests/unit/testprojects/project_with_nested_yaml/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "project_with_instances_yaml"
3 | 
4 | [tool.ragbits.core]
5 | component_preference_config_path = "instances.yaml"
6 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/README.md:
--------------------------------------------------------------------------------
 1 | # Ragbits Evaluate
 2 | 
 3 | Ragbits Evaluate is a package that contains tools for evaluating the performance of AI pipelines defined with Ragbits components. It also helps with automatically finding the best hyperparameter configurations for them.
 4 | 
 5 | ## Installation
 6 | 
 7 | To install the Ragbits Evaluate package, run:
 8 | 
 9 | ```sh
10 | pip install ragbits-evaluate
11 | ```
12 | 
13 | <!--
14 | TODO: Add a minimalistic example inspired by the Quickstart chapter on Ragbits Evaluate once it is ready.
15 | -->
16 | 
17 | ## Documentation
18 | <!--
19 | TODO:
20 | * Add link to the Quickstart chapter on Ragbits Evaluate once it is ready.
21 | * Add link to API Reference once classes from the Evaluate package are added to the API Reference.
22 | -->
23 | * [How-To Guides - Evaluate](https://ragbits.deepsense.ai/how-to/evaluate/optimize/)
24 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/config.py:
--------------------------------------------------------------------------------
 1 | from ragbits.core.config import CoreConfig
 2 | from ragbits.core.utils._pyproject import get_config_instance
 3 | 
 4 | 
 5 | class EvaluateConfig(CoreConfig):
 6 |     """
 7 |     Configuration for the ragbits-evaluate package, loaded from downstream projects' pyproject.toml files.
 8 |     """
 9 | 
10 | 
11 | eval_config = get_config_instance(EvaluateConfig, subproject="evaluate")
12 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.evaluate.dataloaders.base import DataLoader
2 | 
3 | __all__ = ["DataLoader"]
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/exceptions.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | 
 4 | class DataLoaderError(Exception):
 5 |     """
 6 |     Class for all exceptions raised by the data loader.
 7 |     """
 8 | 
 9 |     def __init__(self, message: str, data_path: Path) -> None:
10 |         super().__init__(message)
11 |         self.message = message
12 |         self.data_path = data_path
13 | 
14 | 
15 | class DataLoaderIncorrectFormatDataError(DataLoaderError):
16 |     """
17 |     Raised when the data are incorrectly formatted.
18 |     """
19 | 
20 |     def __init__(self, required_features: list[str], data_path: Path) -> None:
21 |         super().__init__(
22 |             message=f"Dataset {data_path} is incorrectly formatted. Required features: {required_features}",
23 |             data_path=data_path,
24 |         )
25 |         self.required_features = required_features
26 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/prompts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/prompts/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/prompts/corpus_generation.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | from ragbits.core.prompt import Prompt
 4 | 
 5 | 
 6 | class BasicCorpusGenerationPromptInput(BaseModel):
 7 |     """A definition of input for corpus generation task"""
 8 | 
 9 |     query: str
10 | 
11 | 
12 | class BasicCorpusGenerationPrompt(Prompt[BasicCorpusGenerationPromptInput]):
13 |     """A basic prompt for corpus generation"""
14 | 
15 |     system_prompt: str = (
16 |         "You are a provider of random factoids on topic requested by a user."
17 |         "Do not write a long essays, the response for given query should be a single sentence"
18 |         "For each query provide only a single fact about a given topic"
19 |         "Use as few tokens as possible"
20 |     )
21 |     user_prompt: str = "Provide factoids about {{ query }}"
22 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/filter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/filter/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/filter/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from distilabel.steps import Step, StepInput, StepOutput
 4 | 
 5 | from ..corpus_generation import CorpusGenerationStep
 6 | from ..text_generation.base import BaseDistilabelTask
 7 | 
 8 | 
 9 | class BaseFilter(Step, ABC):
10 |     """Base class for filtering the outputs of pipeline steps"""
11 | 
12 |     def __init__(self, task: BaseDistilabelTask | CorpusGenerationStep):
13 |         super().__init__()
14 |         self._task = task
15 | 
16 |     @property
17 |     def inputs(self) -> list[str]:
18 |         """
19 |         Property describing input fields for a filter
20 |         Returns:
21 |             list of input fields for a filter
22 |         """
23 |         return self._task.outputs
24 | 
25 |     @property
26 |     def outputs(self) -> list[str]:
27 |         """
28 |         Property describing output fields for a filter
29 |         Returns:
30 |             list of output fields for a filter
31 |         """
32 |         return self._task.outputs
33 | 
34 |     @abstractmethod
35 |     def process(self, *inputs: StepInput) -> "StepOutput":
36 |         """
37 |         Abstract method for filter step processing
38 |         Args:
39 |             inputs - inputs to a filter
40 |         Returns:
41 |             filtered outputs
42 |         """
43 |         pass
44 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/filter/dont_know.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from distilabel.steps import StepInput, StepOutput
 4 | 
 5 | from .base import BaseFilter
 6 | 
 7 | DONT_KNOW_PHRASES: list[str] = [
 8 |     "I don't know",
 9 |     "I do not know",
10 |     "don't know",
11 | ]
12 | 
13 | 
14 | class DontKnowFilter(BaseFilter):
15 |     """A class for basic rule-based filtering of don't know anwers"""
16 | 
17 |     def process(self, *inputs: StepInput) -> "StepOutput":
18 |         """
19 |         Runs the basic rule-based filtering of the inputs
20 |         Args:
21 |             inputs - the outputs of some generation step
22 |         Returns:
23 |             outputs filtered to the ones that do not contain the pre-defined phrases
24 |         """
25 |         result = [
26 |             {input_type: input_[input_type] for input_type in input_}
27 |             for input_ in inputs[0]
28 |             if not self._is_dont_know(input_)
29 |         ]
30 |         yield result
31 | 
32 |     @staticmethod
33 |     def _is_dont_know(input_: dict[str, Any]) -> bool:
34 |         return any(s.lower() in input_["basic_answer"].lower() for s in DONT_KNOW_PHRASES)
35 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/tasks/text_generation/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/dataset_generator/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | import warnings
 4 | from difflib import SequenceMatcher
 5 | from itertools import combinations
 6 | 
 7 | 
 8 | def get_closest_substring(long: str, short: str) -> str:
 9 |     """
10 |     Finds the closest substring to short string in longer one
11 |     Args:
12 |         long: str - longer string
13 |         short: str - shorter string
14 |     Returns:
15 |         closest substring of longer
16 |     """
17 |     a, b = max(
18 |         combinations(re.finditer("|".join(short.split()), long), 2),
19 |         key=lambda c: SequenceMatcher(None, long[c[0].start() : c[1].end()], short).ratio(),
20 |     )
21 |     return long[a.start() : b.end()]
22 | 
23 | 
24 | def get_passages_list(raw_passages: str) -> list[str]:
25 |     """
26 |     Formats LLM output to list of passages
27 |     Args:
28 |         raw_passages: string representing raw passages returned by llm
29 |     Returns:
30 |         list of parsed passages
31 |     """
32 |     match = re.search(r"\[(.*?)\]", raw_passages, re.DOTALL)
33 | 
34 |     if match:
35 |         passages_content = match.group(1)
36 |         try:
37 |             return json.loads("[" + passages_content + "]")
38 |         except (SyntaxError, ValueError):
39 |             warnings.warn("Unable to evaluate the passages content. Check the format.", category=UserWarning)
40 |             return []
41 |     else:
42 |         warnings.warn(message="No brackets found in the input string.", category=UserWarning)
43 |         return []
44 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/factories/__init__.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from continuous_eval.metrics.retrieval.matching_strategy import RougeChunkMatch
 4 | from datasets import load_dataset
 5 | 
 6 | from ragbits.core.embeddings.dense import LiteLLMEmbedder
 7 | from ragbits.core.sources.hf import HuggingFaceSource
 8 | from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
 9 | from ragbits.document_search import DocumentSearch
10 | from ragbits.document_search.documents.document import DocumentMeta
11 | from ragbits.evaluate.dataloaders.document_search import DocumentSearchDataLoader
12 | from ragbits.evaluate.metrics import MetricSet
13 | from ragbits.evaluate.metrics.document_search import DocumentSearchPrecisionRecallF1
14 | 
15 | 
16 | async def _add_example_documents(document_search: DocumentSearch) -> None:
17 |     dataset = load_dataset(path="deepsense-ai/synthetic-rag-dataset_v1.0", split="train")
18 |     documents = [DocumentMeta.from_literal(doc) for chunks in dataset["chunks"] for doc in chunks]
19 |     await document_search.ingest(documents)
20 | 
21 | 
22 | def basic_document_search_factory() -> DocumentSearch:
23 |     """
24 |     Factory for basic example document search instance.
25 |     """
26 |     document_search: DocumentSearch = DocumentSearch(vector_store=InMemoryVectorStore(embedder=LiteLLMEmbedder()))
27 |     asyncio.run(_add_example_documents(document_search))
28 |     return document_search
29 | 
30 | 
31 | def synthetic_rag_dataset() -> DocumentSearchDataLoader:
32 |     """
33 |     Factory for synthetic RAG dataset.
34 |     """
35 |     return DocumentSearchDataLoader(source=HuggingFaceSource(path="deepsense-ai/synthetic-rag-dataset_v1.0"))
36 | 
37 | 
38 | def precision_recall_f1() -> MetricSet:
39 |     """
40 |     Factory of precision recall f1 metric set for retrival evaluation.
41 |     """
42 |     return MetricSet(DocumentSearchPrecisionRecallF1(matching_strategy=RougeChunkMatch()))
43 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from ragbits.evaluate.metrics.base import Metric, MetricSet
2 | 
3 | __all__ = ["Metric", "MetricSet"]
4 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from ragbits.core.utils.config_handling import WithConstructionConfig
 2 | from ragbits.document_search import DocumentSearch
 3 | from ragbits.evaluate.pipelines.base import EvaluationData, EvaluationPipeline, EvaluationResult
 4 | from ragbits.evaluate.pipelines.document_search import DocumentSearchPipeline
 5 | 
 6 | __all__ = ["DocumentSearchPipeline", "EvaluationData", "EvaluationPipeline", "EvaluationResult"]
 7 | 
 8 | _target_to_evaluation_pipeline: dict[type[WithConstructionConfig], type[EvaluationPipeline]] = {
 9 |     DocumentSearch: DocumentSearchPipeline,
10 | }
11 | 
12 | 
13 | def get_evaluation_pipeline_for_target(evaluation_target: WithConstructionConfig) -> EvaluationPipeline:
14 |     """
15 |     A function instantiating evaluation pipeline for given WithConstructionConfig object
16 |     Args:
17 |         evaluation_target: WithConstructionConfig object to be evaluated
18 |     Returns:
19 |         instance of evaluation pipeline
20 |     Raises:
21 |         ValueError for classes with no registered evaluation pipeline
22 |     """
23 |     for supported_type, evaluation_pipeline_type in _target_to_evaluation_pipeline.items():
24 |         if isinstance(evaluation_target, supported_type):
25 |             return evaluation_pipeline_type(evaluation_target=evaluation_target)
26 |     raise ValueError(f"Evaluation pipeline not implemented for {evaluation_target.__class__}")
27 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from collections.abc import Iterable
 3 | from dataclasses import dataclass
 4 | from types import ModuleType
 5 | from typing import ClassVar, Generic, TypeVar
 6 | 
 7 | from pydantic import BaseModel
 8 | 
 9 | from ragbits.core.utils.config_handling import WithConstructionConfig
10 | from ragbits.evaluate import pipelines
11 | 
12 | EvaluationDataT = TypeVar("EvaluationDataT", bound="EvaluationData")
13 | EvaluationResultT = TypeVar("EvaluationResultT", bound="EvaluationResult")
14 | EvaluationTargetT = TypeVar("EvaluationTargetT", bound=WithConstructionConfig)
15 | 
16 | 
17 | class EvaluationData(BaseModel, ABC):
18 |     """
19 |     Represents the data for a single evaluation.
20 |     """
21 | 
22 | 
23 | @dataclass
24 | class EvaluationResult(ABC):
25 |     """
26 |     Represents the result of a single evaluation.
27 |     """
28 | 
29 | 
30 | class EvaluationPipeline(WithConstructionConfig, Generic[EvaluationTargetT, EvaluationDataT, EvaluationResultT], ABC):
31 |     """
32 |     Evaluation pipeline.
33 |     """
34 | 
35 |     default_module: ClassVar[ModuleType | None] = pipelines
36 |     configuration_key: ClassVar[str] = "pipeline"
37 | 
38 |     def __init__(self, evaluation_target: EvaluationTargetT) -> None:
39 |         """
40 |         Initialize the evaluation pipeline.
41 | 
42 |         Args:
43 |             evaluation_target: Evaluation target instance.
44 |         """
45 |         super().__init__()
46 |         self.evaluation_target = evaluation_target
47 | 
48 |     async def prepare(self) -> None:
49 |         """
50 |         Prepare pipeline for evaluation. Optional step.
51 |         """
52 |         pass
53 | 
54 |     @abstractmethod
55 |     async def __call__(self, data: Iterable[EvaluationDataT]) -> Iterable[EvaluationResultT]:
56 |         """
57 |         Run the evaluation pipeline.
58 | 
59 |         Args:
60 |             data: The evaluation data.
61 | 
62 |         Returns:
63 |             The evaluation result.
64 |         """
65 | 


--------------------------------------------------------------------------------
/packages/ragbits-evaluate/src/ragbits/evaluate/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-evaluate/src/ragbits/evaluate/py.typed


--------------------------------------------------------------------------------
/packages/ragbits-guardrails/README.md:
--------------------------------------------------------------------------------
 1 | # Ragbits Guardrails
 2 | 
 3 | Ragbits Guardrails is a Python package that contains utilities for ensuring the safety and relevance of responses generated by Ragbits components.
 4 | 
 5 | ## Installation
 6 | 
 7 | You can install the latest version of Ragbits Guardrails using pip:
 8 | 
 9 | ```bash
10 | pip install ragbits-guardrails
11 | ```
12 | 
13 | ## Quickstart
14 | Example of using the OpenAI Moderation Guardrail to verify a message:
15 | 
16 | ```python
17 | import asyncio
18 | from ragbits.guardrails.base import GuardrailManager, GuardrailVerificationResult
19 | from ragbits.guardrails.openai_moderation import OpenAIModerationGuardrail
20 | 
21 | 
22 | async def verify_message(message: str) -> list[GuardrailVerificationResult]:
23 |     manager = GuardrailManager([OpenAIModerationGuardrail()])
24 |     return await manager.verify(message)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     print(asyncio.run(verify_message("Test message")))
29 | ```
30 | 
31 | ## Documentation
32 | * [How-To Guides - Guardrails](https://ragbits.deepsense.ai/how-to/use_guardrails/)
33 | <!--
34 | TODO:
35 | * Add link to API Reference once classes from the Guardrails package are added to the API Reference.
36 | -->
37 | 


--------------------------------------------------------------------------------
/packages/ragbits-guardrails/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "ragbits-guardrails"
 3 | version = "1.0.0"
 4 | description = "Guardrails module for Ragbits components"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | license = "MIT"
 8 | authors = [
 9 |     { name = "deepsense.ai", email = "ragbits@deepsense.ai"}
10 | ]
11 | keywords = [
12 |     "Retrieval Augmented Generation",
13 |     "RAG",
14 |     "Large Language Models",
15 |     "LLMs",
16 |     "Generative AI",
17 |     "GenAI",
18 |     "Evaluation"
19 | ]
20 | classifiers = [
21 |     "Development Status :: 4 - Beta",
22 |     "Environment :: Console",
23 |     "Intended Audience :: Science/Research",
24 |     "License :: OSI Approved :: MIT License",
25 |     "Natural Language :: English",
26 |     "Operating System :: OS Independent",
27 |     "Programming Language :: Python :: 3.10",
28 |     "Programming Language :: Python :: 3.11",
29 |     "Programming Language :: Python :: 3.12",
30 |     "Programming Language :: Python :: 3.13",
31 |     "Topic :: Scientific/Engineering :: Artificial Intelligence",
32 |     "Topic :: Software Development :: Libraries :: Python Modules",
33 | ]
34 | dependencies = ["ragbits-core==1.0.0"]
35 | 
36 | [project.urls]
37 | "Homepage" = "https://github.com/deepsense-ai/ragbits"
38 | "Bug Reports" = "https://github.com/deepsense-ai/ragbits/issues"
39 | "Documentation" = "https://ragbits.deepsense.ai/"
40 | "Source" = "https://github.com/deepsense-ai/ragbits"
41 | 
42 | [project.optional-dependencies]
43 | openai = [
44 |     "openai>=1.57.3,<2.0.0",
45 | ]
46 | 
47 | [tool.uv]
48 | dev-dependencies = [
49 |     "pre-commit~=3.8.0",
50 |     "pytest~=8.3.3",
51 |     "pytest-cov~=5.0.0",
52 |     "pytest-asyncio~=0.24.0",
53 |     "pip-licenses>=4.0.0,<5.0.0"
54 | ]
55 | 
56 | [build-system]
57 | requires = ["hatchling"]
58 | build-backend = "hatchling.build"
59 | 
60 | [tool.hatch.metadata]
61 | allow-direct-references = true
62 | 
63 | [tool.hatch.build.targets.wheel]
64 | packages = ["src/ragbits"]
65 | 
66 | [tool.pytest.ini_options]
67 | asyncio_mode = "auto"
68 | 


--------------------------------------------------------------------------------
/packages/ragbits-guardrails/src/ragbits/guardrails/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-guardrails/src/ragbits/guardrails/__init__.py


--------------------------------------------------------------------------------
/packages/ragbits-guardrails/src/ragbits/guardrails/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from ragbits.core.prompt import Prompt
 6 | 
 7 | 
 8 | class GuardrailVerificationResult(BaseModel):
 9 |     """
10 |     Class representing result of guardrail verification
11 |     """
12 | 
13 |     guardrail_name: str
14 |     succeeded: bool
15 |     fail_reason: str | None
16 | 
17 | 
18 | class Guardrail(ABC):
19 |     """
20 |     Abstract class representing guardrail
21 |     """
22 | 
23 |     @abstractmethod
24 |     async def verify(self, input_to_verify: Prompt | str) -> GuardrailVerificationResult:
25 |         """
26 |         Verifies whether provided input meets certain criteria
27 | 
28 |         Args:
29 |             input_to_verify: prompt or output of the model to check
30 | 
31 |         Returns:
32 |             verification result
33 |         """
34 | 
35 | 
36 | class GuardrailManager:
37 |     """
38 |     Class responsible for running guardrails
39 |     """
40 | 
41 |     def __init__(self, guardrails: list[Guardrail]):
42 |         self._guardrails = guardrails
43 | 
44 |     async def verify(self, input_to_verify: Prompt | str) -> list[GuardrailVerificationResult]:
45 |         """
46 |         Verifies whether provided input meets certain criteria
47 | 
48 |         Args:
49 |             input_to_verify: prompt or output of the model to check
50 | 
51 |         Returns:
52 |             list of verification result
53 |         """
54 |         return [await guardrail.verify(input_to_verify) for guardrail in self._guardrails]
55 | 


--------------------------------------------------------------------------------
/packages/ragbits-guardrails/src/ragbits/guardrails/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepsense-ai/ragbits/c37428621adc5ab30528f41ccd22d655f421c2e8/packages/ragbits-guardrails/src/ragbits/guardrails/py.typed


--------------------------------------------------------------------------------
/scripts/install_git_hooks.py:
--------------------------------------------------------------------------------
 1 | # /// script
 2 | # requires-python = ">=3.10"
 3 | # dependencies = [
 4 | #     "inquirer",
 5 | #     "rich",
 6 | # ]
 7 | # ///
 8 | # To run this script and install git hooks, run the following command:
 9 | #
10 | #   uv run scripts/install_git_hooks.py
11 | #
12 | from pathlib import Path
13 | 
14 | from inquirer.shortcuts import list_input
15 | from rich import print as pprint
16 | 
17 | HOOK_BODY = """
18 | #!/usr/bin/env bash
19 | 
20 | echo "🧹 Running formatting...\n"
21 | uv run ruff format --check
22 | 
23 | if [ $? -ne 0 ]
24 | then
25 |     echo "⚠ Formatting failed. Running autofix & aborting..."
26 |     uv run ruff format
27 |     exit 1
28 | fi
29 | 
30 | echo "✅ Formatting passed!"
31 | echo "\n📜 Running linting...\n"
32 | 
33 | uv run ruff check
34 | 
35 | if [ $? -ne 0 ]
36 | then
37 |     echo "⚠ Linting failed. Aborting..."
38 |     exit 1
39 | fi
40 | 
41 | echo "✅ Linting passed!"
42 | 
43 | echo "\n📚 Making sure that docs build...\n"
44 | 
45 | uv run mkdocs build --strict
46 | 
47 | if [ $? -ne 0 ]
48 | then
49 |     echo "⚠ Docs build failed. Aborting..."
50 |     exit 1
51 | fi
52 | 
53 | echo "\n🔎 Running type checking...\n"
54 | 
55 | uv run mypy .
56 | 
57 | if [ $? -ne 0 ]
58 | then
59 |     echo "⚠ Type checking failed. Aborting..."
60 |     exit 1
61 | fi
62 | 
63 | echo "✅ Type checking passed!"
64 | """
65 | 
66 | 
67 | def main() -> None:
68 |     """
69 |     Install pre-commit or pre-push git hooks.
70 |     """
71 |     hooks_dir = Path(__file__).parent.parent / ".git" / "hooks"
72 |     hooks_dir.mkdir(exist_ok=True)
73 | 
74 |     hook_type = list_input("Select a hook to install", choices=["pre-commit", "pre-push"])
75 | 
76 |     (hooks_dir / "pre-commit").unlink(missing_ok=True)
77 |     (hooks_dir / "pre-push").unlink(missing_ok=True)
78 | 
79 |     pre_commit_hook = hooks_dir / hook_type
80 |     pre_commit_hook.write_text(HOOK_BODY)
81 |     pre_commit_hook.chmod(0o755)
82 | 
83 |     pprint(f"[cyan]Git hook for [b]{hook_type}[/b] installed!")
84 | 
85 | 
86 | if __name__ == "__main__":
87 |     main()
88 | 


--------------------------------------------------------------------------------
/ui/.env.example:
--------------------------------------------------------------------------------
1 | VITE_API_URL=http://localhost:8000 # API URL for development


--------------------------------------------------------------------------------
/ui/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 | 
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 | .vite
26 | 


--------------------------------------------------------------------------------
/ui/README.md:
--------------------------------------------------------------------------------
 1 | # React + TypeScript + Vite
 2 | 
 3 | This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
 4 | 
 5 | Currently, two official plugins are available:
 6 | 
 7 | - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
 8 | - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
 9 | 
10 | ## Expanding the ESLint configuration
11 | 
12 | If you are developing a production application, we recommend updating the configuration to enable type aware lint rules:
13 | 
14 | - Configure the top-level `parserOptions` property like this:
15 | 
16 | ```js
17 | export default tseslint.config({
18 |   languageOptions: {
19 |     // other options...
20 |     parserOptions: {
21 |       project: ["./tsconfig.node.json", "./tsconfig.app.json"],
22 |       tsconfigRootDir: import.meta.dirname,
23 |     },
24 |   },
25 | });
26 | ```
27 | 
28 | - Replace `tseslint.configs.recommended` to `tseslint.configs.recommendedTypeChecked` or `tseslint.configs.strictTypeChecked`
29 | - Optionally add `...tseslint.configs.stylisticTypeChecked`
30 | - Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and update the config:
31 | 
32 | ```js
33 | // eslint.config.js
34 | import react from "eslint-plugin-react";
35 | 
36 | export default tseslint.config({
37 |   // Set the react version
38 |   settings: { react: { version: "18.3" } },
39 |   plugins: {
40 |     // Add the react plugin
41 |     react,
42 |   },
43 |   rules: {
44 |     // other rules...
45 |     // Enable its recommended rules
46 |     ...react.configs.recommended.rules,
47 |     ...react.configs["jsx-runtime"].rules,
48 |   },
49 | });
50 | ```
51 | 


--------------------------------------------------------------------------------
/ui/assets/ragbits.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg">
2 |     <text y="24" font-size="24">🐰</text>
3 | </svg>


--------------------------------------------------------------------------------
/ui/eslint.config.js:
--------------------------------------------------------------------------------
 1 | import js from "@eslint/js";
 2 | import globals from "globals";
 3 | import reactHooks from "eslint-plugin-react-hooks";
 4 | import reactRefresh from "eslint-plugin-react-refresh";
 5 | import tseslint from "typescript-eslint";
 6 | 
 7 | export default tseslint.config(
 8 |   { ignores: ["dist"] },
 9 |   {
10 |     extends: [js.configs.recommended, ...tseslint.configs.recommended],
11 |     files: ["**/*.{ts,tsx}"],
12 |     languageOptions: {
13 |       ecmaVersion: 2020,
14 |       globals: globals.browser,
15 |     },
16 |     plugins: {
17 |       "react-hooks": reactHooks,
18 |       "react-refresh": reactRefresh,
19 |     },
20 |     rules: {
21 |       ...reactHooks.configs.recommended.rules,
22 |       "react-refresh/only-export-components": [
23 |         "warn",
24 |         { allowConstantExport: true },
25 |       ],
26 |     },
27 |   },
28 | );
29 | 


--------------------------------------------------------------------------------
/ui/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en" class="h-full light">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/svg+xml" href="assets/ragbits.svg" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>Ragbits</title>
 8 |   </head>
 9 | 
10 |   <body class="h-full">
11 |     <div id="root" class="h-full"></div>
12 |     <script type="module" src="/src/main.tsx"></script>
13 |   </body>
14 | </html>
15 | 


--------------------------------------------------------------------------------
/ui/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "frontend",
 3 |   "private": true,
 4 |   "version": "0.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "build": "tsc -b && vite build",
 9 |     "lint": "eslint .",
10 |     "preview": "vite preview",
11 |     "format": "prettier --write .",
12 |     "format:check": "prettier --check ."
13 |   },
14 |   "dependencies": {
15 |     "@heroicons/react": "^2.2.0",
16 |     "@heroui/react": "^2.6.14",
17 |     "@hookform/resolvers": "^5.0.1",
18 |     "axios": "^1.8.3",
19 |     "framer-motion": "^12.2.0",
20 |     "github-markdown-css": "^5.8.1",
21 |     "lodash": "^4.17.21",
22 |     "react": "^18.3.1",
23 |     "react-dom": "^18.3.1",
24 |     "react-hook-form": "^7.55.0",
25 |     "react-markdown": "^9.0.3",
26 |     "remark-gfm": "^4.0.0",
27 |     "uuid": "^11.1.0",
28 |     "zod": "^3.24.2"
29 |   },
30 |   "devDependencies": {
31 |     "@eslint/js": "^9.17.0",
32 |     "@iconify/react": "^5.2.0",
33 |     "@tailwindcss/typography": "^0.5.16",
34 |     "@types/react": "^18.3.18",
35 |     "@types/react-dom": "^18.3.5",
36 |     "@vitejs/plugin-react": "^4.3.4",
37 |     "autoprefixer": "^10.4.20",
38 |     "eslint": "^9.17.0",
39 |     "eslint-plugin-react-hooks": "^5.0.0",
40 |     "eslint-plugin-react-refresh": "^0.4.16",
41 |     "globals": "^15.14.0",
42 |     "postcss": "^8.5.1",
43 |     "prettier": "^3.4.2",
44 |     "prettier-plugin-tailwindcss": "^0.6.11",
45 |     "tailwindcss": "^3.4.17",
46 |     "typescript": "~5.6.2",
47 |     "typescript-eslint": "^8.18.2",
48 |     "vite": "^6.3.5"
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/ui/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | };
7 | 


--------------------------------------------------------------------------------
/ui/prettier.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   plugins: ["prettier-plugin-tailwindcss"],
3 | };
4 | 


--------------------------------------------------------------------------------
/ui/src/contexts/HistoryContext/HistoryContext.ts:
--------------------------------------------------------------------------------
1 | import { createContext } from "react";
2 | import { HistoryContext as IHistoryContext } from "../../types/history";
3 | 
4 | export const HistoryContext = createContext<IHistoryContext | undefined>(
5 |   undefined,
6 | );
7 | 


--------------------------------------------------------------------------------
/ui/src/contexts/HistoryContext/useHistoryContext.ts:
--------------------------------------------------------------------------------
 1 | import { useContext } from "react";
 2 | import { HistoryContext } from "./HistoryContext";
 3 | 
 4 | export const useHistoryContext = () => {
 5 |   const context = useContext(HistoryContext);
 6 |   if (!context) {
 7 |     throw new Error("useChat must be used within a ChatProvider");
 8 |   }
 9 |   return context;
10 | };
11 | 


--------------------------------------------------------------------------------
/ui/src/contexts/ThemeContext/ThemeContext.ts:
--------------------------------------------------------------------------------
 1 | import { createContext } from "react";
 2 | 
 3 | export enum Theme {
 4 |   LIGHT = "light",
 5 |   DARK = "dark",
 6 | }
 7 | 
 8 | export interface IThemeContext {
 9 |   theme: Theme;
10 |   setTheme: (theme: Theme) => void;
11 | }
12 | 
13 | export const ThemeContext = createContext<IThemeContext | null>(null);
14 | 


--------------------------------------------------------------------------------
/ui/src/contexts/ThemeContext/ThemeContextProvider.tsx:
--------------------------------------------------------------------------------
 1 | import {
 2 |   FC,
 3 |   ReactNode,
 4 |   useCallback,
 5 |   useMemo,
 6 |   useSyncExternalStore,
 7 | } from "react";
 8 | import { ThemeContext, Theme } from "./ThemeContext";
 9 | 
10 | function getPreferredTheme() {
11 |   return window.matchMedia("(prefers-color-scheme: dark)").matches
12 |     ? Theme.DARK
13 |     : Theme.LIGHT;
14 | }
15 | 
16 | function getSnapshot() {
17 |   const saved = window.localStorage.getItem("theme");
18 |   if (saved === Theme.DARK || saved === Theme.LIGHT) {
19 |     return saved;
20 |   }
21 |   return getPreferredTheme();
22 | }
23 | 
24 | function subscribe(callback: () => void) {
25 |   window.addEventListener("storage", callback);
26 |   return () => window.removeEventListener("storage", callback);
27 | }
28 | 
29 | export const ThemeContextProvider: FC<{ children: ReactNode }> = ({
30 |   children,
31 | }) => {
32 |   const themeValue = useSyncExternalStore(subscribe, getSnapshot, getSnapshot);
33 | 
34 |   const setTheme = useCallback((newTheme: Theme) => {
35 |     window.localStorage.setItem("theme", newTheme);
36 |     window.dispatchEvent(new Event("storage"));
37 |   }, []);
38 | 
39 |   const value = useMemo(
40 |     () => ({
41 |       theme: themeValue as Theme,
42 |       setTheme,
43 |     }),
44 |     [themeValue, setTheme],
45 |   );
46 | 
47 |   return (
48 |     <ThemeContext.Provider value={value}>{children}</ThemeContext.Provider>
49 |   );
50 | };
51 | 


--------------------------------------------------------------------------------
/ui/src/contexts/ThemeContext/useThemeContext.ts:
--------------------------------------------------------------------------------
 1 | import { useContext } from "react";
 2 | import { ThemeContext, IThemeContext } from "./ThemeContext";
 3 | 
 4 | export function useThemeContext(): IThemeContext {
 5 |   const context = useContext(ThemeContext);
 6 |   if (!context) {
 7 |     throw new Error(
 8 |       "useThemeContext must be used within a ThemeContextProvider",
 9 |     );
10 |   }
11 |   return context;
12 | }
13 | 


--------------------------------------------------------------------------------
/ui/src/core/components/DelayedTooltip.tsx:
--------------------------------------------------------------------------------
1 | import { Tooltip, TooltipProps } from "@heroui/react";
2 | 
3 | const DelayedTooltip = (props: TooltipProps) => {
4 |   return <Tooltip delay={300} closeDelay={0} {...props} />;
5 | };
6 | 
7 | export default DelayedTooltip;
8 | 


--------------------------------------------------------------------------------
/ui/src/core/components/PromptInput/PromptInputText.tsx:
--------------------------------------------------------------------------------
 1 | import type { TextAreaProps } from "@heroui/react";
 2 | import { forwardRef } from "react";
 3 | import { Textarea } from "@heroui/react";
 4 | 
 5 | const PromptInputText = forwardRef<HTMLTextAreaElement, TextAreaProps>(
 6 |   ({ classNames = {}, ...props }, ref) => {
 7 |     return (
 8 |       <Textarea
 9 |         ref={ref}
10 |         className="min-h-[50px]"
11 |         classNames={classNames}
12 |         {...props}
13 |       />
14 |     );
15 |   },
16 | );
17 | 
18 | export default PromptInputText;
19 | 


--------------------------------------------------------------------------------
/ui/src/core/utils/api.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   ChatResponse,
 3 |   ChatResponseType,
 4 |   Message,
 5 |   MessageRole,
 6 | } from "../../types/api";
 7 | import { HistoryContext } from "../../types/history";
 8 | 
 9 | export const buildApiUrl = (path: string) => {
10 |   const devUrl = import.meta.env.VITE_API_URL ?? "http://127.0.0.1:8000";
11 |   const baseUrl = import.meta.env.DEV ? devUrl : "";
12 | 
13 |   // Ensure that baseUrl doesn't end with a slash
14 |   if (baseUrl.endsWith("/")) {
15 |     return `${baseUrl.slice(0, -1)}${path}`;
16 |   }
17 | 
18 |   return `${baseUrl}${path}`;
19 | };
20 | 
21 | export function mapHistoryToMessages(
22 |   history: HistoryContext["history"],
23 | ): Message[] {
24 |   return (
25 |     history
26 |       // Note: Exclude system messages as they are relevant only in the UI
27 |       .filter((message) => message.role !== MessageRole.SYSTEM)
28 |       .map((message) => ({
29 |         role: message.role,
30 |         content: message.content,
31 |       }))
32 |   );
33 | }
34 | 
35 | export function isChatResponse(data: unknown): data is ChatResponse {
36 |   return (
37 |     typeof data === "object" &&
38 |     data !== null &&
39 |     "type" in data &&
40 |     "content" in data &&
41 |     Object.values(ChatResponseType).includes(data.type as ChatResponseType) &&
42 |     (typeof data.content === "object" || typeof data.content === "string")
43 |   );
44 | }
45 | 


--------------------------------------------------------------------------------
/ui/src/core/utils/plugins/PluginManager.ts:
--------------------------------------------------------------------------------
 1 | import { Plugin } from "../../../types/plugins";
 2 | 
 3 | type PluginState = Record<string, { isActivated: boolean; config: Plugin }>;
 4 | 
 5 | class PluginManager {
 6 |   private plugins: PluginState = {};
 7 |   private listeners: Set<() => void> = new Set();
 8 | 
 9 |   register(plugin: Plugin) {
10 |     this.plugins[plugin.name] = {
11 |       isActivated: false,
12 |       config: plugin,
13 |     };
14 |     this.notify();
15 |   }
16 | 
17 |   activate(name: string) {
18 |     const plugin = this.plugins[name];
19 | 
20 |     if (!plugin || plugin.isActivated) {
21 |       return;
22 |     }
23 | 
24 |     plugin.isActivated = true;
25 |     if (plugin.config.onActivate) {
26 |       plugin.config.onActivate();
27 |     }
28 |     this.notify();
29 |   }
30 | 
31 |   deactivate(name: string) {
32 |     const plugin = this.plugins[name];
33 |     if (!plugin || !plugin.isActivated) {
34 |       return;
35 |     }
36 | 
37 |     plugin.isActivated = false;
38 |     if (plugin.config.onDeactivate) {
39 |       plugin.config.onDeactivate();
40 |     }
41 |     this.notify();
42 |   }
43 | 
44 |   isPluginActivated(name: string): boolean {
45 |     const plugin = this.plugins[name];
46 |     return !!plugin && plugin.isActivated;
47 |   }
48 | 
49 |   getPlugin(name: string): Plugin | null {
50 |     const plugin = this.plugins[name];
51 |     if (!plugin || !plugin.isActivated) {
52 |       return null;
53 |     }
54 |     return plugin.config;
55 |   }
56 | 
57 |   subscribe(listener: () => void) {
58 |     this.listeners.add(listener);
59 |     return () => this.listeners.delete(listener);
60 |   }
61 | 
62 |   private notify() {
63 |     this.listeners.forEach((listener) => listener());
64 |   }
65 | }
66 | 
67 | export const pluginManager = new PluginManager();
68 | 


--------------------------------------------------------------------------------
/ui/src/core/utils/plugins/PluginWrapper.tsx:
--------------------------------------------------------------------------------
 1 | import { Suspense } from "react";
 2 | import { Plugin } from "../../../types/plugins";
 3 | import { Skeleton } from "@heroui/react";
 4 | import { usePluginManager } from "./usePluginManager";
 5 | import { PropsOf } from "../../../types/utility";
 6 | 
 7 | interface PluginWrapperProps<
 8 |   T extends Plugin,
 9 |   C extends keyof T["components"],
10 | > {
11 |   plugin: T;
12 |   component: C;
13 |   componentProps: PropsOf<T["components"][C]>;
14 |   skeletonSize?: { width: string; height: string };
15 |   disableSkeleton?: boolean;
16 | }
17 | 
18 | const PluginWrapper = <T extends Plugin, C extends keyof T["components"]>({
19 |   plugin,
20 |   component,
21 |   skeletonSize,
22 |   disableSkeleton,
23 |   componentProps,
24 | }: PluginWrapperProps<T, C>) => {
25 |   const managedPlugin = usePluginManager(plugin.name);
26 |   const skeletonStyle = skeletonSize
27 |     ? { width: skeletonSize.width, height: skeletonSize.height }
28 |     : {};
29 | 
30 |   if (!managedPlugin) {
31 |     return null;
32 |   }
33 | 
34 |   const Component = managedPlugin.components[component as string];
35 |   try {
36 |     return (
37 |       <Suspense
38 |         fallback={
39 |           disableSkeleton ? null : (
40 |             <Skeleton className="rounded-lg" style={skeletonStyle} />
41 |           )
42 |         }
43 |       >
44 |         {Component ? <Component {...(componentProps || {})} /> : null}
45 |       </Suspense>
46 |     );
47 |   } catch (error) {
48 |     console.error(error);
49 |     return null;
50 |   }
51 | };
52 | 
53 | export default PluginWrapper;
54 | 


--------------------------------------------------------------------------------
/ui/src/core/utils/plugins/usePluginManager.ts:
--------------------------------------------------------------------------------
 1 | import { useSyncExternalStore } from "react";
 2 | import { pluginManager } from "./PluginManager";
 3 | 
 4 | export const usePluginManager = (pluginName: string) => {
 5 |   const subscribe = (callback: () => void) => {
 6 |     return pluginManager.subscribe(callback);
 7 |   };
 8 | 
 9 |   const getSnapshot = () => {
10 |     return pluginManager.getPlugin(pluginName);
11 |   };
12 | 
13 |   return useSyncExternalStore(subscribe, getSnapshot);
14 | };
15 | 


--------------------------------------------------------------------------------
/ui/src/core/utils/plugins/utils.ts:
--------------------------------------------------------------------------------
 1 | import { FunctionComponent, LazyExoticComponent } from "react";
 2 | import { Plugin } from "../../../types/plugins";
 3 | 
 4 | export function createPlugin<
 5 |   // eslint-disable-next-line @typescript-eslint/no-explicit-any
 6 |   T extends Record<string, LazyExoticComponent<FunctionComponent<any>>>,
 7 | >(plugin: Plugin<T>): Plugin<T> {
 8 |   return plugin;
 9 | }
10 | 


--------------------------------------------------------------------------------
/ui/src/core/utils/types.ts:
--------------------------------------------------------------------------------
1 | export function exhaustiveGuard(_value: never): never {
2 |   throw new Error(`Unhandled value: ${JSON.stringify(_value)}`);
3 | }
4 | 


--------------------------------------------------------------------------------
/ui/src/globals.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 | 
5 | .markdown-container pre {
6 |   overflow: auto;
7 |   max-width: 100%;
8 | }
9 | 


--------------------------------------------------------------------------------
/ui/src/main.tsx:
--------------------------------------------------------------------------------
 1 | import { StrictMode } from "react";
 2 | import { createRoot } from "react-dom/client";
 3 | import { HeroUIProvider } from "@heroui/react";
 4 | import App from "./App";
 5 | import "./globals.css";
 6 | import { HistoryProvider } from "./contexts/HistoryContext/HistoryContextProvider";
 7 | import { ThemeContextProvider } from "./contexts/ThemeContext/ThemeContextProvider";
 8 | import { loadIcons } from "@iconify/react";
 9 | import { pluginManager } from "./core/utils/plugins/PluginManager.ts";
10 | import { FeedbackFormPlugin } from "./plugins/FeedbackFormPlugin";
11 | 
12 | //Register plugins
13 | pluginManager.register(FeedbackFormPlugin);
14 | 
15 | // Preload icons
16 | loadIcons([
17 |   "heroicons:check",
18 |   "heroicons:clipboard",
19 |   "heroicons:arrow-path",
20 |   "heroicons:sun",
21 |   "heroicons:moon",
22 |   "heroicons:arrow-down",
23 |   "heroicons:arrow-up",
24 | ]);
25 | 
26 | createRoot(document.getElementById("root")!).render(
27 |   <StrictMode>
28 |     <HeroUIProvider>
29 |       <HistoryProvider>
30 |         <ThemeContextProvider>
31 |           <App />
32 |         </ThemeContextProvider>
33 |       </HistoryProvider>
34 |     </HeroUIProvider>
35 |   </StrictMode>,
36 | );
37 | 


--------------------------------------------------------------------------------
/ui/src/plugins/ExamplePlugin/ExamplePluginComponent.tsx:
--------------------------------------------------------------------------------
 1 | const ExamplePluginComponent = () => {
 2 |   return (
 3 |     <div>
 4 |       <h1>Example Plugin</h1>
 5 |       <p>This is an example plugin.</p>
 6 |     </div>
 7 |   );
 8 | };
 9 | 
10 | export default ExamplePluginComponent;
11 | 


--------------------------------------------------------------------------------
/ui/src/plugins/ExamplePlugin/index.tsx:
--------------------------------------------------------------------------------
 1 | import { lazy } from "react";
 2 | import { createPlugin } from "../../core/utils/plugins/utils";
 3 | 
 4 | export const ExamplePluginName = "ExamplePlugin";
 5 | export const ExamplePlugin = createPlugin({
 6 |   name: ExamplePluginName,
 7 |   components: {
 8 |     ExampleComponent: lazy(() => import("./ExamplePluginComponent")),
 9 |   },
10 |   onActivate: () => {
11 |     console.log("ExamplePlugin activated");
12 |   },
13 |   onDeactivate: () => {
14 |     console.log("ExamplePlugin deactivated");
15 |   },
16 | });
17 | 


--------------------------------------------------------------------------------
/ui/src/plugins/FeedbackFormPlugin/index.tsx:
--------------------------------------------------------------------------------
 1 | import { lazy } from "react";
 2 | import { createPlugin } from "../../core/utils/plugins/utils";
 3 | 
 4 | export const FeedbackFormPluginName = "FeedbackFormPlugin";
 5 | export const FeedbackFormPlugin = createPlugin({
 6 |   name: FeedbackFormPluginName,
 7 |   components: {
 8 |     FeedbackFormComponent: lazy(() => import("./FeedbackFormPluginComponent")),
 9 |     ExampleComponent: lazy(
10 |       () => import("../ExamplePlugin/ExamplePluginComponent"),
11 |     ),
12 |   },
13 |   onActivate: () => {
14 |     console.log("FeedbackFormPlugin activated");
15 |   },
16 |   onDeactivate: () => {
17 |     console.log("FeedbackFormPlugin deactivated");
18 |   },
19 | });
20 | 


--------------------------------------------------------------------------------
/ui/src/plugins/FeedbackFormPlugin/types.ts:
--------------------------------------------------------------------------------
 1 | import { z } from "zod";
 2 | import { FormSchemaResponse } from "../../types/api.ts";
 3 | 
 4 | export const generateZodSchema = (formSchema: FormSchemaResponse | null) => {
 5 |   const schemaMap: Record<string, z.ZodTypeAny> = {};
 6 | 
 7 |   if (!formSchema) {
 8 |     return z.record(z.any());
 9 |   }
10 | 
11 |   formSchema.fields.forEach((field) => {
12 |     switch (field.type) {
13 |       case "select":
14 |         schemaMap[field.name] = field.required
15 |           ? z
16 |               .string()
17 |               .refine((val) => field.options?.some((opt) => opt === val), {
18 |                 message: `${field.label} must be a valid option`,
19 |               })
20 |           : z
21 |               .string()
22 |               .optional()
23 |               .refine(
24 |                 (val) => !val || field.options?.some((opt) => opt === val),
25 |                 {
26 |                   message: `${field.label} must be a valid option`,
27 |                 },
28 |               );
29 |         break;
30 | 
31 |       case "text":
32 |       default:
33 |         schemaMap[field.name] = field.required
34 |           ? z.string().min(1, `${field.label} is required`)
35 |           : z.string().optional();
36 |         break;
37 |     }
38 |   });
39 | 
40 |   return z.object(schemaMap);
41 | };
42 | 


--------------------------------------------------------------------------------
/ui/src/types/history.ts:
--------------------------------------------------------------------------------
 1 | import { ChatResponse, MessageRole, Reference } from "./api";
 2 | 
 3 | export type HistoryState = Map<string, ChatMessage>;
 4 | 
 5 | export type UnsubscribeFn = (() => void) | null;
 6 | 
 7 | export interface ChatMessage {
 8 |   id: string;
 9 |   /**
10 |    * Bot messages would have this set to the server ID (sent in the first event, with type of `message_id`)
11 |    */
12 |   serverId?: string;
13 |   role: MessageRole;
14 |   content: string;
15 |   references?: Reference[];
16 | }
17 | 
18 | export interface HistoryContext {
19 |   history: ChatMessage[];
20 |   isLoading: boolean;
21 |   /**
22 |    * Sends a message to the chat window with animations and delayed rendering.
23 |    */
24 |   sendMessage: (text?: string) => void;
25 |   /**
26 |    * Primitive used for adding a message to the history and get its ID.
27 |    */
28 |   addMessage: (message: Omit<ChatMessage, "id">) => string;
29 |   /**
30 |    * Primitive used for updating a message in the history based on the passed response.
31 |    */
32 |   handleResponse: (chatResponse: ChatResponse, messageId: string) => void;
33 |   clearHistory: () => void;
34 |   stopAnswering: () => void;
35 | }
36 | 


--------------------------------------------------------------------------------
/ui/src/types/plugins.ts:
--------------------------------------------------------------------------------
 1 | import { FunctionComponent, LazyExoticComponent } from "react";
 2 | 
 3 | export interface Plugin<
 4 |   T extends Record<
 5 |     string,
 6 |     // eslint-disable-next-line @typescript-eslint/no-explicit-any
 7 |     LazyExoticComponent<FunctionComponent<any>>
 8 |     // eslint-disable-next-line @typescript-eslint/no-explicit-any
 9 |   > = Record<string, LazyExoticComponent<FunctionComponent<any>>>,
10 | > {
11 |   name: string;
12 |   onActivate?: () => void;
13 |   onDeactivate?: () => void;
14 |   components: T;
15 | }
16 | 


--------------------------------------------------------------------------------
/ui/src/types/utility.ts:
--------------------------------------------------------------------------------
 1 | import { FunctionComponent } from "react";
 2 | 
 3 | export type PropsOf<T> =
 4 |   T extends FunctionComponent<infer P>
 5 |     ? NoProps<P> extends true
 6 |       ? undefined
 7 |       : P
 8 |     : undefined;
 9 | export type NoProps<T> = keyof T extends never ? true : false;
10 | 


--------------------------------------------------------------------------------
/ui/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="vite/client" />
2 | 


--------------------------------------------------------------------------------
/ui/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | import { heroui } from "@heroui/react";
 2 | 
 3 | /** @type {import('tailwindcss').Config} */
 4 | export default {
 5 |   content: [
 6 |     "./index.html",
 7 |     "./src/**/*.{js,ts,jsx,tsx}",
 8 |     "./node_modules/@heroui/theme/dist/**/*.{js,ts,jsx,tsx}",
 9 |   ],
10 |   theme: {
11 |     extend: {
12 |       backgroundColor: {
13 |         dark: "#1A1A1A",
14 |         light: "#FFFFFF",
15 |       },
16 |       keyframes: {
17 |         "pop-in": {
18 |           "0%": { transform: "scale(0.8)", opacity: 0 },
19 |           "100%": { transform: "scale(1)", opacity: 1 },
20 |         },
21 |       },
22 |       animation: {
23 |         "pop-in": "pop-in 0.2s ease-out forwards",
24 |       },
25 |     },
26 |   },
27 |   plugins: [
28 |     heroui({
29 |       themes: {
30 |         light: {
31 |           colors: {
32 |             background: "#FFFFFF",
33 |             foreground: "#1A1A1A",
34 |             primary: {
35 |               DEFAULT: "#1C54FF",
36 |               foreground: "#FFFFFF",
37 |             },
38 |           },
39 |         },
40 |       },
41 |     }),
42 |     require("@tailwindcss/typography"),
43 |   ],
44 | };
45 | 


--------------------------------------------------------------------------------
/ui/tsconfig.app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
 4 |     "target": "ES2020",
 5 |     "useDefineForClassFields": true,
 6 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 7 |     "module": "ESNext",
 8 |     "skipLibCheck": true,
 9 | 
10 |     "moduleResolution": "bundler",
11 |     "allowImportingTsExtensions": true,
12 |     "isolatedModules": true,
13 |     "moduleDetection": "force",
14 |     "noEmit": true,
15 |     "jsx": "react-jsx",
16 | 
17 |     "strict": true,
18 |     "noUnusedLocals": true,
19 |     "noUnusedParameters": true,
20 |     "noFallthroughCasesInSwitch": true,
21 |     "noUncheckedSideEffectImports": true
22 |   },
23 |   "include": ["src"]
24 | }
25 | 


--------------------------------------------------------------------------------
/ui/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "files": [],
3 |   "references": [
4 |     { "path": "./tsconfig.app.json" },
5 |     { "path": "./tsconfig.node.json" }
6 |   ]
7 | }
8 | 


--------------------------------------------------------------------------------
/ui/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
 4 |     "target": "ES2022",
 5 |     "lib": ["ES2023"],
 6 |     "module": "ESNext",
 7 |     "skipLibCheck": true,
 8 | 
 9 |     "moduleResolution": "bundler",
10 |     "allowImportingTsExtensions": true,
11 |     "isolatedModules": true,
12 |     "moduleDetection": "force",
13 |     "noEmit": true,
14 | 
15 |     "strict": true,
16 |     "noUnusedLocals": true,
17 |     "noUnusedParameters": true,
18 |     "noFallthroughCasesInSwitch": true,
19 |     "noUncheckedSideEffectImports": true
20 |   },
21 |   "include": ["vite.config.ts"]
22 | }
23 | 


--------------------------------------------------------------------------------
/ui/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from "vite";
 2 | import react from "@vitejs/plugin-react";
 3 | 
 4 | // https://vite.dev/config/
 5 | export default defineConfig({
 6 |   plugins: [react()],
 7 |   build: {
 8 |     outDir: "../packages/ragbits-chat/src/ragbits/chat/ui-build",
 9 |     emptyOutDir: true,
10 |   },
11 | });
12 | 


--------------------------------------------------------------------------------