├── .env.example ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── composites │ ├── get_jfrog_access_token.sh │ ├── get_jfrog_access_token_subject.sh │ └── python-setup │ │ └── action.yml ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── artifactory.yml │ ├── document-index-execution.yml │ ├── document-index-tests.yml │ ├── sdk-tests.yml │ └── test-execution.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGELOG.md ├── Concepts.md ├── LICENSE.md ├── README.md ├── RELEASE.md ├── assets ├── AbsoluteEvaluation.drawio.svg ├── RecursiveSummary.drawio.svg ├── RelativeEvaluation.drawio.svg ├── TraceViewer.png ├── Tracer.drawio.svg ├── Tracing.drawio.svg ├── argilla_interface.png ├── argilla_splits.png └── fork.png ├── docker-compose.yaml ├── docs ├── Makefile ├── conf.py ├── index.rst ├── intelligence_layer.connectors.rst ├── intelligence_layer.core.rst ├── intelligence_layer.evaluation.rst ├── intelligence_layer.examples.rst └── make.bat ├── mypy.ini ├── poetry.lock ├── pyproject.toml ├── scripts ├── all.sh ├── clean_hf.py ├── doctest.sh ├── fastapi_example_test.sh ├── lint.sh ├── notebook_runner.sh ├── notebook_runner_document_index.sh └── test.sh ├── src ├── documentation │ ├── attention_manipulation_with_text_controls.ipynb │ ├── classification.ipynb │ ├── data │ │ ├── classify_examples.json │ │ └── classify_examples_multilabel.json │ ├── document_index.ipynb │ ├── elo_qa_eval.ipynb │ ├── evaluate_with_studio.ipynb │ ├── evaluation.ipynb │ ├── fastapi_example.py │ ├── fastapi_tutorial.ipynb │ ├── how_tos │ │ ├── __init__.py │ │ ├── example_data.py │ │ ├── how_to_aggregate_evaluations.ipynb │ │ ├── how_to_create_a_dataset.ipynb │ │ ├── how_to_define_a_task.ipynb │ │ ├── how_to_evaluate_runs.ipynb │ │ ├── how_to_human_evaluation_via_argilla.ipynb │ │ ├── how_to_implement_a_simple_evaluation_and_aggregation_logic.ipynb │ │ ├── how_to_implement_a_task.ipynb │ │ ├── how_to_implement_elo_evaluations.ipynb │ │ ├── how_to_implement_incremental_evaluation.ipynb │ │ ├── how_to_log_and_debug_a_task.ipynb │ │ ├── how_to_resume_a_run_after_a_crash.ipynb │ │ ├── how_to_retrieve_data_for_analysis.ipynb │ │ ├── how_to_run_a_task_on_a_dataset.ipynb │ │ └── studio │ │ │ ├── how_to_execute_a_benchmark.ipynb │ │ │ ├── how_to_upload_existing_datasets_to_studio.ipynb │ │ │ └── how_to_use_studio_with_traces.ipynb │ ├── human_evaluation.ipynb │ ├── parameter_optimization.ipynb │ ├── performance_tips.ipynb │ ├── qa.ipynb │ ├── quickstart_task.ipynb │ ├── summarization.ipynb │ └── task_dependencies.drawio.svg └── intelligence_layer │ ├── __init__.py │ ├── connectors │ ├── __init__.py │ ├── argilla │ │ ├── argilla_client.py │ │ └── argilla_wrapper_client.py │ ├── base │ │ └── json_serializable.py │ ├── data │ │ ├── __init__.py │ │ ├── data.py │ │ ├── exceptions.py │ │ └── models.py │ ├── document_index │ │ └── document_index.py │ ├── kernel │ │ └── kernel.py │ ├── limited_concurrency_client.py │ ├── retrievers │ │ ├── __init__.py │ │ ├── base_retriever.py │ │ ├── document_index_retriever.py │ │ ├── hybrid_qdrant_in_memory_retriever.py │ │ └── qdrant_in_memory_retriever.py │ └── studio │ │ └── studio.py │ ├── core │ ├── __init__.py │ ├── chunk.py │ ├── detect_language.py │ ├── echo.py │ ├── instruct.py │ ├── model.py │ ├── prompt_template.py │ ├── task.py │ ├── text_highlight.py │ └── tracer │ │ ├── __init__.py │ │ ├── composite_tracer.py │ │ ├── file_tracer.py │ │ ├── in_memory_tracer.py │ │ ├── open_telemetry_tracer.py │ │ ├── persistent_tracer.py │ │ └── tracer.py │ ├── evaluation │ ├── __init__.py │ ├── aggregation │ │ ├── accumulator.py │ │ ├── aggregation_repository.py │ │ ├── aggregator.py │ │ ├── domain.py │ │ ├── elo_aggregation.py │ │ ├── file_aggregation_repository.py │ │ ├── hugging_face_aggregation_repository.py │ │ └── in_memory_aggregation_repository.py │ ├── benchmark │ │ ├── benchmark.py │ │ ├── get_code.py │ │ ├── studio_benchmark.py │ │ └── trace_information.py │ ├── dataset │ │ ├── dataset_repository.py │ │ ├── domain.py │ │ ├── file_dataset_repository.py │ │ ├── hugging_face_dataset_repository.py │ │ ├── in_memory_dataset_repository.py │ │ ├── single_huggingface_dataset_repository.py │ │ └── studio_dataset_repository.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── domain.py │ │ ├── evaluation_repository.py │ │ ├── evaluator │ │ │ ├── __init__.py │ │ │ ├── argilla_evaluator.py │ │ │ ├── async_evaluator.py │ │ │ ├── base_evaluator.py │ │ │ ├── evaluator.py │ │ │ └── incremental_evaluator.py │ │ ├── file_evaluation_repository.py │ │ ├── graders.py │ │ └── in_memory_evaluation_repository.py │ ├── infrastructure │ │ ├── file_system_based_repository.py │ │ ├── hugging_face_repository.py │ │ └── repository_navigator.py │ ├── run │ │ ├── domain.py │ │ ├── file_run_repository.py │ │ ├── in_memory_run_repository.py │ │ ├── run_repository.py │ │ └── runner.py │ └── run_evaluation.py │ ├── examples │ ├── __init__.py │ ├── classify │ │ ├── __init__.py │ │ ├── classify.py │ │ ├── embedding_based_classify.py │ │ ├── keyword_extract.py │ │ ├── prompt_based_classify.py │ │ └── prompt_based_classify_with_definitions.py │ ├── qa │ │ ├── __init__.py │ │ ├── elo_qa_evaluation_logic.py │ │ ├── long_context_qa.py │ │ ├── multiple_chunk_qa.py │ │ ├── multiple_chunk_retriever_qa.py │ │ ├── retriever_based_qa.py │ │ └── single_chunk_qa.py │ ├── search │ │ ├── __init__.py │ │ ├── expand_chunks.py │ │ └── search.py │ └── summarize │ │ ├── __init__.py │ │ ├── recursive_summarize.py │ │ ├── steerable_long_context_summarize.py │ │ ├── steerable_single_chunk_summarize.py │ │ └── summarize.py │ ├── learning │ ├── __init__.py │ ├── enrich.py │ ├── file_instruction_finetuning_data_repository.py │ ├── instruction_finetuning_data_handler.py │ ├── instruction_finetuning_data_repository.py │ ├── models.py │ └── postgres_instruction_finetuning_data_repository.py │ └── py.typed ├── style_guide.md └── tests ├── __init__.py ├── conftest.py ├── conftest_document_index.py ├── connectors ├── argilla │ └── test_argilla_wrapper_client.py ├── data │ └── test_data.py ├── document_index │ ├── test_async_document_index.py │ └── test_document_index.py ├── kernel │ └── test_kernel.py ├── retrievers │ ├── test_document_index_retriever.py │ ├── test_hybrid_qdrant_in_memory_retriever.py │ └── test_qdrant_in_memory_retriever.py ├── studio │ ├── conftest.py │ ├── test_studio.py │ ├── test_studio_benchmark.py │ └── test_studio_dataset.py └── test_limited_concurrency_client.py ├── core ├── __init__.py ├── test_chunk.py ├── test_detect_language.py ├── test_echo.py ├── test_model.py ├── test_prompt_template.py ├── test_task.py ├── test_text_highlight.py └── tracer │ ├── conftest.py │ ├── fixtures │ └── old_file_trace_format.jsonl │ ├── test_composite_tracer.py │ ├── test_file_tracer.py │ ├── test_in_memory_tracer.py │ ├── test_open_telemetry_tracer.py │ └── test_tracer.py ├── dog-and-cat-cover.jpg ├── evaluation ├── __init__.py ├── aggregation │ ├── conftest.py │ ├── test_accumulator.py │ ├── test_aggregation_repository.py │ ├── test_domain.py │ ├── test_elo_calculator.py │ └── test_hugging_face_aggregation_repository.py ├── benchmark │ ├── test_benchmark.py │ └── test_trace_information.py ├── conftest.py ├── dataset │ ├── test_dataset_domain.py │ ├── test_dataset_repository.py │ ├── test_hugging_face_dataset_repository.py │ ├── test_single_huggingface_dataset_repository.py │ └── test_studio_data_repository.py ├── evaluation │ ├── conftest.py │ ├── test_argilla_evaluator.py │ ├── test_async_evaluation_repository.py │ ├── test_elo_evaluation_logic.py │ ├── test_evaluation_repository.py │ ├── test_evaluator_and_aggregator.py │ ├── test_file_evaluation_repository.py │ ├── test_graders.py │ ├── test_incremental_evaluator.py │ └── test_instruct_comparison_argilla_evaluator.py ├── infrastructure │ ├── test_hugging_face_repository.py │ └── test_repository_navigator.py └── run │ ├── test_file_run_repository.py │ ├── test_run.py │ ├── test_run_repository.py │ └── test_runner.py ├── examples ├── classify │ ├── test_classify.py │ ├── test_embedding_based_classify.py │ ├── test_keyword_extract.py │ ├── test_prompt_based_classify.py │ └── test_prompt_based_classify_with_definitions.py ├── qa │ ├── conftest.py │ ├── test_long_context_qa.py │ ├── test_multiple_chunk_qa.py │ ├── test_multiple_chunk_retriever_qa.py │ ├── test_retriever_based_qa.py │ └── test_single_chunk_qa.py ├── search │ ├── test_expand_chunk.py │ └── test_search.py └── summarize │ ├── __init__.py │ ├── conftest.py │ ├── test_recursive_summarize.py │ ├── test_steerable_long_context_summarize.py │ ├── test_summarize.py │ └── very_long_text.txt ├── image_example.jpg └── learning ├── conftest.py ├── test_file_instruction_finetuning_data_repository.py ├── test_instruction_finetuning_data_handler.py └── test_postgres_instruction_finetuning_data_repository.py /.env.example: -------------------------------------------------------------------------------- 1 | ARGILLA_API_URL="http://localhost:6900/" 2 | ARGILLA_API_KEY="argilla.apikey" 3 | 4 | # Your URL for your Studio deployment 5 | STUDIO_URL="https://pharia-studio.*.com" 6 | 7 | # DB Variables 8 | POSTGRES_HOST=localhost 9 | POSTGRES_PORT=5434 10 | POSTGRES_DB=il_sdk 11 | POSTGRES_USER=il_sdk 12 | POSTGRES_PASSWORD=test 13 | 14 | # ---- Things to adapt ---- 15 | CLIENT_URL=... 16 | AA_TOKEN=token 17 | DOCUMENT_INDEX_URL=... 18 | 19 | # needed for hugging face integration 20 | HUGGING_FACE_TOKEN=token 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | 12 | **Describe the bug** 13 | A clear and concise description of what the bug is. 14 | 15 | **To Reproduce** 16 | Steps to reproduce the behavior: 17 | 1. Go to '...' 18 | 2. Click on '....' 19 | 3. Scroll down to '....' 20 | 4. See error 21 | 22 | **Expected behavior** 23 | A clear and concise description of what you expected to happen. 24 | 25 | **Screenshots** 26 | If applicable, add screenshots to help explain your problem. 27 | 28 | **System (please complete the following information):** 29 | - OS: [e.g. Mac] 30 | - Version [e.g. 10.14] 31 | - Intelligence Layer Version [e.g. 0.1.0] 32 | 33 | **Additional context** 34 | Add any other context about the problem here. 35 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | 12 | **Is your feature request related to a problem? Please describe.** 13 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 14 | 15 | **Describe the solution you'd like** 16 | A clear and concise description of what you want to happen. 17 | 18 | **Describe alternatives you've considered** 19 | A clear and concise description of any alternative solutions or features you've considered. 20 | 21 | **Additional context** 22 | Add any other context or screenshots about the feature request here. 23 | -------------------------------------------------------------------------------- /.github/composites/get_jfrog_access_token.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | ID_TOKEN=$(curl -sLS -H "User-Agent: actions/oidc-client" -H "Authorization: Bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ 6 | "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=https://alephalpha.jfrog.io" | jq .value | tr -d '"') 7 | 8 | JFROG_ACCESS_TOKEN=$(curl -v \ 9 | -X POST \ 10 | -H "Content-type: application/json" \ 11 | https://alephalpha.jfrog.io/access/api/v1/oidc/token \ 12 | -d \ 13 | "{\"grant_type\": \"urn:ietf:params:oauth:grant-type:token-exchange\", \"subject_token_type\":\"urn:ietf:params:oauth:token-type:id_token\", \"subject_token\": \"$ID_TOKEN\", \"provider_name\": \"github\"}" | jq .access_token -r) 14 | 15 | echo -n $JFROG_ACCESS_TOKEN 16 | -------------------------------------------------------------------------------- /.github/composites/get_jfrog_access_token_subject.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | JFROG_ACCESS_TOKEN=$1 6 | echo $JFROG_ACCESS_TOKEN | awk -F'.' '{print $2}' | sed 's/.\{1,3\}$/&==/' | base64 -d | jq '.sub' -r 7 | -------------------------------------------------------------------------------- /.github/composites/python-setup/action.yml: -------------------------------------------------------------------------------- 1 | name: Checkout and set up python 2 | description: "Installs python, dependencies and handles venv caching" 3 | runs: 4 | using: composite 5 | steps: 6 | - uses: actions/setup-python@v5 7 | with: 8 | python-version: "3.10" 9 | 10 | - name: Install and configure Poetry 11 | uses: snok/install-poetry@v1 12 | with: 13 | virtualenvs-create: true 14 | virtualenvs-in-project: true 15 | installer-parallel: true 16 | virtualenvs-path: .venv 17 | 18 | - name: Load cached venv 19 | id: cached-poetry-dependencies 20 | uses: actions/cache@v4 21 | with: 22 | path: .venv 23 | key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} 24 | 25 | - name: Install dependencies 26 | shell: bash 27 | if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' 28 | run: | 29 | poetry config installer.max-workers 10 30 | poetry install --no-interaction 31 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | groups: 13 | minor: 14 | update-types: 15 | - minor 16 | - patch 17 | patterns: 18 | - "types*" 19 | 20 | - package-ecosystem: "github-actions" 21 | # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.) 22 | directory: "/" 23 | schedule: 24 | interval: "daily" 25 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Description 2 | No description. 3 | 4 | ## Before Merging 5 | - [ ] Review the code changes 6 | - Unused print / comments / TODOs 7 | - Missing docstrings for functions that should have them 8 | - Consistent variable names 9 | - ... 10 | - [ ] Update `changelog.md` if necessary 11 | - [ ] Commit messages should contain a semantic [label](https://gist.github.com/joshbuchea/6f47e86d2510bce28f8e7f42ae84c716) and the ticket number 12 | - Consider squashing if this is not the case 13 | -------------------------------------------------------------------------------- /.github/workflows/artifactory.yml: -------------------------------------------------------------------------------- 1 | name: Artifactory Deployment of PyPi 2 | 3 | on: 4 | workflow_dispatch: {} 5 | release: 6 | types: [published] 7 | 8 | env: 9 | ARTIFACTORY_URL: https://alephalpha.jfrog.io 10 | ARTIFACTORY_PYPI_REPOSITORY: "intelligence-layer" 11 | ARTIFACTORY_DOCKER_REGISTRY: alephalpha.jfrog.io/intelligence-layer-images 12 | 13 | jobs: 14 | build-and-push-pypi: 15 | permissions: 16 | contents: read 17 | id-token: write 18 | runs-on: ubuntu-latest 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v4 22 | - uses: actions/setup-python@v5 23 | with: 24 | python-version: "3.10" 25 | - name: Install and configure Poetry 26 | uses: snok/install-poetry@v1 27 | with: 28 | virtualenvs-create: true 29 | virtualenvs-in-project: true 30 | installer-parallel: true 31 | - name: Build package and push to Artifactory 32 | run: | 33 | poetry build 34 | poetry config repositories.artifactory $ARTIFACTORY_URL/artifactory/api/pypi/$ARTIFACTORY_PYPI_REPOSITORY 35 | export POETRY_HTTP_BASIC_ARTIFACTORY_PASSWORD=$(.github/composites/get_jfrog_access_token.sh) 36 | export POETRY_HTTP_BASIC_ARTIFACTORY_USERNAME=$(.github/composites/get_jfrog_access_token_subject.sh $POETRY_HTTP_BASIC_ARTIFACTORY_PASSWORD) 37 | poetry publish -r artifactory 38 | -------------------------------------------------------------------------------- /.github/workflows/document-index-execution.yml: -------------------------------------------------------------------------------- 1 | name: Document Index Tests 2 | 3 | on: 4 | schedule: 5 | - cron: '0 5 * * *' # Runs daily at 5:00 UTC 6 | workflow_dispatch: # Allows manual trigger 7 | push: 8 | branches: 9 | - main 10 | paths: 11 | - "src/intelligence_layer/connectors/document_index/**" 12 | - "tests/connectors/document_index/**" 13 | pull_request: 14 | paths: 15 | - "src/intelligence_layer/connectors/document_index/**" 16 | - "tests/connectors/document_index/**" 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | python-tests: 24 | uses: ./.github/workflows/document-index-tests.yml 25 | secrets: inherit 26 | -------------------------------------------------------------------------------- /.github/workflows/document-index-tests.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_call: 3 | inputs: 4 | runner: 5 | type: string 6 | default: "ubuntu-latest" 7 | timeout: 8 | type: number 9 | default: 15 # mins 10 | 11 | defaults: 12 | run: 13 | shell: bash 14 | 15 | jobs: 16 | document-index-notebooks: 17 | timeout-minutes: ${{inputs.timeout}} 18 | runs-on: ${{inputs.runner}} 19 | 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v4 23 | - uses: ./.github/composites/python-setup 24 | 25 | - name: Run Notebooks 26 | env: 27 | AA_TOKEN: ${{ secrets.AA_TOKEN }} 28 | CLIENT_URL: ${{ secrets.CLIENT_URL }} 29 | DOCUMENT_INDEX_URL: ${{secrets.DOCUMENT_INDEX_URL}} 30 | run: | 31 | ./scripts/notebook_runner_document_index.sh 32 | document-index-tests: 33 | timeout-minutes: ${{inputs.timeout}} 34 | runs-on: ${{inputs.runner}} 35 | continue-on-error: true 36 | env: 37 | DOCUMENT_INDEX_URL: ${{secrets.DOCUMENT_INDEX_URL}} 38 | AA_TOKEN: ${{ secrets.AA_TOKEN }} 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v4 42 | - uses: ./.github/composites/python-setup 43 | - name: Run client tests 44 | run: | 45 | TQDM_DISABLE=1 poetry run pytest -m "document_index and not asyncio" 46 | - name: Run async client tests 47 | run: | 48 | TQDM_DISABLE=1 poetry run pytest -m "document_index and asyncio" 49 | -------------------------------------------------------------------------------- /.github/workflows/test-execution.yml: -------------------------------------------------------------------------------- 1 | name: Intelligence Layer SDK Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | # By default, a workflow only runs when a pull_request event's activity type is opened, synchronize, or reopened 8 | pull_request: 9 | # manual trigger 10 | workflow_dispatch: 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | cancel-in-progress: true 15 | jobs: 16 | python-tests: 17 | uses: ./.github/workflows/sdk-tests.yml 18 | secrets: inherit 19 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v5.0.0 4 | hooks: 5 | - id: check-json 6 | - id: pretty-format-json 7 | files: .json 8 | args: 9 | - --autofix 10 | - repo: https://github.com/astral-sh/ruff-pre-commit 11 | # Ruff version. 12 | rev: v0.7.3 13 | hooks: 14 | # Run the linter. 15 | - id: ruff 16 | name: ruff-lint 17 | args: [--fix] 18 | types_or: [python, pyi, jupyter] 19 | # Run the formatter. 20 | - id: ruff-format 21 | types_or: [python, pyi, jupyter] 22 | - repo: https://github.com/kynan/nbstripout 23 | rev: 0.8.1 24 | hooks: 25 | - id: nbstripout 26 | files: ".ipynb" 27 | args: 28 | [ 29 | --drop-empty-cells, 30 | --extra-keys=metadata.kernelspec metadata.language_info.codemirror_mode.version metadata.language_info.pygments_lexer metadata.language_info.version, 31 | ] 32 | 33 | - repo: https://github.com/codespell-project/codespell 34 | rev: v2.3.0 35 | hooks: 36 | - id: codespell 37 | args: 38 | [ 39 | "-L", 40 | "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor,fille,ans", 41 | ] 42 | exclude: '^(poetry\.lock|tests/.*|src/intelligence_layer/examples/qa/multiple_chunk_qa.py|src/intelligence_layer/examples/summarize/.*|src/intelligence_layer/examples/classify/keyword_extract.py|src/intelligence_layer/learning/enrich.py)$' 43 | - repo: https://github.com/jsh9/pydoclint 44 | rev: 0.5.9 45 | hooks: 46 | - id: pydoclint 47 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | jobs: 14 | # according to https://github.com/readthedocs/readthedocs.org/issues/4912#issuecomment-1992286540 15 | post_create_environment: 16 | - python -m pip install poetry 17 | post_install: 18 | - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install 19 | 20 | # Build documentation in the "docs/" directory with Sphinx 21 | sphinx: 22 | configuration: docs/conf.py 23 | # Optionally build your docs in additional formats such as PDF and ePub 24 | # formats: 25 | # - pdf 26 | # - epub 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (C) Aleph Alpha GmbH - All Rights Reserved 2 | 3 | This source code, databases, and other material is protected under international copyright law. All rights reserved and protected by the copyright holders. This file is confidential and only available to authorized individuals with the permission of the copyright holders. If you encounter this file and do not have permission, please contact the copyright holder. 4 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | # Release cycle TODOs 2 | 3 | - Update CHANGELOG.md 4 | - We committed to updating the changelog with every relevant merge into main. Check the new entries of the changelog and perform adjustments where necessary. 5 | - Update the "version" field of the project in `pyproject.toml` 6 | - We use [semantic versioning](https://semver.org/) 7 | - Commit the changes and merge to main 8 | - Tag the latest commit on main with the new release number (e.g. v0.6.0) 9 | - `git checkout main, git tag , git push origin ` 10 | - Create a new release draft in GitHub (Tags -> Releases -> Draft a new release) and save it as draft 11 | - Copy the changelog into the release description. Also add a link to the commits since the last release at the bottom of the description. 12 | - Make sure the changes have been merged into the main branch. 13 | - Publish the release. 14 | - Consider updating the changelog of the [docs](https://gitlab.aleph-alpha.de/engineering/docs). The repository for the docs can be found [here](https://gitlab.aleph-alpha.de/engineering/docs). 15 | - Update it when we have big new features we want to communicate or in preparation of the sprint review. 16 | -------------------------------------------------------------------------------- /assets/TraceViewer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/assets/TraceViewer.png -------------------------------------------------------------------------------- /assets/argilla_interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/assets/argilla_interface.png -------------------------------------------------------------------------------- /assets/argilla_splits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/assets/argilla_splits.png -------------------------------------------------------------------------------- /assets/fork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/assets/fork.png -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | # taken from https://docs.argilla.io/latest/getting_started/how-to-deploy-argilla-with-docker/ 3 | argilla-elastic-search: 4 | image: docker.elastic.co/elasticsearch/elasticsearch:8.12.2 5 | environment: 6 | - node.name=elasticsearch 7 | - cluster.name=es-argilla-local 8 | - discovery.type=single-node 9 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 10 | - cluster.routing.allocation.disk.threshold_enabled=false 11 | - xpack.security.enabled=false 12 | ulimits: 13 | memlock: 14 | soft: -1 15 | hard: -1 16 | ports: 17 | - "9200:9200" 18 | - "9300:9300" 19 | healthcheck: 20 | test: 21 | [ 22 | "CMD-SHELL", 23 | "curl --silent --fail localhost:9200/_cluster/health || exit 1", 24 | ] 25 | interval: 5s 26 | timeout: 5s 27 | retries: 3 28 | argilla: 29 | depends_on: 30 | argilla-elastic-search: 31 | condition: service_healthy 32 | image: argilla/argilla-server:v1.29.1 33 | ports: 34 | - "6900:6900" 35 | environment: 36 | ARGILLA_ELASTICSEARCH: "http://argilla-elastic-search:9200" 37 | ARGILLA_ENABLE_TELEMETRY: 0 38 | 39 | USERNAME: argilla 40 | PASSWORD: 12345678 41 | API_KEY: argilla.apikey 42 | open-telemetry-trace-service: 43 | container_name: jaeger_1_35 44 | environment: 45 | COLLECTOR_OTLP_ENABLED: "true" 46 | ports: 47 | - "4317:4317" 48 | - "4318:4318" 49 | - "16686:16686" 50 | image: jaegertracing/all-in-one:1.35 51 | # export GITHUB_TOKEN=... 52 | # echo $GITHUB_TOKEN | docker login ghcr.io -u your_email@for_github --password-stdin 53 | # docker compose pull to update containers 54 | 55 | # export GITLAB_TOKEN=... 56 | # (optional) export GITLAB_TOKEN=$(op item get YOUR_TOKEN --format json --fields password | jq .value | tr -d '"') 57 | # echo $GITLAB_TOKEN | docker login registry.gitlab.aleph-alpha.de -u your_email@for_gitlab --password-stdin 58 | # docker compose pull to update containers 59 | postgres: 60 | image: postgres:15 61 | ports: 62 | - ${POSTGRES_PORT}:${POSTGRES_PORT} 63 | env_file: ".env" 64 | command: -p ${POSTGRES_PORT} 65 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= # -nvT 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | export SPHINX_APIDOC_OPTIONS=members,show-inheritance 12 | 13 | # Put it first so that "make" without argument is like "make help". 14 | help: 15 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 16 | 17 | .PHONY: help Makefile 18 | 19 | # Catch-all target: route all unknown targets to Sphinx using the new 20 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 21 | %: Makefile 22 | # sphinx-apidoc -o . ../src 23 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 24 | # rm --force `ls *.rst | grep --fixed-strings --invert-match index.rst` 25 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = "Intelligence Layer" 10 | copyright = "2023, Aleph Alpha" 11 | author = "Aleph Alpha" 12 | 13 | # -- General configuration --------------------------------------------------- 14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 15 | 16 | extensions = [ 17 | "sphinx.ext.autodoc", 18 | "sphinx.ext.napoleon", 19 | "sphinx.ext.viewcode", 20 | "sphinx.ext.doctest", 21 | ] 22 | 23 | autodoc_default_options = { 24 | "members": True, 25 | "show-inheritance": True, 26 | "inherited-members": "BaseModel,RuntimeError", 27 | # BaseModel attributes where the documentation does not add a lot of value 28 | "exclude-members": "model_config,model_fields,model_computed_fields", 29 | } 30 | 31 | templates_path = ["_templates"] 32 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 33 | 34 | 35 | # -- Options for HTML output ------------------------------------------------- 36 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 37 | 38 | html_theme = "sphinx_rtd_theme" 39 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Intelligence Layer documentation master file, created by 2 | sphinx-quickstart on Fri Oct 27 14:17:00 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Intelligence Layer's documentation! 7 | ============================================== 8 | 9 | This documentation provides API-level code documentation for the Intelligence Layer, covering modules such as connectors, core, evaluation, and examples. 10 | 11 | A comprehensive overview of the Intelligence Layer can be found on GitHub, including `tutorials `_ and `how-tos `_. 12 | This code documentation is intended to serve as a reference companion to the main documentation, providing detailed information on the Intelligence Layer's API and implementation. 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | :caption: Contents: 17 | 18 | intelligence_layer.connectors 19 | intelligence_layer.core 20 | intelligence_layer.evaluation 21 | intelligence_layer.examples 22 | 23 | 24 | Indices and tables 25 | ================== 26 | 27 | * :ref:`genindex` 28 | -------------------------------------------------------------------------------- /docs/intelligence_layer.connectors.rst: -------------------------------------------------------------------------------- 1 | intelligence\_layer.connectors 2 | ====================================== 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: intelligence_layer.connectors 8 | -------------------------------------------------------------------------------- /docs/intelligence_layer.core.rst: -------------------------------------------------------------------------------- 1 | intelligence\_layer.core 2 | ================================ 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: intelligence_layer.core 8 | 9 | .. autoclass:: TextChunk 10 | -------------------------------------------------------------------------------- /docs/intelligence_layer.evaluation.rst: -------------------------------------------------------------------------------- 1 | intelligence\_layer.evaluation 2 | ====================================== 3 | 4 | 5 | Module contents 6 | --------------- 7 | 8 | .. automodule:: intelligence_layer.evaluation 9 | -------------------------------------------------------------------------------- /docs/intelligence_layer.examples.rst: -------------------------------------------------------------------------------- 1 | intelligence\_layer.examples 2 | ====================================== 3 | 4 | 5 | Module contents 6 | --------------- 7 | 8 | .. automodule:: intelligence_layer.examples 9 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | warn_unused_ignores = True 4 | -------------------------------------------------------------------------------- /scripts/all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -eu -o pipefail 2 | 3 | ProjectRoot="$(cd $(dirname "$0")/.. && pwd -P)" 4 | 5 | cd "$ProjectRoot" 6 | 7 | # see https://stackoverflow.com/questions/43267413/how-to-set-environment-variables-from-env-file 8 | set -a # automatically export all variables 9 | source .env 10 | set +a 11 | 12 | ./scripts/lint.sh 13 | ./scripts/doctest.sh 14 | ./scripts/notebook_runner.sh 15 | ./scripts/test.sh 16 | python "$(dirname "$0")/clean_hf.py" 17 | -------------------------------------------------------------------------------- /scripts/clean_hf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | from dotenv import load_dotenv 5 | from huggingface_hub import HfApi 6 | 7 | 8 | def clean_up_dangling_hf_repos(hugging_face_token: str) -> None: 9 | api = HfApi(token=hugging_face_token) 10 | datasets = list( 11 | api.list_datasets(author="Aleph-Alpha", dataset_name="IL-temp-tests") 12 | ) 13 | if len(datasets) > 0: 14 | warnings.warn("dangling hf datasets found, attempting to delete", stacklevel=2) 15 | for dataset in datasets: 16 | api.delete_repo(dataset.id, repo_type="dataset", missing_ok=True) 17 | 18 | 19 | if __name__ == "__main__": 20 | load_dotenv() 21 | token = os.getenv("HUGGING_FACE_TOKEN") 22 | assert isinstance(token, str) 23 | clean_up_dangling_hf_repos(token) 24 | -------------------------------------------------------------------------------- /scripts/doctest.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -eu -o pipefail 2 | 3 | ProjectRoot="$(cd $(dirname "$0")/.. && pwd -P)" 4 | 5 | cd "$ProjectRoot" 6 | 7 | if [ -f .env ]; then 8 | # Export environment variables from .env file 9 | set -a # automatically export all variables 10 | source .env 11 | set +a 12 | fi 13 | (cd docs && poetry run make doctest) 14 | -------------------------------------------------------------------------------- /scripts/fastapi_example_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -eu -o pipefail 2 | echo "Testing the fastapi app" 3 | # start the server in the background 4 | hypercorn src/documentation/fastapi_example:app --bind localhost:8000 & 5 | server_pid=$! 6 | 7 | attempt_counter=0 8 | max_attempts=10 9 | 10 | trap 'kill $server_pid' EXIT SIGINT 11 | # waiting for server startup 12 | until $(curl -X GET http://localhost:8000 --fail-with-body --output /dev/null --silent --head); do 13 | if [ ${attempt_counter} -eq ${max_attempts} ];then 14 | echo "Max attempts reached" 15 | exit 1 16 | fi 17 | 18 | printf '.' 19 | attempt_counter=$(($attempt_counter+1)) 20 | sleep 1 21 | done 22 | 23 | curl -X GET http://localhost:8000 --fail-with-body 24 | curl -X POST http://localhost:8000/summary --fail-with-body -H "Content-Type: application/json" -d '{"chunk": "", "language": {"iso_639_1": "en"}}' 25 | 26 | # kill happens at the end with the trap command 27 | exit 0 28 | -------------------------------------------------------------------------------- /scripts/lint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -eu -o pipefail 2 | 3 | cd $(dirname $0)/.. 4 | 5 | poetry run pre-commit run --all-files 6 | poetry run mypy . 7 | -------------------------------------------------------------------------------- /scripts/notebook_runner.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -eu -o pipefail 2 | # next line loads AA_TOKEN from .env file when running bash script locally. In CI this is not necessary since AA_TOKEN is environment variable. 3 | [ -f .env ] && source .env 4 | export AA_TOKEN 5 | # Find all .ipynb files in the directory and pass them to xargs for parallel execution 6 | rm -rf src/documentation/.ipynb_checkpoints 7 | rm -rf src/documentation/how_tos/.ipynb_checkpoints 8 | 9 | find src/documentation -name "*.nbconvert.ipynb" -type f -delete 10 | find src/documentation -name "*.ipynb" ! -name "performance_tips.ipynb" ! -name "document_index.ipynb" | xargs --max-args 1 --max-procs 6 poetry run jupyter nbconvert --to notebook --execute 11 | find src/documentation -name "*.nbconvert.ipynb" -type f -delete 12 | 13 | poetry run ./scripts/fastapi_example_test.sh 14 | -------------------------------------------------------------------------------- /scripts/notebook_runner_document_index.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -eu -o pipefail 2 | 3 | # Load environment variables if running locally 4 | [ -f .env ] && source .env 5 | export AA_TOKEN 6 | 7 | # Remove Jupyter Notebook checkpoints 8 | rm -rf src/documentation/.ipynb_checkpoints 9 | rm -rf src/documentation/how_tos/.ipynb_checkpoints 10 | 11 | # Remove any previously executed version of the notebook 12 | find src/documentation -name "document_index.nbconvert.ipynb" -type f -delete 13 | 14 | # Execute only document_index.ipynb 15 | poetry run jupyter nbconvert --to notebook --execute src/documentation/document_index.ipynb 16 | 17 | # Remove the execution-generated file 18 | find src/documentation -name "document_index.nbconvert.ipynb" -type f -delete 19 | 20 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S bash -eu -o pipefail 2 | 3 | TQDM_DISABLE=1 poetry run pytest -n 10 -m "not document_index" 4 | -------------------------------------------------------------------------------- /src/documentation/data/classify_examples.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "label": "Finance", 4 | "message": "I just traveled to Paris for a conference, where can I get the train ride refunded?" 5 | }, 6 | { 7 | "label": "Sales", 8 | "message": "Hello, we would like to get in contact with your sales team, because we are interested in your solution." 9 | }, 10 | { 11 | "label": "Communications", 12 | "message": "We are working on a documentation on AI and would like to film a piece about you. Would you be interested?" 13 | }, 14 | { 15 | "label": "Research", 16 | "message": "I am working with Stanford and was hoping to win you over for a research collaboration." 17 | }, 18 | { 19 | "label": "IT Support", 20 | "message": "My laptop is broken" 21 | }, 22 | { 23 | "label": "Communications", 24 | "message": "Can you send your models via email?" 25 | }, 26 | { 27 | "label": "Research", 28 | "message": "We should do a research collaboration." 29 | }, 30 | { 31 | "label": "Research", 32 | "message": "My company has been working on time series and signal processing for a long time. It would make sense to define a joint go to market and research strategy." 33 | }, 34 | { 35 | "label": "Human Resources", 36 | "message": "Full stack developer in your area available now." 37 | }, 38 | { 39 | "label": "Product", 40 | "message": "Hi,\n\nI recently bought your offering. I am having trouble running your docker container in my environment. It fails to start. Can you help?" 41 | }, 42 | { 43 | "label": "Product", 44 | "message": "Hello,\n\nI am getting strange errors from your API. It is saying the queue is full, but I am only sending one task at a time. Why is this happening?" 45 | }, 46 | { 47 | "label": "Product", 48 | "message": "Can you show me a demo of different use cases your offering can solve?" 49 | }, 50 | { 51 | "label": "Human Resources", 52 | "message": "Hey, I did not get a t-shirt in the onboarding. Could I still get one?" 53 | }, 54 | { 55 | "label": "Customer", 56 | "message": "Hi, can you name me a couple of timeslots for a first call? Would be really interested in learning more about the product?" 57 | }, 58 | { 59 | "label": "Product", 60 | "message": "Hi Jan, is your product ISO 37301 compliant?" 61 | }, 62 | { 63 | "label": "IT Support", 64 | "message": "I can\u2019t login to Mattermost or Sharepoint, how can I gain access?" 65 | }, 66 | { 67 | "label": "Finance", 68 | "message": "I did not get paid last month, when do I get paid? What is going on?" 69 | }, 70 | { 71 | "label": "Security", 72 | "message": "Hi, I want to get a new badge, the photo of me looks ugly and I just got new glasses so it does not look like me. " 73 | }, 74 | { 75 | "label": "Marketing", 76 | "message": "I have a question concerning your marketing strategy, would you have time to hop on a call?" 77 | }, 78 | { 79 | "label": "CEO Office", 80 | "message": "Dear Jonas Andrulis,\n\nWe have met each other at the event in N\u00fcrnberg, can we meet for a follow up in your Office in Heidelberg?" 81 | }, 82 | { 83 | "label": "Security", 84 | "message": "Your hTTPs Certificate is not valid on your www.aleph-alpha.de" 85 | }, 86 | { 87 | "label": "Human Resources", 88 | "message": "I want to take a week off immediately" 89 | }, 90 | { 91 | "label": "Human Resources", 92 | "message": "I want to take a sabbatical" 93 | }, 94 | { 95 | "label": "Human Resources", 96 | "message": "How can I work more, I want to work weekends, can I get paid overtime?" 97 | } 98 | ] 99 | -------------------------------------------------------------------------------- /src/documentation/fastapi_example.py: -------------------------------------------------------------------------------- 1 | import http 2 | import os 3 | from collections.abc import Sequence 4 | from http import HTTPStatus 5 | from typing import Annotated 6 | 7 | from aleph_alpha_client import Client 8 | from dotenv import load_dotenv 9 | from fastapi import Depends, FastAPI, HTTPException, Request, Response 10 | from fastapi.datastructures import URL 11 | 12 | from intelligence_layer.connectors import AlephAlphaClientProtocol 13 | from intelligence_layer.core import Llama3InstructModel, NoOpTracer, Task 14 | from intelligence_layer.examples import ( 15 | SingleChunkSummarizeInput, 16 | SteerableSingleChunkSummarize, 17 | SummarizeOutput, 18 | ) 19 | 20 | # Minimal FastAPI app ########################################################## 21 | 22 | app = FastAPI() 23 | 24 | 25 | @app.get("/") 26 | def root() -> Response: 27 | return Response(content="Hello World", status_code=HTTPStatus.OK) 28 | 29 | 30 | # Authentication ############################################################### 31 | 32 | 33 | class AuthService: 34 | def is_valid_token(self, token: str, permissions: Sequence[str], url: URL) -> bool: 35 | # Add your authentication logic here 36 | print(f"Checking permission for route: {url.path}") 37 | return True 38 | 39 | 40 | class PermissionChecker: 41 | def __init__(self, permissions: Sequence[str] = []): 42 | self.permissions = permissions 43 | 44 | def __call__( 45 | self, 46 | request: Request, 47 | auth_service: Annotated[AuthService, Depends(AuthService)], 48 | ) -> None: 49 | token = request.headers.get("Authorization") or "" 50 | try: 51 | if not auth_service.is_valid_token(token, self.permissions, request.url): 52 | raise HTTPException(HTTPStatus.UNAUTHORIZED) 53 | except RuntimeError as e: 54 | raise HTTPException(HTTPStatus.INTERNAL_SERVER_ERROR) from e 55 | 56 | 57 | permission_checker_for_user = PermissionChecker(["User"]) 58 | 59 | 60 | # Intelligence Layer Task ###################################################### 61 | 62 | load_dotenv() 63 | 64 | 65 | def client() -> Client: 66 | return Client( 67 | token=os.environ["AA_TOKEN"], 68 | host=os.environ["CLIENT_URL"], 69 | ) 70 | 71 | 72 | def default_model( 73 | app_client: Annotated[AlephAlphaClientProtocol, Depends(client)], 74 | ) -> Llama3InstructModel: 75 | return Llama3InstructModel(client=app_client) 76 | 77 | 78 | def summary_task( 79 | model: Annotated[Llama3InstructModel, Depends(default_model)], 80 | ) -> SteerableSingleChunkSummarize: 81 | return SteerableSingleChunkSummarize(model=model) 82 | 83 | 84 | @app.post( 85 | "/summary", 86 | dependencies=[Depends(PermissionChecker(["User"]))], 87 | status_code=http.HTTPStatus.OK, 88 | ) 89 | def summary_task_route( 90 | input: SingleChunkSummarizeInput, 91 | task: Annotated[ 92 | Task[SingleChunkSummarizeInput, SummarizeOutput], Depends(summary_task) 93 | ], 94 | ) -> SummarizeOutput: 95 | return task.run(input, NoOpTracer()) 96 | -------------------------------------------------------------------------------- /src/documentation/how_tos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/documentation/how_tos/__init__.py -------------------------------------------------------------------------------- /src/documentation/how_tos/how_to_aggregate_evaluations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from example_data import DummyAggregationLogic, example_data\n", 10 | "\n", 11 | "from intelligence_layer.evaluation.aggregation.aggregator import Aggregator\n", 12 | "from intelligence_layer.evaluation.aggregation.in_memory_aggregation_repository import (\n", 13 | " InMemoryAggregationRepository,\n", 14 | ")" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# How to aggregate evaluations\n", 22 | "0. Run the evaluations of all your tasks and datasets (see [here](./how_to_evaluate_runs.ipynb)).\n", 23 | " - When aggregating multiple evaluations, all of them need the same data types \n", 24 | "1. Initialize all necessary repositories for the `Aggregator`, and an `AggregationLogic`\n", 25 | "2. Run the `Aggregator` to aggregate all examples and create a single `AggregationOverview`\n", 26 | "3. (Optional) Save the `AggregationOverview.id` for later retrieval\n", 27 | "\n", 28 | "### Example" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# Step 0\n", 38 | "\n", 39 | "\n", 40 | "my_example_data = example_data()\n", 41 | "print()\n", 42 | "\n", 43 | "evaluation_ids = [\n", 44 | " my_example_data.evaluation_overview_1.id,\n", 45 | " my_example_data.evaluation_overview_2.id,\n", 46 | "]\n", 47 | "\n", 48 | "# Step 1\n", 49 | "evaluation_repository = my_example_data.evaluation_repository\n", 50 | "aggregation_repository = InMemoryAggregationRepository()\n", 51 | "aggregation_logic = DummyAggregationLogic()\n", 52 | "\n", 53 | "# Step 2\n", 54 | "aggregator = Aggregator(\n", 55 | " evaluation_repository,\n", 56 | " aggregation_repository,\n", 57 | " \"MyAggregationDescription\",\n", 58 | " aggregation_logic,\n", 59 | ")\n", 60 | "aggregation_overview = aggregator.aggregate_evaluation(\n", 61 | " *evaluation_ids, labels=set([\"label_a\"]), metadata=dict({\"key\": \"value\"})\n", 62 | ")\n", 63 | "\n", 64 | "# Step 3\n", 65 | "print(aggregation_overview.id)\n", 66 | "print(aggregation_overview.labels)\n", 67 | "print(aggregation_overview.metadata)" 68 | ] 69 | } 70 | ], 71 | "metadata": { 72 | "language_info": { 73 | "codemirror_mode": { 74 | "name": "ipython" 75 | }, 76 | "file_extension": ".py", 77 | "mimetype": "text/x-python", 78 | "name": "python", 79 | "nbconvert_exporter": "python" 80 | } 81 | }, 82 | "nbformat": 4, 83 | "nbformat_minor": 2 84 | } 85 | -------------------------------------------------------------------------------- /src/documentation/how_tos/how_to_create_a_dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from collections.abc import Sequence\n", 10 | "\n", 11 | "from dotenv import load_dotenv\n", 12 | "from pydantic import BaseModel\n", 13 | "\n", 14 | "from intelligence_layer.evaluation import Example, InMemoryDatasetRepository\n", 15 | "\n", 16 | "load_dotenv()" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "# How to create a dataset\n", 24 | "\n", 25 | "0. Collect data for examples.\n", 26 | "1. Convert data to `Example`s.\n", 27 | "1. Create a `DatasetRepository`.\n", 28 | "2. Store `Example`s to `DatasetRepository`.\n", 29 | "3. Remember the dataset id." 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "### Example" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "class StoryTaskInput(BaseModel): # Should already be implemented in your task\n", 46 | " topic: str\n", 47 | " targeted_word_count: int\n", 48 | "\n", 49 | "\n", 50 | "class StoryTaskExpectedOutput(BaseModel): # Should already be implemented in your task\n", 51 | " keywords: Sequence[str]\n", 52 | "\n", 53 | "\n", 54 | "# Step 1\n", 55 | "examples = [\n", 56 | " Example(\n", 57 | " input=StoryTaskInput(topic=\"rain\", targeted_word_count=42),\n", 58 | " expected_output=StoryTaskExpectedOutput(keywords=[\"wet\"]),\n", 59 | " metadata={\n", 60 | " \"author\": \"Shakespeare\"\n", 61 | " }, # the metadata is optional and can contain custom information\n", 62 | " ),\n", 63 | " # ...\n", 64 | "]\n", 65 | "\n", 66 | "# Step 2 - Use FileDatasetRepository or HuggingFaceDatasetRepository for persistence\n", 67 | "dataset_repository = InMemoryDatasetRepository()\n", 68 | "\n", 69 | "# Step 3\n", 70 | "dataset = dataset_repository.create_dataset(\n", 71 | " examples=examples,\n", 72 | " dataset_name=\"StoryDataset\",\n", 73 | " labels=set([\"label1\", \"label2\"]),\n", 74 | " metadata=dict({\"key_a\": [\"a\", \"b\"], \"key_b\": \"value\"}),\n", 75 | ")\n", 76 | "\n", 77 | "# Step 4\n", 78 | "print(dataset.id)\n", 79 | "print(dataset.labels)\n", 80 | "print(dataset.metadata)" 81 | ] 82 | } 83 | ], 84 | "metadata": { 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython" 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 2 97 | } 98 | -------------------------------------------------------------------------------- /src/documentation/how_tos/how_to_define_a_task.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to define a task\n", 8 | "\n", 9 | "1. Think about what you want to do and define the requirements for your task\n", 10 | "2. Define the corresponding input and output in the form of Python classes\n", 11 | "3. Check if any existing task can be used to fulfill these requirements (see the [Use-case index](../../../README.md#use-case-index))\n", 12 | "4. Implement the task with the defined input and output types, see [How to implement a task](how_to_implement_a_task.ipynb) \n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "### Example\n", 20 | "\n", 21 | "**Step 1**\n", 22 | "- I want the LLM to tell a joke about a specific topic\n", 23 | "- It should work for any topic\n", 24 | "- It should fail if there is no topic given by the user\n", 25 | "\n", 26 | "\n", 27 | "**Step 2**\n", 28 | "\n", 29 | "```python\n", 30 | "class TellAJokeTaskInput(BaseModel):\n", 31 | " topic: str\n", 32 | "\n", 33 | "class TellAJokeTaskOutput(BaseModel):\n", 34 | " joke: str\n", 35 | "```\n", 36 | "\n", 37 | "**Step 3**\n", 38 | "On first glance any of the QA tasks seem to fulfill the requirements. However, here only the topic for the joke should be specified by the user and the request to tell a joke should be handled by the task itself. \n", 39 | "\n", 40 | "\n", 41 | "**Step 4**\n", 42 | "```python\n", 43 | "class TellAJokeTask(Task[TellAJokeTaskInput, TellAJokeTaskOutput]):\n", 44 | " ...\n", 45 | "```" 46 | ] 47 | } 48 | ], 49 | "metadata": { 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython" 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 2 62 | } 63 | -------------------------------------------------------------------------------- /src/documentation/how_tos/how_to_evaluate_runs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from example_data import DummyEvaluationLogic, example_data\n", 10 | "\n", 11 | "from intelligence_layer.evaluation import Evaluator, InMemoryEvaluationRepository" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# How to evaluate runs\n", 19 | "0. Run your tasks on the datasets where you want to evaluate them on (see [here](./how_to_run_a_task_on_a_dataset.ipynb))\n", 20 | " - When evaluating multiple runs, all of them need the same data types \n", 21 | "2. Initialize all necessary repositories for the `Evaluator`, and an `EvaluationLogic`.\n", 22 | "3. Run the evaluator to evaluate all examples and create a single `EvaluationOverview`\n", 23 | "4. (Optional) Save the evaluation id for later use" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "### Example" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# Step 0\n", 40 | "\n", 41 | "my_example_data = example_data()\n", 42 | "print()\n", 43 | "run_ids = [my_example_data.run_overview_1.id, my_example_data.run_overview_2.id]\n", 44 | "\n", 45 | "# Step 1\n", 46 | "dataset_repository = my_example_data.dataset_repository\n", 47 | "run_repository = my_example_data.run_repository\n", 48 | "evaluation_repository = InMemoryEvaluationRepository()\n", 49 | "evaluation_logic = DummyEvaluationLogic()\n", 50 | "\n", 51 | "# Step 3\n", 52 | "evaluator = Evaluator(\n", 53 | " dataset_repository,\n", 54 | " run_repository,\n", 55 | " evaluation_repository,\n", 56 | " \"My dummy evaluation\",\n", 57 | " evaluation_logic,\n", 58 | ")\n", 59 | "\n", 60 | "evaluation_overview = evaluator.evaluate_runs(\n", 61 | " *run_ids, labels=set({\"label\"}), metadata=dict({\"key\": \"value\"})\n", 62 | ")\n", 63 | "\n", 64 | "# Step 4\n", 65 | "print(evaluation_overview.id)\n", 66 | "print(evaluation_overview.metadata)\n", 67 | "print(evaluation_overview.labels)" 68 | ] 69 | } 70 | ], 71 | "metadata": { 72 | "language_info": { 73 | "codemirror_mode": { 74 | "name": "ipython" 75 | }, 76 | "file_extension": ".py", 77 | "mimetype": "text/x-python", 78 | "name": "python", 79 | "nbconvert_exporter": "python" 80 | } 81 | }, 82 | "nbformat": 4, 83 | "nbformat_minor": 2 84 | } 85 | -------------------------------------------------------------------------------- /src/documentation/how_tos/how_to_implement_elo_evaluations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from documentation.how_tos.example_data import DummyEloEvaluationLogic, example_data\n", 10 | "from intelligence_layer.evaluation import (\n", 11 | " IncrementalEvaluator,\n", 12 | " InMemoryEvaluationRepository,\n", 13 | ")" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "# How to implement elo evaluations\n", 21 | "0. Run your tasks on the datasets you want to evaluate (see [here](./how_to_run_a_task_on_a_dataset.ipynb))\n", 22 | " - When evaluating multiple runs, all of them need the same data types \n", 23 | "2. Initialize all necessary repositories for the `IncrementalEvaluator`, and an `EloEvaluationLogic` that is specific to your use case. \n", 24 | "3. Run the evaluator to evaluate all examples and create a single `EvaluationOverview`\n", 25 | "4. (Optional) Save the evaluation id for later use" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### Example" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# Step 0\n", 42 | "\n", 43 | "\n", 44 | "my_example_data = example_data()\n", 45 | "print()\n", 46 | "run_ids = [my_example_data.run_overview_1.id, my_example_data.run_overview_2.id]\n", 47 | "\n", 48 | "# Step 1\n", 49 | "dataset_repository = my_example_data.dataset_repository\n", 50 | "run_repository = my_example_data.run_repository\n", 51 | "evaluation_repository = InMemoryEvaluationRepository()\n", 52 | "evaluation_logic = DummyEloEvaluationLogic()\n", 53 | "\n", 54 | "# Step 2\n", 55 | "evaluator = IncrementalEvaluator(\n", 56 | " dataset_repository,\n", 57 | " run_repository,\n", 58 | " evaluation_repository,\n", 59 | " \"My dummy evaluation\",\n", 60 | " evaluation_logic,\n", 61 | ")\n", 62 | "\n", 63 | "evaluation_overview = evaluator.evaluate_runs(*run_ids)\n", 64 | "\n", 65 | "# Step 3\n", 66 | "print(evaluation_overview.id)" 67 | ] 68 | } 69 | ], 70 | "metadata": { 71 | "language_info": { 72 | "codemirror_mode": { 73 | "name": "ipython" 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python" 79 | } 80 | }, 81 | "nbformat": 4, 82 | "nbformat_minor": 2 83 | } 84 | -------------------------------------------------------------------------------- /src/documentation/how_tos/how_to_log_and_debug_a_task.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import random\n", 10 | "from uuid import uuid4\n", 11 | "\n", 12 | "from aleph_alpha_client import Prompt\n", 13 | "from dotenv import load_dotenv\n", 14 | "\n", 15 | "from intelligence_layer.connectors import StudioClient\n", 16 | "from intelligence_layer.core import (\n", 17 | " CompleteInput,\n", 18 | " InMemoryTracer,\n", 19 | " LuminousControlModel,\n", 20 | " Task,\n", 21 | " TaskSpan,\n", 22 | ")\n", 23 | "\n", 24 | "load_dotenv()" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "# How to log and debug a task\n", 32 | "The Intelligence Layer offers logging and debugging via a `Tracer`. \n", 33 | "Here are several steps you can use to debug tasks with the trace feature:\n", 34 | "\n", 35 | "-----\n", 36 | "Most logging of a task (input, output, time) is done simply by inheriting from `Task`. This logs to a trace.\n", 37 | "\n", 38 | " - If you don't care about logging and tracing, use the `NoOpTracer`.\n", 39 | " - To create custom logging messages in a trace use `task_span.log()`.\n", 40 | " - To map a complex execution flow of a task into a single trace, pass the `task_span` of the `do_run` to other execution methods (e.g. `Task.run()` or `model.complete()`). \n", 41 | " - If the execution method is not provided by the intelligence layer, the tracing of input and output has to happen manually. See the implementation of `Task.run()` for an example.\n", 42 | " - Use the [submit trace functionality of the `StudioClient`](./how_to_use_studio_with_traces.ipynb) to view and inspect a trace in Studio" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "### Example" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "class DummyTask(Task[str, str]):\n", 59 | " def __init__(self, model: LuminousControlModel | None = None) -> None:\n", 60 | " self._model = model if model else LuminousControlModel()\n", 61 | "\n", 62 | " def do_run(self, input: str, task_span: TaskSpan) -> str:\n", 63 | " should_output = random.random()\n", 64 | " # log a custom message and value\n", 65 | " task_span.log(\n", 66 | " \"My very important log message that logs a random value\", should_output\n", 67 | " )\n", 68 | " if should_output > 0.5:\n", 69 | " model_input = CompleteInput(prompt=Prompt.from_text(input), temperature=0.2)\n", 70 | " # Create a trace tree by passing task_span to .run or .complete methods.\n", 71 | " completion = self._model.complete(model_input, task_span)\n", 72 | " return completion.completions[0].completion\n", 73 | " else:\n", 74 | " return \"Nope!\"\n", 75 | "\n", 76 | "\n", 77 | "tracer = InMemoryTracer()\n", 78 | "DummyTask().run(\"\", tracer)\n", 79 | "\n", 80 | "project_name = str(uuid4())\n", 81 | "studio_client = StudioClient(project=project_name)\n", 82 | "my_project = studio_client.create_project(project=project_name)\n", 83 | "\n", 84 | "submitted_trace_id = studio_client.submit_from_tracer(tracer)\n", 85 | "\n", 86 | "\n", 87 | "pass" 88 | ] 89 | } 90 | ], 91 | "metadata": { 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython" 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python" 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 2 104 | } 105 | -------------------------------------------------------------------------------- /src/documentation/how_tos/how_to_resume_a_run_after_a_crash.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pytest\n", 10 | "from example_data import DummyTaskCanFail, example_data\n", 11 | "\n", 12 | "from intelligence_layer.evaluation.run.in_memory_run_repository import (\n", 13 | " InMemoryRunRepository,\n", 14 | ")\n", 15 | "from intelligence_layer.evaluation.run.runner import Runner\n", 16 | "\n", 17 | "my_example_data = example_data()\n", 18 | "\n", 19 | "dataset_repository = my_example_data.dataset_repository\n", 20 | "run_repository = InMemoryRunRepository()\n", 21 | "task = DummyTaskCanFail()\n", 22 | "\n", 23 | "runner = Runner(task, dataset_repository, run_repository, \"MyRunDescription\")" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "# How to resume a run after a crash\n", 31 | "\n", 32 | "0. Run task on a dataset, see [here](./how_to_run_a_task_on_a_dataset.ipynb).\n", 33 | "1. A crash occurs.\n", 34 | "2. Re-run task on the same dataset with `resume_from_recovery_data` set to `True`." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# Steps 0 & 1: Run task for dataset\n", 44 | "with pytest.raises(Exception): # noqa: B017\n", 45 | " run_overview = runner.run_dataset(my_example_data.dataset.id, abort_on_error=True)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "A failure has occurred. Note, this might be a crash of the computer or an unexpected uncaught exception. \n", 53 | "\n", 54 | "For demonstration purposes, we set `abort_on_error=True`, such that an exception is raised. Further, we catch the exception for purely technical reasons of our CI. Feel free to remove the pytest scope on your local setup when running this notebook.\n", 55 | "\n", 56 | "Even though the run crashed, the `RunRepository` stores recovery data and is able to continue `run_dataset` by setting `resume_from_recovery_data` to `True`. This way, the already successfully calculated outputs do not have to be re-calculated again, and only the missing examples are processed:" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# Step 2: Re-run the same run with `resume_from_recovery_data` enabled\n", 66 | "run_overview = runner.run_dataset(\n", 67 | " my_example_data.dataset.id, abort_on_error=True, resume_from_recovery_data=True\n", 68 | ")" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "print(run_overview)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "Note: The `FileSystemRepository` persists the recovery data in the file system. The run can therefore be resumed even in case of a complete program or even computer crash. \n", 85 | "\n", 86 | "On the other hand, the `InMemoryRunRepository` retains the recovery data only as long as the repository resides in computer memory. A crash of the process will lead to the loss of the recovery data. In that case, all examples will have to be recalculated." 87 | ] 88 | } 89 | ], 90 | "metadata": { 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython" 94 | }, 95 | "file_extension": ".py", 96 | "mimetype": "text/x-python", 97 | "name": "python", 98 | "nbconvert_exporter": "python" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 2 103 | } 104 | -------------------------------------------------------------------------------- /src/documentation/how_tos/how_to_run_a_task_on_a_dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from example_data import DummyTask, example_data\n", 10 | "\n", 11 | "from intelligence_layer.evaluation.run.in_memory_run_repository import (\n", 12 | " InMemoryRunRepository,\n", 13 | ")\n", 14 | "from intelligence_layer.evaluation.run.runner import Runner" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# How to run a task on a dataset\n", 22 | "0. Create a suitable dataset (see [here](./how_to_create_a_dataset.ipynb)) and a task (see [here](./how_to_implement_a_task.ipynb)).\n", 23 | "1. Initialize the task and a `RunRepository`, and open the correct `DatasetRepository`\n", 24 | " - The `DatasetRepository` needs to contain the dataset.\n", 25 | " - The `RunRepository` stores results.\n", 26 | "2. Use the `Runner` to run the task on the given dataset via `run_dataset`\n", 27 | "3. Save the id of the resulting `RunOverview`\n", 28 | "\n", 29 | "### Example" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# Step 0\n", 39 | "my_example_data = example_data()\n", 40 | "print()\n", 41 | "\n", 42 | "# Step 1\n", 43 | "dataset_repository = my_example_data.dataset_repository\n", 44 | "run_repository = InMemoryRunRepository()\n", 45 | "task = DummyTask()\n", 46 | "\n", 47 | "# Step 2\n", 48 | "runner = Runner(task, dataset_repository, run_repository, \"MyRunDescription\")\n", 49 | "run_overview = runner.run_dataset(my_example_data.dataset.id)\n", 50 | "\n", 51 | "# Step 3\n", 52 | "print(run_overview.id)" 53 | ] 54 | } 55 | ], 56 | "metadata": { 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython" 60 | }, 61 | "file_extension": ".py", 62 | "mimetype": "text/x-python", 63 | "name": "python", 64 | "nbconvert_exporter": "python" 65 | } 66 | }, 67 | "nbformat": 4, 68 | "nbformat_minor": 2 69 | } 70 | -------------------------------------------------------------------------------- /src/documentation/how_tos/studio/how_to_execute_a_benchmark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from uuid import uuid4\n", 10 | "\n", 11 | "from dotenv import load_dotenv\n", 12 | "\n", 13 | "from documentation.how_tos.example_data import (\n", 14 | " ComplexDummyAggregationLogic,\n", 15 | " ComplexDummyEvaluationLogic,\n", 16 | " ComplexDummyTask,\n", 17 | " example_data,\n", 18 | ")\n", 19 | "from intelligence_layer.connectors import StudioClient\n", 20 | "from intelligence_layer.evaluation import (\n", 21 | " StudioBenchmarkRepository,\n", 22 | " StudioDatasetRepository,\n", 23 | ")\n", 24 | "\n", 25 | "load_dotenv()\n", 26 | "my_example_data = example_data()\n", 27 | "examples = my_example_data.complex_examples" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "# How to execute Benchmarks\n", 35 | "
\n", 36 | "\n", 37 | "Make sure your account has permissions to use the Studio application.\n", 38 | "\n", 39 | "For an on-prem or local installation, please contact the corresponding team.\n", 40 | "
" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "0. Initialize a `StudioClient` with a project.\n", 48 | " - Use an existing project or create a new one with the `StudioClient.create_project` function.\n", 49 | " \n", 50 | "1. Create a `StudioDatasetRepository` and create a new `Dataset` via `StudioDatasetRepository.create_dataset`, which will automatically upload this new `Dataset` to Studio.\n", 51 | "\n", 52 | "2. Create a `StudioBenchmarkRepository` and instantiate a benchmark with your `evaluation_logic` and `aggregation_logic` using the `create_benchmark` function.\n", 53 | "\n", 54 | "3. Execute the `Benchmark` with your initialized `Task`\n", 55 | "\n", 56 | "### Example" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# Step 0\n", 66 | "\n", 67 | "studio_client = StudioClient(\n", 68 | " project=my_example_data.studio_project_name, create_project=True\n", 69 | ")\n", 70 | "\n", 71 | "# Step 1\n", 72 | "studio_dataset_repository = StudioDatasetRepository(studio_client)\n", 73 | "dataset = studio_dataset_repository.create_dataset(examples, \"my_dataset\")\n", 74 | "\n", 75 | "# Step 2\n", 76 | "studio_benchmark_repository = StudioBenchmarkRepository(studio_client)\n", 77 | "evaluation_logic = ComplexDummyEvaluationLogic()\n", 78 | "aggregation_logic = ComplexDummyAggregationLogic()\n", 79 | "benchmark = studio_benchmark_repository.create_benchmark(\n", 80 | " dataset.id, evaluation_logic, aggregation_logic, f\"my_benchmark-{uuid4()}\"\n", 81 | ")\n", 82 | "\n", 83 | "# Step 3\n", 84 | "\n", 85 | "task = ComplexDummyTask()\n", 86 | "benchmark.execute(task, \"my_task\")" 87 | ] 88 | } 89 | ], 90 | "metadata": { 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython" 94 | }, 95 | "file_extension": ".py", 96 | "mimetype": "text/x-python", 97 | "name": "python", 98 | "nbconvert_exporter": "python" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 2 103 | } 104 | -------------------------------------------------------------------------------- /src/documentation/how_tos/studio/how_to_upload_existing_datasets_to_studio.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from dotenv import load_dotenv\n", 10 | "\n", 11 | "from documentation.how_tos.example_data import example_data\n", 12 | "from intelligence_layer.connectors import StudioClient\n", 13 | "from intelligence_layer.evaluation.dataset.studio_dataset_repository import (\n", 14 | " StudioDatasetRepository,\n", 15 | ")\n", 16 | "\n", 17 | "load_dotenv()\n", 18 | "\n", 19 | "my_example_data = example_data()" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# How to upload (existing) datasets to Studio\n", 27 | "
\n", 28 | "\n", 29 | "Make sure your account has permissions to use the Studio application.\n", 30 | "\n", 31 | "For an on-prem or local installation, please contact the corresponding team.\n", 32 | "
" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "\n", 40 | "0. Extract `Dataset` and `Examples` from your `DatasetRepository`.\n", 41 | "\n", 42 | "1. Initialize a `StudioClient` with a project.\n", 43 | " - Use an existing project or create a new one with the `StudioClient.create_project` function.\n", 44 | " \n", 45 | "2. Create a `StudioDatasetRepository` and create a new `Dataset` via `StudioDatasetRepository.create_dataset`, which will automatically upload this new `Dataset` to Studio.\n", 46 | "\n", 47 | "### Example" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "# Step 0\n", 57 | "existing_dataset_repo = my_example_data.dataset_repository\n", 58 | "\n", 59 | "existing_dataset = existing_dataset_repo.dataset(dataset_id=my_example_data.dataset.id)\n", 60 | "assert existing_dataset, \"Make sure your dataset still exists.\"\n", 61 | "\n", 62 | "existing_examples = existing_dataset_repo.examples(\n", 63 | " existing_dataset.id, input_type=str, expected_output_type=str\n", 64 | ")\n", 65 | "\n", 66 | "# Step 1\n", 67 | "studio_client = StudioClient(\n", 68 | " project=my_example_data.studio_project_name,\n", 69 | " create_project=True,\n", 70 | ")\n", 71 | "\n", 72 | "# Step 2\n", 73 | "studio_dataset_repo = StudioDatasetRepository(studio_client=studio_client)\n", 74 | "\n", 75 | "studio_dataset = studio_dataset_repo.create_dataset(\n", 76 | " examples=existing_examples,\n", 77 | " dataset_name=existing_dataset.name,\n", 78 | " labels=existing_dataset.labels,\n", 79 | " metadata=existing_dataset.metadata,\n", 80 | ")" 81 | ] 82 | } 83 | ], 84 | "metadata": { 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython" 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 2 97 | } 98 | -------------------------------------------------------------------------------- /src/documentation/how_tos/studio/how_to_use_studio_with_traces.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from dotenv import load_dotenv\n", 10 | "\n", 11 | "from documentation.how_tos.example_data import DummyTask, example_data\n", 12 | "from intelligence_layer.connectors import StudioClient\n", 13 | "from intelligence_layer.core import InMemoryTracer\n", 14 | "\n", 15 | "load_dotenv()\n", 16 | "\n", 17 | "my_example_data = example_data()" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# How to use Studio for Debugging in a SaaS Configuration\n", 25 | "
\n", 26 | "\n", 27 | "Make sure your account has permissions to use the Studio application.\n", 28 | "\n", 29 | "For an on-prem or local installation, please contact the corresponding team.\n", 30 | "
" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "\n", 38 | "0. Generate a trace of your `Task` of interest.\n", 39 | "1. Initialize a `StudioClient` with a project.\n", 40 | " - Use an existing project or create a new one with the `StudioClient.create_project` function.\n", 41 | "2. Submit your traces with the client\n", 42 | " 1. Submit a single trace via `Tracer.export_for_viewing` and `StudioClient.submit_trace`\n", 43 | " 2. [Recommended] submit multiple traces via `StudioClient.submit_from_tracer`. \n", 44 | "\n", 45 | "### Example" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Step 0\n", 55 | "tracer = InMemoryTracer()\n", 56 | "DummyTask().run(\"My Dummy Run\", tracer=tracer)\n", 57 | "\n", 58 | "# Step 1\n", 59 | "studio_client = StudioClient(\n", 60 | " project=my_example_data.studio_project_name, create_project=True\n", 61 | ")\n", 62 | "\n", 63 | "# Step 2.1\n", 64 | "trace_to_submit = tracer.export_for_viewing()\n", 65 | "trace_id = studio_client.submit_trace(trace_to_submit) # only works for single traces\n", 66 | "\n", 67 | "# Step 2.2\n", 68 | "tracer2 = InMemoryTracer()\n", 69 | "DummyTask().run(\"My Dummy Run2\", tracer=tracer2)\n", 70 | "DummyTask().run(\"My Dummy Run3\", tracer=tracer2)\n", 71 | "ids_of_submitted_traces = studio_client.submit_from_tracer(tracer2)" 72 | ] 73 | } 74 | ], 75 | "metadata": { 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython" 79 | }, 80 | "file_extension": ".py", 81 | "mimetype": "text/x-python", 82 | "name": "python", 83 | "nbconvert_exporter": "python" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 2 88 | } 89 | -------------------------------------------------------------------------------- /src/intelligence_layer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/connectors/base/json_serializable.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping, Sequence 2 | from typing import TYPE_CHECKING 3 | 4 | from typing_extensions import TypeAliasType 5 | 6 | if TYPE_CHECKING: 7 | JsonSerializable = ( 8 | int 9 | | float 10 | | str 11 | | None 12 | | bool 13 | | Sequence["JsonSerializable"] 14 | | Mapping[str, "JsonSerializable"] 15 | ) 16 | else: 17 | JsonSerializable = TypeAliasType( 18 | "JsonSerializable", 19 | int 20 | | float 21 | | str 22 | | None 23 | | bool 24 | | Sequence["JsonSerializable"] 25 | | Mapping[str, "JsonSerializable"], 26 | ) 27 | 28 | SerializableDict = dict[str, JsonSerializable] 29 | -------------------------------------------------------------------------------- /src/intelligence_layer/connectors/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import DataClient 2 | from .exceptions import ( 3 | DataExternalServiceUnavailable, 4 | DataForbiddenError, 5 | DataInternalError, 6 | DataInvalidInput, 7 | DataResourceNotFound, 8 | ) 9 | from .models import ( 10 | DataDataset, 11 | DataFile, 12 | DataFileCreate, 13 | DataRepository, 14 | DataRepositoryCreate, 15 | DatasetCreate, 16 | DataStage, 17 | DataStageCreate, 18 | ) 19 | 20 | __all__ = [ 21 | "DataClient", 22 | "DataInternalError", 23 | "DataExternalServiceUnavailable", 24 | "DataForbiddenError", 25 | "DataInvalidInput", 26 | "DataResourceNotFound", 27 | "DataRepository", 28 | "DataRepositoryCreate", 29 | "DataDataset", 30 | "DatasetCreate", 31 | "DataStage", 32 | "DataStageCreate", 33 | "DataFile", 34 | "DataFileCreate", 35 | ] 36 | -------------------------------------------------------------------------------- /src/intelligence_layer/connectors/data/exceptions.py: -------------------------------------------------------------------------------- 1 | class DataError(Exception): 2 | """Base class for exceptions in this module.""" 3 | 4 | def __init__(self, *args: object) -> None: 5 | default_message = getattr(self, "DEFAULT_MESSAGE", "") 6 | super().__init__(default_message, *args) 7 | 8 | 9 | class DataInternalError(DataError): 10 | """Exception raised when an internal error occurs.""" 11 | 12 | DEFAULT_MESSAGE = "Internal error: An unexpected error occurred. " 13 | 14 | 15 | class DataResourceNotFound(DataError): 16 | """Exception raised when a resource is not found.""" 17 | 18 | DEFAULT_MESSAGE = "Resource not found: The requested resource was not found. " 19 | 20 | 21 | class DataInvalidInput(DataError): 22 | """Exception raised when the input is invalid.""" 23 | 24 | DEFAULT_MESSAGE = "Invalid input: The input provided is invalid. " 25 | 26 | 27 | class DataExternalServiceUnavailable(DataError): 28 | """Exception raised when an external service is unavailable.""" 29 | 30 | DEFAULT_MESSAGE = ( 31 | "External service unavailable: The external service is unavailable. " 32 | ) 33 | 34 | 35 | class DataForbiddenError(DataError): 36 | """Exception raised when a forbidden error occurs.""" 37 | 38 | DEFAULT_MESSAGE = ( 39 | "Forbidden error: Client does not have permission to access the resource. " 40 | ) 41 | -------------------------------------------------------------------------------- /src/intelligence_layer/connectors/kernel/kernel.py: -------------------------------------------------------------------------------- 1 | from os import getenv 2 | from typing import TypeVar 3 | 4 | import requests 5 | from pydantic import BaseModel 6 | 7 | from intelligence_layer.core import Task, TaskSpan 8 | 9 | Input = TypeVar("Input", bound=BaseModel) 10 | """Interface to be passed to the task with all data needed to run the process. 11 | Ideally, these are specified in terms related to the use-case, rather than lower-level 12 | configuration options.""" 13 | Output = TypeVar("Output", bound=BaseModel) 14 | """Interface of the output returned by the task.""" 15 | 16 | 17 | class KernelTask(Task[Input, Output]): 18 | """A Task that can call a Skill within the Kernel. 19 | 20 | Note: this will not support full tracing in the Intelligence Layer, 21 | but it will allow passing a Kernel Skill as a subtask to a larger 22 | workflow, or allow for passing it to the Evaluation tooling. 23 | 24 | Args: 25 | skill: The name of the skill deployed in Pharia Kernel that should be called. 26 | input_model: The type for the Pydantic model that should be used for serializing the input. 27 | output_model: The type for the Pydantic model that should be used for deserializing the output. 28 | host: The URL to use for accessing Pharia Kernel. Defaults to the env variable `PHARIA_KERNEL_URL` if not provided. 29 | token: The auth token to use for accessing Pharia Kernel. Defaults to the env variable `AA_TOKEN` if not provided. 30 | """ 31 | 32 | def __init__( 33 | self, 34 | skill: str, 35 | input_model: type[Input], 36 | output_model: type[Output], 37 | host: str | None = None, 38 | token: str | None = None, 39 | ): 40 | if host is None: 41 | host = getenv("PHARIA_KERNEL_URL") 42 | assert host, "Define PHARIA_KERNEL_URL with a valid url pointing towards your Pharia Kernel API." 43 | if token is None: 44 | token = getenv("AA_TOKEN") 45 | assert token, "Define environment variable AA_TOKEN with a valid token for the Aleph Alpha API" 46 | 47 | self.skill = skill 48 | self.input_model = input_model 49 | self.output_model = output_model 50 | self.host = host 51 | self.session = requests.Session() 52 | self.session.headers = {"Authorization": f"Bearer {token}"} 53 | 54 | def __del__(self): 55 | if self.session: 56 | self.session.close() 57 | 58 | def do_run(self, input: Input, task_span: TaskSpan) -> Output: 59 | response = self.session.post( 60 | f"{self.host}/v1/skills/{self.skill}/run", 61 | json=input.model_dump(), 62 | ) 63 | 64 | if response.status_code != 200: 65 | raise Exception(f"{response.status_code}: {response.text}") 66 | 67 | return self.output_model(**response.json()) 68 | -------------------------------------------------------------------------------- /src/intelligence_layer/connectors/retrievers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/connectors/retrievers/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/connectors/retrievers/base_retriever.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections.abc import Sequence 3 | from typing import Any, Generic, Optional, TypeVar 4 | 5 | from pydantic import BaseModel 6 | 7 | 8 | class Document(BaseModel): 9 | """A document. 10 | 11 | Attributes: 12 | text: The document's text. 13 | metadata: Any metadata added to the document. 14 | """ 15 | 16 | text: str 17 | metadata: Any = None 18 | 19 | 20 | class DocumentChunk(BaseModel): 21 | """Part of a :class:`Document`, specifically for retrieval use cases. 22 | 23 | Attributes: 24 | text: Chunk of the document that matched the search query. 25 | metadata: Any metadata added to the document. 26 | start: Start index of the chunk within the document 27 | end: End index of the chunk within the document 28 | """ 29 | 30 | text: str 31 | start: int 32 | end: int 33 | metadata: Any = None 34 | 35 | 36 | ID = TypeVar("ID") 37 | 38 | 39 | class SearchResult(BaseModel, Generic[ID]): 40 | """Contains a text alongside its search score. 41 | 42 | Attributes: 43 | id: Unique identifier of the document 44 | score: The similarity score between the text and the query that was searched with. 45 | Will be between 0 and 1, where 0 means no similarity and 1 perfect similarity. 46 | document_chunk: The document chunk found by search. 47 | """ 48 | 49 | id: ID 50 | score: float 51 | document_chunk: DocumentChunk 52 | 53 | 54 | class BaseRetriever(ABC, Generic[ID]): 55 | """General interface for any retriever. 56 | 57 | Retrievers are used to find texts given a user query. 58 | Each Retriever implementation owns its own logic for retrieval. 59 | For comparison purposes, we assume scores in the `SearchResult` instances to be between 0 and 1. 60 | """ 61 | 62 | @abstractmethod 63 | def get_relevant_documents_with_scores( 64 | self, query: str 65 | ) -> Sequence[SearchResult[ID]]: 66 | pass 67 | 68 | @abstractmethod 69 | def get_full_document(self, id: ID) -> Optional[Document]: 70 | pass 71 | 72 | 73 | class AsyncBaseRetriever(ABC, Generic[ID]): 74 | """General interface for any asynchronous retriever. 75 | 76 | Asynchronous retrievers are used to find texts given a user query. 77 | Each Retriever implementation owns its own logic for retrieval. 78 | For comparison purposes, we assume scores in the `SearchResult` instances to be between 0 and 1. 79 | """ 80 | 81 | @abstractmethod 82 | async def get_relevant_documents_with_scores( 83 | self, query: str 84 | ) -> Sequence[SearchResult[ID]]: 85 | pass 86 | 87 | @abstractmethod 88 | async def get_full_document(self, id: ID) -> Optional[Document]: 89 | pass 90 | -------------------------------------------------------------------------------- /src/intelligence_layer/core/chunk.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from typing import NewType 3 | 4 | from pydantic import BaseModel 5 | from semantic_text_splitter import TextSplitter 6 | 7 | from intelligence_layer.core.model import AlephAlphaModel 8 | from intelligence_layer.core.task import Task 9 | from intelligence_layer.core.tracer.tracer import TaskSpan 10 | 11 | TextChunk = NewType("TextChunk", str) 12 | """Segment of a larger text. 13 | 14 | This type infers that the string is smaller than the context size of the model where it is used. 15 | 16 | LLMs can't process documents larger than their context size. 17 | To handle this, documents have to be split up into smaller segments that fit within their context size. 18 | These smaller segments are referred to as chunks. 19 | """ 20 | 21 | 22 | class ChunkInput(BaseModel): 23 | """The input for a `Chunk`-task. 24 | 25 | Attributes: 26 | text: A text of arbitrary length. 27 | """ 28 | 29 | text: str 30 | 31 | 32 | class ChunkOutput(BaseModel): 33 | """The output of a `ChunkTask`. 34 | 35 | Attributes: 36 | chunks: A list of smaller sections of the input text. 37 | """ 38 | 39 | chunks: Sequence[TextChunk] 40 | 41 | 42 | class Chunk(Task[ChunkInput, ChunkOutput]): 43 | """Splits a longer text into smaller text chunks. 44 | 45 | Provide a text of any length and chunk it into smaller pieces using a 46 | tokenizer that is available within the Aleph Alpha client. 47 | 48 | Args: 49 | model: A valid Aleph Alpha model. 50 | max_tokens_per_chunk: The maximum number of tokens to fit into one chunk. 51 | """ 52 | 53 | def __init__(self, model: AlephAlphaModel, max_tokens_per_chunk: int = 512): 54 | super().__init__() 55 | self._splitter = TextSplitter.from_huggingface_tokenizer( 56 | model.get_tokenizer(), capacity=max_tokens_per_chunk 57 | ) 58 | 59 | def do_run(self, input: ChunkInput, task_span: TaskSpan) -> ChunkOutput: 60 | chunks = [TextChunk(t) for t in self._splitter.chunks(input.text)] 61 | return ChunkOutput(chunks=chunks) 62 | 63 | 64 | class ChunkWithStartEndIndices(BaseModel, frozen=True): 65 | """A `TextChunk` and its `start_index` and `end_index` within the given text. 66 | 67 | Attributes: 68 | chunk: The actual text. 69 | start_index: The character start index of the chunk within the given text. 70 | end_index: The character end index of the chunk within the given text. 71 | """ 72 | 73 | chunk: TextChunk 74 | start_index: int 75 | end_index: int 76 | 77 | 78 | class ChunkWithIndicesOutput(BaseModel): 79 | """The output of a `ChunkWithIndices`-task. 80 | 81 | Attributes: 82 | chunks_with_indices: A list of smaller sections of the input text with the respective start_index. 83 | """ 84 | 85 | chunks_with_indices: Sequence[ChunkWithStartEndIndices] 86 | 87 | 88 | class ChunkWithIndices(Task[ChunkInput, ChunkWithIndicesOutput]): 89 | """Splits a longer text into smaller text chunks and returns the chunks' start indices. 90 | 91 | Provide a text of any length and chunk it into smaller pieces using a 92 | tokenizer that is available within the Aleph Alpha client. For each chunk, the respective 93 | start index relative to the document is also returned. 94 | 95 | Args: 96 | model: A valid Aleph Alpha model. 97 | max_tokens_per_chunk: The maximum number of tokens to fit into one chunk. 98 | """ 99 | 100 | def __init__(self, model: AlephAlphaModel, max_tokens_per_chunk: int = 512): 101 | super().__init__() 102 | self._splitter = TextSplitter.from_huggingface_tokenizer( 103 | model.get_tokenizer(), capacity=max_tokens_per_chunk, trim=False 104 | ) 105 | 106 | def do_run(self, input: ChunkInput, task_span: TaskSpan) -> ChunkWithIndicesOutput: 107 | chunks_with_indices = [ 108 | ChunkWithStartEndIndices( 109 | chunk=TextChunk(chunk), 110 | start_index=start_index, 111 | end_index=start_index + len(chunk), 112 | ) 113 | for (start_index, chunk) in self._splitter.chunk_indices(input.text) 114 | ] 115 | return ChunkWithIndicesOutput(chunks_with_indices=chunks_with_indices) 116 | -------------------------------------------------------------------------------- /src/intelligence_layer/core/echo.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from typing import NewType 3 | 4 | from aleph_alpha_client import Prompt, Text 5 | from pydantic import BaseModel 6 | 7 | from intelligence_layer.core.model import AlephAlphaModel 8 | from intelligence_layer.core.task import Task, Token 9 | from intelligence_layer.core.tracer.tracer import TaskSpan 10 | 11 | LogProb = NewType("LogProb", float) 12 | 13 | 14 | class TokenWithLogProb(BaseModel): 15 | token: Token 16 | prob: LogProb 17 | 18 | 19 | class EchoInput(BaseModel): 20 | """The input for an `Echo` task. 21 | 22 | Attributes: 23 | prompt: The input text that serves as the starting point for the LLM. 24 | expected_completion: The desired completion based on the prompt. 25 | The likelihood of the tokens in this will be examined. 26 | """ 27 | 28 | prompt: Prompt 29 | expected_completion: str 30 | 31 | 32 | class EchoOutput(BaseModel): 33 | """The output of an `Echo` task. 34 | 35 | Attributes: 36 | tokens_with_log_probs: Every token of the `expected_completion` of the 37 | `EchoInput` accompanied by its probability of having been generated 38 | in a completion scenario. 39 | """ 40 | 41 | tokens_with_log_probs: Sequence[TokenWithLogProb] 42 | 43 | 44 | class Echo(Task[EchoInput, EchoOutput]): 45 | """Task that returns probabilities of a completion given a prompt. 46 | 47 | Analyzes the likelihood of generating tokens in the expected completion based on 48 | a given prompt and model. Does not generate any tokens. 49 | 50 | Args: 51 | model: A model to use in the task. 52 | 53 | Example: 54 | >>> from aleph_alpha_client import Prompt 55 | >>> from intelligence_layer.core import Echo, EchoInput, InMemoryTracer, LuminousControlModel 56 | 57 | >>> model = LuminousControlModel(name="luminous-base-control") 58 | >>> task = Echo(model) 59 | >>> input = EchoInput( 60 | ... prompt=Prompt.from_text("This is a "), 61 | ... expected_completion="happy text", 62 | ... ) 63 | >>> tracer = InMemoryTracer() 64 | >>> output = task.run(input, tracer) 65 | """ 66 | 67 | PROMPT_TEMPLATE_STR: str = "{{prompt}}{{expected_completion}}" 68 | 69 | def __init__(self, model: AlephAlphaModel) -> None: 70 | super().__init__() 71 | self._model = model 72 | 73 | def do_run(self, input: EchoInput, task_span: TaskSpan) -> EchoOutput: 74 | if len(input.prompt.items) != 1: 75 | raise NotImplementedError( 76 | "`Echo` currently only supports prompts with one item." 77 | ) 78 | 79 | if not isinstance(input.prompt.items[0], Text): 80 | raise NotImplementedError( 81 | "`Echo` currently only supports prompts that are of type `Text`." 82 | ) 83 | 84 | echo_output = self._model.echo( 85 | input.prompt.items[0].text, input.expected_completion, task_span 86 | ) 87 | 88 | tokens_with_prob = [ 89 | TokenWithLogProb( 90 | token=token, 91 | prob=LogProb(log_prob or 0.0), 92 | ) 93 | for token, log_prob in echo_output 94 | ] 95 | return EchoOutput(tokens_with_log_probs=tokens_with_prob) 96 | -------------------------------------------------------------------------------- /src/intelligence_layer/core/instruct.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | from intelligence_layer.core.model import CompleteInput, CompleteOutput, ControlModel 6 | from intelligence_layer.core.task import Task 7 | from intelligence_layer.core.tracer.tracer import TaskSpan 8 | 9 | 10 | class InstructInput(BaseModel): 11 | instruction: str 12 | input: Optional[str] = None 13 | response_prefix: Optional[str] = None 14 | maximum_tokens: int = 128 15 | 16 | 17 | class Instruct(Task[InstructInput, CompleteOutput]): 18 | def __init__(self, model: ControlModel) -> None: 19 | super().__init__() 20 | self._model = model 21 | 22 | def do_run(self, input: InstructInput, task_span: TaskSpan) -> CompleteOutput: 23 | prompt = self._model.to_instruct_prompt( 24 | instruction=input.instruction, 25 | input=input.input, 26 | response_prefix=input.response_prefix, 27 | ) 28 | return self._model.complete( 29 | CompleteInput(prompt=prompt, maximum_tokens=input.maximum_tokens), task_span 30 | ) 31 | -------------------------------------------------------------------------------- /src/intelligence_layer/core/tracer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/core/tracer/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/core/tracer/file_tracer.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from json import loads 3 | from pathlib import Path 4 | from typing import Optional 5 | from uuid import UUID 6 | 7 | from pydantic import BaseModel 8 | 9 | from intelligence_layer.core.tracer.in_memory_tracer import InMemoryTracer 10 | from intelligence_layer.core.tracer.persistent_tracer import ( 11 | LogLine, 12 | PersistentSpan, 13 | PersistentTaskSpan, 14 | PersistentTracer, 15 | ) 16 | from intelligence_layer.core.tracer.tracer import Context, PydanticSerializable 17 | 18 | 19 | class FileTracer(PersistentTracer): 20 | """A `Tracer` that logs to a file. 21 | 22 | Each log-entry is represented by a JSON object. The information logged allows 23 | to reconstruct the hierarchical nature of the logs, i.e. all entries have a 24 | _pointer_ to its parent element in form of a parent attribute containing 25 | the uuid of the parent. 26 | 27 | Args: 28 | log_file_path: Denotes the file to log to. 29 | 30 | Attributes: 31 | uuid: a uuid for the tracer. If multiple :class:`FileTracer` instances log to the same file 32 | the child-elements for a tracer can be identified by referring to this id as parent. 33 | """ 34 | 35 | def __init__(self, log_file_path: Path | str) -> None: 36 | super().__init__() 37 | self._log_file_path = Path(log_file_path) 38 | 39 | def _log_entry(self, id: UUID, entry: BaseModel) -> None: 40 | self._log_file_path.parent.mkdir(parents=True, exist_ok=True) 41 | with self._log_file_path.open(mode="a", encoding="utf-8") as f: 42 | f.write( 43 | LogLine( 44 | trace_id=id, entry_type=type(entry).__name__, entry=entry 45 | ).model_dump_json() 46 | + "\n" 47 | ) 48 | 49 | def span( 50 | self, 51 | name: str, 52 | timestamp: Optional[datetime] = None, 53 | ) -> "FileSpan": 54 | span = FileSpan(self._log_file_path, context=self.context) 55 | self._log_span(span, name, timestamp) 56 | return span 57 | 58 | def task_span( 59 | self, 60 | task_name: str, 61 | input: PydanticSerializable, 62 | timestamp: Optional[datetime] = None, 63 | ) -> "FileTaskSpan": 64 | task = FileTaskSpan( 65 | self._log_file_path, 66 | context=self.context, 67 | ) 68 | self._log_task(task, task_name, input, timestamp) 69 | return task 70 | 71 | def traces(self, trace_id: Optional[str] = None) -> InMemoryTracer: 72 | with self._log_file_path.open("r", encoding="utf-8") as f: 73 | traces = (LogLine.model_validate(loads(line)) for line in f) 74 | filtered_traces = ( 75 | (line for line in traces if line.trace_id == trace_id) 76 | if trace_id is not None 77 | else traces 78 | ) 79 | return self._parse_log(filtered_traces) 80 | 81 | def convert_file_for_viewing(self, file_path: Path | str) -> None: 82 | in_memory_tracer = self.traces() 83 | traces = in_memory_tracer.export_for_viewing() 84 | path_to_file = Path(file_path) 85 | with path_to_file.open(mode="w", encoding="utf-8") as file: 86 | for exportedSpan in traces: 87 | file.write(exportedSpan.model_dump_json() + "\n") 88 | 89 | 90 | class FileSpan(PersistentSpan, FileTracer): 91 | """A `Span` created by `FileTracer.span`.""" 92 | 93 | def __init__(self, log_file_path: Path, context: Optional[Context] = None) -> None: 94 | PersistentSpan.__init__(self, context=context) 95 | FileTracer.__init__(self, log_file_path=log_file_path) 96 | 97 | 98 | class FileTaskSpan(PersistentTaskSpan, FileSpan): 99 | """A `TaskSpan` created by `FileTracer.task_span`.""" 100 | 101 | pass 102 | -------------------------------------------------------------------------------- /src/intelligence_layer/core/tracer/open_telemetry_tracer.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from datetime import datetime 3 | from typing import Optional 4 | 5 | from opentelemetry.context import attach, detach 6 | from opentelemetry.trace import Span as OpenTSpan 7 | from opentelemetry.trace import StatusCode, set_span_in_context 8 | from opentelemetry.trace import Tracer as OpenTTracer 9 | from pydantic import BaseModel, SerializeAsAny 10 | 11 | from intelligence_layer.core.tracer.tracer import ( 12 | Context, 13 | ExportedSpan, 14 | JsonSerializer, 15 | PydanticSerializable, 16 | Span, 17 | SpanStatus, 18 | SpanType, 19 | TaskSpan, 20 | Tracer, 21 | ) 22 | 23 | 24 | class OpenTelemetryTracer(Tracer): 25 | """A `Tracer` that uses open telemetry.""" 26 | 27 | def __init__(self, tracer: OpenTTracer) -> None: 28 | self._tracer = tracer 29 | 30 | def span( 31 | self, 32 | name: str, 33 | timestamp: Optional[datetime] = None, 34 | ) -> "OpenTelemetrySpan": 35 | tracer_span = self._tracer.start_span( 36 | name, 37 | attributes={"type": SpanType.SPAN.value}, 38 | start_time=None if not timestamp else _open_telemetry_timestamp(timestamp), 39 | ) 40 | token = attach(set_span_in_context(tracer_span)) 41 | return OpenTelemetrySpan(tracer_span, self._tracer, token, self.context) 42 | 43 | def task_span( 44 | self, 45 | task_name: str, 46 | input: PydanticSerializable, 47 | timestamp: Optional[datetime] = None, 48 | ) -> "OpenTelemetryTaskSpan": 49 | tracer_span = self._tracer.start_span( 50 | task_name, 51 | attributes={"input": _serialize(input), "type": SpanType.TASK_SPAN.value}, 52 | start_time=None if not timestamp else _open_telemetry_timestamp(timestamp), 53 | ) 54 | token = attach(set_span_in_context(tracer_span)) 55 | return OpenTelemetryTaskSpan(tracer_span, self._tracer, token, self.context) 56 | 57 | def export_for_viewing(self) -> Sequence[ExportedSpan]: 58 | raise NotImplementedError( 59 | "The OpenTelemetryTracer does not support export for viewing, as it can not access its own traces." 60 | ) 61 | 62 | 63 | class OpenTelemetrySpan(Span, OpenTelemetryTracer): 64 | """A `Span` created by `OpenTelemetryTracer.span`.""" 65 | 66 | end_timestamp: Optional[datetime] = None 67 | 68 | def __init__( 69 | self, 70 | span: OpenTSpan, 71 | tracer: OpenTTracer, 72 | token: object, 73 | context: Optional[Context] = None, 74 | ) -> None: 75 | OpenTelemetryTracer.__init__(self, tracer) 76 | Span.__init__(self, context=context) 77 | self.open_ts_span = span 78 | self._token = token 79 | 80 | def log( 81 | self, 82 | message: str, 83 | value: PydanticSerializable, 84 | timestamp: Optional[datetime] = None, 85 | ) -> None: 86 | self.open_ts_span.add_event( 87 | message, 88 | {"value": _serialize(value)}, 89 | None if not timestamp else _open_telemetry_timestamp(timestamp), 90 | ) 91 | 92 | def end(self, timestamp: Optional[datetime] = None) -> None: 93 | super().end(timestamp) 94 | self.open_ts_span.set_status( 95 | StatusCode.OK if self.status_code == SpanStatus.OK else StatusCode.ERROR 96 | ) 97 | detach(self._token) 98 | self.open_ts_span.end( 99 | _open_telemetry_timestamp(timestamp) if timestamp is not None else None 100 | ) 101 | 102 | 103 | class OpenTelemetryTaskSpan(TaskSpan, OpenTelemetrySpan): 104 | """A `TaskSpan` created by `OpenTelemetryTracer.task_span`.""" 105 | 106 | output: Optional[PydanticSerializable] = None 107 | 108 | def record_output(self, output: PydanticSerializable) -> None: 109 | self.open_ts_span.set_attribute("output", _serialize(output)) 110 | 111 | 112 | def _open_telemetry_timestamp(t: datetime) -> int: 113 | # Open telemetry expects *nanoseconds* since epoch 114 | t_float = t.timestamp() * 1e9 115 | return int(t_float) 116 | 117 | 118 | def _serialize(s: SerializeAsAny[PydanticSerializable]) -> str: 119 | value = s if isinstance(s, BaseModel) else JsonSerializer(root=s) 120 | return value.model_dump_json() 121 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/aggregation/accumulator.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Generic, TypeVar 3 | 4 | T = TypeVar("T") 5 | Output = TypeVar("Output") 6 | 7 | 8 | class Accumulator(ABC, Generic[T, Output]): 9 | """Used for incremental computation. 10 | 11 | For use cases with large amount of data where you don't want to have every value in memory at once, e.g. evaluation. 12 | """ 13 | 14 | @abstractmethod 15 | def add(self, value: T) -> None: 16 | """Responsible for accumulating values. 17 | 18 | Args: 19 | value: the value to add 20 | Returns: 21 | nothing 22 | """ 23 | ... 24 | 25 | @abstractmethod 26 | def extract(self) -> Output: 27 | """Accumulates the final result. 28 | 29 | Returns: 30 | float: 0.0 if no values were added before, else the mean 31 | """ 32 | ... 33 | 34 | 35 | class MeanAccumulator(Accumulator[float, float]): 36 | def __init__(self) -> None: 37 | self._n = 0 38 | self._acc = 0.0 39 | self._squares_acc = 0.0 # Sum of squares of the values 40 | 41 | def add(self, value: float) -> None: 42 | self._n += 1 43 | self._acc += value 44 | self._squares_acc += value**2 45 | 46 | def extract(self) -> float: 47 | """Accumulates the mean. 48 | 49 | :return: 0.0 if no values were added before, else the mean 50 | """ 51 | return 0.0 if self._n == 0 else self._acc / self._n 52 | 53 | def standard_deviation(self) -> float: 54 | """Calculates the standard deviation.""" 55 | if self._n == 0: 56 | return 0.0 57 | mean = self.extract() 58 | variance = (self._squares_acc / self._n) - (mean**2) 59 | return variance**0.5 60 | 61 | def standard_error(self) -> float: 62 | """Calculates the standard error of the mean.""" 63 | if self._n <= 1: 64 | return 0.0 65 | return self.standard_deviation() / (self._n**0.5) 66 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/aggregation/aggregation_repository.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections.abc import Iterable, Sequence 3 | from typing import Optional 4 | 5 | from intelligence_layer.evaluation.aggregation.domain import ( 6 | AggregatedEvaluation, 7 | AggregationOverview, 8 | ) 9 | 10 | 11 | class AggregationRepository(ABC): 12 | """Base aggregation repository interface. 13 | 14 | Provides methods to store and load aggregated evaluation results: :class:`AggregationOverview`. 15 | """ 16 | 17 | @abstractmethod 18 | def store_aggregation_overview( 19 | self, aggregation_overview: AggregationOverview[AggregatedEvaluation] 20 | ) -> None: 21 | """Stores an :class:`AggregationOverview`. 22 | 23 | Args: 24 | aggregation_overview: The aggregated results to be persisted. 25 | """ 26 | ... 27 | 28 | @abstractmethod 29 | def aggregation_overview( 30 | self, aggregation_id: str, aggregation_type: type[AggregatedEvaluation] 31 | ) -> Optional[AggregationOverview[AggregatedEvaluation]]: 32 | """Returns an :class:`AggregationOverview` for the given ID. 33 | 34 | Args: 35 | aggregation_id: ID of the aggregation overview to retrieve. 36 | aggregation_type: Type of the aggregation. 37 | 38 | Returns: 39 | :class:`EvaluationOverview` if it was found, `None` otherwise. 40 | """ 41 | ... 42 | 43 | def aggregation_overviews( 44 | self, aggregation_type: type[AggregatedEvaluation] 45 | ) -> Iterable[AggregationOverview[AggregatedEvaluation]]: 46 | """Returns all :class:`AggregationOverview`s sorted by their ID. 47 | 48 | Args: 49 | aggregation_type: Type of the aggregation. 50 | 51 | Yields: 52 | :class:`AggregationOverview`s. 53 | """ 54 | for aggregation_id in self.aggregation_overview_ids(): 55 | aggregation_overview = self.aggregation_overview( 56 | aggregation_id, aggregation_type 57 | ) 58 | if aggregation_overview is not None: 59 | yield aggregation_overview 60 | 61 | @abstractmethod 62 | def aggregation_overview_ids(self) -> Sequence[str]: 63 | """Returns sorted IDs of all stored :class:`AggregationOverview`s. 64 | 65 | Returns: 66 | A :class:`Sequence` of the :class:`AggregationOverview` IDs. 67 | """ 68 | pass 69 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/aggregation/domain.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable, Sequence 2 | from datetime import datetime 3 | from typing import Generic, TypeVar 4 | 5 | from pydantic import BaseModel, SerializeAsAny 6 | 7 | from intelligence_layer.connectors.base.json_serializable import ( 8 | SerializableDict, 9 | ) 10 | from intelligence_layer.evaluation.evaluation.domain import ( 11 | EvaluationFailed, 12 | EvaluationOverview, 13 | ) 14 | from intelligence_layer.evaluation.run.domain import RunOverview 15 | 16 | AggregatedEvaluation = TypeVar("AggregatedEvaluation", bound=BaseModel, covariant=True) 17 | 18 | 19 | class AggregationOverview(BaseModel, Generic[AggregatedEvaluation], frozen=True): 20 | """Complete overview of the results of evaluating a :class:`Task` on a dataset. 21 | 22 | Created when running :meth:`Evaluator.eval_and_aggregate_runs`. Contains high-level information and statistics. 23 | 24 | Attributes: 25 | evaluation_overviews: :class:`EvaluationOverview`s used for aggregation. 26 | id: Aggregation overview ID. 27 | start: Start timestamp of the aggregation. 28 | end: End timestamp of the aggregation. 29 | end: The time when the evaluation run ended 30 | successful_evaluation_count: The number of examples that where successfully evaluated. 31 | crashed_during_evaluation_count: The number of examples that crashed during evaluation. 32 | failed_evaluation_count: The number of examples that crashed during evaluation 33 | plus the number of examples that failed to produce an output for evaluation. 34 | run_ids: IDs of all :class:`RunOverview`s from all linked :class:`EvaluationOverview`s. 35 | description: A short description. 36 | statistics: Aggregated statistics of the run. Whatever is returned by :meth:`Evaluator.aggregate` 37 | labels: Labels for filtering aggregation. Defaults to empty list. 38 | metadata: Additional information about the aggregation. Defaults to empty dict. 39 | 40 | """ 41 | 42 | evaluation_overviews: frozenset[EvaluationOverview] 43 | id: str 44 | start: datetime 45 | end: datetime 46 | successful_evaluation_count: int 47 | crashed_during_evaluation_count: int 48 | description: str 49 | statistics: SerializeAsAny[AggregatedEvaluation] 50 | labels: set[str] = set() 51 | metadata: SerializableDict = dict() 52 | 53 | @property 54 | def run_ids(self) -> Sequence[str]: 55 | return [overview.id for overview in self.run_overviews()] 56 | 57 | def run_overviews(self) -> Iterable[RunOverview]: 58 | return set( 59 | run_overview 60 | for evaluation_overview in self.evaluation_overviews 61 | for run_overview in evaluation_overview.run_overviews 62 | ) 63 | 64 | @property 65 | def failed_evaluation_count(self) -> int: 66 | return self.crashed_during_evaluation_count + sum( 67 | run_overview.failed_example_count for run_overview in self.run_overviews() 68 | ) 69 | 70 | def raise_on_evaluation_failure(self) -> None: 71 | if self.crashed_during_evaluation_count > 0: 72 | raise EvaluationFailed(self.id, self.crashed_during_evaluation_count) 73 | 74 | def __repr__(self) -> str: 75 | return self.__str__() 76 | 77 | def __str__(self) -> str: 78 | res = ( 79 | f"Aggregation Overview ID = {self.id}\n" 80 | f"Start time = {self.start}\n" 81 | f"End time = {self.end}\n" 82 | f"Successful example count = {self.successful_evaluation_count}\n" 83 | f"Count of examples crashed during evaluation = {self.failed_evaluation_count}\n" 84 | f'Description = "{self.description}"\n' 85 | f"Labels = {self.labels}\n" 86 | f"Metadata = {self.metadata}\n" 87 | ) 88 | 89 | res += f"IDs of aggregated Evaluation Overviews = {[evaluation_overview.id for evaluation_overview in self.evaluation_overviews]}\n" 90 | res += f"IDs of aggregated Run Overviews = {self.run_ids}\n" 91 | 92 | res += "Statistics = {\n" 93 | res += f"{self.statistics}\n" 94 | res += "}\n" 95 | 96 | return res 97 | 98 | def __hash__(self) -> int: 99 | return hash(self.id) 100 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/aggregation/file_aggregation_repository.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from pathlib import Path 3 | from typing import Optional 4 | 5 | from fsspec.implementations.local import LocalFileSystem # type: ignore 6 | 7 | from intelligence_layer.evaluation.aggregation.aggregation_repository import ( 8 | AggregationRepository, 9 | ) 10 | from intelligence_layer.evaluation.aggregation.domain import ( 11 | AggregatedEvaluation, 12 | AggregationOverview, 13 | ) 14 | from intelligence_layer.evaluation.infrastructure.file_system_based_repository import ( 15 | FileSystemBasedRepository, 16 | ) 17 | 18 | 19 | class FileSystemAggregationRepository(AggregationRepository, FileSystemBasedRepository): 20 | _SUB_DIRECTORY = "aggregations" 21 | 22 | def store_aggregation_overview( 23 | self, aggregation_overview: AggregationOverview[AggregatedEvaluation] 24 | ) -> None: 25 | self.write_utf8( 26 | self._aggregation_overview_path(aggregation_overview.id), 27 | aggregation_overview.model_dump_json(indent=2), 28 | create_parents=True, 29 | ) 30 | 31 | def aggregation_overview( 32 | self, aggregation_id: str, aggregation_type: type[AggregatedEvaluation] 33 | ) -> Optional[AggregationOverview[AggregatedEvaluation]]: 34 | file_path = self._aggregation_overview_path(aggregation_id) 35 | 36 | if not self.exists(file_path): 37 | return None 38 | 39 | content = self.read_utf8(file_path) 40 | return AggregationOverview[aggregation_type].model_validate_json( # type:ignore 41 | content 42 | ) 43 | 44 | def aggregation_overview_ids(self) -> Sequence[str]: 45 | return sorted(self.file_names(self._aggregation_root_directory())) 46 | 47 | def _aggregation_root_directory(self) -> Path: 48 | return self._root_directory / self._SUB_DIRECTORY 49 | 50 | def _aggregation_directory(self, evaluation_id: str) -> Path: 51 | return self._aggregation_root_directory() / evaluation_id 52 | 53 | def _aggregation_overview_path(self, aggregation_id: str) -> Path: 54 | return self._aggregation_directory(aggregation_id).with_suffix(".json") 55 | 56 | 57 | class FileAggregationRepository(FileSystemAggregationRepository): 58 | def __init__(self, root_directory: Path) -> None: 59 | super().__init__(LocalFileSystem(), root_directory) 60 | 61 | @staticmethod 62 | def path_to_str(path: Path) -> str: 63 | return str(path) 64 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/aggregation/hugging_face_aggregation_repository.py: -------------------------------------------------------------------------------- 1 | from intelligence_layer.evaluation.aggregation.file_aggregation_repository import ( 2 | FileSystemAggregationRepository, 3 | ) 4 | from intelligence_layer.evaluation.infrastructure.hugging_face_repository import ( 5 | HuggingFaceRepository, 6 | ) 7 | 8 | 9 | class HuggingFaceAggregationRepository( 10 | FileSystemAggregationRepository, HuggingFaceRepository 11 | ): 12 | # this class inherits all its behavior from its parents 13 | pass 14 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/aggregation/in_memory_aggregation_repository.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from typing import Any, Optional 3 | 4 | from intelligence_layer.evaluation.aggregation.aggregation_repository import ( 5 | AggregationRepository, 6 | ) 7 | from intelligence_layer.evaluation.aggregation.domain import ( 8 | AggregatedEvaluation, 9 | AggregationOverview, 10 | ) 11 | 12 | 13 | class InMemoryAggregationRepository(AggregationRepository): 14 | def __init__(self) -> None: 15 | super().__init__() 16 | self._aggregation_overviews: dict[str, AggregationOverview[Any]] = dict() 17 | 18 | def store_aggregation_overview( 19 | self, aggregation_overview: AggregationOverview[AggregatedEvaluation] 20 | ) -> None: 21 | self._aggregation_overviews[aggregation_overview.id] = aggregation_overview 22 | 23 | def aggregation_overview( 24 | self, aggregation_id: str, aggregation_type: type[AggregatedEvaluation] 25 | ) -> Optional[AggregationOverview[AggregatedEvaluation]]: 26 | overview = self._aggregation_overviews.get(aggregation_id, None) 27 | if overview is None or type(overview.statistics) is aggregation_type: 28 | return overview 29 | return AggregationOverview[AggregatedEvaluation]( 30 | evaluation_overviews=overview.evaluation_overviews, 31 | id=overview.id, 32 | start=overview.start, 33 | end=overview.end, 34 | successful_evaluation_count=overview.successful_evaluation_count, 35 | crashed_during_evaluation_count=overview.crashed_during_evaluation_count, 36 | description=overview.description, 37 | statistics=aggregation_type.model_validate(overview.statistics), 38 | labels=overview.labels, 39 | metadata=overview.metadata, 40 | ) 41 | 42 | def aggregation_overview_ids(self) -> Sequence[str]: 43 | return sorted(list(self._aggregation_overviews.keys())) 44 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/benchmark/trace_information.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from datetime import timedelta 3 | from typing import cast 4 | 5 | from aleph_alpha_client import CompletionResponse 6 | 7 | from intelligence_layer.core import ExportedSpan 8 | from intelligence_layer.core.model import _Complete 9 | from intelligence_layer.core.tracer.tracer import SpanType 10 | 11 | 12 | def _get_root(trace: Sequence[ExportedSpan]) -> ExportedSpan | None: 13 | root_spans = [span for span in trace if span.parent_id is None] 14 | if len(root_spans) != 1: 15 | return None 16 | return root_spans[0] 17 | 18 | 19 | def extract_latency_from_trace(trace: Sequence[ExportedSpan]) -> int: 20 | """Extract the total duration of a given trace based on its root trace. 21 | 22 | Args: 23 | trace: trace to analyze 24 | 25 | Returns: 26 | The duration of the trace in microseconds 27 | """ 28 | root_span = _get_root(trace) 29 | if root_span is None: 30 | raise ValueError("No root span found in the trace") 31 | latency = (root_span.end_time - root_span.start_time) / timedelta(microseconds=1) 32 | return int(latency) 33 | 34 | 35 | def _is_complete_request(span: ExportedSpan) -> bool: 36 | # Assuming that LLM requests have a specific name or attribute 37 | return span.name == _Complete.__name__ 38 | 39 | 40 | def _extract_tokens_from_complete_request(span: ExportedSpan) -> int: 41 | if not hasattr(span.attributes, "output"): 42 | raise ValueError( 43 | "Function expects a complete span with attributes.output. Output was not present." 44 | ) 45 | completion_output = cast(CompletionResponse, span.attributes.output) 46 | return completion_output.num_tokens_generated 47 | 48 | 49 | def extract_token_count_from_trace(trace: Sequence[ExportedSpan]) -> int: 50 | """Extract the number of tokens generated in a trace based on its completion requests. 51 | 52 | Note: Does not support traces of streamed responses. 53 | 54 | Args: 55 | trace: trace to analyze. 56 | 57 | Returns: 58 | The sum of newly generated tokens across all spans in the given trace. 59 | """ 60 | token_count = 0 61 | for span in trace: 62 | if span.attributes.type != SpanType.TASK_SPAN: 63 | continue 64 | if _is_complete_request(span): 65 | token_count += _extract_tokens_from_complete_request(span) 66 | return token_count 67 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/dataset/domain.py: -------------------------------------------------------------------------------- 1 | from typing import Generic, Optional, TypeVar 2 | from uuid import uuid4 3 | 4 | from pydantic import BaseModel, Field 5 | from rich.tree import Tree 6 | 7 | from intelligence_layer.connectors.base.json_serializable import ( 8 | SerializableDict, 9 | ) 10 | from intelligence_layer.core.task import Input 11 | from intelligence_layer.core.tracer.tracer import PydanticSerializable 12 | 13 | ExpectedOutput = TypeVar("ExpectedOutput", bound=PydanticSerializable) 14 | """Dataset-specific type that defines characteristics that an :class:`Output` can be checked against. 15 | 16 | Traditional names for this are `label` or `y` in classification.""" 17 | 18 | 19 | class Example(BaseModel, Generic[Input, ExpectedOutput]): 20 | """Example case used for evaluations. 21 | 22 | Attributes: 23 | input: Input for the :class:`Task`. Has to be same type as the input for the task used. 24 | expected_output: The expected output from a given example run. 25 | This will be used by the evaluator to compare the received output with. 26 | id: Identifier for the example, defaults to uuid. 27 | metadata: Optional dictionary of custom key-value pairs. 28 | 29 | Generics: 30 | Input: Interface to be passed to the :class:`Task` that shall be evaluated. 31 | ExpectedOutput: Output that is expected from the run with the supplied input. 32 | """ 33 | 34 | input: Input 35 | expected_output: ExpectedOutput 36 | id: str = Field(default_factory=lambda: str(uuid4())) 37 | metadata: Optional[SerializableDict] = None 38 | 39 | def __repr__(self) -> str: 40 | return self.__str__() 41 | 42 | def __str__(self) -> str: 43 | return ( 44 | f"Example ID = {self.id}\n" 45 | f"Input = {self.input}\n" 46 | f"Expected output = {self.expected_output}\n" 47 | f"Metadata = {self.metadata}\n" 48 | ) 49 | 50 | def _rich_render(self) -> Tree: 51 | example_tree = Tree(f"Example: {self.id}") 52 | example_tree.add("Input").add(str(self.input)) 53 | example_tree.add("Expected Output").add(str(self.expected_output)) 54 | if self.metadata: 55 | example_tree.add("Metadata").add(str(self.metadata)) 56 | return example_tree 57 | 58 | 59 | class Dataset(BaseModel): 60 | """Represents a dataset linked to multiple examples. 61 | 62 | Attributes: 63 | id: Dataset ID. 64 | name: A short name of the dataset. 65 | label: Labels for filtering datasets. Defaults to empty list. 66 | metadata: Additional information about the dataset. Defaults to empty dict. 67 | """ 68 | 69 | id: str = Field(default_factory=lambda: str(uuid4())) 70 | name: str 71 | labels: set[str] = set() 72 | metadata: SerializableDict = dict() 73 | 74 | def __repr__(self) -> str: 75 | return self.__str__() 76 | 77 | def __str__(self) -> str: 78 | return ( 79 | f"Dataset ID = {self.id}\n" 80 | f"Name = {self.name}\n" 81 | f"Labels = {self.labels}\n" 82 | f"Metadata = {self.metadata}" 83 | ) 84 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/dataset/hugging_face_dataset_repository.py: -------------------------------------------------------------------------------- 1 | from functools import _lru_cache_wrapper, lru_cache 2 | from typing import Optional 3 | 4 | from intelligence_layer.evaluation.dataset.domain import Dataset 5 | from intelligence_layer.evaluation.dataset.file_dataset_repository import ( 6 | FileSystemDatasetRepository, 7 | ) 8 | from intelligence_layer.evaluation.infrastructure.hugging_face_repository import ( 9 | HuggingFaceRepository, 10 | ) 11 | 12 | 13 | class HuggingFaceDatasetRepository(HuggingFaceRepository, FileSystemDatasetRepository): 14 | def __init__( 15 | self, repository_id: str, token: str, private: bool, caching: bool = True 16 | ) -> None: 17 | """Initializes a :class:`HuggingFaceDatasetRepository` to be ready for dataset storage and access. 18 | 19 | Args: 20 | repository_id: The HuggingFace namespace and repository name, separated by a "/". 21 | token: The HuggingFace authentication token. 22 | private: Whether the dataset repository should be private on HuggingFace. 23 | caching: If set, datasets are cached in memory once retrieved. 24 | This means external updates to datasets will be missed. Defaults to `True`. 25 | """ 26 | super().__init__(repository_id, token, private) 27 | if caching: 28 | self.examples = lru_cache(maxsize=2)(self.examples) # type: ignore 29 | 30 | def delete_dataset(self, dataset_id: str) -> None: 31 | """Deletes a dataset identified by the given dataset ID. 32 | 33 | This implementation should be backwards compatible to datasets 34 | created without a dataset object (i.e., there is no dataset file 35 | with dataset metadata). 36 | 37 | Note, that HuggingFace API does not seem to support deleting not-existing files. 38 | 39 | Args: 40 | dataset_id: Dataset ID of the dataset to delete. 41 | """ 42 | if self.exists(self._dataset_examples_path(dataset_id)): 43 | self._file_system.rm( 44 | self.path_to_str(self._dataset_examples_path(dataset_id)) 45 | ) 46 | 47 | if self.exists(self._dataset_path(dataset_id)): 48 | self._file_system.rm(self.path_to_str(self._dataset_path(dataset_id))) 49 | # this resets the complete cache if a dataset gets deleted. 50 | if isinstance(self.examples, _lru_cache_wrapper): 51 | self.examples.cache_clear() 52 | 53 | def dataset(self, dataset_id: str) -> Optional[Dataset]: 54 | """Returns a dataset identified by the given dataset ID. 55 | 56 | This implementation should be backwards compatible to datasets 57 | created without a dataset object (i.e., there is no dataset file 58 | with dataset metadata). 59 | 60 | Args: 61 | dataset_id: Dataset ID of the dataset to delete. 62 | 63 | Returns: 64 | :class:`Dataset` if it was not, `None` otherwise. 65 | """ 66 | dataset_file_path = self._dataset_path(dataset_id) 67 | examples_file_path = self._dataset_examples_path(dataset_id) 68 | if not self.exists(dataset_file_path): 69 | if not self.exists(examples_file_path): 70 | return None 71 | else: 72 | return Dataset(id=dataset_id, name=f"HuggingFace dataset {dataset_id}") 73 | 74 | return super().dataset(dataset_id) 75 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable, Sequence 2 | from typing import Optional, cast 3 | 4 | from datasets import Dataset as HFDataset # type: ignore 5 | from datasets import DatasetDict, IterableDataset, IterableDatasetDict 6 | from pydantic import BaseModel 7 | 8 | from intelligence_layer.connectors.base.json_serializable import SerializableDict 9 | from intelligence_layer.core.task import Input 10 | from intelligence_layer.evaluation.dataset.dataset_repository import DatasetRepository 11 | from intelligence_layer.evaluation.dataset.domain import ( 12 | Dataset, 13 | Example, 14 | ExpectedOutput, 15 | ) 16 | 17 | 18 | class MultipleChoiceInput(BaseModel): 19 | question: str 20 | choices: Sequence[str] 21 | 22 | 23 | class SingleHuggingfaceDatasetRepository(DatasetRepository): 24 | def __init__( 25 | self, 26 | huggingface_dataset: ( 27 | DatasetDict | HFDataset | IterableDatasetDict | IterableDataset 28 | ), 29 | ) -> None: 30 | self._huggingface_dataset = huggingface_dataset 31 | 32 | def create_dataset( 33 | self, 34 | examples: Iterable[Example[Input, ExpectedOutput]], 35 | dataset_name: str, 36 | id: str | None = None, 37 | labels: set[str] | None = None, 38 | metadata: SerializableDict | None = None, 39 | ) -> Dataset: 40 | raise NotImplementedError 41 | 42 | def dataset(self, dataset_id: str) -> Dataset | None: 43 | raise NotImplementedError 44 | 45 | def dataset_ids(self) -> Iterable[str]: 46 | raise NotImplementedError 47 | 48 | def delete_dataset(self, dataset_id: str) -> None: 49 | raise NotImplementedError 50 | 51 | def example( 52 | self, 53 | dataset_id: str, 54 | example_id: str, 55 | input_type: type[Input], 56 | expected_output_type: type[ExpectedOutput], 57 | ) -> Example[Input, ExpectedOutput] | None: 58 | examples = self.examples( 59 | dataset_id=dataset_id, 60 | input_type=input_type, 61 | expected_output_type=expected_output_type, 62 | ) 63 | 64 | for example in examples: 65 | if example.id == example_id: 66 | return example 67 | return None 68 | 69 | def examples( 70 | self, 71 | dataset_id: str, 72 | input_type: type[Input], 73 | expected_output_type: type[ExpectedOutput], 74 | examples_to_skip: Optional[frozenset[str]] = None, 75 | ) -> Iterable[Example[Input, ExpectedOutput]]: 76 | examples_to_skip = examples_to_skip or frozenset() 77 | answers = "ABCD" 78 | assert input_type == MultipleChoiceInput 79 | assert expected_output_type is str 80 | for index, sample in enumerate(self._huggingface_dataset["test"]): 81 | if str(index) not in examples_to_skip: 82 | yield Example( 83 | input=cast( 84 | Input, 85 | MultipleChoiceInput( 86 | question=sample["question"], choices=sample["choices"] 87 | ), 88 | ), 89 | expected_output=cast(ExpectedOutput, answers[sample["answer"]]), 90 | id=str(index), 91 | ) 92 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/evaluation/evaluation/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/evaluation/evaluator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/evaluation/evaluation/evaluator/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/infrastructure/file_system_based_repository.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from pathlib import Path 3 | from typing import cast 4 | 5 | from fsspec import AbstractFileSystem # type: ignore 6 | 7 | 8 | class FileSystemBasedRepository: 9 | """An :class:`FileBasedRepository` that stores evaluation results in files. 10 | 11 | Args: 12 | file_system: The specific file system to use from fsspec. 13 | root_directory: The folder where the files are stored. The folder 14 | (along with its parents) will be created if it does not exist yet. 15 | """ 16 | 17 | def __init__(self, file_system: AbstractFileSystem, root_directory: Path) -> None: 18 | self._root_directory = root_directory 19 | self._file_system = file_system 20 | self.mkdir(root_directory) 21 | 22 | def write_utf8( 23 | self, path: Path, content: str, create_parents: bool = False 24 | ) -> None: 25 | if create_parents: 26 | self.mkdir(path.parent) 27 | self._file_system.write_text(self.path_to_str(path), content, encoding="utf-8") 28 | 29 | def read_utf8(self, path: Path) -> str: 30 | return cast( 31 | str, self._file_system.read_text(self.path_to_str(path), encoding="utf-8") 32 | ) 33 | 34 | def remove_file(self, path: Path) -> None: 35 | self._file_system.rm_file(path) 36 | 37 | def exists(self, path: Path) -> bool: 38 | return cast(bool, self._file_system.exists(self.path_to_str(path))) 39 | 40 | def mkdir(self, path: Path) -> None: 41 | if self.exists(path): 42 | return 43 | try: 44 | self._file_system.makedir(self.path_to_str(path), create_parents=True) 45 | except FileExistsError: 46 | return 47 | 48 | def file_names(self, path: Path, file_type: str = "json") -> Sequence[str]: 49 | files = [ 50 | Path(file) 51 | for file in self._file_system.ls(self.path_to_str(path), detail=False) 52 | ] 53 | return [file.stem for file in files if file.suffix == "." + file_type] 54 | 55 | @staticmethod 56 | def path_to_str(path: Path) -> str: 57 | """Returns a string for the given Path so that it's readable for the respective file system. 58 | 59 | Args: 60 | path: Given Path that should be converted. 61 | 62 | Returns: 63 | String representation of the given Path. 64 | """ 65 | return str(path) 66 | -------------------------------------------------------------------------------- /src/intelligence_layer/evaluation/infrastructure/hugging_face_repository.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import huggingface_hub 4 | 5 | from intelligence_layer.evaluation.infrastructure.file_system_based_repository import ( 6 | FileSystemBasedRepository, 7 | ) 8 | 9 | 10 | class HuggingFaceRepository(FileSystemBasedRepository): 11 | """HuggingFace base repository.""" 12 | 13 | _REPO_TYPE = "dataset" 14 | _ROOT_DIRECTORY_PREFIX_ = "datasets" # HuggingFace API root directory 15 | 16 | @staticmethod 17 | def path_to_str(path: Path) -> str: 18 | return path.as_posix() 19 | 20 | def __init__(self, repository_id: str, token: str, private: bool) -> None: 21 | """Create a HuggingFace repository. 22 | 23 | Creates a corresponding repository and initializes the file system. 24 | 25 | Args: 26 | repository_id: The HuggingFace namespace and repository name, separated by a "/". 27 | token: The HuggingFace authentication token. 28 | private: Whether the dataset repository should be private. 29 | """ 30 | assert repository_id[-1] != "/" 31 | self.create_repository(repository_id, token, private) 32 | 33 | file_system = huggingface_hub.HfFileSystem(token=token) 34 | root_directory = Path(f"{self._ROOT_DIRECTORY_PREFIX_}/{repository_id}") 35 | 36 | super().__init__(file_system, root_directory) 37 | self._repository_id = repository_id 38 | # the file system is assigned in super init but this fixes the typing 39 | self._file_system: huggingface_hub.HfFileSystem 40 | 41 | def create_repository(self, repository_id: str, token: str, private: bool) -> None: 42 | huggingface_hub.create_repo( 43 | repo_id=repository_id, 44 | token=token, 45 | repo_type=self._REPO_TYPE, 46 | private=private, 47 | exist_ok=True, 48 | ) 49 | 50 | def delete_repository(self) -> None: 51 | huggingface_hub.delete_repo( 52 | repo_id=self._repository_id, 53 | token=self._file_system.token, 54 | repo_type=self._REPO_TYPE, 55 | missing_ok=True, 56 | ) 57 | -------------------------------------------------------------------------------- /src/intelligence_layer/examples/classify/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/examples/classify/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/examples/classify/keyword_extract.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | 3 | from pydantic import BaseModel 4 | 5 | from intelligence_layer.core import ( 6 | CompleteInput, 7 | ControlModel, 8 | Language, 9 | LuminousControlModel, 10 | Task, 11 | TaskSpan, 12 | TextChunk, 13 | ) 14 | 15 | INSTRUCT_CONFIGS = { 16 | Language( 17 | "de" 18 | ): "Worum geht es in dem Text? Extrahiere ein paar Stichwörter in Form einer Komma-separierten Liste.", 19 | Language( 20 | "en" 21 | ): "What is the text about? Extract a few keywords in form of a comma-separated list.", 22 | Language( 23 | "es" 24 | ): "¿De qué trata el texto? Extrae algunas palabras clave en forma de una lista separada por comas.", 25 | Language( 26 | "fr" 27 | ): "De quoi parle le texte? Extraire quelques mots-clés sous forme d'une liste séparée par des virgules.", 28 | Language( 29 | "it" 30 | ): "Di cosa tratta il testo? Estrai alcune parole chiave sotto forma di una lista separata da virgole.", 31 | } 32 | 33 | 34 | class KeywordExtractInput(BaseModel): 35 | chunk: TextChunk 36 | language: Language 37 | 38 | 39 | class KeywordExtractOutput(BaseModel): 40 | keywords: frozenset[str] 41 | 42 | 43 | class KeywordExtract(Task[KeywordExtractInput, KeywordExtractOutput]): 44 | def __init__( 45 | self, 46 | model: ControlModel | None = None, 47 | instruct_configs: Mapping[Language, str] = INSTRUCT_CONFIGS, 48 | maximum_tokens: int = 32, 49 | ) -> None: 50 | self._instruct_configs = instruct_configs 51 | self._model = model or LuminousControlModel("luminous-base-control") 52 | self._maximum_tokens = maximum_tokens 53 | 54 | def do_run( 55 | self, input: KeywordExtractInput, task_span: TaskSpan 56 | ) -> KeywordExtractOutput: 57 | instruction = input.language.language_config(self._instruct_configs) 58 | result = self._model.complete( 59 | CompleteInput( 60 | prompt=self._model.to_instruct_prompt( 61 | instruction=instruction, input=str(input.chunk) 62 | ), 63 | maximum_tokens=self._maximum_tokens, 64 | ), 65 | task_span, 66 | ) 67 | return KeywordExtractOutput( 68 | keywords=frozenset(s.strip() for s in result.completion.split(",")) 69 | ) 70 | -------------------------------------------------------------------------------- /src/intelligence_layer/examples/classify/prompt_based_classify_with_definitions.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from collections.abc import Sequence 3 | from math import exp 4 | 5 | from aleph_alpha_client import Prompt 6 | from pydantic import BaseModel 7 | 8 | from intelligence_layer.core import ( 9 | CompleteInput, 10 | CompleteOutput, 11 | ControlModel, 12 | LuminousControlModel, 13 | Task, 14 | TaskSpan, 15 | TextChunk, 16 | ) 17 | 18 | from .classify import ClassifyInput, Probability, SingleLabelClassifyOutput 19 | 20 | 21 | class LabelWithDefinition(BaseModel): 22 | """Defines a label with a definition. 23 | 24 | Attributes: 25 | name: Name of the label. 26 | definition: A definition or description of the label. 27 | """ 28 | 29 | name: str 30 | definition: str 31 | 32 | def to_string(self) -> str: 33 | return f"{self.name}: {self.definition}" 34 | 35 | 36 | class PromptBasedClassifyWithDefinitions( 37 | Task[ClassifyInput, SingleLabelClassifyOutput] 38 | ): 39 | INSTRUCTION: str = """Identify a class that describes the text adequately. 40 | Reply with only the class label.""" 41 | 42 | def __init__( 43 | self, 44 | labels_with_definitions: Sequence[LabelWithDefinition], 45 | model: ControlModel | None = None, 46 | instruction: str = INSTRUCTION, 47 | ) -> None: 48 | super().__init__() 49 | self._labels_with_definitions = labels_with_definitions 50 | self._model = model or LuminousControlModel("luminous-base-control") 51 | if not isinstance(self._model, LuminousControlModel): 52 | warnings.warn( 53 | "PromptBasedClassifyWithDefinitions was build for luminous models. LLama models may not work correctly. " 54 | "Proceed with caution and testing.", 55 | UserWarning, 56 | ) 57 | self._instruction = instruction 58 | 59 | def do_run( 60 | self, input: ClassifyInput, task_span: TaskSpan 61 | ) -> SingleLabelClassifyOutput: 62 | complete_output = self._model.complete( 63 | CompleteInput( 64 | prompt=self._get_prompt(input.chunk, input.labels), 65 | completion_bias_inclusion=list(input.labels), 66 | log_probs=len(input.labels) * 2, 67 | ), 68 | task_span, 69 | ) 70 | return SingleLabelClassifyOutput(scores=self._build_scores(complete_output)) 71 | 72 | def _get_prompt(self, chunk: TextChunk, labels: frozenset[str]) -> Prompt: 73 | def format_input(text: str, labels: frozenset[str]) -> str: 74 | definitions = "\n".join( 75 | label.to_string() 76 | for label in self._labels_with_definitions 77 | if label.name in labels 78 | ) 79 | return f"""Labels: 80 | {', '.join(label.name for label in self._labels_with_definitions if label.name in labels)} 81 | 82 | Definitions: 83 | {definitions} 84 | 85 | Text: {text}""" 86 | 87 | unexpected_labels = labels - set( 88 | label.name for label in self._labels_with_definitions 89 | ) 90 | if unexpected_labels: 91 | raise ValueError(f"Got unexpected labels: {', '.join(unexpected_labels)}") 92 | 93 | return self._model.to_instruct_prompt( 94 | instruction=self._instruction, 95 | input=format_input(text=str(chunk), labels=labels), 96 | ) 97 | 98 | def _build_scores(self, complete_output: CompleteOutput) -> dict[str, Probability]: 99 | raw_probs: dict[str, float] = {} 100 | for label in self._labels_with_definitions: 101 | label_prob = 0.0 102 | assert complete_output.completions[0].log_probs 103 | for token, prob in complete_output.completions[0].log_probs[0].items(): 104 | if label.name.startswith(token.strip()) and prob: 105 | label_prob += exp(prob) 106 | raw_probs[label.name] = label_prob 107 | 108 | total = sum(raw_probs.values()) 109 | return {key: Probability(value / total) for key, value in raw_probs.items()} 110 | -------------------------------------------------------------------------------- /src/intelligence_layer/examples/qa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/examples/qa/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/examples/search/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/examples/search/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/examples/summarize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/examples/summarize/__init__.py -------------------------------------------------------------------------------- /src/intelligence_layer/examples/summarize/recursive_summarize.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | from intelligence_layer.core import Task, TaskSpan 4 | from intelligence_layer.core.detect_language import Language 5 | from intelligence_layer.examples.summarize.steerable_long_context_summarize import ( 6 | SteerableLongContextSummarize, 7 | ) 8 | from intelligence_layer.examples.summarize.summarize import ( 9 | LongContextSummarizeInput, 10 | LongContextSummarizeOutput, 11 | SummarizeOutput, 12 | ) 13 | 14 | 15 | class RecursiveSummarizeInput(BaseModel): 16 | """The input for a recursive summarize-task for a text of any length. 17 | 18 | Attributes: 19 | text: A text of any length. 20 | language: The desired language of the summary. ISO 619 str with language e.g. en, fr, etc. 21 | max_tokens: The maximum desired length of the summary in tokens. 22 | """ 23 | 24 | text: str 25 | language: Language = Language("en") 26 | max_tokens: int = 512 27 | 28 | 29 | class RecursiveSummarize(Task[RecursiveSummarizeInput, SummarizeOutput]): 30 | """This task will summarize the input text recursively until the desired length is reached. 31 | 32 | It uses any long-context summarize task to go over text recursively and condense it even further. 33 | 34 | Args: 35 | long_context_summarize_task: Any task that satifies the interface Input: LongContextSummarizeInput and Output: LongContextSummarizeOutput. 36 | Defaults to :class:`SteerableLongContextSummarize` 37 | """ 38 | 39 | def __init__( 40 | self, 41 | long_context_summarize_task: ( 42 | Task[LongContextSummarizeInput, LongContextSummarizeOutput] | None 43 | ) = None, 44 | ) -> None: 45 | self.long_context_summarize_task = ( 46 | long_context_summarize_task or SteerableLongContextSummarize() 47 | ) 48 | 49 | def do_run( 50 | self, input: RecursiveSummarizeInput, task_span: TaskSpan 51 | ) -> SummarizeOutput: 52 | num_partial_summaries = 0 53 | text_to_summarize = input.text 54 | summary = "" 55 | num_generated_tokens = 0 56 | while True: 57 | summarize_output = self.long_context_summarize_task.run( 58 | LongContextSummarizeInput( 59 | text=text_to_summarize, language=input.language 60 | ), 61 | task_span, 62 | ) 63 | # If the number of chunks stayed the same, we assume that no further summarization has taken place and we return the previous summary 64 | if num_partial_summaries == len(summarize_output.partial_summaries): 65 | break 66 | num_partial_summaries = len(summarize_output.partial_summaries) 67 | 68 | partial_summaries = summarize_output.partial_summaries 69 | num_generated_tokens = sum( 70 | partial_summary.generated_tokens 71 | for partial_summary in partial_summaries 72 | ) 73 | summary = "\n".join( 74 | partial_summary.summary for partial_summary in partial_summaries 75 | ) 76 | # If the number of chunks is 1 we want to return the new summary since we assume that no further summarization will take place with our prompt 77 | if ( 78 | len(summarize_output.partial_summaries) == 1 79 | or num_generated_tokens < input.max_tokens 80 | ): 81 | break 82 | text_to_summarize = summary 83 | 84 | return SummarizeOutput( 85 | summary=summary.strip(), generated_tokens=num_generated_tokens 86 | ) 87 | -------------------------------------------------------------------------------- /src/intelligence_layer/examples/summarize/steerable_long_context_summarize.py: -------------------------------------------------------------------------------- 1 | from intelligence_layer.core import ( 2 | Chunk, 3 | ChunkInput, 4 | ChunkOutput, 5 | ControlModel, 6 | LuminousControlModel, 7 | Task, 8 | TaskSpan, 9 | ) 10 | from intelligence_layer.examples.summarize.steerable_single_chunk_summarize import ( 11 | SteerableSingleChunkSummarize, 12 | ) 13 | from intelligence_layer.examples.summarize.summarize import ( 14 | LongContextSummarizeInput, 15 | LongContextSummarizeOutput, 16 | PartialSummary, 17 | SingleChunkSummarizeInput, 18 | SummarizeOutput, 19 | ) 20 | 21 | 22 | class SteerableLongContextSummarize( 23 | Task[LongContextSummarizeInput, LongContextSummarizeOutput] 24 | ): 25 | """Condenses a long text into a summary. 26 | 27 | Generate a summary given an instruction setup. 28 | 29 | Args: 30 | summarize: The summarize task that is used to summarize a single chunk. 31 | Make sure that this and the chunk task use the same model. 32 | Defaults to :class:`SteerableSingleChunkSummarize` . 33 | chunk: The chunk task that is used to chunk the long text into smaller pieces 34 | such that a single chunk fits into the context of the model. 35 | Make sure that this and the summarize task use the same model. 36 | Defaults to :class:`Chunk` . 37 | model: A valid Aleph Alpha control model. This is passed on to the 38 | default summarize and chunk tasks. So it is ignored when the 39 | defaults for both tasks are overwritten. 40 | Defaults to luminous-base-control. 41 | """ 42 | 43 | def __init__( 44 | self, 45 | summarize: Task[SingleChunkSummarizeInput, SummarizeOutput] | None = None, 46 | chunk: Task[ChunkInput, ChunkOutput] | None = None, 47 | model: ControlModel | None = None, 48 | ) -> None: 49 | super().__init__() 50 | model = model or LuminousControlModel("luminous-base-control") 51 | self._summarize = summarize or SteerableSingleChunkSummarize( 52 | model, max_generated_tokens=512 53 | ) 54 | self._chunk_task = chunk or Chunk(model, max_tokens_per_chunk=1024) 55 | 56 | def do_run( 57 | self, input: LongContextSummarizeInput, task_span: TaskSpan 58 | ) -> LongContextSummarizeOutput: 59 | chunk_output = self._chunk_task.run(ChunkInput(text=input.text), task_span) 60 | summary_outputs = self._summarize.run_concurrently( 61 | [ 62 | SingleChunkSummarizeInput(chunk=chunk, language=input.language) 63 | for chunk in chunk_output.chunks 64 | ], 65 | task_span, 66 | ) 67 | return LongContextSummarizeOutput( 68 | partial_summaries=[ 69 | PartialSummary( 70 | summary=summary_output.summary, 71 | chunk=chunk, 72 | generated_tokens=summary_output.generated_tokens, 73 | ) 74 | for summary_output, chunk in zip( 75 | summary_outputs, chunk_output.chunks, strict=True 76 | ) 77 | ] 78 | ) 79 | -------------------------------------------------------------------------------- /src/intelligence_layer/examples/summarize/steerable_single_chunk_summarize.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | 3 | from intelligence_layer.core import ( 4 | CompleteInput, 5 | ControlModel, 6 | Language, 7 | LuminousControlModel, 8 | Task, 9 | TaskSpan, 10 | ) 11 | from intelligence_layer.examples.summarize.summarize import ( 12 | SingleChunkSummarizeInput, 13 | SummarizeOutput, 14 | ) 15 | 16 | INSTRUCTION_CONFIGS = { 17 | Language("en"): "Summarize the text in a single paragraph.", 18 | Language("de"): "Fasse den Text in einem Paragraphen zusammen.", 19 | } 20 | 21 | 22 | class SteerableSingleChunkSummarize(Task[SingleChunkSummarizeInput, SummarizeOutput]): 23 | """Summarises a text given an instruction. 24 | 25 | Args: 26 | model: A valid Aleph Alpha control model. 27 | max_generated_tokens: The maximum number of tokens to be generated by the model. 28 | This is not intended to steer the generation length, but instead will cut off the generation at the specified limit. 29 | Note that maximum tokens + chunk size + prompt length should not exceed the context size of the model. 30 | instruction_configs: A mapping of valid `Language` to `str` for each 31 | supported language. 32 | """ 33 | 34 | def __init__( 35 | self, 36 | model: ControlModel | None = None, 37 | max_generated_tokens: int = 256, 38 | instruction_configs: Mapping[Language, str] = INSTRUCTION_CONFIGS, 39 | ) -> None: 40 | self._model = model or LuminousControlModel("luminous-base-control") 41 | self._max_generated_tokens = max_generated_tokens 42 | self._instruction_configs = instruction_configs 43 | 44 | def do_run( 45 | self, input: SingleChunkSummarizeInput, task_span: TaskSpan 46 | ) -> SummarizeOutput: 47 | instruction = self._instruction_configs.get(input.language) 48 | if not instruction: 49 | raise ValueError(f"Could not find `prompt_config` for {input.language}.") 50 | completion = self._model.complete( 51 | CompleteInput( 52 | prompt=self._model.to_instruct_prompt(instruction, input.chunk), 53 | maximum_tokens=self._max_generated_tokens, 54 | ), 55 | task_span, 56 | ) 57 | return SummarizeOutput( 58 | summary=completion.completion.strip(), 59 | generated_tokens=completion.generated_tokens, 60 | ) 61 | -------------------------------------------------------------------------------- /src/intelligence_layer/learning/__init__.py: -------------------------------------------------------------------------------- 1 | from .enrich import EnrichDomain as EnrichDomain 2 | from .enrich import EnrichQuality as EnrichQuality 3 | from .file_instruction_finetuning_data_repository import ( 4 | FileInstructionFinetuningDataRepository as FileInstructionFinetuningDataRepository, 5 | ) 6 | from .instruction_finetuning_data_handler import EnrichAction as EnrichAction 7 | from .instruction_finetuning_data_handler import ( 8 | InstructionFinetuningDataHandler as InstructionFinetuningDataHandler, 9 | ) 10 | from .instruction_finetuning_data_handler import ( 11 | instruction_finetuning_handler_builder as instruction_finetuning_handler_builder, 12 | ) 13 | from .instruction_finetuning_data_repository import ( 14 | InstructionFinetuningDataRepository as InstructionFinetuningDataRepository, 15 | ) 16 | from .models import InstructionFinetuningSample as InstructionFinetuningSample 17 | from .models import ( 18 | InstructionFinetuningSample_ as InstructionFinetuningSample_, 19 | ) 20 | from .models import ( 21 | InstructionFinetuningSampleAttributes as InstructionFinetuningSampleAttributes, 22 | ) 23 | from .models import InvalidSampleError as InvalidSampleError 24 | from .models import RawInstructionFinetuningSample as RawInstructionFinetuningSample 25 | from .models import TripletTransformation as TripletTransformation 26 | from .postgres_instruction_finetuning_data_repository import ( 27 | PostgresInstructionFinetuningDataRepository as PostgresInstructionFinetuningDataRepository, 28 | ) 29 | 30 | __all__ = [symbol for symbol in dir()] 31 | -------------------------------------------------------------------------------- /src/intelligence_layer/learning/instruction_finetuning_data_repository.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections.abc import Iterable 3 | from typing import Optional 4 | 5 | from sqlalchemy import ColumnElement 6 | 7 | from intelligence_layer.learning.models import ( 8 | InstructionFinetuningSample, 9 | ) 10 | 11 | 12 | class InstructionFinetuningDataRepository(ABC): 13 | @abstractmethod 14 | def store_sample(self, sample: InstructionFinetuningSample) -> str: 15 | """Stores a finetuning sample and returns its ID. 16 | 17 | Args: 18 | sample: The sample to store. 19 | 20 | Returns: 21 | The ID of the stored sample. 22 | """ 23 | pass 24 | 25 | @abstractmethod 26 | def store_samples( 27 | self, samples: Iterable[InstructionFinetuningSample] 28 | ) -> list[str]: 29 | """Stores multiple finetuning samples and returns their IDs. 30 | 31 | Args: 32 | samples: The samples to store. 33 | 34 | Returns: 35 | The IDs of the stored samples. 36 | """ 37 | pass 38 | 39 | @abstractmethod 40 | def head(self, limit: Optional[int] = 100) -> Iterable[InstructionFinetuningSample]: 41 | """Returns the first `limit` samples. 42 | 43 | Args: 44 | limit: The number of samples to return. Defaults to 100. 45 | 46 | Returns: 47 | Iterable[InstructionFinetuningSample]: The first `limit` samples. 48 | """ 49 | pass 50 | 51 | @abstractmethod 52 | def sample(self, id: str) -> Optional[InstructionFinetuningSample]: 53 | """Gets a finetuning sample by its ID. 54 | 55 | Args: 56 | id: The ID of the sample. 57 | 58 | Returns: 59 | The sample with the given ID, or None if not found. 60 | """ 61 | pass 62 | 63 | @abstractmethod 64 | def samples(self, ids: Iterable[str]) -> Iterable[InstructionFinetuningSample]: 65 | """Gets multiple finetuning samples by their IDs. 66 | 67 | Args: 68 | ids: The IDs of the samples. 69 | 70 | Returns: 71 | The samples with the given IDs. 72 | """ 73 | pass 74 | 75 | @abstractmethod 76 | def samples_with_filter( 77 | self, filter_expression: ColumnElement[bool], limit: Optional[int] = 100 78 | ) -> Iterable[InstructionFinetuningSample]: 79 | """Gets samples that match the given filter. 80 | 81 | Args: 82 | filter_expression: The filter expression. 83 | limit: The number of samples to return. Defaults to 100. 84 | 85 | Returns: 86 | The samples that match the filter. 87 | """ 88 | pass 89 | 90 | @abstractmethod 91 | def delete_sample(self, id: str) -> None: 92 | """Deletes a finetuning sample by its ID. 93 | 94 | Args: 95 | id: The ID of the sample. 96 | """ 97 | pass 98 | 99 | @abstractmethod 100 | def delete_samples(self, ids: Iterable[str]) -> None: 101 | """Deletes multiple finetuning samples by their IDs. 102 | 103 | Args: 104 | ids: The IDs of the samples. 105 | """ 106 | pass 107 | -------------------------------------------------------------------------------- /src/intelligence_layer/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/py.typed -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/__init__.py -------------------------------------------------------------------------------- /tests/connectors/kernel/test_kernel.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | from pydantic import BaseModel 3 | 4 | from intelligence_layer.connectors.kernel.kernel import KernelTask 5 | from intelligence_layer.core.tracer.tracer import NoOpTracer 6 | 7 | 8 | def test_kernel_connector() -> None: 9 | load_dotenv() 10 | tracer = NoOpTracer() 11 | 12 | class Input(BaseModel): 13 | question: str 14 | 15 | class Output(BaseModel): 16 | answer: str | None 17 | 18 | task = KernelTask( 19 | skill="playground/super_rag", 20 | input_model=Input, 21 | output_model=Output, 22 | ) 23 | 24 | output = task.run( 25 | Input(question="What is a transformer?"), 26 | tracer, 27 | ) 28 | assert output.answer and "transformer" in output.answer 29 | -------------------------------------------------------------------------------- /tests/connectors/retrievers/test_document_index_retriever.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from intelligence_layer.connectors.retrievers.document_index_retriever import ( 4 | AsyncDocumentIndexRetriever, 5 | DocumentIndexRetriever, 6 | ) 7 | 8 | pytestmark = pytest.mark.document_index 9 | 10 | 11 | @pytest.mark.internal 12 | def test_document_index_retriever( 13 | document_index_retriever: DocumentIndexRetriever, 14 | ) -> None: 15 | documents = document_index_retriever.get_relevant_documents_with_scores("Coca-Cola") 16 | assert len(documents) > 0 17 | 18 | 19 | @pytest.mark.internal 20 | async def test_async_document_index_retriever( 21 | async_document_index_retriever: AsyncDocumentIndexRetriever, 22 | ) -> None: 23 | documents = await async_document_index_retriever.get_relevant_documents_with_scores( 24 | "Coca-Cola" 25 | ) 26 | assert len(documents) > 0 27 | -------------------------------------------------------------------------------- /tests/connectors/retrievers/test_hybrid_qdrant_in_memory_retriever.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | 3 | from pytest import fixture 4 | 5 | from intelligence_layer.connectors import AlephAlphaClientProtocol, RetrieverType 6 | from intelligence_layer.connectors.retrievers.base_retriever import Document 7 | from intelligence_layer.connectors.retrievers.hybrid_qdrant_in_memory_retriever import ( 8 | HybridQdrantInMemoryRetriever, 9 | ) 10 | from tests.conftest import to_document 11 | 12 | 13 | @fixture 14 | def in_memory_retriever_documents() -> Sequence[Document]: 15 | return [ 16 | Document(text="Summer is warm but I like it"), 17 | Document(text="I do not like rain"), 18 | Document(text="We are so back"), 19 | Document(text="Summer rain is rejuvenating"), 20 | ] 21 | 22 | 23 | @fixture 24 | def hybrid_asymmetric_in_memory_retriever( 25 | client: AlephAlphaClientProtocol, 26 | in_memory_retriever_documents: Sequence[Document], 27 | ) -> HybridQdrantInMemoryRetriever: 28 | return HybridQdrantInMemoryRetriever( 29 | in_memory_retriever_documents, 30 | client=client, 31 | k=2, 32 | retriever_type=RetrieverType.ASYMMETRIC, 33 | ) 34 | 35 | 36 | @fixture 37 | def hybrid_symmetric_in_memory_retriever( 38 | client: AlephAlphaClientProtocol, 39 | in_memory_retriever_documents: Sequence[Document], 40 | ) -> HybridQdrantInMemoryRetriever: 41 | return HybridQdrantInMemoryRetriever( 42 | in_memory_retriever_documents, 43 | client=client, 44 | k=2, 45 | retriever_type=RetrieverType.SYMMETRIC, 46 | ) 47 | 48 | 49 | def test_asymmetric_in_memory_retriever( 50 | hybrid_asymmetric_in_memory_retriever: HybridQdrantInMemoryRetriever, 51 | in_memory_retriever_documents: Sequence[Document], 52 | ) -> None: 53 | query = "Do you like hot weather?" 54 | documents = ( 55 | hybrid_asymmetric_in_memory_retriever.get_relevant_documents_with_scores(query) 56 | ) 57 | assert in_memory_retriever_documents[0] == to_document(documents[0].document_chunk) 58 | assert len(documents) <= 2 59 | 60 | 61 | def test_symmetric_in_memory_retriever( 62 | hybrid_symmetric_in_memory_retriever: HybridQdrantInMemoryRetriever, 63 | in_memory_retriever_documents: Sequence[Document], 64 | ) -> None: 65 | query = "I hate drizzle" 66 | documents = hybrid_symmetric_in_memory_retriever.get_relevant_documents_with_scores( 67 | query 68 | ) 69 | assert in_memory_retriever_documents[1] == to_document(documents[0].document_chunk) 70 | assert len(documents) <= 2 71 | 72 | 73 | def test_hybrid_in_memory_retriever( 74 | hybrid_asymmetric_in_memory_retriever: HybridQdrantInMemoryRetriever, 75 | in_memory_retriever_documents: Sequence[Document], 76 | ) -> None: 77 | query = "Summer rain" 78 | documents = ( 79 | hybrid_asymmetric_in_memory_retriever.get_relevant_documents_with_scores(query) 80 | ) 81 | assert in_memory_retriever_documents[3] == to_document(documents[0].document_chunk) 82 | assert len(documents) <= 2 83 | -------------------------------------------------------------------------------- /tests/connectors/retrievers/test_qdrant_in_memory_retriever.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | 3 | from pytest import fixture 4 | 5 | from intelligence_layer.connectors.retrievers.base_retriever import Document 6 | from intelligence_layer.connectors.retrievers.qdrant_in_memory_retriever import ( 7 | QdrantInMemoryRetriever, 8 | ) 9 | from tests.conftest_document_index import to_document 10 | 11 | 12 | @fixture 13 | def in_memory_retriever_documents() -> Sequence[Document]: 14 | return [ 15 | Document(text="I do not like rain"), 16 | Document(text="Summer is warm"), 17 | Document(text="We are so back"), 18 | ] 19 | 20 | 21 | def test_asymmetric_in_memory_retriever( 22 | asymmetric_in_memory_retriever: QdrantInMemoryRetriever, 23 | in_memory_retriever_documents: Sequence[Document], 24 | ) -> None: 25 | query = "Do you like summer?" 26 | documents = asymmetric_in_memory_retriever.get_relevant_documents_with_scores(query) 27 | assert in_memory_retriever_documents[1] == to_document(documents[0].document_chunk) 28 | assert len(documents) <= 2 29 | 30 | 31 | def test_symmetric_in_memory_retriever( 32 | symmetric_in_memory_retriever: QdrantInMemoryRetriever, 33 | in_memory_retriever_documents: Sequence[Document], 34 | ) -> None: 35 | query = "I hate drizzle" 36 | documents = symmetric_in_memory_retriever.get_relevant_documents_with_scores(query) 37 | assert in_memory_retriever_documents[0] == to_document(documents[0].document_chunk) 38 | assert len(documents) <= 2 39 | -------------------------------------------------------------------------------- /tests/connectors/studio/conftest.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from unittest.mock import Mock 3 | from uuid import uuid4 4 | 5 | from dotenv import load_dotenv 6 | from pydantic import BaseModel 7 | from pytest import fixture 8 | 9 | from intelligence_layer.connectors.studio.studio import StudioClient, StudioExample 10 | 11 | 12 | @fixture 13 | def studio_client() -> StudioClient: 14 | load_dotenv() 15 | project_name = str(uuid4()) 16 | client = StudioClient(project_name) 17 | client.create_project(project_name) 18 | return client 19 | 20 | 21 | @fixture 22 | def mock_studio_client() -> Mock: 23 | return Mock(spec=StudioClient) 24 | 25 | 26 | class PydanticType(BaseModel): 27 | data: int 28 | 29 | 30 | @fixture 31 | def examples() -> Sequence[StudioExample[PydanticType, PydanticType]]: 32 | return [ 33 | StudioExample[PydanticType, PydanticType]( 34 | input=PydanticType(data=i), expected_output=PydanticType(data=i) 35 | ) 36 | for i in range(2) 37 | ] 38 | 39 | 40 | @fixture 41 | def many_examples() -> Sequence[StudioExample[PydanticType, PydanticType]]: 42 | examples = [ 43 | StudioExample[PydanticType, PydanticType]( 44 | input=PydanticType(data=i), expected_output=PydanticType(data=i) 45 | ) 46 | for i in range(15) 47 | ] 48 | return examples 49 | -------------------------------------------------------------------------------- /tests/connectors/studio/test_studio_dataset.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable, Sequence 2 | from typing import Any 3 | from uuid import UUID 4 | 5 | from pytest import fixture 6 | 7 | from intelligence_layer.connectors import StudioClient 8 | from intelligence_layer.connectors.studio.studio import StudioDataset 9 | from intelligence_layer.evaluation.dataset.domain import Example 10 | from intelligence_layer.evaluation.dataset.in_memory_dataset_repository import ( 11 | InMemoryDatasetRepository, 12 | ) 13 | from intelligence_layer.evaluation.dataset.studio_dataset_repository import ( 14 | StudioDatasetRepository, 15 | ) 16 | from tests.connectors.studio.conftest import PydanticType 17 | 18 | 19 | @fixture 20 | def labels() -> set[str]: 21 | return {"label1", "label2"} 22 | 23 | 24 | @fixture 25 | def metadata() -> dict[str, Any]: 26 | return {"key": "value"} 27 | 28 | 29 | @fixture 30 | def with_uploaded_dataset( 31 | studio_client: StudioClient, many_examples: Sequence[Example] 32 | ): 33 | dataset_repo = StudioDatasetRepository(studio_client) 34 | dataset = dataset_repo.create_dataset(many_examples, "my_dataset") 35 | 36 | return dataset 37 | 38 | 39 | def test_can_upload_dataset_with_minimal_request_body( 40 | studio_client: StudioClient, 41 | examples: Sequence[Example], 42 | ) -> None: 43 | dataset_repo = InMemoryDatasetRepository() 44 | dataset = dataset_repo.create_dataset(examples, "my_dataset") 45 | 46 | studio_dataset = StudioDatasetRepository.map_to_studio_dataset(dataset) 47 | studio_examples = StudioDatasetRepository.map_to_many_studio_example(examples) 48 | 49 | result = studio_client.submit_dataset( 50 | dataset=studio_dataset, examples=studio_examples 51 | ) 52 | uuid = UUID(result) 53 | assert uuid 54 | 55 | 56 | def test_can_upload_dataset_with_complete_request_body( 57 | studio_client: StudioClient, 58 | examples: Sequence[Example[PydanticType, PydanticType]], 59 | labels: set[str], 60 | metadata: dict[str, Any], 61 | ) -> None: 62 | dataset_repo = InMemoryDatasetRepository() 63 | dataset = dataset_repo.create_dataset( 64 | examples, "my_dataset", labels=labels, metadata=metadata 65 | ) 66 | 67 | studio_dataset = StudioDatasetRepository.map_to_studio_dataset(dataset) 68 | studio_examples = StudioDatasetRepository.map_to_many_studio_example(examples) 69 | 70 | result = studio_client.submit_dataset( 71 | dataset=studio_dataset, examples=studio_examples 72 | ) 73 | assert result 74 | 75 | 76 | def test_get_many_dataset_examples( 77 | studio_client: StudioClient, 78 | many_examples: Iterable[Example[PydanticType, PydanticType]], 79 | with_uploaded_dataset: StudioDataset, 80 | ) -> None: 81 | received_examples = studio_client.get_dataset_examples( 82 | with_uploaded_dataset.id, 83 | input_type=PydanticType, 84 | expected_output_type=PydanticType, 85 | ) 86 | 87 | for received_example, given_example in zip( 88 | received_examples, many_examples, strict=True 89 | ): 90 | # These models appear equal, but somehow are not -> we need to check the specific values, not the models 91 | assert received_example.model_dump() == given_example.model_dump() 92 | assert received_example.input.data == given_example.input.data 93 | assert ( 94 | received_example.expected_output.data == given_example.expected_output.data 95 | ) 96 | -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/core/__init__.py -------------------------------------------------------------------------------- /tests/core/test_detect_language.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from lingua import Language as LinguaLanguage 3 | 4 | from intelligence_layer.core import ( 5 | DetectLanguage, 6 | DetectLanguageInput, 7 | Language, 8 | NoOpTracer, 9 | ) 10 | 11 | 12 | @pytest.mark.parametrize( 13 | "text_input,expected_language", 14 | [ 15 | ( 16 | "Hello, my name is Niklas. I am working with Pit on this language detection piece.", 17 | Language("en"), 18 | ), 19 | ( 20 | "Hola, mi nombre es Niklas. Estoy trabajando con Pit en esta pieza de detección de idioma.", 21 | Language("es"), 22 | ), 23 | ( 24 | "Ciao, mi chiamo Niklas. Sto lavorando con Pit su questo pezzo di rilevamento della lingua.", 25 | Language("it"), 26 | ), 27 | ( 28 | "Hallo, mein Name ist Niklas. Ich arbeite mit Pit an diesem Stück zur Spracherkennung.", 29 | Language("de"), 30 | ), 31 | ( 32 | "Bonjour, je m'appelle Niklas. Je travaille avec Pit sur cette pièce de détection de langue.", 33 | Language("fr"), 34 | ), 35 | ( 36 | "Hola, em dic Niklas. Estic treballant amb Pit en aquesta peça de detecció d'idiomes.", 37 | Language("ca"), 38 | ), 39 | ( 40 | "Cześć, nazywam się Niklas. Pracuję z Pitem nad tym kawałkiem wykrywania języka.", 41 | Language("pl"), 42 | ), 43 | ], 44 | ) 45 | def test_detect_language_returns_correct_language( 46 | text_input: str, expected_language: Language 47 | ) -> None: 48 | task = DetectLanguage() 49 | input = DetectLanguageInput( 50 | text=text_input, 51 | possible_languages=[ 52 | Language(lang) for lang in ["en", "de", "fr", "it", "es", "pl", "ca"] 53 | ], 54 | ) 55 | tracer = NoOpTracer() 56 | output = task.run(input, tracer) 57 | 58 | assert output.best_fit == expected_language 59 | 60 | 61 | def test_detect_language_returns_none_if_no_language_can_be_detected() -> None: 62 | text = "Je m’appelle Jessica. Je suis une fille, je suis française et j’ai treize ans." # codespell:ignore 63 | task = DetectLanguage() 64 | input = DetectLanguageInput( 65 | text=text, 66 | possible_languages=[Language(lang) for lang in ["en", "de"]], 67 | ) 68 | tracer = NoOpTracer() 69 | output = task.run(input, tracer) 70 | 71 | assert output.best_fit is None 72 | 73 | 74 | def test_conversion_to_lingua_works() -> None: 75 | language: Language = Language("de") 76 | expected_language: LinguaLanguage = LinguaLanguage.GERMAN 77 | 78 | converted_language = language.to_lingua_language() 79 | 80 | assert converted_language == expected_language 81 | -------------------------------------------------------------------------------- /tests/core/test_task.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | from concurrent.futures import ThreadPoolExecutor 3 | from functools import wraps 4 | from threading import Lock 5 | from time import sleep 6 | 7 | from intelligence_layer.core import ( 8 | MAX_CONCURRENCY, 9 | InMemoryTracer, 10 | NoOpTracer, 11 | Task, 12 | TaskSpan, 13 | ) 14 | 15 | 16 | class ConcurrencyCounter(Task[None, None]): 17 | max_concurrency_counter: int = 0 18 | concurrency_counter: int = 0 19 | 20 | def __init__(self) -> None: 21 | self.lock = Lock() 22 | 23 | def do_run(self, input: None, task_span: TaskSpan) -> None: 24 | with self.lock: 25 | self.concurrency_counter += 1 26 | self.max_concurrency_counter = max( 27 | self.max_concurrency_counter, self.concurrency_counter 28 | ) 29 | 30 | sleep(0.01) 31 | with self.lock: 32 | self.concurrency_counter -= 1 33 | 34 | 35 | class DeadlockDetector(Task[None, None]): 36 | def __init__(self) -> None: 37 | super().__init__() 38 | self.inner_task = InnerTask() 39 | 40 | def do_run(self, input: None, task_span: TaskSpan) -> None: 41 | # wait a bit so all DeadlockDetector tasks run before the first InnerTask is submitted 42 | sleep(0.01) 43 | with ThreadPoolExecutor(max_workers=1) as executor: 44 | future = executor.submit( 45 | self.inner_task.run_concurrently, [input], task_span 46 | ) 47 | # wait a bit to ensure the future has finished 48 | # (even if the InnerTasks of all DeadlockDetector tasks are scheduled sequentially) 49 | for _ in range(20): 50 | if future.done(): 51 | break 52 | sleep(0.1) 53 | if not future.done(): 54 | executor.shutdown(wait=False) 55 | raise RuntimeError("Deadlock detected") 56 | 57 | 58 | class InnerTask(Task[None, None]): 59 | def do_run(self, input: None, task_span: TaskSpan) -> None: 60 | pass 61 | 62 | 63 | def dummy_decorator( 64 | f: Callable[["BaseTask", None, TaskSpan], None], 65 | ) -> Callable[["BaseTask", None, TaskSpan], None]: 66 | @wraps(f) 67 | def wrap( 68 | self: "BaseTask", 69 | input: None, 70 | task_span: TaskSpan, 71 | ) -> None: 72 | return f(self, input, task_span) 73 | 74 | return wrap 75 | 76 | 77 | class BaseTask(Task[None, None]): 78 | @dummy_decorator 79 | def do_run(self, input: None, task_span: TaskSpan) -> None: 80 | task_span.log("Plain", "Entry") 81 | 82 | 83 | class SubTask(BaseTask): 84 | pass 85 | 86 | 87 | class NestedTask(Task[None, None]): 88 | def do_run(self, input: None, task_span: TaskSpan) -> None: 89 | BaseTask().run(input, task_span) 90 | 91 | 92 | def test_run_concurrently() -> None: 93 | task = ConcurrencyCounter() 94 | task.run_concurrently([None] * MAX_CONCURRENCY * 10, NoOpTracer()) 95 | assert task.max_concurrency_counter == MAX_CONCURRENCY 96 | 97 | 98 | def test_run_concurrently_limited() -> None: 99 | task = ConcurrencyCounter() 100 | limit_concurrency = MAX_CONCURRENCY // 2 101 | task.run_concurrently([None] * MAX_CONCURRENCY * 3, NoOpTracer(), limit_concurrency) 102 | assert task.max_concurrency_counter == limit_concurrency 103 | 104 | 105 | def test_run_concurrently_does_not_deadlock_if_nested() -> None: 106 | task = DeadlockDetector() 107 | task.run_concurrently([None] * MAX_CONCURRENCY, NoOpTracer()) 108 | 109 | 110 | def test_sub_tasks_do_not_introduce_multiple_task_spans() -> None: 111 | tracer = InMemoryTracer() 112 | 113 | SubTask().run(None, tracer) 114 | 115 | assert tracer.entries 116 | assert isinstance(tracer.entries[0], TaskSpan) 117 | assert tracer.entries[0].entries 118 | assert not isinstance(tracer.entries[0].entries[0], TaskSpan) 119 | -------------------------------------------------------------------------------- /tests/core/tracer/conftest.py: -------------------------------------------------------------------------------- 1 | import time 2 | from pathlib import Path 3 | 4 | from pytest import fixture 5 | 6 | from intelligence_layer.core import FileTracer, InMemoryTracer, Task, TaskSpan 7 | 8 | 9 | class TracerTestSubTask(Task[None, None]): 10 | def do_run(self, input: None, task_span: TaskSpan) -> None: 11 | task_span.log("subtask", "value") 12 | 13 | 14 | class TracerTestTask(Task[str, str]): 15 | sub_task = TracerTestSubTask() 16 | 17 | def do_run(self, input: str, task_span: TaskSpan) -> str: 18 | time.sleep(0.001) 19 | with task_span.span("span") as sub_span: 20 | time.sleep(0.001) 21 | sub_span.log("message", "a value") 22 | time.sleep(0.001) 23 | self.sub_task.run(None, sub_span) 24 | time.sleep(0.001) 25 | self.sub_task.run(None, task_span) 26 | time.sleep(0.001) 27 | return "output" 28 | 29 | 30 | class SpecificTestException(Exception): 31 | pass 32 | 33 | 34 | @fixture 35 | def tracer_test_task() -> Task[str, str]: 36 | return TracerTestTask() 37 | 38 | 39 | @fixture 40 | def file_tracer(tmp_path: Path) -> FileTracer: 41 | return FileTracer(tmp_path / "log.log") 42 | 43 | 44 | @fixture 45 | def in_memory_tracer() -> InMemoryTracer: 46 | return InMemoryTracer() 47 | -------------------------------------------------------------------------------- /tests/core/tracer/fixtures/old_file_trace_format.jsonl: -------------------------------------------------------------------------------- 1 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"StartTask","entry":{"uuid":"41528209-1b78-4785-a00d-7f65af1bb09c","parent":"75e79a11-1a26-4731-8b49-ef8634c352ed","name":"TestTask","start":"2024-05-22T09:43:37.428758Z","input":"input","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}} 2 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"StartSpan","entry":{"uuid":"ad1ed79b-6ad6-4ea5-8ee8-26be4055e228","parent":"41528209-1b78-4785-a00d-7f65af1bb09c","name":"span","start":"2024-05-22T09:43:37.429448Z","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}} 3 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"PlainEntry","entry":{"message":"message","value":"a value","timestamp":"2024-05-22T09:43:37.429503Z","parent":"ad1ed79b-6ad6-4ea5-8ee8-26be4055e228","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}} 4 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"StartTask","entry":{"uuid":"e8cca541-57a8-440a-b848-7c3b33a97f52","parent":"ad1ed79b-6ad6-4ea5-8ee8-26be4055e228","name":"TestSubTask","start":"2024-05-22T09:43:37.429561Z","input":null,"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}} 5 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"PlainEntry","entry":{"message":"subtask","value":"value","timestamp":"2024-05-22T09:43:37.429605Z","parent":"e8cca541-57a8-440a-b848-7c3b33a97f52","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}} 6 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"EndTask","entry":{"uuid":"e8cca541-57a8-440a-b848-7c3b33a97f52","end":"2024-05-22T09:43:37.429647Z","output":null}} 7 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"EndSpan","entry":{"uuid":"ad1ed79b-6ad6-4ea5-8ee8-26be4055e228","end":"2024-05-22T09:43:37.429687Z"}} 8 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"StartTask","entry":{"uuid":"8840185c-2019-4105-9178-1b0e20ab6388","parent":"41528209-1b78-4785-a00d-7f65af1bb09c","name":"TestSubTask","start":"2024-05-22T09:43:37.429728Z","input":null,"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}} 9 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"PlainEntry","entry":{"message":"subtask","value":"value","timestamp":"2024-05-22T09:43:37.429768Z","parent":"8840185c-2019-4105-9178-1b0e20ab6388","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}} 10 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"EndTask","entry":{"uuid":"8840185c-2019-4105-9178-1b0e20ab6388","end":"2024-05-22T09:43:37.429806Z","output":null}} 11 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"EndTask","entry":{"uuid":"41528209-1b78-4785-a00d-7f65af1bb09c","end":"2024-05-22T09:43:37.429842Z","output":"output"}} 12 | -------------------------------------------------------------------------------- /tests/core/tracer/test_composite_tracer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from intelligence_layer.core import CompositeTracer, InMemoryTracer, SpanStatus, Task 4 | from tests.core.tracer.conftest import SpecificTestException 5 | 6 | 7 | def test_composite_tracer(tracer_test_task: Task[str, str]) -> None: 8 | tracer_1 = InMemoryTracer() 9 | tracer_2 = InMemoryTracer() 10 | tracer_test_task.run(input="input", tracer=CompositeTracer([tracer_1, tracer_2])) 11 | 12 | trace_1 = tracer_1.export_for_viewing()[0] 13 | trace_2 = tracer_2.export_for_viewing()[0] 14 | assert trace_1.name == trace_2.name 15 | assert trace_1.attributes == trace_2.attributes 16 | assert trace_1.status == trace_2.status 17 | assert trace_1.context.trace_id != trace_2.context.trace_id 18 | assert trace_1.context.span_id != trace_2.context.span_id 19 | 20 | 21 | def test_composite_tracer_can_get_span_status( 22 | tracer_test_task: Task[str, str], 23 | ) -> None: 24 | tracer_1 = InMemoryTracer() 25 | tracer_2 = InMemoryTracer() 26 | 27 | composite_tracer = CompositeTracer([tracer_1, tracer_2]) 28 | 29 | with composite_tracer.span("test_name") as composite_span: 30 | assert composite_span.status_code == SpanStatus.OK 31 | 32 | 33 | def test_composite_tracer_raises_for_inconsistent_span_status( 34 | tracer_test_task: Task[str, str], 35 | ) -> None: 36 | tracer_1 = InMemoryTracer() 37 | tracer_2 = InMemoryTracer() 38 | 39 | composite_tracer = CompositeTracer([tracer_1, tracer_2]) 40 | 41 | with composite_tracer.span("test_name") as composite_span: 42 | spans = composite_span.tracers 43 | single_span = spans[0] 44 | try: 45 | with single_span: 46 | raise SpecificTestException 47 | except SpecificTestException: 48 | pass 49 | 50 | with pytest.raises(ValueError): 51 | composite_span.status_code # noqa: B018 52 | -------------------------------------------------------------------------------- /tests/core/tracer/test_file_tracer.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest.mock import Mock 3 | 4 | import pytest 5 | from pytest import fixture 6 | 7 | from intelligence_layer.core import ( 8 | FileTracer, 9 | InMemoryTaskSpan, 10 | Task, 11 | TracerLogEntryFailed, 12 | ) 13 | from tests.core.tracer.conftest import SpecificTestException 14 | 15 | 16 | @fixture 17 | def file_tracer(tmp_path: Path) -> FileTracer: 18 | return FileTracer(tmp_path / "log.log") 19 | 20 | 21 | def test_file_tracer_retrieves_all_file_traces( 22 | file_tracer: FileTracer, tracer_test_task: Task[str, str] 23 | ) -> None: 24 | input = "input" 25 | 26 | tracer_test_task.run(input, file_tracer) 27 | tracer_test_task.run(input, file_tracer) 28 | traces = file_tracer.traces() 29 | assert len(traces.entries) == 2 30 | assert isinstance(traces.entries[0], InMemoryTaskSpan) 31 | assert isinstance(traces.entries[1], InMemoryTaskSpan) 32 | assert traces.entries[0].context.trace_id != traces.entries[1].context.trace_id 33 | 34 | 35 | def test_file_tracer_handles_tracer_log_entry_failed_exception( 36 | file_tracer: FileTracer, 37 | ) -> None: 38 | file_tracer._log_entry = Mock( # type: ignore[method-assign] 39 | side_effect=[TracerLogEntryFailed("Hi I am an error", "21"), None] 40 | ) 41 | 42 | try: 43 | file_tracer.task_span(task_name="mock_task_name", input="42", timestamp=None) 44 | except Exception as exception: 45 | raise AssertionError(f"'Unexpected exception: {exception}") from None 46 | 47 | 48 | def test_file_tracer_raises_non_log_entry_failed_exceptions( 49 | file_tracer: FileTracer, 50 | ) -> None: 51 | file_tracer._log_entry = Mock( # type: ignore[method-assign] 52 | side_effect=[SpecificTestException("Hi I am an error", "21")] 53 | ) 54 | with pytest.raises(SpecificTestException): 55 | file_tracer.task_span(task_name="mock_task_name", input="42", timestamp=None) 56 | 57 | 58 | def test_file_tracer_is_backwards_compatible() -> None: 59 | current_file_location = Path(__file__) 60 | file_tracer = FileTracer( 61 | current_file_location.parent / "fixtures/old_file_trace_format.jsonl" 62 | ) 63 | tracer = file_tracer.traces() 64 | 65 | assert len(tracer.entries) == 1 66 | task_span = tracer.entries[0] 67 | assert isinstance(task_span, InMemoryTaskSpan) 68 | assert task_span.input == "input" 69 | assert task_span.start_timestamp and task_span.end_timestamp 70 | assert task_span.start_timestamp < task_span.end_timestamp 71 | -------------------------------------------------------------------------------- /tests/dog-and-cat-cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/dog-and-cat-cover.jpg -------------------------------------------------------------------------------- /tests/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/evaluation/__init__.py -------------------------------------------------------------------------------- /tests/evaluation/aggregation/conftest.py: -------------------------------------------------------------------------------- 1 | from pytest import fixture 2 | 3 | from intelligence_layer.core import utc_now 4 | from intelligence_layer.evaluation import AggregationOverview, EvaluationOverview 5 | from tests.evaluation.conftest import DummyAggregatedEvaluation 6 | 7 | 8 | @fixture 9 | def dummy_aggregated_evaluation() -> DummyAggregatedEvaluation: 10 | return DummyAggregatedEvaluation(score=0.5) 11 | 12 | 13 | @fixture 14 | def aggregation_overview( 15 | evaluation_overview: EvaluationOverview, 16 | dummy_aggregated_evaluation: DummyAggregatedEvaluation, 17 | ) -> AggregationOverview[DummyAggregatedEvaluation]: 18 | return AggregationOverview( 19 | evaluation_overviews=frozenset([evaluation_overview]), 20 | id="aggregation-id", 21 | start=utc_now(), 22 | end=utc_now(), 23 | successful_evaluation_count=5, 24 | crashed_during_evaluation_count=3, 25 | description="dummy-evaluator", 26 | statistics=dummy_aggregated_evaluation, 27 | ) 28 | -------------------------------------------------------------------------------- /tests/evaluation/aggregation/test_accumulator.py: -------------------------------------------------------------------------------- 1 | from intelligence_layer.evaluation import MeanAccumulator 2 | 3 | 4 | def test_mean_accumulator_returns_mean() -> None: 5 | acc = MeanAccumulator() 6 | assert acc.extract() == 0.0 7 | acc.add(1) 8 | assert acc.extract() == 1.0 9 | acc.add(0) 10 | assert acc.extract() == 0.5 11 | 12 | 13 | def test_mean_accumulator_returns_stdev_and_se() -> None: 14 | acc = MeanAccumulator() 15 | assert acc.standard_deviation() == 0.0 16 | assert acc.standard_error() == 0.0 17 | acc.add(1) 18 | assert acc.standard_deviation() == 0.0 19 | assert acc.standard_error() == 0.0 20 | acc.add(0) 21 | assert acc.standard_deviation() == 0.5 22 | assert round(acc.standard_error(), 3) == 0.354 23 | -------------------------------------------------------------------------------- /tests/evaluation/aggregation/test_domain.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from intelligence_layer.evaluation.aggregation.domain import AggregationOverview 4 | from intelligence_layer.evaluation.evaluation.domain import EvaluationFailed 5 | from tests.evaluation.conftest import DummyAggregatedEvaluation 6 | 7 | 8 | def test_raise_on_exception_for_evaluation_run_overview( 9 | aggregation_overview: AggregationOverview[DummyAggregatedEvaluation], 10 | ) -> None: 11 | with pytest.raises(EvaluationFailed): 12 | aggregation_overview.raise_on_evaluation_failure() 13 | -------------------------------------------------------------------------------- /tests/evaluation/aggregation/test_elo_calculator.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | from itertools import combinations 3 | 4 | from pydantic import BaseModel 5 | from pytest import fixture 6 | 7 | from intelligence_layer.evaluation import EloCalculator, MatchOutcome, WinRateCalculator 8 | from intelligence_layer.evaluation.evaluation.evaluator.incremental_evaluator import ( 9 | ComparisonEvaluation, 10 | ) 11 | 12 | 13 | @fixture 14 | def players() -> Sequence[str]: 15 | return [str(i + 1) for i in range(10)] 16 | 17 | 18 | @fixture 19 | def matches(players: Sequence[str]) -> Sequence[ComparisonEvaluation]: 20 | return [ 21 | ComparisonEvaluation( 22 | first_player=player_a, second_player=player_b, outcome=MatchOutcome.A_WINS 23 | ) 24 | for player_a, player_b in combinations(players, 2) 25 | ] 26 | 27 | 28 | class MatchOutcomeModel(BaseModel): 29 | match_outcome: MatchOutcome 30 | 31 | 32 | def test_match_outcome_serializes() -> None: 33 | match_outcome_model = MatchOutcomeModel(match_outcome=MatchOutcome.A_WINS) 34 | dumped = match_outcome_model.model_dump_json() 35 | loaded = MatchOutcomeModel.model_validate_json(dumped) 36 | 37 | assert loaded == match_outcome_model 38 | 39 | 40 | def test_elo_calculator_works( 41 | players: Sequence[str], matches: Sequence[ComparisonEvaluation] 42 | ) -> None: 43 | elo_calculator = EloCalculator(players) 44 | elo_calculator.calculate(matches) 45 | 46 | sorted_scores = { 47 | k: v 48 | for k, v in sorted( 49 | elo_calculator.ratings.items(), key=lambda item: item[1], reverse=True 50 | ) 51 | } 52 | assert [int(i) for i in players] == [int(i) for i in sorted_scores] 53 | assert ( 54 | round(sum(score for score in sorted_scores.values()) / len(sorted_scores), 0) 55 | == 1500 56 | ) 57 | 58 | 59 | def test_win_rate_calculator_works( 60 | players: Sequence[str], matches: Sequence[ComparisonEvaluation] 61 | ) -> None: 62 | win_rate_calculator = WinRateCalculator(players) 63 | scores = win_rate_calculator.calculate(matches) 64 | 65 | sorted_scores = { 66 | k: v for k, v in sorted(scores.items(), key=lambda item: item[1], reverse=True) 67 | } 68 | assert [int(i) for i in players] == [int(i) for i in sorted_scores] 69 | assert ( 70 | round( 71 | sum(score for score in sorted_scores.values()) / len(sorted_scores), 72 | 5, 73 | ) 74 | == 0.5 75 | ) 76 | -------------------------------------------------------------------------------- /tests/evaluation/aggregation/test_hugging_face_aggregation_repository.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | from uuid import uuid4 3 | 4 | import huggingface_hub 5 | from _pytest.fixtures import fixture 6 | 7 | from intelligence_layer.core import utc_now 8 | from intelligence_layer.evaluation import ( 9 | AggregationOverview, 10 | HuggingFaceAggregationRepository, 11 | ) 12 | from tests.evaluation.conftest import DummyAggregatedEvaluation 13 | 14 | 15 | @fixture 16 | def dummy_aggregated_evaluation() -> DummyAggregatedEvaluation: 17 | return DummyAggregatedEvaluation(score=0.5) 18 | 19 | 20 | # these fixtures should only be used once and are here for readable tests 21 | # because creating/deleting HuggingFace repositories can be rate-limited 22 | @fixture(scope="session") 23 | def hugging_face_aggregation_repository( 24 | hugging_face_token: str, hugging_face_test_repository_id: str 25 | ) -> Iterable[HuggingFaceAggregationRepository]: 26 | try: 27 | yield HuggingFaceAggregationRepository( 28 | hugging_face_test_repository_id, 29 | token=hugging_face_token, 30 | private=True, 31 | ) 32 | finally: 33 | huggingface_hub.delete_repo( 34 | repo_id=hugging_face_test_repository_id, 35 | token=hugging_face_token, 36 | repo_type="dataset", 37 | missing_ok=True, 38 | ) 39 | 40 | 41 | @fixture 42 | def aggregation_overview( 43 | dummy_aggregated_evaluation: DummyAggregatedEvaluation, 44 | ) -> AggregationOverview[DummyAggregatedEvaluation]: 45 | return AggregationOverview( 46 | evaluation_overviews=frozenset([]), 47 | id=str(uuid4()), 48 | start=utc_now(), 49 | end=utc_now(), 50 | successful_evaluation_count=0, 51 | crashed_during_evaluation_count=0, 52 | description="", 53 | statistics=dummy_aggregated_evaluation, 54 | ) 55 | 56 | 57 | def test_repository_operations( 58 | hugging_face_aggregation_repository: HuggingFaceAggregationRepository, 59 | aggregation_overview: AggregationOverview[DummyAggregatedEvaluation], 60 | ) -> None: 61 | hugging_face_aggregation_repository.store_aggregation_overview(aggregation_overview) 62 | overview = hugging_face_aggregation_repository.aggregation_overview( 63 | aggregation_overview.id, DummyAggregatedEvaluation 64 | ) 65 | 66 | assert ( 67 | aggregation_overview.id 68 | in hugging_face_aggregation_repository.aggregation_overview_ids() 69 | ) 70 | assert overview is not None 71 | -------------------------------------------------------------------------------- /tests/evaluation/dataset/test_dataset_domain.py: -------------------------------------------------------------------------------- 1 | from intelligence_layer.evaluation import Dataset 2 | 3 | 4 | def test_default_values_are_set() -> None: 5 | dataset = Dataset(name="Test") 6 | 7 | assert dataset.id is not None 8 | assert len(dataset.metadata) == 0 9 | assert len(dataset.labels) == 0 10 | 11 | 12 | def test_default_values_are_not_changed() -> None: 13 | modified_dataset = Dataset(name="Modified Dataset") 14 | modified_dataset.labels.add("test_label") 15 | modified_dataset.metadata.update({"key": "value"}) 16 | 17 | default_dataset = Dataset(name="Default Dataset") 18 | 19 | assert modified_dataset.labels != default_dataset.labels 20 | assert modified_dataset.metadata != default_dataset.metadata 21 | -------------------------------------------------------------------------------- /tests/evaluation/evaluation/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/evaluation/evaluation/conftest.py -------------------------------------------------------------------------------- /tests/evaluation/evaluation/test_file_evaluation_repository.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | 3 | from pydantic import BaseModel 4 | 5 | from intelligence_layer.evaluation.evaluation.file_evaluation_repository import ( 6 | FileEvaluationRepository, 7 | ) 8 | 9 | """Contains specific test for the FileEvaluationRepository. For more generic 10 | tests, check the test_evaluation_repository file.""" 11 | 12 | 13 | class DummyType(BaseModel): 14 | pass 15 | 16 | 17 | def test_evaluation_overview_ids_does_not_create_a_folder( 18 | file_evaluation_repository: FileEvaluationRepository, 19 | ) -> None: 20 | assert not file_evaluation_repository._eval_root_directory().exists() 21 | with contextlib.suppress(FileNotFoundError): 22 | file_evaluation_repository.evaluation_overview_ids() 23 | assert not file_evaluation_repository._eval_root_directory().exists() 24 | 25 | 26 | def test_evaluation_overview_does_not_create_a_folder( 27 | file_evaluation_repository: FileEvaluationRepository, 28 | ) -> None: 29 | assert not file_evaluation_repository._eval_root_directory().exists() 30 | assert not file_evaluation_repository._eval_directory("Non-existent").exists() 31 | 32 | file_evaluation_repository.evaluation_overview("Non-existent") 33 | assert not file_evaluation_repository._eval_root_directory().exists() 34 | 35 | 36 | def test_example_evaluations_does_not_create_a_folder( 37 | file_evaluation_repository: FileEvaluationRepository, 38 | ) -> None: 39 | assert not file_evaluation_repository._eval_root_directory().exists() 40 | assert not file_evaluation_repository._eval_directory("Non-existent").exists() 41 | 42 | with contextlib.suppress(ValueError): 43 | file_evaluation_repository.example_evaluations("Non-existent", DummyType) 44 | assert not file_evaluation_repository._eval_root_directory().exists() 45 | 46 | 47 | def test_example_evaluation_does_not_create_a_folder( 48 | file_evaluation_repository: FileEvaluationRepository, 49 | ) -> None: 50 | assert not file_evaluation_repository._eval_root_directory().exists() 51 | assert not file_evaluation_repository._eval_directory("Non-existent").exists() 52 | 53 | with contextlib.suppress(ValueError): 54 | file_evaluation_repository.example_evaluation( 55 | "Non-existent", "Non-existent", DummyType 56 | ) 57 | assert not file_evaluation_repository._eval_root_directory().exists() 58 | -------------------------------------------------------------------------------- /tests/evaluation/infrastructure/test_hugging_face_repository.py: -------------------------------------------------------------------------------- 1 | import huggingface_hub 2 | 3 | from intelligence_layer.evaluation.infrastructure.hugging_face_repository import ( 4 | HuggingFaceRepository, 5 | ) 6 | 7 | 8 | def test_hugging_face_repository_can_create_and_delete_a_repository( 9 | hugging_face_token: str, hugging_face_test_repository_id: str 10 | ) -> None: 11 | repository_id = hugging_face_test_repository_id + "unused-suffix" 12 | 13 | assert not huggingface_hub.repo_exists( 14 | repo_id=repository_id, 15 | token=hugging_face_token, 16 | repo_type="dataset", 17 | ), f"The repository with the ID {repository_id} already exists. Try to run the clean_hf script." 18 | 19 | created_repository = HuggingFaceRepository( 20 | repository_id=repository_id, 21 | token=hugging_face_token, 22 | private=True, 23 | ) 24 | 25 | try: 26 | assert huggingface_hub.repo_exists( 27 | repo_id=repository_id, 28 | token=hugging_face_token, 29 | repo_type="dataset", 30 | ) 31 | created_repository.delete_repository() 32 | assert not huggingface_hub.repo_exists( 33 | repo_id=repository_id, 34 | token=hugging_face_token, 35 | repo_type="dataset", 36 | ) 37 | finally: 38 | huggingface_hub.delete_repo( 39 | repo_id=repository_id, 40 | token=hugging_face_token, 41 | repo_type="dataset", 42 | missing_ok=True, 43 | ) 44 | -------------------------------------------------------------------------------- /tests/evaluation/run/test_file_run_repository.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | 3 | import pytest 4 | from pydantic import BaseModel 5 | 6 | from intelligence_layer.evaluation.run.file_run_repository import FileRunRepository 7 | 8 | """Contains specific test for the FileRunRepository. For more generic 9 | tests, check the test_run_repository file.""" 10 | 11 | 12 | class DummyType(BaseModel): 13 | pass 14 | 15 | 16 | def test_run_overview_ids_does_not_create_a_folder( 17 | file_run_repository: FileRunRepository, 18 | ) -> None: 19 | assert not file_run_repository._run_root_directory().exists() 20 | with contextlib.suppress(FileNotFoundError): 21 | file_run_repository.run_overview_ids() 22 | assert not file_run_repository._run_root_directory().exists() 23 | 24 | 25 | def test_run_overview_does_not_create_a_folder( 26 | file_run_repository: FileRunRepository, 27 | ) -> None: 28 | assert not file_run_repository._run_root_directory().exists() 29 | assert not file_run_repository._run_directory("Non-existent").exists() 30 | 31 | file_run_repository.run_overview("Non-existent") 32 | assert not file_run_repository._run_root_directory().exists() 33 | 34 | 35 | @pytest.mark.filterwarnings("ignore::UserWarning") 36 | def test_example_runs_does_not_create_a_folder( 37 | file_run_repository: FileRunRepository, 38 | ) -> None: 39 | assert not file_run_repository._run_root_directory().exists() 40 | assert not file_run_repository._run_directory("Non-existent").exists() 41 | 42 | with contextlib.suppress(ValueError): 43 | file_run_repository.example_outputs("Non-existent", DummyType) 44 | assert not file_run_repository._run_root_directory().exists() 45 | 46 | 47 | @pytest.mark.filterwarnings("ignore::UserWarning") 48 | def test_example_run_does_not_create_a_folder( 49 | file_run_repository: FileRunRepository, 50 | ) -> None: 51 | assert not file_run_repository._run_root_directory().exists() 52 | assert not file_run_repository._run_directory("Non-existent").exists() 53 | 54 | with contextlib.suppress(ValueError): 55 | file_run_repository.example_output("Non-existent", "Non-existent", DummyType) 56 | assert not file_run_repository._run_root_directory().exists() 57 | -------------------------------------------------------------------------------- /tests/evaluation/run/test_run.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable, Sequence 2 | from pathlib import Path 3 | 4 | from dotenv import load_dotenv 5 | from pydantic import BaseModel 6 | from pytest import fixture 7 | 8 | from intelligence_layer.connectors import AlephAlphaClientProtocol 9 | from intelligence_layer.core import Task, TaskSpan 10 | from intelligence_layer.evaluation import ( 11 | AggregationLogic, 12 | EvaluationLogic, 13 | Example, 14 | FileAggregationRepository, 15 | FileDatasetRepository, 16 | SuccessfulExampleOutput, 17 | ) 18 | from intelligence_layer.evaluation.run_evaluation import main 19 | 20 | load_dotenv() 21 | 22 | 23 | @fixture 24 | def examples() -> Sequence[Example[None, None]]: 25 | return [Example(input=None, expected_output=None)] 26 | 27 | 28 | class DummyEvaluation(BaseModel): 29 | correct: bool 30 | 31 | 32 | class DummyAggregation(BaseModel): 33 | correct_rate: float 34 | 35 | 36 | class DummyTask(Task[None, None]): 37 | def __init__(self) -> None: 38 | pass 39 | 40 | def do_run(self, input: None, task_span: TaskSpan) -> None: 41 | return input 42 | 43 | 44 | class DummyTaskWithClient(DummyTask): 45 | def __init__(self, client: AlephAlphaClientProtocol) -> None: 46 | pass 47 | 48 | 49 | class DummyAggregationLogic(AggregationLogic[DummyEvaluation, DummyAggregation]): 50 | def aggregate(self, evaluations: Iterable[DummyEvaluation]) -> DummyAggregation: 51 | list(evaluations) 52 | return DummyAggregation(correct_rate=1.0) 53 | 54 | 55 | class DummyEvaluationLogic(EvaluationLogic[None, None, None, DummyEvaluation]): 56 | def do_evaluate( 57 | self, example: Example[None, None], *output: SuccessfulExampleOutput[None] 58 | ) -> DummyEvaluation: 59 | return DummyEvaluation(correct=True) 60 | 61 | 62 | def test_run_evaluation( 63 | tmp_path: Path, examples: Sequence[Example[None, None]] 64 | ) -> None: 65 | dataset_path = tmp_path / "dataset" 66 | dataset_repository = FileDatasetRepository(dataset_path) 67 | dataset_id = dataset_repository.create_dataset( 68 | examples=examples, dataset_name="test-dataset" 69 | ).id 70 | 71 | aggregation_path = tmp_path / "eval" 72 | aggregation_repository = FileAggregationRepository(aggregation_path) 73 | 74 | main( 75 | [ 76 | "", 77 | "--eval-logic", 78 | "tests.evaluation.run.test_run.DummyEvaluationLogic", 79 | "--aggregation-logic", 80 | "tests.evaluation.run.test_run.DummyAggregationLogic", 81 | "--task", 82 | "tests.evaluation.run.test_run.DummyTask", 83 | "--dataset-repository-path", 84 | str(dataset_path), 85 | "--dataset-id", 86 | dataset_id, 87 | "--target-dir", 88 | str(aggregation_path), 89 | "--description", 90 | "dummy-evaluator", 91 | ] 92 | ) 93 | ids = aggregation_repository.aggregation_overview_ids() 94 | assert len(ids) == 1 95 | overview = aggregation_repository.aggregation_overview(ids[0], DummyAggregation) 96 | assert overview 97 | assert overview.successful_evaluation_count == 1 98 | 99 | 100 | def test_run_evaluation_with_task_with_client( 101 | tmp_path: Path, examples: Sequence[Example[None, None]] 102 | ) -> None: 103 | dataset_path = tmp_path / "dataset" 104 | dataset_repository = FileDatasetRepository(dataset_path) 105 | dataset_id = dataset_repository.create_dataset( 106 | examples=examples, dataset_name="test-dataset" 107 | ).id 108 | 109 | eval_path = tmp_path / "eval" 110 | 111 | main( 112 | [ 113 | "", 114 | "--eval-logic", 115 | "tests.evaluation.run.test_run.DummyEvaluationLogic", 116 | "--aggregation-logic", 117 | "tests.evaluation.run.test_run.DummyAggregationLogic", 118 | "--task", 119 | "tests.evaluation.run.test_run.DummyTaskWithClient", 120 | "--dataset-repository-path", 121 | str(dataset_path), 122 | "--dataset-id", 123 | dataset_id, 124 | "--target-dir", 125 | str(eval_path), 126 | "--description", 127 | "dummy-evaluator", 128 | ] 129 | ) 130 | -------------------------------------------------------------------------------- /tests/examples/classify/test_keyword_extract.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from intelligence_layer.core import NoOpTracer 4 | from intelligence_layer.core.chunk import TextChunk 5 | from intelligence_layer.core.detect_language import Language, LanguageNotSupportedError 6 | from intelligence_layer.examples.classify.keyword_extract import ( 7 | KeywordExtract, 8 | KeywordExtractInput, 9 | ) 10 | 11 | 12 | @pytest.fixture() 13 | def keyword_extract() -> KeywordExtract: 14 | return KeywordExtract() 15 | 16 | 17 | def test_keyword_extract_works(keyword_extract: KeywordExtract) -> None: 18 | input = KeywordExtractInput( 19 | chunk=TextChunk("I really like my computer"), language=Language("en") 20 | ) 21 | 22 | result = keyword_extract.run(input, NoOpTracer()) 23 | assert "computer" in [keyword.lower() for keyword in result.keywords] 24 | 25 | 26 | def test_keyword_extract_raises_for_unsupported_language( 27 | keyword_extract: KeywordExtract, 28 | ) -> None: 29 | input = KeywordExtractInput( 30 | chunk=TextChunk("text about computers"), language=Language("pt") 31 | ) 32 | with pytest.raises(LanguageNotSupportedError) as _: 33 | keyword_extract.run(input, NoOpTracer()) 34 | -------------------------------------------------------------------------------- /tests/examples/classify/test_prompt_based_classify_with_definitions.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | 3 | from pytest import fixture 4 | 5 | from intelligence_layer.core import Llama3InstructModel, NoOpTracer, TextChunk 6 | from intelligence_layer.examples import ( 7 | ClassifyInput, 8 | LabelWithDefinition, 9 | PromptBasedClassifyWithDefinitions, 10 | SingleLabelClassifyOutput, 11 | ) 12 | 13 | 14 | @fixture 15 | def labels_with_definitions() -> Sequence[LabelWithDefinition]: 16 | return [ 17 | LabelWithDefinition( 18 | name="Dinosaur", definition="Any text that is about dinosaurs." 19 | ), 20 | LabelWithDefinition(name="Plant", definition="Any text that is about plants."), 21 | LabelWithDefinition( 22 | name="Toy", definition="Everything that has something to do with toys." 23 | ), 24 | ] 25 | 26 | 27 | @fixture 28 | def prompt_based_classify_with_definitions( 29 | llama_control_model: Llama3InstructModel, 30 | labels_with_definitions: Sequence[LabelWithDefinition], 31 | ) -> PromptBasedClassifyWithDefinitions: 32 | return PromptBasedClassifyWithDefinitions( 33 | labels_with_definitions, llama_control_model 34 | ) 35 | 36 | 37 | def test_prompt_based_classify_with_definitions_returns_score_for_all_labels( 38 | prompt_based_classify_with_definitions: PromptBasedClassifyWithDefinitions, 39 | labels_with_definitions: Sequence[LabelWithDefinition], 40 | ) -> None: 41 | classify_input = ClassifyInput( 42 | chunk=TextChunk("I love my cactus!"), 43 | labels=frozenset(label.name for label in labels_with_definitions), 44 | ) 45 | 46 | classify_output = prompt_based_classify_with_definitions.run( 47 | classify_input, NoOpTracer() 48 | ) 49 | 50 | # Output contains everything we expect 51 | assert isinstance(classify_output, SingleLabelClassifyOutput) 52 | assert classify_input.labels == set(r for r in classify_output.scores) 53 | -------------------------------------------------------------------------------- /tests/examples/qa/test_multiple_chunk_qa.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | 3 | from intelligence_layer.core import NoOpTracer 4 | from intelligence_layer.core.chunk import TextChunk 5 | from intelligence_layer.core.detect_language import Language 6 | from intelligence_layer.examples.qa.multiple_chunk_qa import ( 7 | MultipleChunkQa, 8 | MultipleChunkQaInput, 9 | ) 10 | 11 | CHUNK_CONTAINING_ANSWER = TextChunk( 12 | "Paul Nicolas lost his mother at the age of 3, and then his father in 1914.[3] He was raised by his mother-in-law together with his brother Henri. " 13 | "He began his football career with Saint-Mandé Club in 1916. Initially, he played as a defender, but he quickly realized that his destiny laid at the " 14 | "forefront since he scored many goals.[3] In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, " 15 | "and these two qualities combined eventually drew the attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916." 16 | ) 17 | RELATED_CHUNK_WITHOUT_ANSWER = TextChunk( 18 | "In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, and these two qualities combined eventually drew the " 19 | "attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916. " 20 | ) 21 | RELATED_QUESTION = "What is the name of Paul Nicolas' brother?" 22 | IMPORTANT_PART_OF_CORRECT_ANSWER = "Henri" 23 | UNRELATED_QUESTION = "What is the the capital of Germany?" 24 | 25 | 26 | def test_multiple_chunk_qa_with_mulitple_chunks( 27 | multiple_chunk_qa: MultipleChunkQa, 28 | ) -> None: 29 | chunks: Sequence[TextChunk] = [ 30 | CHUNK_CONTAINING_ANSWER, 31 | RELATED_CHUNK_WITHOUT_ANSWER, 32 | ] 33 | 34 | input = MultipleChunkQaInput( 35 | chunks=chunks, question=RELATED_QUESTION, generate_highlights=True 36 | ) 37 | output = multiple_chunk_qa.run(input, NoOpTracer()) 38 | 39 | assert output.answer 40 | assert IMPORTANT_PART_OF_CORRECT_ANSWER in output.answer 41 | assert len(output.subanswers) == 1 42 | assert output.subanswers[0].chunk == chunks[0] 43 | assert any( 44 | IMPORTANT_PART_OF_CORRECT_ANSWER 45 | in CHUNK_CONTAINING_ANSWER[highlight.start : highlight.end] 46 | for highlight in output.subanswers[0].highlights 47 | ) 48 | 49 | 50 | def test_multiple_chunk_qa_with_mulitple_chunks_explainability_disabled( 51 | multiple_chunk_qa: MultipleChunkQa, 52 | ) -> None: 53 | chunks: Sequence[TextChunk] = [ 54 | CHUNK_CONTAINING_ANSWER, 55 | RELATED_CHUNK_WITHOUT_ANSWER, 56 | ] 57 | 58 | input = MultipleChunkQaInput( 59 | chunks=chunks, question=RELATED_QUESTION, generate_highlights=False 60 | ) 61 | output = multiple_chunk_qa.run(input, NoOpTracer()) 62 | 63 | assert output.answer 64 | assert IMPORTANT_PART_OF_CORRECT_ANSWER in output.answer 65 | assert len(output.subanswers) == 1 66 | assert output.subanswers[0].chunk == chunks[0] 67 | assert all(not subanswer.highlights for subanswer in output.subanswers) 68 | 69 | 70 | def test_multiple_chunk_qa_without_answer(multiple_chunk_qa: MultipleChunkQa) -> None: 71 | chunks: Sequence[TextChunk] = [CHUNK_CONTAINING_ANSWER] 72 | 73 | input = MultipleChunkQaInput(chunks=chunks, question=UNRELATED_QUESTION) 74 | output = multiple_chunk_qa.run(input, NoOpTracer()) 75 | 76 | assert output.answer is None 77 | 78 | 79 | def test_multiple_chunk_qa_with_spanish_question( 80 | multiple_chunk_qa: MultipleChunkQa, 81 | ) -> None: 82 | question = "¿Cómo se llama el hermano de Paul Nicola?" 83 | chunks = [CHUNK_CONTAINING_ANSWER, CHUNK_CONTAINING_ANSWER] 84 | 85 | input = MultipleChunkQaInput( 86 | chunks=chunks, question=question, language=Language("es") 87 | ) 88 | output = multiple_chunk_qa.run(input, NoOpTracer()) 89 | 90 | assert len(output.subanswers) == len(chunks) 91 | assert output.answer 92 | assert "hermano" in output.answer 93 | -------------------------------------------------------------------------------- /tests/examples/qa/test_multiple_chunk_retriever_qa.py: -------------------------------------------------------------------------------- 1 | from pytest import fixture 2 | 3 | from intelligence_layer.connectors import QdrantInMemoryRetriever 4 | from intelligence_layer.core import LuminousControlModel, NoOpTracer 5 | from intelligence_layer.core.tracer.in_memory_tracer import InMemoryTracer 6 | from intelligence_layer.examples import ( 7 | ExpandChunks, 8 | MultipleChunkRetrieverQa, 9 | RetrieverBasedQaInput, 10 | ) 11 | 12 | 13 | @fixture 14 | def multiple_chunk_retriever_qa( 15 | luminous_control_model: LuminousControlModel, 16 | asymmetric_in_memory_retriever: QdrantInMemoryRetriever, 17 | ) -> MultipleChunkRetrieverQa[int]: 18 | return MultipleChunkRetrieverQa( 19 | retriever=asymmetric_in_memory_retriever, 20 | model=luminous_control_model, 21 | expand_chunks=ExpandChunks( 22 | asymmetric_in_memory_retriever, luminous_control_model, 256 23 | ), 24 | ) 25 | 26 | 27 | def test_multiple_chunk_retriever_qa_using_in_memory_retriever( 28 | multiple_chunk_retriever_qa: MultipleChunkRetrieverQa[int], 29 | no_op_tracer: NoOpTracer, 30 | ) -> None: 31 | question = "When was Robert Moses born?" 32 | input = RetrieverBasedQaInput(question=question) 33 | tracer = InMemoryTracer() 34 | output = multiple_chunk_retriever_qa.run(input, tracer) 35 | assert output.answer 36 | assert "1888" in output.answer 37 | assert len(output.sources) == 5 38 | -------------------------------------------------------------------------------- /tests/examples/qa/test_retriever_based_qa.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pytest import fixture 3 | 4 | from intelligence_layer.connectors.document_index.document_index import DocumentPath 5 | from intelligence_layer.connectors.retrievers.document_index_retriever import ( 6 | DocumentIndexRetriever, 7 | ) 8 | from intelligence_layer.connectors.retrievers.qdrant_in_memory_retriever import ( 9 | QdrantInMemoryRetriever, 10 | ) 11 | from intelligence_layer.core import NoOpTracer 12 | from intelligence_layer.examples import ( 13 | MultipleChunkQa, 14 | RetrieverBasedQa, 15 | RetrieverBasedQaInput, 16 | ) 17 | 18 | 19 | @fixture 20 | def retriever_based_qa_with_in_memory_retriever( 21 | multiple_chunk_qa: MultipleChunkQa, 22 | asymmetric_in_memory_retriever: QdrantInMemoryRetriever, 23 | ) -> RetrieverBasedQa[int]: 24 | return RetrieverBasedQa( 25 | retriever=asymmetric_in_memory_retriever, multi_chunk_qa=multiple_chunk_qa 26 | ) 27 | 28 | 29 | @fixture 30 | def retriever_based_qa_with_document_index( 31 | multiple_chunk_qa: MultipleChunkQa, document_index_retriever: DocumentIndexRetriever 32 | ) -> RetrieverBasedQa[DocumentPath]: 33 | return RetrieverBasedQa( 34 | retriever=document_index_retriever, multi_chunk_qa=multiple_chunk_qa 35 | ) 36 | 37 | 38 | @pytest.mark.filterwarnings("ignore::DeprecationWarning") 39 | def test_retriever_based_qa_using_in_memory_retriever( 40 | retriever_based_qa_with_in_memory_retriever: RetrieverBasedQa[int], 41 | no_op_tracer: NoOpTracer, 42 | ) -> None: 43 | question = "When was Robert Moses born?" 44 | input = RetrieverBasedQaInput(question=question) 45 | output = retriever_based_qa_with_in_memory_retriever.run(input, no_op_tracer) 46 | assert output.answer 47 | assert "1888" in output.answer 48 | assert output.subanswers[0].id == 3 49 | -------------------------------------------------------------------------------- /tests/examples/summarize/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/examples/summarize/__init__.py -------------------------------------------------------------------------------- /tests/examples/summarize/test_recursive_summarize.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from aleph_alpha_client import Client, CompletionRequest, CompletionResponse 4 | from pytest import fixture 5 | 6 | from intelligence_layer.core import Chunk, Llama3InstructModel, NoOpTracer 7 | from intelligence_layer.examples import RecursiveSummarize 8 | from intelligence_layer.examples.summarize.recursive_summarize import ( 9 | RecursiveSummarizeInput, 10 | ) 11 | from intelligence_layer.examples.summarize.steerable_long_context_summarize import ( 12 | SteerableLongContextSummarize, 13 | ) 14 | from intelligence_layer.examples.summarize.steerable_single_chunk_summarize import ( 15 | SteerableSingleChunkSummarize, 16 | ) 17 | 18 | 19 | class RecursiveCountingClient(Client): 20 | recursive_counter: int = 0 21 | 22 | def complete(self, request: CompletionRequest, model: str) -> CompletionResponse: 23 | self.recursive_counter += 1 24 | return super().complete(request, model) 25 | 26 | 27 | short_text = """The brown bear (Ursus arctos) is a large bear species found across Eurasia and North America.[1][3] In North America, the populations of brown bears are called grizzly bears, while the subspecies that inhabits the Kodiak Islands of Alaska is known as the Kodiak bear. It is one of the largest living terrestrial members of the order Carnivora, rivaled in size only by its closest relative, the polar bear (Ursus maritimus), which is much less variable in size and slightly bigger on average.[4][5][6][7][8] The brown bear's range includes parts of Russia, Central Asia, the Himalayas, China, Canada, the United States, Hokkaido, Scandinavia, Finland, the Balkans, the Picos de Europa and the Carpathian region (especially Romania), Iran, Anatolia, and the Caucasus.[1][9] The brown bear is recognized as a national and state animal in several European countries.[10]""" 28 | 29 | 30 | @fixture 31 | def recursive_counting_client( 32 | token: str, inference_url: str 33 | ) -> RecursiveCountingClient: 34 | return RecursiveCountingClient(token, host=inference_url) 35 | 36 | 37 | @fixture 38 | def very_long_text() -> str: 39 | with (Path(__file__).parent / "very_long_text.txt").open( 40 | mode="r", encoding="utf-8" 41 | ) as file: 42 | return file.read() 43 | 44 | 45 | def test_recursive_summarize_stops_when_hitting_max_tokens( 46 | very_long_text: str, 47 | steerable_long_context_summarize: SteerableLongContextSummarize, 48 | ) -> None: 49 | max_tokens = 1000 50 | input = RecursiveSummarizeInput(text=very_long_text, max_tokens=max_tokens) 51 | task = RecursiveSummarize(steerable_long_context_summarize) 52 | output = task.run(input, NoOpTracer()) 53 | 54 | assert len(output.summary) < len(very_long_text) 55 | assert output.generated_tokens < max_tokens 56 | assert "new orleans" in output.summary.lower() 57 | 58 | 59 | def test_recursive_summarize_stops_when_num_partial_summaries_stays_same( 60 | steerable_long_context_summarize: SteerableLongContextSummarize, 61 | ) -> None: 62 | max_tokens = 2048 63 | input = RecursiveSummarizeInput(text=short_text, max_tokens=max_tokens) 64 | task = RecursiveSummarize(steerable_long_context_summarize) 65 | output = task.run(input, NoOpTracer()) 66 | 67 | assert output.generated_tokens > 50 68 | 69 | 70 | def test_recursive_summarize_stops_when_num_partial_summaries_stays_same_with_empty_text( 71 | steerable_long_context_summarize: SteerableLongContextSummarize, 72 | ) -> None: 73 | max_tokens = 2048 74 | input = RecursiveSummarizeInput(text="", max_tokens=max_tokens) 75 | task = RecursiveSummarize(steerable_long_context_summarize) 76 | output = task.run(input, NoOpTracer()) 77 | 78 | assert output.generated_tokens == 0 79 | 80 | 81 | def test_recursive_summarize_stops_after_one_chunk( 82 | recursive_counting_client: RecursiveCountingClient, 83 | ) -> None: 84 | model = Llama3InstructModel( 85 | name="llama-3.1-8b-instruct", client=recursive_counting_client 86 | ) 87 | 88 | long_context_high_compression_summarize = SteerableLongContextSummarize( 89 | summarize=SteerableSingleChunkSummarize(model, max_generated_tokens=100), 90 | chunk=Chunk(model, max_tokens_per_chunk=1500), 91 | ) 92 | input = RecursiveSummarizeInput(text=short_text) 93 | task = RecursiveSummarize(long_context_high_compression_summarize) 94 | task.run(input, NoOpTracer()) 95 | 96 | assert recursive_counting_client.recursive_counter == 1 97 | -------------------------------------------------------------------------------- /tests/examples/summarize/test_steerable_long_context_summarize.py: -------------------------------------------------------------------------------- 1 | from intelligence_layer.core import Chunk, Language, LuminousControlModel, NoOpTracer 2 | from intelligence_layer.examples import ( 3 | LongContextSummarizeInput, 4 | SteerableLongContextSummarize, 5 | ) 6 | from intelligence_layer.examples.summarize.steerable_single_chunk_summarize import ( 7 | SteerableSingleChunkSummarize, 8 | ) 9 | 10 | 11 | def test_steerable_long_context_summarize_en( 12 | steerable_long_context_summarize: SteerableLongContextSummarize, 13 | long_text: str, 14 | ) -> None: 15 | input = LongContextSummarizeInput(text=long_text) 16 | output = steerable_long_context_summarize.run(input, NoOpTracer()) 17 | 18 | assert output.partial_summaries 19 | assert any( 20 | "bear" in partial_summary.summary 21 | for partial_summary in output.partial_summaries 22 | ) 23 | assert len( 24 | " ".join( 25 | partial_summary.summary for partial_summary in output.partial_summaries 26 | ) 27 | ) < len(long_text) 28 | 29 | 30 | def test_steerable_long_context_summarize_adapts_to_instruction( 31 | luminous_control_model: LuminousControlModel, 32 | long_text: str, 33 | ) -> None: 34 | input = LongContextSummarizeInput(text=long_text) 35 | steerable_long_context_summarize_keyword = SteerableLongContextSummarize( 36 | summarize=SteerableSingleChunkSummarize( 37 | luminous_control_model, 38 | max_generated_tokens=128, 39 | instruction_configs={Language("en"): "Summarize using bullet points."}, 40 | ), 41 | chunk=Chunk(luminous_control_model, max_tokens_per_chunk=512), 42 | ) 43 | 44 | output = steerable_long_context_summarize_keyword.run(input, NoOpTracer()) 45 | 46 | assert output.partial_summaries 47 | assert any( 48 | "bear" in partial_summary.summary 49 | for partial_summary in output.partial_summaries 50 | ) 51 | assert all( 52 | partial_summary.summary.startswith("- ") 53 | for partial_summary in output.partial_summaries 54 | ) 55 | assert len( 56 | " ".join( 57 | partial_summary.summary for partial_summary in output.partial_summaries 58 | ) 59 | ) < len(long_text) 60 | -------------------------------------------------------------------------------- /tests/image_example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/image_example.jpg -------------------------------------------------------------------------------- /tests/learning/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | from pytest import fixture 5 | 6 | from intelligence_layer.core import DetectLanguage, Language, NoOpTracer 7 | from intelligence_layer.core.model import Message 8 | from intelligence_layer.learning import ( 9 | EnrichDomain, 10 | EnrichQuality, 11 | FileInstructionFinetuningDataRepository, 12 | InstructionFinetuningDataHandler, 13 | InstructionFinetuningSample, 14 | InstructionFinetuningSampleAttributes, 15 | PostgresInstructionFinetuningDataRepository, 16 | RawInstructionFinetuningSample, 17 | ) 18 | 19 | 20 | @fixture 21 | def raw_instruction_finetuning_sample() -> RawInstructionFinetuningSample: 22 | return RawInstructionFinetuningSample( 23 | messages=[ 24 | Message(role="user", content="Hi."), 25 | Message(role="assistant", content="Hello, how can I help you?"), 26 | ], 27 | attributes=InstructionFinetuningSampleAttributes( 28 | source="example", domain="general", quality=5, languages=[Language("en")] 29 | ), 30 | external_id="example_1", 31 | ) 32 | 33 | 34 | @fixture(scope="function") 35 | def instruction_finetuning_sample( 36 | raw_instruction_finetuning_sample: RawInstructionFinetuningSample, 37 | ) -> InstructionFinetuningSample: 38 | return InstructionFinetuningSample.from_raw_sample( 39 | raw_instruction_finetuning_sample 40 | ) 41 | 42 | 43 | @fixture 44 | def postgres_instruction_finetuning_data_repository() -> ( 45 | PostgresInstructionFinetuningDataRepository 46 | ): 47 | db_user = os.getenv("POSTGRES_USER") 48 | db_pw = os.getenv("POSTGRES_PASSWORD") 49 | db_host = os.getenv("POSTGRES_HOST") 50 | db_port = os.getenv("POSTGRES_PORT") 51 | 52 | db_name = os.getenv("POSTGRES_DB") 53 | db_url = f"postgresql://{db_user}:{db_pw}@{db_host}:{db_port}/{db_name}" 54 | 55 | return PostgresInstructionFinetuningDataRepository(db_url) 56 | 57 | 58 | @fixture 59 | def file_instruction_finetuning_data_repository( 60 | tmp_path: Path, 61 | ) -> FileInstructionFinetuningDataRepository: 62 | return FileInstructionFinetuningDataRepository(tmp_path) 63 | 64 | 65 | @fixture(scope="function") 66 | def instruction_finetuning_data_handler( 67 | postgres_instruction_finetuning_data_repository: PostgresInstructionFinetuningDataRepository, 68 | ) -> InstructionFinetuningDataHandler: 69 | return InstructionFinetuningDataHandler( 70 | postgres_instruction_finetuning_data_repository, 71 | EnrichDomain(["smalltalk", "weather", "gossip"]), 72 | EnrichQuality(), 73 | DetectLanguage(), 74 | [Language("de"), Language("en")], 75 | Language("en"), 76 | NoOpTracer(), 77 | ) 78 | -------------------------------------------------------------------------------- /tests/learning/test_postgres_instruction_finetuning_data_repository.py: -------------------------------------------------------------------------------- 1 | from intelligence_layer.learning import ( 2 | InstructionFinetuningSample, 3 | PostgresInstructionFinetuningDataRepository, 4 | RawInstructionFinetuningSample, 5 | ) 6 | 7 | 8 | def test_postgres_instruction_finetuning_data_repository_can_store_load_and_delete_sample( 9 | postgres_instruction_finetuning_data_repository: PostgresInstructionFinetuningDataRepository, 10 | instruction_finetuning_sample: InstructionFinetuningSample, 11 | ) -> None: 12 | postgres_instruction_finetuning_data_repository.store_sample( 13 | instruction_finetuning_sample 14 | ) 15 | loaded_sample = postgres_instruction_finetuning_data_repository.sample( 16 | instruction_finetuning_sample.id 17 | ) 18 | 19 | assert instruction_finetuning_sample == loaded_sample 20 | 21 | postgres_instruction_finetuning_data_repository.delete_sample( 22 | instruction_finetuning_sample.id 23 | ) 24 | no_sample_expected = postgres_instruction_finetuning_data_repository.sample( 25 | instruction_finetuning_sample.id 26 | ) 27 | 28 | assert no_sample_expected is None 29 | 30 | 31 | def test_postgres_instruction_finetuning_data_repository_can_store_load_and_delete_samples( 32 | postgres_instruction_finetuning_data_repository: PostgresInstructionFinetuningDataRepository, 33 | raw_instruction_finetuning_sample: RawInstructionFinetuningSample, 34 | ) -> None: 35 | samples = [ 36 | InstructionFinetuningSample.from_raw_sample(raw_instruction_finetuning_sample) 37 | for _ in range(10) 38 | ] 39 | ids = [sample.id for sample in samples] 40 | id_iter = (id for id in ids) 41 | 42 | postgres_instruction_finetuning_data_repository.store_samples(samples) 43 | loaded_samples = postgres_instruction_finetuning_data_repository.samples(id_iter) 44 | 45 | assert set(ids) == set(loaded_sample.id for loaded_sample in loaded_samples) 46 | 47 | postgres_instruction_finetuning_data_repository.delete_samples(ids) 48 | no_samples_expected = postgres_instruction_finetuning_data_repository.samples(ids) 49 | 50 | assert list(no_samples_expected) == [] 51 | 52 | 53 | def test_postgres_instruction_finetuning_data_repository_can_show_first_n_samples( 54 | postgres_instruction_finetuning_data_repository: PostgresInstructionFinetuningDataRepository, 55 | raw_instruction_finetuning_sample: RawInstructionFinetuningSample, 56 | ) -> None: 57 | n = 10 58 | samples = [ 59 | InstructionFinetuningSample.from_raw_sample(raw_instruction_finetuning_sample) 60 | for _ in range(n) 61 | ] 62 | 63 | postgres_instruction_finetuning_data_repository.store_samples(samples) 64 | head = list(postgres_instruction_finetuning_data_repository.head(n // 2)) 65 | 66 | assert len(head) == n // 2 67 | --------------------------------------------------------------------------------