├── .env.example
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── composites
    │   ├── get_jfrog_access_token.sh
    │   ├── get_jfrog_access_token_subject.sh
    │   └── python-setup
    │   │   └── action.yml
    ├── dependabot.yml
    ├── pull_request_template.md
    └── workflows
    │   ├── artifactory.yml
    │   ├── document-index-execution.yml
    │   ├── document-index-tests.yml
    │   ├── sdk-tests.yml
    │   └── test-execution.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.md
├── Concepts.md
├── LICENSE.md
├── README.md
├── RELEASE.md
├── assets
    ├── AbsoluteEvaluation.drawio.svg
    ├── RecursiveSummary.drawio.svg
    ├── RelativeEvaluation.drawio.svg
    ├── TraceViewer.png
    ├── Tracer.drawio.svg
    ├── Tracing.drawio.svg
    ├── argilla_interface.png
    ├── argilla_splits.png
    └── fork.png
├── docker-compose.yaml
├── docs
    ├── Makefile
    ├── conf.py
    ├── index.rst
    ├── intelligence_layer.connectors.rst
    ├── intelligence_layer.core.rst
    ├── intelligence_layer.evaluation.rst
    ├── intelligence_layer.examples.rst
    └── make.bat
├── mypy.ini
├── poetry.lock
├── pyproject.toml
├── scripts
    ├── all.sh
    ├── clean_hf.py
    ├── doctest.sh
    ├── fastapi_example_test.sh
    ├── lint.sh
    ├── notebook_runner.sh
    ├── notebook_runner_document_index.sh
    └── test.sh
├── src
    ├── documentation
    │   ├── attention_manipulation_with_text_controls.ipynb
    │   ├── classification.ipynb
    │   ├── data
    │   │   ├── classify_examples.json
    │   │   └── classify_examples_multilabel.json
    │   ├── document_index.ipynb
    │   ├── elo_qa_eval.ipynb
    │   ├── evaluate_with_studio.ipynb
    │   ├── evaluation.ipynb
    │   ├── fastapi_example.py
    │   ├── fastapi_tutorial.ipynb
    │   ├── how_tos
    │   │   ├── __init__.py
    │   │   ├── example_data.py
    │   │   ├── how_to_aggregate_evaluations.ipynb
    │   │   ├── how_to_create_a_dataset.ipynb
    │   │   ├── how_to_define_a_task.ipynb
    │   │   ├── how_to_evaluate_runs.ipynb
    │   │   ├── how_to_human_evaluation_via_argilla.ipynb
    │   │   ├── how_to_implement_a_simple_evaluation_and_aggregation_logic.ipynb
    │   │   ├── how_to_implement_a_task.ipynb
    │   │   ├── how_to_implement_elo_evaluations.ipynb
    │   │   ├── how_to_implement_incremental_evaluation.ipynb
    │   │   ├── how_to_log_and_debug_a_task.ipynb
    │   │   ├── how_to_resume_a_run_after_a_crash.ipynb
    │   │   ├── how_to_retrieve_data_for_analysis.ipynb
    │   │   ├── how_to_run_a_task_on_a_dataset.ipynb
    │   │   └── studio
    │   │   │   ├── how_to_execute_a_benchmark.ipynb
    │   │   │   ├── how_to_upload_existing_datasets_to_studio.ipynb
    │   │   │   └── how_to_use_studio_with_traces.ipynb
    │   ├── human_evaluation.ipynb
    │   ├── parameter_optimization.ipynb
    │   ├── performance_tips.ipynb
    │   ├── qa.ipynb
    │   ├── quickstart_task.ipynb
    │   ├── summarization.ipynb
    │   └── task_dependencies.drawio.svg
    └── intelligence_layer
    │   ├── __init__.py
    │   ├── connectors
    │       ├── __init__.py
    │       ├── argilla
    │       │   ├── argilla_client.py
    │       │   └── argilla_wrapper_client.py
    │       ├── base
    │       │   └── json_serializable.py
    │       ├── data
    │       │   ├── __init__.py
    │       │   ├── data.py
    │       │   ├── exceptions.py
    │       │   └── models.py
    │       ├── document_index
    │       │   └── document_index.py
    │       ├── kernel
    │       │   └── kernel.py
    │       ├── limited_concurrency_client.py
    │       ├── retrievers
    │       │   ├── __init__.py
    │       │   ├── base_retriever.py
    │       │   ├── document_index_retriever.py
    │       │   ├── hybrid_qdrant_in_memory_retriever.py
    │       │   └── qdrant_in_memory_retriever.py
    │       └── studio
    │       │   └── studio.py
    │   ├── core
    │       ├── __init__.py
    │       ├── chunk.py
    │       ├── detect_language.py
    │       ├── echo.py
    │       ├── instruct.py
    │       ├── model.py
    │       ├── prompt_template.py
    │       ├── task.py
    │       ├── text_highlight.py
    │       └── tracer
    │       │   ├── __init__.py
    │       │   ├── composite_tracer.py
    │       │   ├── file_tracer.py
    │       │   ├── in_memory_tracer.py
    │       │   ├── open_telemetry_tracer.py
    │       │   ├── persistent_tracer.py
    │       │   └── tracer.py
    │   ├── evaluation
    │       ├── __init__.py
    │       ├── aggregation
    │       │   ├── accumulator.py
    │       │   ├── aggregation_repository.py
    │       │   ├── aggregator.py
    │       │   ├── domain.py
    │       │   ├── elo_aggregation.py
    │       │   ├── file_aggregation_repository.py
    │       │   ├── hugging_face_aggregation_repository.py
    │       │   └── in_memory_aggregation_repository.py
    │       ├── benchmark
    │       │   ├── benchmark.py
    │       │   ├── get_code.py
    │       │   ├── studio_benchmark.py
    │       │   └── trace_information.py
    │       ├── dataset
    │       │   ├── dataset_repository.py
    │       │   ├── domain.py
    │       │   ├── file_dataset_repository.py
    │       │   ├── hugging_face_dataset_repository.py
    │       │   ├── in_memory_dataset_repository.py
    │       │   ├── single_huggingface_dataset_repository.py
    │       │   └── studio_dataset_repository.py
    │       ├── evaluation
    │       │   ├── __init__.py
    │       │   ├── domain.py
    │       │   ├── evaluation_repository.py
    │       │   ├── evaluator
    │       │   │   ├── __init__.py
    │       │   │   ├── argilla_evaluator.py
    │       │   │   ├── async_evaluator.py
    │       │   │   ├── base_evaluator.py
    │       │   │   ├── evaluator.py
    │       │   │   └── incremental_evaluator.py
    │       │   ├── file_evaluation_repository.py
    │       │   ├── graders.py
    │       │   └── in_memory_evaluation_repository.py
    │       ├── infrastructure
    │       │   ├── file_system_based_repository.py
    │       │   ├── hugging_face_repository.py
    │       │   └── repository_navigator.py
    │       ├── run
    │       │   ├── domain.py
    │       │   ├── file_run_repository.py
    │       │   ├── in_memory_run_repository.py
    │       │   ├── run_repository.py
    │       │   └── runner.py
    │       └── run_evaluation.py
    │   ├── examples
    │       ├── __init__.py
    │       ├── classify
    │       │   ├── __init__.py
    │       │   ├── classify.py
    │       │   ├── embedding_based_classify.py
    │       │   ├── keyword_extract.py
    │       │   ├── prompt_based_classify.py
    │       │   └── prompt_based_classify_with_definitions.py
    │       ├── qa
    │       │   ├── __init__.py
    │       │   ├── elo_qa_evaluation_logic.py
    │       │   ├── long_context_qa.py
    │       │   ├── multiple_chunk_qa.py
    │       │   ├── multiple_chunk_retriever_qa.py
    │       │   ├── retriever_based_qa.py
    │       │   └── single_chunk_qa.py
    │       ├── search
    │       │   ├── __init__.py
    │       │   ├── expand_chunks.py
    │       │   └── search.py
    │       └── summarize
    │       │   ├── __init__.py
    │       │   ├── recursive_summarize.py
    │       │   ├── steerable_long_context_summarize.py
    │       │   ├── steerable_single_chunk_summarize.py
    │       │   └── summarize.py
    │   ├── learning
    │       ├── __init__.py
    │       ├── enrich.py
    │       ├── file_instruction_finetuning_data_repository.py
    │       ├── instruction_finetuning_data_handler.py
    │       ├── instruction_finetuning_data_repository.py
    │       ├── models.py
    │       └── postgres_instruction_finetuning_data_repository.py
    │   └── py.typed
├── style_guide.md
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── conftest_document_index.py
    ├── connectors
        ├── argilla
        │   └── test_argilla_wrapper_client.py
        ├── data
        │   └── test_data.py
        ├── document_index
        │   ├── test_async_document_index.py
        │   └── test_document_index.py
        ├── kernel
        │   └── test_kernel.py
        ├── retrievers
        │   ├── test_document_index_retriever.py
        │   ├── test_hybrid_qdrant_in_memory_retriever.py
        │   └── test_qdrant_in_memory_retriever.py
        ├── studio
        │   ├── conftest.py
        │   ├── test_studio.py
        │   ├── test_studio_benchmark.py
        │   └── test_studio_dataset.py
        └── test_limited_concurrency_client.py
    ├── core
        ├── __init__.py
        ├── test_chunk.py
        ├── test_detect_language.py
        ├── test_echo.py
        ├── test_model.py
        ├── test_prompt_template.py
        ├── test_task.py
        ├── test_text_highlight.py
        └── tracer
        │   ├── conftest.py
        │   ├── fixtures
        │       └── old_file_trace_format.jsonl
        │   ├── test_composite_tracer.py
        │   ├── test_file_tracer.py
        │   ├── test_in_memory_tracer.py
        │   ├── test_open_telemetry_tracer.py
        │   └── test_tracer.py
    ├── dog-and-cat-cover.jpg
    ├── evaluation
        ├── __init__.py
        ├── aggregation
        │   ├── conftest.py
        │   ├── test_accumulator.py
        │   ├── test_aggregation_repository.py
        │   ├── test_domain.py
        │   ├── test_elo_calculator.py
        │   └── test_hugging_face_aggregation_repository.py
        ├── benchmark
        │   ├── test_benchmark.py
        │   └── test_trace_information.py
        ├── conftest.py
        ├── dataset
        │   ├── test_dataset_domain.py
        │   ├── test_dataset_repository.py
        │   ├── test_hugging_face_dataset_repository.py
        │   ├── test_single_huggingface_dataset_repository.py
        │   └── test_studio_data_repository.py
        ├── evaluation
        │   ├── conftest.py
        │   ├── test_argilla_evaluator.py
        │   ├── test_async_evaluation_repository.py
        │   ├── test_elo_evaluation_logic.py
        │   ├── test_evaluation_repository.py
        │   ├── test_evaluator_and_aggregator.py
        │   ├── test_file_evaluation_repository.py
        │   ├── test_graders.py
        │   ├── test_incremental_evaluator.py
        │   └── test_instruct_comparison_argilla_evaluator.py
        ├── infrastructure
        │   ├── test_hugging_face_repository.py
        │   └── test_repository_navigator.py
        └── run
        │   ├── test_file_run_repository.py
        │   ├── test_run.py
        │   ├── test_run_repository.py
        │   └── test_runner.py
    ├── examples
        ├── classify
        │   ├── test_classify.py
        │   ├── test_embedding_based_classify.py
        │   ├── test_keyword_extract.py
        │   ├── test_prompt_based_classify.py
        │   └── test_prompt_based_classify_with_definitions.py
        ├── qa
        │   ├── conftest.py
        │   ├── test_long_context_qa.py
        │   ├── test_multiple_chunk_qa.py
        │   ├── test_multiple_chunk_retriever_qa.py
        │   ├── test_retriever_based_qa.py
        │   └── test_single_chunk_qa.py
        ├── search
        │   ├── test_expand_chunk.py
        │   └── test_search.py
        └── summarize
        │   ├── __init__.py
        │   ├── conftest.py
        │   ├── test_recursive_summarize.py
        │   ├── test_steerable_long_context_summarize.py
        │   ├── test_summarize.py
        │   └── very_long_text.txt
    ├── image_example.jpg
    └── learning
        ├── conftest.py
        ├── test_file_instruction_finetuning_data_repository.py
        ├── test_instruction_finetuning_data_handler.py
        └── test_postgres_instruction_finetuning_data_repository.py


/.env.example:
--------------------------------------------------------------------------------
 1 | ARGILLA_API_URL="http://localhost:6900/"
 2 | ARGILLA_API_KEY="argilla.apikey"
 3 | 
 4 | # Your URL for your Studio deployment
 5 | STUDIO_URL="https://pharia-studio.*.com"
 6 | 
 7 | # DB Variables
 8 | POSTGRES_HOST=localhost
 9 | POSTGRES_PORT=5434
10 | POSTGRES_DB=il_sdk
11 | POSTGRES_USER=il_sdk
12 | POSTGRES_PASSWORD=test
13 | 
14 | # ---- Things to adapt ----
15 | CLIENT_URL=...
16 | AA_TOKEN=token
17 | DOCUMENT_INDEX_URL=...
18 | 
19 | # needed for hugging face integration
20 | HUGGING_FACE_TOKEN=token
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!-- IMPORTANT: By posting an issue, your username will be PUBLIC to everyone who has access to this repository. If you don't want to expose this to other users, feel free to email us at beta-programm@aleph-alpha.com -->
11 | 
12 | **Describe the bug**
13 | A clear and concise description of what the bug is.
14 | 
15 | **To Reproduce**
16 | Steps to reproduce the behavior:
17 | 1. Go to '...'
18 | 2. Click on '....'
19 | 3. Scroll down to '....'
20 | 4. See error
21 | 
22 | **Expected behavior**
23 | A clear and concise description of what you expected to happen.
24 | 
25 | **Screenshots**
26 | If applicable, add screenshots to help explain your problem.
27 | 
28 | **System (please complete the following information):**
29 |  - OS: [e.g. Mac]
30 |  - Version [e.g. 10.14]
31 |  - Intelligence Layer Version [e.g. 0.1.0]
32 | 
33 | **Additional context**
34 | Add any other context about the problem here.
35 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!-- IMPORTANT: By posting an issue, your username will be PUBLIC to everyone who has access to this repository. If you don't want to expose this to other users, feel free to email us at beta-programm@aleph-alpha.com -->
11 | 
12 | **Is your feature request related to a problem? Please describe.**
13 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
14 | 
15 | **Describe the solution you'd like**
16 | A clear and concise description of what you want to happen.
17 | 
18 | **Describe alternatives you've considered**
19 | A clear and concise description of any alternative solutions or features you've considered.
20 | 
21 | **Additional context**
22 | Add any other context or screenshots about the feature request here.
23 | 


--------------------------------------------------------------------------------
/.github/composites/get_jfrog_access_token.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | ID_TOKEN=$(curl -sLS -H "User-Agent: actions/oidc-client" -H "Authorization: Bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \
 6 |     "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=https://alephalpha.jfrog.io" | jq .value | tr -d '"')
 7 | 
 8 | JFROG_ACCESS_TOKEN=$(curl -v \
 9 |     -X POST \
10 |     -H "Content-type: application/json" \
11 |     https://alephalpha.jfrog.io/access/api/v1/oidc/token \
12 |     -d \
13 |     "{\"grant_type\": \"urn:ietf:params:oauth:grant-type:token-exchange\", \"subject_token_type\":\"urn:ietf:params:oauth:token-type:id_token\", \"subject_token\": \"$ID_TOKEN\", \"provider_name\": \"github\"}" | jq .access_token -r)
14 | 
15 | echo -n $JFROG_ACCESS_TOKEN
16 | 


--------------------------------------------------------------------------------
/.github/composites/get_jfrog_access_token_subject.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | set -euo pipefail
4 | 
5 | JFROG_ACCESS_TOKEN=$1
6 | echo $JFROG_ACCESS_TOKEN | awk -F'.' '{print $2}'  | sed 's/.\{1,3\}$/&==/' | base64 -d | jq '.sub' -r
7 | 


--------------------------------------------------------------------------------
/.github/composites/python-setup/action.yml:
--------------------------------------------------------------------------------
 1 | name: Checkout and set up python
 2 | description: "Installs python, dependencies and handles venv caching"
 3 | runs:
 4 |   using: composite
 5 |   steps:
 6 |     - uses: actions/setup-python@v5
 7 |       with:
 8 |         python-version: "3.10"
 9 | 
10 |     - name: Install and configure Poetry
11 |       uses: snok/install-poetry@v1
12 |       with:
13 |         virtualenvs-create: true
14 |         virtualenvs-in-project: true
15 |         installer-parallel: true
16 |         virtualenvs-path: .venv
17 | 
18 |     - name: Load cached venv
19 |       id: cached-poetry-dependencies
20 |       uses: actions/cache@v4
21 |       with:
22 |         path: .venv
23 |         key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
24 | 
25 |     - name: Install dependencies
26 |       shell: bash
27 |       if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
28 |       run: |
29 |         poetry config installer.max-workers 10
30 |         poetry install --no-interaction
31 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip"
 9 |     directory: "/"
10 |     schedule:
11 |       interval: "daily"
12 |     groups:
13 |       minor:
14 |         update-types:
15 |           - minor
16 |           - patch
17 |         patterns:
18 |         - "types*"
19 | 
20 |   - package-ecosystem: "github-actions"
21 |     # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
22 |     directory: "/"
23 |     schedule:
24 |       interval: "daily"
25 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # Description
 2 | No description.
 3 | 
 4 | ## Before Merging
 5 |  - [ ] Review the code changes
 6 |     - Unused print / comments / TODOs
 7 |     - Missing docstrings for functions that should have them
 8 |     - Consistent variable names
 9 |     - ...
10 |  - [ ] Update `changelog.md` if necessary
11 |  - [ ] Commit messages should contain a semantic [label](https://gist.github.com/joshbuchea/6f47e86d2510bce28f8e7f42ae84c716) and the ticket number
12 |    - Consider squashing if this is not the case
13 | 


--------------------------------------------------------------------------------
/.github/workflows/artifactory.yml:
--------------------------------------------------------------------------------
 1 | name: Artifactory Deployment of PyPi
 2 | 
 3 | on:
 4 |   workflow_dispatch: {}
 5 |   release:
 6 |     types: [published]
 7 | 
 8 | env:
 9 |   ARTIFACTORY_URL: https://alephalpha.jfrog.io
10 |   ARTIFACTORY_PYPI_REPOSITORY: "intelligence-layer"
11 |   ARTIFACTORY_DOCKER_REGISTRY: alephalpha.jfrog.io/intelligence-layer-images
12 | 
13 | jobs:
14 |   build-and-push-pypi:
15 |     permissions:
16 |       contents: read
17 |       id-token: write
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - name: Checkout
21 |         uses: actions/checkout@v4
22 |       - uses: actions/setup-python@v5
23 |         with:
24 |           python-version: "3.10"
25 |       - name: Install and configure Poetry
26 |         uses: snok/install-poetry@v1
27 |         with:
28 |           virtualenvs-create: true
29 |           virtualenvs-in-project: true
30 |           installer-parallel: true
31 |       - name: Build package and push to Artifactory
32 |         run: |
33 |           poetry build
34 |           poetry config repositories.artifactory $ARTIFACTORY_URL/artifactory/api/pypi/$ARTIFACTORY_PYPI_REPOSITORY
35 |           export POETRY_HTTP_BASIC_ARTIFACTORY_PASSWORD=$(.github/composites/get_jfrog_access_token.sh)
36 |           export POETRY_HTTP_BASIC_ARTIFACTORY_USERNAME=$(.github/composites/get_jfrog_access_token_subject.sh $POETRY_HTTP_BASIC_ARTIFACTORY_PASSWORD)
37 |           poetry publish -r artifactory
38 | 


--------------------------------------------------------------------------------
/.github/workflows/document-index-execution.yml:
--------------------------------------------------------------------------------
 1 | name: Document Index Tests
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 5 * * *'  # Runs daily at 5:00 UTC
 6 |   workflow_dispatch:  # Allows manual trigger
 7 |   push:
 8 |     branches:
 9 |       - main
10 |     paths:
11 |       - "src/intelligence_layer/connectors/document_index/**"
12 |       - "tests/connectors/document_index/**"
13 |   pull_request:
14 |     paths:
15 |       - "src/intelligence_layer/connectors/document_index/**"
16 |       - "tests/connectors/document_index/**"
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   python-tests:
24 |     uses: ./.github/workflows/document-index-tests.yml
25 |     secrets: inherit
26 | 


--------------------------------------------------------------------------------
/.github/workflows/document-index-tests.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_call:
 3 |     inputs:
 4 |       runner:
 5 |         type: string
 6 |         default: "ubuntu-latest"
 7 |       timeout:
 8 |         type: number
 9 |         default: 15 # mins
10 | 
11 | defaults:
12 |   run:
13 |     shell: bash
14 | 
15 | jobs:
16 |   document-index-notebooks:
17 |     timeout-minutes: ${{inputs.timeout}}
18 |     runs-on: ${{inputs.runner}}
19 |       
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v4
23 |       - uses: ./.github/composites/python-setup
24 | 
25 |       - name: Run Notebooks
26 |         env:
27 |           AA_TOKEN: ${{ secrets.AA_TOKEN }}
28 |           CLIENT_URL: ${{ secrets.CLIENT_URL }}
29 |           DOCUMENT_INDEX_URL: ${{secrets.DOCUMENT_INDEX_URL}}
30 |         run: |
31 |           ./scripts/notebook_runner_document_index.sh
32 |   document-index-tests:
33 |     timeout-minutes: ${{inputs.timeout}}
34 |     runs-on: ${{inputs.runner}}
35 |     continue-on-error: true
36 |     env:
37 |       DOCUMENT_INDEX_URL: ${{secrets.DOCUMENT_INDEX_URL}}
38 |       AA_TOKEN: ${{ secrets.AA_TOKEN }}
39 |     steps:
40 |       - name: Checkout repository
41 |         uses: actions/checkout@v4
42 |       - uses: ./.github/composites/python-setup
43 |       - name: Run client tests
44 |         run: |
45 |           TQDM_DISABLE=1 poetry run pytest -m "document_index and not asyncio"
46 |       - name: Run async client tests
47 |         run: |
48 |           TQDM_DISABLE=1 poetry run pytest -m "document_index and asyncio"
49 | 


--------------------------------------------------------------------------------
/.github/workflows/test-execution.yml:
--------------------------------------------------------------------------------
 1 | name: Intelligence Layer SDK Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   # By default, a workflow only runs when a pull_request event's activity type is opened, synchronize, or reopened
 8 |   pull_request:
 9 |   # manual trigger
10 |   workflow_dispatch:
11 | 
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.ref }}
14 |   cancel-in-progress: true
15 | jobs:
16 |   python-tests:
17 |     uses: ./.github/workflows/sdk-tests.yml
18 |     secrets: inherit
19 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v5.0.0
 4 |     hooks:
 5 |       - id: check-json
 6 |       - id: pretty-format-json
 7 |         files: .json
 8 |         args:
 9 |           - --autofix
10 |   - repo: https://github.com/astral-sh/ruff-pre-commit
11 |     # Ruff version.
12 |     rev: v0.7.3
13 |     hooks:
14 |       # Run the linter.
15 |       - id: ruff
16 |         name: ruff-lint
17 |         args: [--fix]
18 |         types_or: [python, pyi, jupyter]
19 |       # Run the formatter.
20 |       - id: ruff-format
21 |         types_or: [python, pyi, jupyter]
22 |   - repo: https://github.com/kynan/nbstripout
23 |     rev: 0.8.1
24 |     hooks:
25 |       - id: nbstripout
26 |         files: ".ipynb"
27 |         args:
28 |           [
29 |             --drop-empty-cells,
30 |             --extra-keys=metadata.kernelspec metadata.language_info.codemirror_mode.version metadata.language_info.pygments_lexer metadata.language_info.version,
31 |           ]
32 | 
33 |   - repo: https://github.com/codespell-project/codespell
34 |     rev: v2.3.0
35 |     hooks:
36 |       - id: codespell
37 |         args:
38 |           [
39 |             "-L",
40 |             "newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor,fille,ans",
41 |           ]
42 |         exclude: '^(poetry\.lock|tests/.*|src/intelligence_layer/examples/qa/multiple_chunk_qa.py|src/intelligence_layer/examples/summarize/.*|src/intelligence_layer/examples/classify/keyword_extract.py|src/intelligence_layer/learning/enrich.py)$'
43 |   - repo: https://github.com/jsh9/pydoclint
44 |     rev: 0.5.9
45 |     hooks:
46 |       - id: pydoclint
47 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.11"
13 |   jobs:
14 |     # according to https://github.com/readthedocs/readthedocs.org/issues/4912#issuecomment-1992286540
15 |     post_create_environment:
16 |       - python -m pip install poetry
17 |     post_install:
18 |       - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install
19 | 
20 | # Build documentation in the "docs/" directory with Sphinx
21 | sphinx:
22 |   configuration: docs/conf.py
23 | # Optionally build your docs in additional formats such as PDF and ePub
24 | # formats:
25 | #    - pdf
26 | #    - epub
27 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (C) Aleph Alpha GmbH - All Rights Reserved
2 | 
3 | This source code, databases, and other material is protected under international copyright law. All rights reserved and protected by the copyright holders. This file is confidential and only available to authorized individuals with the permission of the copyright holders. If you encounter this file and do not have permission, please contact the copyright holder.
4 | 


--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
 1 | # Release cycle TODOs
 2 | 
 3 | - Update CHANGELOG.md
 4 |   - We committed to updating the changelog with every relevant merge into main. Check the new entries of the changelog and perform adjustments where necessary.
 5 | - Update the "version" field of the project in `pyproject.toml`
 6 |   - We use [semantic versioning](https://semver.org/)
 7 | - Commit the changes and merge to main
 8 | - Tag the latest commit on main with the new release number (e.g. v0.6.0)
 9 |   - `git checkout main, git tag <tag_name>, git push origin <tag_name>`
10 | - Create a new release draft in GitHub (Tags -> Releases -> Draft a new release) and save it as draft
11 |   - Copy the changelog into the release description. Also add a link to the commits since the last release at the bottom of the description.
12 | - Make sure the changes have been merged into the main branch.
13 | - Publish the release.
14 | - Consider updating the changelog of the [docs](https://gitlab.aleph-alpha.de/engineering/docs). The repository for the docs can be found [here](https://gitlab.aleph-alpha.de/engineering/docs).
15 |   - Update it when we have big new features we want to communicate or in preparation of the sprint review.
16 | 


--------------------------------------------------------------------------------
/assets/TraceViewer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/assets/TraceViewer.png


--------------------------------------------------------------------------------
/assets/argilla_interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/assets/argilla_interface.png


--------------------------------------------------------------------------------
/assets/argilla_splits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/assets/argilla_splits.png


--------------------------------------------------------------------------------
/assets/fork.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/assets/fork.png


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   # taken from https://docs.argilla.io/latest/getting_started/how-to-deploy-argilla-with-docker/
 3 |   argilla-elastic-search:
 4 |     image: docker.elastic.co/elasticsearch/elasticsearch:8.12.2
 5 |     environment:
 6 |       - node.name=elasticsearch
 7 |       - cluster.name=es-argilla-local
 8 |       - discovery.type=single-node
 9 |       - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
10 |       - cluster.routing.allocation.disk.threshold_enabled=false
11 |       - xpack.security.enabled=false
12 |     ulimits:
13 |       memlock:
14 |         soft: -1
15 |         hard: -1
16 |     ports:
17 |       - "9200:9200"
18 |       - "9300:9300"
19 |     healthcheck:
20 |       test:
21 |         [
22 |           "CMD-SHELL",
23 |           "curl --silent --fail localhost:9200/_cluster/health || exit 1",
24 |         ]
25 |       interval: 5s
26 |       timeout: 5s
27 |       retries: 3
28 |   argilla:
29 |     depends_on:
30 |       argilla-elastic-search:
31 |         condition: service_healthy
32 |     image: argilla/argilla-server:v1.29.1
33 |     ports:
34 |       - "6900:6900"
35 |     environment:
36 |       ARGILLA_ELASTICSEARCH: "http://argilla-elastic-search:9200"
37 |       ARGILLA_ENABLE_TELEMETRY: 0
38 | 
39 |       USERNAME: argilla
40 |       PASSWORD: 12345678
41 |       API_KEY: argilla.apikey
42 |   open-telemetry-trace-service:
43 |     container_name: jaeger_1_35
44 |     environment:
45 |       COLLECTOR_OTLP_ENABLED: "true"
46 |     ports:
47 |       - "4317:4317"
48 |       - "4318:4318"
49 |       - "16686:16686"
50 |     image: jaegertracing/all-in-one:1.35
51 |   # export GITHUB_TOKEN=...
52 |   # echo $GITHUB_TOKEN | docker login ghcr.io -u your_email@for_github --password-stdin
53 |   # docker compose pull to update containers
54 | 
55 |   # export GITLAB_TOKEN=...
56 |   # (optional) export GITLAB_TOKEN=$(op item get YOUR_TOKEN --format json --fields password | jq .value | tr -d '"')
57 |   # echo $GITLAB_TOKEN | docker login registry.gitlab.aleph-alpha.de -u your_email@for_gitlab --password-stdin
58 |   # docker compose pull to update containers
59 |   postgres:
60 |     image: postgres:15
61 |     ports:
62 |       - ${POSTGRES_PORT}:${POSTGRES_PORT}
63 |     env_file: ".env"
64 |     command: -p ${POSTGRES_PORT}
65 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?= # -nvT
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | export SPHINX_APIDOC_OPTIONS=members,show-inheritance
12 | 
13 | # Put it first so that "make" without argument is like "make help".
14 | help:
15 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
16 | 
17 | .PHONY: help Makefile
18 | 
19 | # Catch-all target: route all unknown targets to Sphinx using the new
20 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
21 | %: Makefile
22 | 	# sphinx-apidoc -o . ../src
23 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 | 	# rm --force `ls *.rst | grep --fixed-strings --invert-match index.rst`
25 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | project = "Intelligence Layer"
10 | copyright = "2023, Aleph Alpha"
11 | author = "Aleph Alpha"
12 | 
13 | # -- General configuration ---------------------------------------------------
14 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
15 | 
16 | extensions = [
17 |     "sphinx.ext.autodoc",
18 |     "sphinx.ext.napoleon",
19 |     "sphinx.ext.viewcode",
20 |     "sphinx.ext.doctest",
21 | ]
22 | 
23 | autodoc_default_options = {
24 |     "members": True,
25 |     "show-inheritance": True,
26 |     "inherited-members": "BaseModel,RuntimeError",
27 |     # BaseModel attributes where the documentation does not add a lot of value
28 |     "exclude-members": "model_config,model_fields,model_computed_fields",
29 | }
30 | 
31 | templates_path = ["_templates"]
32 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
33 | 
34 | 
35 | # -- Options for HTML output -------------------------------------------------
36 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
37 | 
38 | html_theme = "sphinx_rtd_theme"
39 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Intelligence Layer documentation master file, created by
 2 |    sphinx-quickstart on Fri Oct 27 14:17:00 2023.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Intelligence Layer's documentation!
 7 | ==============================================
 8 | 
 9 | This documentation provides API-level code documentation for the Intelligence Layer, covering modules such as connectors, core, evaluation, and examples.
10 | 
11 | A comprehensive overview of the Intelligence Layer can be found on GitHub, including `tutorials <https://github.com/Aleph-Alpha/intelligence-layer-sdk/tree/main?tab=readme-ov-file#tutorial-notebooks>`_ and `how-tos <https://github.com/Aleph-Alpha/intelligence-layer-sdk/tree/main?tab=readme-ov-file#how-tos>`_.
12 | This code documentation is intended to serve as a reference companion to the main documentation, providing detailed information on the Intelligence Layer's API and implementation.
13 | 
14 | .. toctree::
15 |    :maxdepth: 1
16 |    :caption: Contents:
17 | 
18 |    intelligence_layer.connectors
19 |    intelligence_layer.core
20 |    intelligence_layer.evaluation
21 |    intelligence_layer.examples
22 | 
23 | 
24 | Indices and tables
25 | ==================
26 | 
27 | * :ref:`genindex`
28 | 


--------------------------------------------------------------------------------
/docs/intelligence_layer.connectors.rst:
--------------------------------------------------------------------------------
1 | intelligence\_layer.connectors
2 | ======================================
3 | 
4 | Module contents
5 | ---------------
6 | 
7 | .. automodule:: intelligence_layer.connectors
8 | 


--------------------------------------------------------------------------------
/docs/intelligence_layer.core.rst:
--------------------------------------------------------------------------------
 1 | intelligence\_layer.core
 2 | ================================
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: intelligence_layer.core
 8 | 
 9 |     .. autoclass:: TextChunk
10 | 


--------------------------------------------------------------------------------
/docs/intelligence_layer.evaluation.rst:
--------------------------------------------------------------------------------
1 | intelligence\_layer.evaluation
2 | ======================================
3 | 
4 | 
5 | Module contents
6 | ---------------
7 | 
8 | .. automodule:: intelligence_layer.evaluation
9 | 


--------------------------------------------------------------------------------
/docs/intelligence_layer.examples.rst:
--------------------------------------------------------------------------------
1 | intelligence\_layer.examples
2 | ======================================
3 | 
4 | 
5 | Module contents
6 | ---------------
7 | 
8 | .. automodule:: intelligence_layer.examples
9 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | 
3 | warn_unused_ignores = True
4 | 


--------------------------------------------------------------------------------
/scripts/all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env -S bash -eu -o pipefail
 2 | 
 3 | ProjectRoot="$(cd $(dirname "$0")/.. && pwd -P)"
 4 | 
 5 | cd "$ProjectRoot"
 6 | 
 7 | # see https://stackoverflow.com/questions/43267413/how-to-set-environment-variables-from-env-file
 8 | set -a # automatically export all variables
 9 | source .env
10 | set +a
11 | 
12 | ./scripts/lint.sh
13 | ./scripts/doctest.sh
14 | ./scripts/notebook_runner.sh
15 | ./scripts/test.sh
16 | python "$(dirname "$0")/clean_hf.py"
17 | 


--------------------------------------------------------------------------------
/scripts/clean_hf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import warnings
 3 | 
 4 | from dotenv import load_dotenv
 5 | from huggingface_hub import HfApi
 6 | 
 7 | 
 8 | def clean_up_dangling_hf_repos(hugging_face_token: str) -> None:
 9 |     api = HfApi(token=hugging_face_token)
10 |     datasets = list(
11 |         api.list_datasets(author="Aleph-Alpha", dataset_name="IL-temp-tests")
12 |     )
13 |     if len(datasets) > 0:
14 |         warnings.warn("dangling hf datasets found, attempting to delete", stacklevel=2)
15 |     for dataset in datasets:
16 |         api.delete_repo(dataset.id, repo_type="dataset", missing_ok=True)
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     load_dotenv()
21 |     token = os.getenv("HUGGING_FACE_TOKEN")
22 |     assert isinstance(token, str)
23 |     clean_up_dangling_hf_repos(token)
24 | 


--------------------------------------------------------------------------------
/scripts/doctest.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env -S bash -eu -o pipefail
 2 | 
 3 | ProjectRoot="$(cd $(dirname "$0")/.. && pwd -P)"
 4 | 
 5 | cd "$ProjectRoot"
 6 | 
 7 | if [ -f .env ]; then
 8 |     # Export environment variables from .env file
 9 |     set -a # automatically export all variables
10 |     source .env
11 |     set +a
12 | fi
13 | (cd docs && poetry run make doctest)
14 | 


--------------------------------------------------------------------------------
/scripts/fastapi_example_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env -S bash -eu -o pipefail
 2 | echo "Testing the fastapi app"
 3 | # start the server in the background
 4 | hypercorn src/documentation/fastapi_example:app --bind localhost:8000 &
 5 | server_pid=$!
 6 | 
 7 | attempt_counter=0
 8 | max_attempts=10
 9 | 
10 | trap 'kill $server_pid' EXIT SIGINT
11 | # waiting for server startup
12 | until $(curl -X GET http://localhost:8000 --fail-with-body --output /dev/null --silent --head); do
13 |     if [ ${attempt_counter} -eq ${max_attempts} ];then
14 |       echo "Max attempts reached"
15 |       exit 1
16 |     fi
17 | 
18 |     printf '.'
19 |     attempt_counter=$(($attempt_counter+1))
20 |     sleep 1
21 | done
22 | 
23 | curl -X GET http://localhost:8000 --fail-with-body
24 | curl -X POST http://localhost:8000/summary --fail-with-body -H "Content-Type: application/json" -d '{"chunk": "<your text to summarize here>", "language": {"iso_639_1": "en"}}'
25 | 
26 | # kill happens at the end with the trap command
27 | exit 0
28 | 


--------------------------------------------------------------------------------
/scripts/lint.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env -S bash -eu -o pipefail
2 | 
3 | cd $(dirname $0)/..
4 | 
5 | poetry run pre-commit run --all-files
6 | poetry run mypy .
7 | 


--------------------------------------------------------------------------------
/scripts/notebook_runner.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env -S bash -eu -o pipefail
 2 | # next line loads AA_TOKEN from .env file when running bash script locally. In CI this is not necessary since AA_TOKEN is environment variable.
 3 | [ -f .env ] && source .env
 4 | export AA_TOKEN
 5 | # Find all .ipynb files in the directory and pass them to xargs for parallel execution
 6 | rm -rf src/documentation/.ipynb_checkpoints
 7 | rm -rf src/documentation/how_tos/.ipynb_checkpoints
 8 | 
 9 | find src/documentation -name "*.nbconvert.ipynb" -type f -delete
10 | find src/documentation -name "*.ipynb" ! -name "performance_tips.ipynb" ! -name "document_index.ipynb" | xargs --max-args 1 --max-procs 6 poetry run jupyter nbconvert --to notebook --execute
11 | find src/documentation -name "*.nbconvert.ipynb" -type f -delete
12 | 
13 | poetry run ./scripts/fastapi_example_test.sh
14 | 


--------------------------------------------------------------------------------
/scripts/notebook_runner_document_index.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env -S bash -eu -o pipefail
 2 | 
 3 | # Load environment variables if running locally
 4 | [ -f .env ] && source .env
 5 | export AA_TOKEN
 6 | 
 7 | # Remove Jupyter Notebook checkpoints
 8 | rm -rf src/documentation/.ipynb_checkpoints
 9 | rm -rf src/documentation/how_tos/.ipynb_checkpoints
10 | 
11 | # Remove any previously executed version of the notebook
12 | find src/documentation -name "document_index.nbconvert.ipynb" -type f -delete
13 | 
14 | # Execute only document_index.ipynb
15 | poetry run jupyter nbconvert --to notebook --execute src/documentation/document_index.ipynb
16 | 
17 | # Remove the execution-generated file
18 | find src/documentation -name "document_index.nbconvert.ipynb" -type f -delete
19 | 
20 | 


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env -S bash -eu -o pipefail
2 | 
3 | TQDM_DISABLE=1 poetry run pytest -n 10 -m "not document_index"
4 | 


--------------------------------------------------------------------------------
/src/documentation/data/classify_examples.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "label": "Finance",
 4 |     "message": "I just traveled to Paris for a conference, where can I get the train ride refunded?"
 5 |   },
 6 |   {
 7 |     "label": "Sales",
 8 |     "message": "Hello, we would like to get in contact with your sales team, because we are interested in your solution."
 9 |   },
10 |   {
11 |     "label": "Communications",
12 |     "message": "We are working on a documentation on AI and would like to film a piece about you. Would you be interested?"
13 |   },
14 |   {
15 |     "label": "Research",
16 |     "message": "I am working with Stanford and was hoping to win you over for a research collaboration."
17 |   },
18 |   {
19 |     "label": "IT Support",
20 |     "message": "My laptop is broken"
21 |   },
22 |   {
23 |     "label": "Communications",
24 |     "message": "Can you send your models via email?"
25 |   },
26 |   {
27 |     "label": "Research",
28 |     "message": "We should do a research collaboration."
29 |   },
30 |   {
31 |     "label": "Research",
32 |     "message": "My company has been working on time series and signal processing for a long time. It would make sense to define a joint go to market and research strategy."
33 |   },
34 |   {
35 |     "label": "Human Resources",
36 |     "message": "Full stack developer in your area available now."
37 |   },
38 |   {
39 |     "label": "Product",
40 |     "message": "Hi,\n\nI recently bought your offering. I am having trouble running your docker container in my environment. It fails to start. Can you help?"
41 |   },
42 |   {
43 |     "label": "Product",
44 |     "message": "Hello,\n\nI am getting strange errors from your API. It is saying the queue is full, but I am only sending one task at a time. Why is this happening?"
45 |   },
46 |   {
47 |     "label": "Product",
48 |     "message": "Can you show me a demo of different use cases your offering can solve?"
49 |   },
50 |   {
51 |     "label": "Human Resources",
52 |     "message": "Hey, I did not get a t-shirt in the onboarding. Could I still get one?"
53 |   },
54 |   {
55 |     "label": "Customer",
56 |     "message": "Hi, can you name me a couple of timeslots for a first call? Would be really interested in learning more about the product?"
57 |   },
58 |   {
59 |     "label": "Product",
60 |     "message": "Hi Jan, is your product ISO 37301 compliant?"
61 |   },
62 |   {
63 |     "label": "IT Support",
64 |     "message": "I can\u2019t login to Mattermost or Sharepoint, how can I gain access?"
65 |   },
66 |   {
67 |     "label": "Finance",
68 |     "message": "I did not get paid last month, when do I get paid? What is going on?"
69 |   },
70 |   {
71 |     "label": "Security",
72 |     "message": "Hi, I want to get a new badge, the photo of me looks ugly and I just got new glasses so it does not look like me. "
73 |   },
74 |   {
75 |     "label": "Marketing",
76 |     "message": "I have a question concerning your marketing strategy, would you have time to hop on a call?"
77 |   },
78 |   {
79 |     "label": "CEO Office",
80 |     "message": "Dear Jonas Andrulis,\n\nWe have met each other at the event in N\u00fcrnberg, can we meet for a follow up in your Office in Heidelberg?"
81 |   },
82 |   {
83 |     "label": "Security",
84 |     "message": "Your hTTPs Certificate is not valid on your www.aleph-alpha.de"
85 |   },
86 |   {
87 |     "label": "Human Resources",
88 |     "message": "I want to take a week off immediately"
89 |   },
90 |   {
91 |     "label": "Human Resources",
92 |     "message": "I want to take a sabbatical"
93 |   },
94 |   {
95 |     "label": "Human Resources",
96 |     "message": "How can I work more, I want to work weekends, can I get paid overtime?"
97 |   }
98 | ]
99 | 


--------------------------------------------------------------------------------
/src/documentation/fastapi_example.py:
--------------------------------------------------------------------------------
 1 | import http
 2 | import os
 3 | from collections.abc import Sequence
 4 | from http import HTTPStatus
 5 | from typing import Annotated
 6 | 
 7 | from aleph_alpha_client import Client
 8 | from dotenv import load_dotenv
 9 | from fastapi import Depends, FastAPI, HTTPException, Request, Response
10 | from fastapi.datastructures import URL
11 | 
12 | from intelligence_layer.connectors import AlephAlphaClientProtocol
13 | from intelligence_layer.core import Llama3InstructModel, NoOpTracer, Task
14 | from intelligence_layer.examples import (
15 |     SingleChunkSummarizeInput,
16 |     SteerableSingleChunkSummarize,
17 |     SummarizeOutput,
18 | )
19 | 
20 | # Minimal FastAPI app ##########################################################
21 | 
22 | app = FastAPI()
23 | 
24 | 
25 | @app.get("/")
26 | def root() -> Response:
27 |     return Response(content="Hello World", status_code=HTTPStatus.OK)
28 | 
29 | 
30 | # Authentication ###############################################################
31 | 
32 | 
33 | class AuthService:
34 |     def is_valid_token(self, token: str, permissions: Sequence[str], url: URL) -> bool:
35 |         # Add your authentication logic here
36 |         print(f"Checking permission for route: {url.path}")
37 |         return True
38 | 
39 | 
40 | class PermissionChecker:
41 |     def __init__(self, permissions: Sequence[str] = []):
42 |         self.permissions = permissions
43 | 
44 |     def __call__(
45 |         self,
46 |         request: Request,
47 |         auth_service: Annotated[AuthService, Depends(AuthService)],
48 |     ) -> None:
49 |         token = request.headers.get("Authorization") or ""
50 |         try:
51 |             if not auth_service.is_valid_token(token, self.permissions, request.url):
52 |                 raise HTTPException(HTTPStatus.UNAUTHORIZED)
53 |         except RuntimeError as e:
54 |             raise HTTPException(HTTPStatus.INTERNAL_SERVER_ERROR) from e
55 | 
56 | 
57 | permission_checker_for_user = PermissionChecker(["User"])
58 | 
59 | 
60 | # Intelligence Layer Task ######################################################
61 | 
62 | load_dotenv()
63 | 
64 | 
65 | def client() -> Client:
66 |     return Client(
67 |         token=os.environ["AA_TOKEN"],
68 |         host=os.environ["CLIENT_URL"],
69 |     )
70 | 
71 | 
72 | def default_model(
73 |     app_client: Annotated[AlephAlphaClientProtocol, Depends(client)],
74 | ) -> Llama3InstructModel:
75 |     return Llama3InstructModel(client=app_client)
76 | 
77 | 
78 | def summary_task(
79 |     model: Annotated[Llama3InstructModel, Depends(default_model)],
80 | ) -> SteerableSingleChunkSummarize:
81 |     return SteerableSingleChunkSummarize(model=model)
82 | 
83 | 
84 | @app.post(
85 |     "/summary",
86 |     dependencies=[Depends(PermissionChecker(["User"]))],
87 |     status_code=http.HTTPStatus.OK,
88 | )
89 | def summary_task_route(
90 |     input: SingleChunkSummarizeInput,
91 |     task: Annotated[
92 |         Task[SingleChunkSummarizeInput, SummarizeOutput], Depends(summary_task)
93 |     ],
94 | ) -> SummarizeOutput:
95 |     return task.run(input, NoOpTracer())
96 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/documentation/how_tos/__init__.py


--------------------------------------------------------------------------------
/src/documentation/how_tos/how_to_aggregate_evaluations.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from example_data import DummyAggregationLogic, example_data\n",
10 |     "\n",
11 |     "from intelligence_layer.evaluation.aggregation.aggregator import Aggregator\n",
12 |     "from intelligence_layer.evaluation.aggregation.in_memory_aggregation_repository import (\n",
13 |     "    InMemoryAggregationRepository,\n",
14 |     ")"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "markdown",
19 |    "metadata": {},
20 |    "source": [
21 |     "# How to aggregate evaluations\n",
22 |     "0. Run the evaluations of all your tasks and datasets (see [here](./how_to_evaluate_runs.ipynb)).\n",
23 |     "    - When aggregating multiple evaluations, all of them need the same data types \n",
24 |     "1. Initialize all necessary repositories for the `Aggregator`, and an `AggregationLogic`\n",
25 |     "2. Run the `Aggregator` to aggregate all examples and create a single `AggregationOverview`\n",
26 |     "3. (Optional) Save the `AggregationOverview.id` for later retrieval\n",
27 |     "\n",
28 |     "### Example"
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "code",
33 |    "execution_count": null,
34 |    "metadata": {},
35 |    "outputs": [],
36 |    "source": [
37 |     "# Step 0\n",
38 |     "\n",
39 |     "\n",
40 |     "my_example_data = example_data()\n",
41 |     "print()\n",
42 |     "\n",
43 |     "evaluation_ids = [\n",
44 |     "    my_example_data.evaluation_overview_1.id,\n",
45 |     "    my_example_data.evaluation_overview_2.id,\n",
46 |     "]\n",
47 |     "\n",
48 |     "# Step 1\n",
49 |     "evaluation_repository = my_example_data.evaluation_repository\n",
50 |     "aggregation_repository = InMemoryAggregationRepository()\n",
51 |     "aggregation_logic = DummyAggregationLogic()\n",
52 |     "\n",
53 |     "# Step 2\n",
54 |     "aggregator = Aggregator(\n",
55 |     "    evaluation_repository,\n",
56 |     "    aggregation_repository,\n",
57 |     "    \"MyAggregationDescription\",\n",
58 |     "    aggregation_logic,\n",
59 |     ")\n",
60 |     "aggregation_overview = aggregator.aggregate_evaluation(\n",
61 |     "    *evaluation_ids, labels=set([\"label_a\"]), metadata=dict({\"key\": \"value\"})\n",
62 |     ")\n",
63 |     "\n",
64 |     "# Step 3\n",
65 |     "print(aggregation_overview.id)\n",
66 |     "print(aggregation_overview.labels)\n",
67 |     "print(aggregation_overview.metadata)"
68 |    ]
69 |   }
70 |  ],
71 |  "metadata": {
72 |   "language_info": {
73 |    "codemirror_mode": {
74 |     "name": "ipython"
75 |    },
76 |    "file_extension": ".py",
77 |    "mimetype": "text/x-python",
78 |    "name": "python",
79 |    "nbconvert_exporter": "python"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 2
84 | }
85 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/how_to_create_a_dataset.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from collections.abc import Sequence\n",
10 |     "\n",
11 |     "from dotenv import load_dotenv\n",
12 |     "from pydantic import BaseModel\n",
13 |     "\n",
14 |     "from intelligence_layer.evaluation import Example, InMemoryDatasetRepository\n",
15 |     "\n",
16 |     "load_dotenv()"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "markdown",
21 |    "metadata": {},
22 |    "source": [
23 |     "# How to create a dataset\n",
24 |     "\n",
25 |     "0. Collect data for examples.\n",
26 |     "1. Convert data to `Example`s.\n",
27 |     "1. Create a `DatasetRepository`.\n",
28 |     "2. Store `Example`s to `DatasetRepository`.\n",
29 |     "3. Remember the dataset id."
30 |    ]
31 |   },
32 |   {
33 |    "cell_type": "markdown",
34 |    "metadata": {},
35 |    "source": [
36 |     "### Example"
37 |    ]
38 |   },
39 |   {
40 |    "cell_type": "code",
41 |    "execution_count": null,
42 |    "metadata": {},
43 |    "outputs": [],
44 |    "source": [
45 |     "class StoryTaskInput(BaseModel):  # Should already be implemented in your task\n",
46 |     "    topic: str\n",
47 |     "    targeted_word_count: int\n",
48 |     "\n",
49 |     "\n",
50 |     "class StoryTaskExpectedOutput(BaseModel):  # Should already be implemented in your task\n",
51 |     "    keywords: Sequence[str]\n",
52 |     "\n",
53 |     "\n",
54 |     "# Step 1\n",
55 |     "examples = [\n",
56 |     "    Example(\n",
57 |     "        input=StoryTaskInput(topic=\"rain\", targeted_word_count=42),\n",
58 |     "        expected_output=StoryTaskExpectedOutput(keywords=[\"wet\"]),\n",
59 |     "        metadata={\n",
60 |     "            \"author\": \"Shakespeare\"\n",
61 |     "        },  # the metadata is optional and can contain custom information\n",
62 |     "    ),\n",
63 |     "    # ...\n",
64 |     "]\n",
65 |     "\n",
66 |     "# Step 2 - Use FileDatasetRepository or HuggingFaceDatasetRepository for persistence\n",
67 |     "dataset_repository = InMemoryDatasetRepository()\n",
68 |     "\n",
69 |     "# Step 3\n",
70 |     "dataset = dataset_repository.create_dataset(\n",
71 |     "    examples=examples,\n",
72 |     "    dataset_name=\"StoryDataset\",\n",
73 |     "    labels=set([\"label1\", \"label2\"]),\n",
74 |     "    metadata=dict({\"key_a\": [\"a\", \"b\"], \"key_b\": \"value\"}),\n",
75 |     ")\n",
76 |     "\n",
77 |     "# Step 4\n",
78 |     "print(dataset.id)\n",
79 |     "print(dataset.labels)\n",
80 |     "print(dataset.metadata)"
81 |    ]
82 |   }
83 |  ],
84 |  "metadata": {
85 |   "language_info": {
86 |    "codemirror_mode": {
87 |     "name": "ipython"
88 |    },
89 |    "file_extension": ".py",
90 |    "mimetype": "text/x-python",
91 |    "name": "python",
92 |    "nbconvert_exporter": "python"
93 |   }
94 |  },
95 |  "nbformat": 4,
96 |  "nbformat_minor": 2
97 | }
98 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/how_to_define_a_task.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# How to define a task\n",
 8 |     "\n",
 9 |     "1. Think about what you want to do and define the requirements for your task\n",
10 |     "2. Define the corresponding input and output in the form of Python classes\n",
11 |     "3. Check if any existing task can be used to fulfill these requirements (see the [Use-case index](../../../README.md#use-case-index))\n",
12 |     "4. Implement the task with the defined input and output types, see [How to implement a task](how_to_implement_a_task.ipynb) \n"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "markdown",
17 |    "metadata": {},
18 |    "source": [
19 |     "### Example\n",
20 |     "\n",
21 |     "**Step 1**\n",
22 |     "- I want the LLM to tell a joke about a specific topic\n",
23 |     "- It should work for any topic\n",
24 |     "- It should fail if there is no topic given by the user\n",
25 |     "\n",
26 |     "\n",
27 |     "**Step 2**\n",
28 |     "\n",
29 |     "```python\n",
30 |     "class TellAJokeTaskInput(BaseModel):\n",
31 |     "    topic: str\n",
32 |     "\n",
33 |     "class TellAJokeTaskOutput(BaseModel):\n",
34 |     "    joke: str\n",
35 |     "```\n",
36 |     "\n",
37 |     "**Step 3**\n",
38 |     "On first glance any of the QA tasks seem to fulfill the requirements. However, here only the topic for the joke should be specified by the user and the request to tell a joke should be handled by the task itself. \n",
39 |     "\n",
40 |     "\n",
41 |     "**Step 4**\n",
42 |     "```python\n",
43 |     "class TellAJokeTask(Task[TellAJokeTaskInput, TellAJokeTaskOutput]):\n",
44 |     "    ...\n",
45 |     "```"
46 |    ]
47 |   }
48 |  ],
49 |  "metadata": {
50 |   "language_info": {
51 |    "codemirror_mode": {
52 |     "name": "ipython"
53 |    },
54 |    "file_extension": ".py",
55 |    "mimetype": "text/x-python",
56 |    "name": "python",
57 |    "nbconvert_exporter": "python"
58 |   }
59 |  },
60 |  "nbformat": 4,
61 |  "nbformat_minor": 2
62 | }
63 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/how_to_evaluate_runs.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from example_data import DummyEvaluationLogic, example_data\n",
10 |     "\n",
11 |     "from intelligence_layer.evaluation import Evaluator, InMemoryEvaluationRepository"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "markdown",
16 |    "metadata": {},
17 |    "source": [
18 |     "# How to evaluate runs\n",
19 |     "0. Run your tasks on the datasets where you want to evaluate them on (see [here](./how_to_run_a_task_on_a_dataset.ipynb))\n",
20 |     "   - When evaluating multiple runs, all of them need the same data types \n",
21 |     "2. Initialize all necessary repositories for the `Evaluator`, and an `EvaluationLogic`.\n",
22 |     "3. Run the evaluator to evaluate all examples and create a single `EvaluationOverview`\n",
23 |     "4. (Optional) Save the evaluation id for later use"
24 |    ]
25 |   },
26 |   {
27 |    "cell_type": "markdown",
28 |    "metadata": {},
29 |    "source": [
30 |     "### Example"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "code",
35 |    "execution_count": null,
36 |    "metadata": {},
37 |    "outputs": [],
38 |    "source": [
39 |     "# Step 0\n",
40 |     "\n",
41 |     "my_example_data = example_data()\n",
42 |     "print()\n",
43 |     "run_ids = [my_example_data.run_overview_1.id, my_example_data.run_overview_2.id]\n",
44 |     "\n",
45 |     "# Step 1\n",
46 |     "dataset_repository = my_example_data.dataset_repository\n",
47 |     "run_repository = my_example_data.run_repository\n",
48 |     "evaluation_repository = InMemoryEvaluationRepository()\n",
49 |     "evaluation_logic = DummyEvaluationLogic()\n",
50 |     "\n",
51 |     "# Step 3\n",
52 |     "evaluator = Evaluator(\n",
53 |     "    dataset_repository,\n",
54 |     "    run_repository,\n",
55 |     "    evaluation_repository,\n",
56 |     "    \"My dummy evaluation\",\n",
57 |     "    evaluation_logic,\n",
58 |     ")\n",
59 |     "\n",
60 |     "evaluation_overview = evaluator.evaluate_runs(\n",
61 |     "    *run_ids, labels=set({\"label\"}), metadata=dict({\"key\": \"value\"})\n",
62 |     ")\n",
63 |     "\n",
64 |     "# Step 4\n",
65 |     "print(evaluation_overview.id)\n",
66 |     "print(evaluation_overview.metadata)\n",
67 |     "print(evaluation_overview.labels)"
68 |    ]
69 |   }
70 |  ],
71 |  "metadata": {
72 |   "language_info": {
73 |    "codemirror_mode": {
74 |     "name": "ipython"
75 |    },
76 |    "file_extension": ".py",
77 |    "mimetype": "text/x-python",
78 |    "name": "python",
79 |    "nbconvert_exporter": "python"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 2
84 | }
85 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/how_to_implement_elo_evaluations.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from documentation.how_tos.example_data import DummyEloEvaluationLogic, example_data\n",
10 |     "from intelligence_layer.evaluation import (\n",
11 |     "    IncrementalEvaluator,\n",
12 |     "    InMemoryEvaluationRepository,\n",
13 |     ")"
14 |    ]
15 |   },
16 |   {
17 |    "cell_type": "markdown",
18 |    "metadata": {},
19 |    "source": [
20 |     "# How to implement elo evaluations\n",
21 |     "0. Run your tasks on the datasets you want to evaluate (see [here](./how_to_run_a_task_on_a_dataset.ipynb))\n",
22 |     "   - When evaluating multiple runs, all of them need the same data types \n",
23 |     "2. Initialize all necessary repositories for the `IncrementalEvaluator`, and an `EloEvaluationLogic` that is specific to your use case. \n",
24 |     "3. Run the evaluator to evaluate all examples and create a single `EvaluationOverview`\n",
25 |     "4. (Optional) Save the evaluation id for later use"
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "markdown",
30 |    "metadata": {},
31 |    "source": [
32 |     "### Example"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "code",
37 |    "execution_count": null,
38 |    "metadata": {},
39 |    "outputs": [],
40 |    "source": [
41 |     "# Step 0\n",
42 |     "\n",
43 |     "\n",
44 |     "my_example_data = example_data()\n",
45 |     "print()\n",
46 |     "run_ids = [my_example_data.run_overview_1.id, my_example_data.run_overview_2.id]\n",
47 |     "\n",
48 |     "# Step 1\n",
49 |     "dataset_repository = my_example_data.dataset_repository\n",
50 |     "run_repository = my_example_data.run_repository\n",
51 |     "evaluation_repository = InMemoryEvaluationRepository()\n",
52 |     "evaluation_logic = DummyEloEvaluationLogic()\n",
53 |     "\n",
54 |     "# Step 2\n",
55 |     "evaluator = IncrementalEvaluator(\n",
56 |     "    dataset_repository,\n",
57 |     "    run_repository,\n",
58 |     "    evaluation_repository,\n",
59 |     "    \"My dummy evaluation\",\n",
60 |     "    evaluation_logic,\n",
61 |     ")\n",
62 |     "\n",
63 |     "evaluation_overview = evaluator.evaluate_runs(*run_ids)\n",
64 |     "\n",
65 |     "# Step 3\n",
66 |     "print(evaluation_overview.id)"
67 |    ]
68 |   }
69 |  ],
70 |  "metadata": {
71 |   "language_info": {
72 |    "codemirror_mode": {
73 |     "name": "ipython"
74 |    },
75 |    "file_extension": ".py",
76 |    "mimetype": "text/x-python",
77 |    "name": "python",
78 |    "nbconvert_exporter": "python"
79 |   }
80 |  },
81 |  "nbformat": 4,
82 |  "nbformat_minor": 2
83 | }
84 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/how_to_log_and_debug_a_task.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import random\n",
 10 |     "from uuid import uuid4\n",
 11 |     "\n",
 12 |     "from aleph_alpha_client import Prompt\n",
 13 |     "from dotenv import load_dotenv\n",
 14 |     "\n",
 15 |     "from intelligence_layer.connectors import StudioClient\n",
 16 |     "from intelligence_layer.core import (\n",
 17 |     "    CompleteInput,\n",
 18 |     "    InMemoryTracer,\n",
 19 |     "    LuminousControlModel,\n",
 20 |     "    Task,\n",
 21 |     "    TaskSpan,\n",
 22 |     ")\n",
 23 |     "\n",
 24 |     "load_dotenv()"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "# How to log and debug a task\n",
 32 |     "The Intelligence Layer offers logging and debugging via a `Tracer`.  \n",
 33 |     "Here are several steps you can use to debug tasks with the trace feature:\n",
 34 |     "\n",
 35 |     "-----\n",
 36 |     "Most logging of a task (input, output, time) is done simply by inheriting from `Task`. This logs to a trace.\n",
 37 |     "\n",
 38 |     " - If you don't care about logging and tracing, use the `NoOpTracer`.\n",
 39 |     " - To create custom logging messages in a trace use `task_span.log()`.\n",
 40 |     " - To map a complex execution flow of a task into a single trace, pass the `task_span` of the `do_run` to other execution methods (e.g. `Task.run()` or `model.complete()`). \n",
 41 |     "   - If the execution method is not provided by the intelligence layer, the tracing of input and output has to happen manually. See the implementation of `Task.run()` for an example.\n",
 42 |     " - Use the [submit trace functionality of the `StudioClient`](./how_to_use_studio_with_traces.ipynb) to view and inspect a trace in Studio"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "### Example"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "class DummyTask(Task[str, str]):\n",
 59 |     "    def __init__(self, model: LuminousControlModel | None = None) -> None:\n",
 60 |     "        self._model = model if model else LuminousControlModel()\n",
 61 |     "\n",
 62 |     "    def do_run(self, input: str, task_span: TaskSpan) -> str:\n",
 63 |     "        should_output = random.random()\n",
 64 |     "        # log a custom message and value\n",
 65 |     "        task_span.log(\n",
 66 |     "            \"My very important log message that logs a random value\", should_output\n",
 67 |     "        )\n",
 68 |     "        if should_output > 0.5:\n",
 69 |     "            model_input = CompleteInput(prompt=Prompt.from_text(input), temperature=0.2)\n",
 70 |     "            # Create a trace tree by passing task_span to .run or .complete methods.\n",
 71 |     "            completion = self._model.complete(model_input, task_span)\n",
 72 |     "            return completion.completions[0].completion\n",
 73 |     "        else:\n",
 74 |     "            return \"Nope!\"\n",
 75 |     "\n",
 76 |     "\n",
 77 |     "tracer = InMemoryTracer()\n",
 78 |     "DummyTask().run(\"\", tracer)\n",
 79 |     "\n",
 80 |     "project_name = str(uuid4())\n",
 81 |     "studio_client = StudioClient(project=project_name)\n",
 82 |     "my_project = studio_client.create_project(project=project_name)\n",
 83 |     "\n",
 84 |     "submitted_trace_id = studio_client.submit_from_tracer(tracer)\n",
 85 |     "\n",
 86 |     "\n",
 87 |     "pass"
 88 |    ]
 89 |   }
 90 |  ],
 91 |  "metadata": {
 92 |   "language_info": {
 93 |    "codemirror_mode": {
 94 |     "name": "ipython"
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python"
100 |   }
101 |  },
102 |  "nbformat": 4,
103 |  "nbformat_minor": 2
104 | }
105 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/how_to_resume_a_run_after_a_crash.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pytest\n",
 10 |     "from example_data import DummyTaskCanFail, example_data\n",
 11 |     "\n",
 12 |     "from intelligence_layer.evaluation.run.in_memory_run_repository import (\n",
 13 |     "    InMemoryRunRepository,\n",
 14 |     ")\n",
 15 |     "from intelligence_layer.evaluation.run.runner import Runner\n",
 16 |     "\n",
 17 |     "my_example_data = example_data()\n",
 18 |     "\n",
 19 |     "dataset_repository = my_example_data.dataset_repository\n",
 20 |     "run_repository = InMemoryRunRepository()\n",
 21 |     "task = DummyTaskCanFail()\n",
 22 |     "\n",
 23 |     "runner = Runner(task, dataset_repository, run_repository, \"MyRunDescription\")"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "# How to resume a run after a crash\n",
 31 |     "\n",
 32 |     "0. Run task on a dataset, see [here](./how_to_run_a_task_on_a_dataset.ipynb).\n",
 33 |     "1. A crash occurs.\n",
 34 |     "2. Re-run task on the same dataset with `resume_from_recovery_data` set to `True`."
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# Steps 0 & 1: Run task for dataset\n",
 44 |     "with pytest.raises(Exception):  # noqa: B017\n",
 45 |     "    run_overview = runner.run_dataset(my_example_data.dataset.id, abort_on_error=True)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "A failure has occurred. Note, this might be a crash of the computer or an unexpected uncaught exception. \n",
 53 |     "\n",
 54 |     "For demonstration purposes, we set `abort_on_error=True`, such that an exception is raised. Further, we catch the exception for purely technical reasons of our CI. Feel free to remove the pytest scope on your local setup when running this notebook.\n",
 55 |     "\n",
 56 |     "Even though the run crashed, the `RunRepository` stores recovery data and is able to continue `run_dataset` by setting `resume_from_recovery_data` to `True`. This way, the already successfully calculated outputs do not have to be re-calculated again, and only the missing examples are processed:"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# Step 2: Re-run the same run with `resume_from_recovery_data` enabled\n",
 66 |     "run_overview = runner.run_dataset(\n",
 67 |     "    my_example_data.dataset.id, abort_on_error=True, resume_from_recovery_data=True\n",
 68 |     ")"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "print(run_overview)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "Note: The `FileSystemRepository` persists the recovery data in the file system. The run can therefore be resumed even in case of a complete program or even computer crash. \n",
 85 |     "\n",
 86 |     "On the other hand, the `InMemoryRunRepository` retains the recovery data only as long as the repository resides in computer memory. A crash of the process will lead to the loss of the recovery data. In that case, all examples will have to be recalculated."
 87 |    ]
 88 |   }
 89 |  ],
 90 |  "metadata": {
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython"
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python"
 99 |   }
100 |  },
101 |  "nbformat": 4,
102 |  "nbformat_minor": 2
103 | }
104 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/how_to_run_a_task_on_a_dataset.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from example_data import DummyTask, example_data\n",
10 |     "\n",
11 |     "from intelligence_layer.evaluation.run.in_memory_run_repository import (\n",
12 |     "    InMemoryRunRepository,\n",
13 |     ")\n",
14 |     "from intelligence_layer.evaluation.run.runner import Runner"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "markdown",
19 |    "metadata": {},
20 |    "source": [
21 |     "# How to run a task on a dataset\n",
22 |     "0. Create a suitable dataset (see [here](./how_to_create_a_dataset.ipynb)) and a task (see [here](./how_to_implement_a_task.ipynb)).\n",
23 |     "1. Initialize the task and a `RunRepository`, and open the correct `DatasetRepository`\n",
24 |     "    - The `DatasetRepository` needs to contain the dataset.\n",
25 |     "    - The `RunRepository` stores results.\n",
26 |     "2. Use the `Runner` to run the task on the given dataset via `run_dataset`\n",
27 |     "3. Save the id of the resulting `RunOverview`\n",
28 |     "\n",
29 |     "### Example"
30 |    ]
31 |   },
32 |   {
33 |    "cell_type": "code",
34 |    "execution_count": null,
35 |    "metadata": {},
36 |    "outputs": [],
37 |    "source": [
38 |     "# Step 0\n",
39 |     "my_example_data = example_data()\n",
40 |     "print()\n",
41 |     "\n",
42 |     "# Step 1\n",
43 |     "dataset_repository = my_example_data.dataset_repository\n",
44 |     "run_repository = InMemoryRunRepository()\n",
45 |     "task = DummyTask()\n",
46 |     "\n",
47 |     "# Step 2\n",
48 |     "runner = Runner(task, dataset_repository, run_repository, \"MyRunDescription\")\n",
49 |     "run_overview = runner.run_dataset(my_example_data.dataset.id)\n",
50 |     "\n",
51 |     "# Step 3\n",
52 |     "print(run_overview.id)"
53 |    ]
54 |   }
55 |  ],
56 |  "metadata": {
57 |   "language_info": {
58 |    "codemirror_mode": {
59 |     "name": "ipython"
60 |    },
61 |    "file_extension": ".py",
62 |    "mimetype": "text/x-python",
63 |    "name": "python",
64 |    "nbconvert_exporter": "python"
65 |   }
66 |  },
67 |  "nbformat": 4,
68 |  "nbformat_minor": 2
69 | }
70 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/studio/how_to_execute_a_benchmark.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from uuid import uuid4\n",
 10 |     "\n",
 11 |     "from dotenv import load_dotenv\n",
 12 |     "\n",
 13 |     "from documentation.how_tos.example_data import (\n",
 14 |     "    ComplexDummyAggregationLogic,\n",
 15 |     "    ComplexDummyEvaluationLogic,\n",
 16 |     "    ComplexDummyTask,\n",
 17 |     "    example_data,\n",
 18 |     ")\n",
 19 |     "from intelligence_layer.connectors import StudioClient\n",
 20 |     "from intelligence_layer.evaluation import (\n",
 21 |     "    StudioBenchmarkRepository,\n",
 22 |     "    StudioDatasetRepository,\n",
 23 |     ")\n",
 24 |     "\n",
 25 |     "load_dotenv()\n",
 26 |     "my_example_data = example_data()\n",
 27 |     "examples = my_example_data.complex_examples"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "# How to execute Benchmarks\n",
 35 |     "<div class=\"alert alert-info\">  \n",
 36 |     "\n",
 37 |     "Make sure your account has permissions to use the Studio application.\n",
 38 |     "\n",
 39 |     "For an on-prem or local installation, please contact the corresponding team.\n",
 40 |     "</div>"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "0. Initialize a `StudioClient` with a project.\n",
 48 |     "    - Use an existing project or create a new one with the `StudioClient.create_project` function.\n",
 49 |     "    \n",
 50 |     "1. Create a `StudioDatasetRepository` and create a new `Dataset` via `StudioDatasetRepository.create_dataset`, which will automatically upload this new `Dataset` to Studio.\n",
 51 |     "\n",
 52 |     "2. Create a `StudioBenchmarkRepository` and instantiate a benchmark with your `evaluation_logic` and `aggregation_logic` using the `create_benchmark` function.\n",
 53 |     "\n",
 54 |     "3. Execute the `Benchmark` with your initialized `Task`\n",
 55 |     "\n",
 56 |     "### Example"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# Step 0\n",
 66 |     "\n",
 67 |     "studio_client = StudioClient(\n",
 68 |     "    project=my_example_data.studio_project_name, create_project=True\n",
 69 |     ")\n",
 70 |     "\n",
 71 |     "# Step 1\n",
 72 |     "studio_dataset_repository = StudioDatasetRepository(studio_client)\n",
 73 |     "dataset = studio_dataset_repository.create_dataset(examples, \"my_dataset\")\n",
 74 |     "\n",
 75 |     "# Step 2\n",
 76 |     "studio_benchmark_repository = StudioBenchmarkRepository(studio_client)\n",
 77 |     "evaluation_logic = ComplexDummyEvaluationLogic()\n",
 78 |     "aggregation_logic = ComplexDummyAggregationLogic()\n",
 79 |     "benchmark = studio_benchmark_repository.create_benchmark(\n",
 80 |     "    dataset.id, evaluation_logic, aggregation_logic, f\"my_benchmark-{uuid4()}\"\n",
 81 |     ")\n",
 82 |     "\n",
 83 |     "# Step 3\n",
 84 |     "\n",
 85 |     "task = ComplexDummyTask()\n",
 86 |     "benchmark.execute(task, \"my_task\")"
 87 |    ]
 88 |   }
 89 |  ],
 90 |  "metadata": {
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython"
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python"
 99 |   }
100 |  },
101 |  "nbformat": 4,
102 |  "nbformat_minor": 2
103 | }
104 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/studio/how_to_upload_existing_datasets_to_studio.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from dotenv import load_dotenv\n",
10 |     "\n",
11 |     "from documentation.how_tos.example_data import example_data\n",
12 |     "from intelligence_layer.connectors import StudioClient\n",
13 |     "from intelligence_layer.evaluation.dataset.studio_dataset_repository import (\n",
14 |     "    StudioDatasetRepository,\n",
15 |     ")\n",
16 |     "\n",
17 |     "load_dotenv()\n",
18 |     "\n",
19 |     "my_example_data = example_data()"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "markdown",
24 |    "metadata": {},
25 |    "source": [
26 |     "# How to upload (existing) datasets to Studio\n",
27 |     "<div class=\"alert alert-info\">  \n",
28 |     "\n",
29 |     "Make sure your account has permissions to use the Studio application.\n",
30 |     "\n",
31 |     "For an on-prem or local installation, please contact the corresponding team.\n",
32 |     "</div>"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "markdown",
37 |    "metadata": {},
38 |    "source": [
39 |     "\n",
40 |     "0. Extract `Dataset` and `Examples` from your `DatasetRepository`.\n",
41 |     "\n",
42 |     "1. Initialize a `StudioClient` with a project.\n",
43 |     "    - Use an existing project or create a new one with the `StudioClient.create_project` function.\n",
44 |     "    \n",
45 |     "2. Create a `StudioDatasetRepository` and create a new `Dataset` via `StudioDatasetRepository.create_dataset`, which will automatically upload this new `Dataset` to Studio.\n",
46 |     "\n",
47 |     "### Example"
48 |    ]
49 |   },
50 |   {
51 |    "cell_type": "code",
52 |    "execution_count": null,
53 |    "metadata": {},
54 |    "outputs": [],
55 |    "source": [
56 |     "# Step 0\n",
57 |     "existing_dataset_repo = my_example_data.dataset_repository\n",
58 |     "\n",
59 |     "existing_dataset = existing_dataset_repo.dataset(dataset_id=my_example_data.dataset.id)\n",
60 |     "assert existing_dataset, \"Make sure your dataset still exists.\"\n",
61 |     "\n",
62 |     "existing_examples = existing_dataset_repo.examples(\n",
63 |     "    existing_dataset.id, input_type=str, expected_output_type=str\n",
64 |     ")\n",
65 |     "\n",
66 |     "# Step 1\n",
67 |     "studio_client = StudioClient(\n",
68 |     "    project=my_example_data.studio_project_name,\n",
69 |     "    create_project=True,\n",
70 |     ")\n",
71 |     "\n",
72 |     "# Step 2\n",
73 |     "studio_dataset_repo = StudioDatasetRepository(studio_client=studio_client)\n",
74 |     "\n",
75 |     "studio_dataset = studio_dataset_repo.create_dataset(\n",
76 |     "    examples=existing_examples,\n",
77 |     "    dataset_name=existing_dataset.name,\n",
78 |     "    labels=existing_dataset.labels,\n",
79 |     "    metadata=existing_dataset.metadata,\n",
80 |     ")"
81 |    ]
82 |   }
83 |  ],
84 |  "metadata": {
85 |   "language_info": {
86 |    "codemirror_mode": {
87 |     "name": "ipython"
88 |    },
89 |    "file_extension": ".py",
90 |    "mimetype": "text/x-python",
91 |    "name": "python",
92 |    "nbconvert_exporter": "python"
93 |   }
94 |  },
95 |  "nbformat": 4,
96 |  "nbformat_minor": 2
97 | }
98 | 


--------------------------------------------------------------------------------
/src/documentation/how_tos/studio/how_to_use_studio_with_traces.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from dotenv import load_dotenv\n",
10 |     "\n",
11 |     "from documentation.how_tos.example_data import DummyTask, example_data\n",
12 |     "from intelligence_layer.connectors import StudioClient\n",
13 |     "from intelligence_layer.core import InMemoryTracer\n",
14 |     "\n",
15 |     "load_dotenv()\n",
16 |     "\n",
17 |     "my_example_data = example_data()"
18 |    ]
19 |   },
20 |   {
21 |    "cell_type": "markdown",
22 |    "metadata": {},
23 |    "source": [
24 |     "# How to use Studio for Debugging in a SaaS Configuration\n",
25 |     "<div class=\"alert alert-info\">  \n",
26 |     "\n",
27 |     "Make sure your account has permissions to use the Studio application.\n",
28 |     "\n",
29 |     "For an on-prem or local installation, please contact the corresponding team.\n",
30 |     "</div>"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "markdown",
35 |    "metadata": {},
36 |    "source": [
37 |     "\n",
38 |     "0. Generate a trace of your `Task` of interest.\n",
39 |     "1. Initialize a `StudioClient` with a project.\n",
40 |     "    - Use an existing project or create a new one with the `StudioClient.create_project` function.\n",
41 |     "2. Submit your traces with the client\n",
42 |     "    1. Submit a single trace via  `Tracer.export_for_viewing` and `StudioClient.submit_trace`\n",
43 |     "    2. [Recommended] submit multiple traces via `StudioClient.submit_from_tracer`. \n",
44 |     "\n",
45 |     "### Example"
46 |    ]
47 |   },
48 |   {
49 |    "cell_type": "code",
50 |    "execution_count": null,
51 |    "metadata": {},
52 |    "outputs": [],
53 |    "source": [
54 |     "# Step 0\n",
55 |     "tracer = InMemoryTracer()\n",
56 |     "DummyTask().run(\"My Dummy Run\", tracer=tracer)\n",
57 |     "\n",
58 |     "# Step 1\n",
59 |     "studio_client = StudioClient(\n",
60 |     "    project=my_example_data.studio_project_name, create_project=True\n",
61 |     ")\n",
62 |     "\n",
63 |     "# Step 2.1\n",
64 |     "trace_to_submit = tracer.export_for_viewing()\n",
65 |     "trace_id = studio_client.submit_trace(trace_to_submit)  # only works for single traces\n",
66 |     "\n",
67 |     "# Step 2.2\n",
68 |     "tracer2 = InMemoryTracer()\n",
69 |     "DummyTask().run(\"My Dummy Run2\", tracer=tracer2)\n",
70 |     "DummyTask().run(\"My Dummy Run3\", tracer=tracer2)\n",
71 |     "ids_of_submitted_traces = studio_client.submit_from_tracer(tracer2)"
72 |    ]
73 |   }
74 |  ],
75 |  "metadata": {
76 |   "language_info": {
77 |    "codemirror_mode": {
78 |     "name": "ipython"
79 |    },
80 |    "file_extension": ".py",
81 |    "mimetype": "text/x-python",
82 |    "name": "python",
83 |    "nbconvert_exporter": "python"
84 |   }
85 |  },
86 |  "nbformat": 4,
87 |  "nbformat_minor": 2
88 | }
89 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/connectors/base/json_serializable.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Mapping, Sequence
 2 | from typing import TYPE_CHECKING
 3 | 
 4 | from typing_extensions import TypeAliasType
 5 | 
 6 | if TYPE_CHECKING:
 7 |     JsonSerializable = (
 8 |         int
 9 |         | float
10 |         | str
11 |         | None
12 |         | bool
13 |         | Sequence["JsonSerializable"]
14 |         | Mapping[str, "JsonSerializable"]
15 |     )
16 | else:
17 |     JsonSerializable = TypeAliasType(
18 |         "JsonSerializable",
19 |         int
20 |         | float
21 |         | str
22 |         | None
23 |         | bool
24 |         | Sequence["JsonSerializable"]
25 |         | Mapping[str, "JsonSerializable"],
26 |     )
27 | 
28 | SerializableDict = dict[str, JsonSerializable]
29 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/connectors/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .data import DataClient
 2 | from .exceptions import (
 3 |     DataExternalServiceUnavailable,
 4 |     DataForbiddenError,
 5 |     DataInternalError,
 6 |     DataInvalidInput,
 7 |     DataResourceNotFound,
 8 | )
 9 | from .models import (
10 |     DataDataset,
11 |     DataFile,
12 |     DataFileCreate,
13 |     DataRepository,
14 |     DataRepositoryCreate,
15 |     DatasetCreate,
16 |     DataStage,
17 |     DataStageCreate,
18 | )
19 | 
20 | __all__ = [
21 |     "DataClient",
22 |     "DataInternalError",
23 |     "DataExternalServiceUnavailable",
24 |     "DataForbiddenError",
25 |     "DataInvalidInput",
26 |     "DataResourceNotFound",
27 |     "DataRepository",
28 |     "DataRepositoryCreate",
29 |     "DataDataset",
30 |     "DatasetCreate",
31 |     "DataStage",
32 |     "DataStageCreate",
33 |     "DataFile",
34 |     "DataFileCreate",
35 | ]
36 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/connectors/data/exceptions.py:
--------------------------------------------------------------------------------
 1 | class DataError(Exception):
 2 |     """Base class for exceptions in this module."""
 3 | 
 4 |     def __init__(self, *args: object) -> None:
 5 |         default_message = getattr(self, "DEFAULT_MESSAGE", "")
 6 |         super().__init__(default_message, *args)
 7 | 
 8 | 
 9 | class DataInternalError(DataError):
10 |     """Exception raised when an internal error occurs."""
11 | 
12 |     DEFAULT_MESSAGE = "Internal error: An unexpected error occurred. "
13 | 
14 | 
15 | class DataResourceNotFound(DataError):
16 |     """Exception raised when a resource is not found."""
17 | 
18 |     DEFAULT_MESSAGE = "Resource not found: The requested resource was not found. "
19 | 
20 | 
21 | class DataInvalidInput(DataError):
22 |     """Exception raised when the input is invalid."""
23 | 
24 |     DEFAULT_MESSAGE = "Invalid input: The input provided is invalid. "
25 | 
26 | 
27 | class DataExternalServiceUnavailable(DataError):
28 |     """Exception raised when an external service is unavailable."""
29 | 
30 |     DEFAULT_MESSAGE = (
31 |         "External service unavailable: The external service is unavailable. "
32 |     )
33 | 
34 | 
35 | class DataForbiddenError(DataError):
36 |     """Exception raised when a forbidden error occurs."""
37 | 
38 |     DEFAULT_MESSAGE = (
39 |         "Forbidden error: Client does not have permission to access the resource. "
40 |     )
41 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/connectors/kernel/kernel.py:
--------------------------------------------------------------------------------
 1 | from os import getenv
 2 | from typing import TypeVar
 3 | 
 4 | import requests
 5 | from pydantic import BaseModel
 6 | 
 7 | from intelligence_layer.core import Task, TaskSpan
 8 | 
 9 | Input = TypeVar("Input", bound=BaseModel)
10 | """Interface to be passed to the task with all data needed to run the process.
11 | Ideally, these are specified in terms related to the use-case, rather than lower-level
12 | configuration options."""
13 | Output = TypeVar("Output", bound=BaseModel)
14 | """Interface of the output returned by the task."""
15 | 
16 | 
17 | class KernelTask(Task[Input, Output]):
18 |     """A Task that can call a Skill within the Kernel.
19 | 
20 |     Note: this will not support full tracing in the Intelligence Layer,
21 |     but it will allow passing a Kernel Skill as a subtask to a larger
22 |     workflow, or allow for passing it to the Evaluation tooling.
23 | 
24 |     Args:
25 |         skill: The name of the skill deployed in Pharia Kernel that should be called.
26 |         input_model: The type for the Pydantic model that should be used for serializing the input.
27 |         output_model: The type for the Pydantic model that should be used for deserializing the output.
28 |         host: The URL to use for accessing Pharia Kernel. Defaults to the env variable `PHARIA_KERNEL_URL` if not provided.
29 |         token: The auth token to use for accessing Pharia Kernel. Defaults to the env variable `AA_TOKEN` if not provided.
30 |     """
31 | 
32 |     def __init__(
33 |         self,
34 |         skill: str,
35 |         input_model: type[Input],
36 |         output_model: type[Output],
37 |         host: str | None = None,
38 |         token: str | None = None,
39 |     ):
40 |         if host is None:
41 |             host = getenv("PHARIA_KERNEL_URL")
42 |             assert host, "Define PHARIA_KERNEL_URL with a valid url pointing towards your Pharia Kernel API."
43 |         if token is None:
44 |             token = getenv("AA_TOKEN")
45 |             assert token, "Define environment variable AA_TOKEN with a valid token for the Aleph Alpha API"
46 | 
47 |         self.skill = skill
48 |         self.input_model = input_model
49 |         self.output_model = output_model
50 |         self.host = host
51 |         self.session = requests.Session()
52 |         self.session.headers = {"Authorization": f"Bearer {token}"}
53 | 
54 |     def __del__(self):
55 |         if self.session:
56 |             self.session.close()
57 | 
58 |     def do_run(self, input: Input, task_span: TaskSpan) -> Output:
59 |         response = self.session.post(
60 |             f"{self.host}/v1/skills/{self.skill}/run",
61 |             json=input.model_dump(),
62 |         )
63 | 
64 |         if response.status_code != 200:
65 |             raise Exception(f"{response.status_code}: {response.text}")
66 | 
67 |         return self.output_model(**response.json())
68 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/connectors/retrievers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/connectors/retrievers/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/connectors/retrievers/base_retriever.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from collections.abc import Sequence
 3 | from typing import Any, Generic, Optional, TypeVar
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class Document(BaseModel):
 9 |     """A document.
10 | 
11 |     Attributes:
12 |         text: The document's text.
13 |         metadata: Any metadata added to the document.
14 |     """
15 | 
16 |     text: str
17 |     metadata: Any = None
18 | 
19 | 
20 | class DocumentChunk(BaseModel):
21 |     """Part of a :class:`Document`, specifically for retrieval use cases.
22 | 
23 |     Attributes:
24 |         text: Chunk of the document that matched the search query.
25 |         metadata: Any metadata added to the document.
26 |         start: Start index of the chunk within the document
27 |         end: End index of the chunk within the document
28 |     """
29 | 
30 |     text: str
31 |     start: int
32 |     end: int
33 |     metadata: Any = None
34 | 
35 | 
36 | ID = TypeVar("ID")
37 | 
38 | 
39 | class SearchResult(BaseModel, Generic[ID]):
40 |     """Contains a text alongside its search score.
41 | 
42 |     Attributes:
43 |         id: Unique identifier of the document
44 |         score: The similarity score between the text and the query that was searched with.
45 |             Will be between 0 and 1, where 0 means no similarity and 1 perfect similarity.
46 |         document_chunk: The document chunk found by search.
47 |     """
48 | 
49 |     id: ID
50 |     score: float
51 |     document_chunk: DocumentChunk
52 | 
53 | 
54 | class BaseRetriever(ABC, Generic[ID]):
55 |     """General interface for any retriever.
56 | 
57 |     Retrievers are used to find texts given a user query.
58 |     Each Retriever implementation owns its own logic for retrieval.
59 |     For comparison purposes, we assume scores in the `SearchResult` instances to be between 0 and 1.
60 |     """
61 | 
62 |     @abstractmethod
63 |     def get_relevant_documents_with_scores(
64 |         self, query: str
65 |     ) -> Sequence[SearchResult[ID]]:
66 |         pass
67 | 
68 |     @abstractmethod
69 |     def get_full_document(self, id: ID) -> Optional[Document]:
70 |         pass
71 | 
72 | 
73 | class AsyncBaseRetriever(ABC, Generic[ID]):
74 |     """General interface for any asynchronous retriever.
75 | 
76 |     Asynchronous retrievers are used to find texts given a user query.
77 |     Each Retriever implementation owns its own logic for retrieval.
78 |     For comparison purposes, we assume scores in the `SearchResult` instances to be between 0 and 1.
79 |     """
80 | 
81 |     @abstractmethod
82 |     async def get_relevant_documents_with_scores(
83 |         self, query: str
84 |     ) -> Sequence[SearchResult[ID]]:
85 |         pass
86 | 
87 |     @abstractmethod
88 |     async def get_full_document(self, id: ID) -> Optional[Document]:
89 |         pass
90 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/core/chunk.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Sequence
  2 | from typing import NewType
  3 | 
  4 | from pydantic import BaseModel
  5 | from semantic_text_splitter import TextSplitter
  6 | 
  7 | from intelligence_layer.core.model import AlephAlphaModel
  8 | from intelligence_layer.core.task import Task
  9 | from intelligence_layer.core.tracer.tracer import TaskSpan
 10 | 
 11 | TextChunk = NewType("TextChunk", str)
 12 | """Segment of a larger text.
 13 | 
 14 | This type infers that the string is smaller than the context size of the model where it is used.
 15 | 
 16 | LLMs can't process documents larger than their context size.
 17 | To handle this, documents have to be split up into smaller segments that fit within their context size.
 18 | These smaller segments are referred to as chunks.
 19 | """
 20 | 
 21 | 
 22 | class ChunkInput(BaseModel):
 23 |     """The input for a `Chunk`-task.
 24 | 
 25 |     Attributes:
 26 |         text: A text of arbitrary length.
 27 |     """
 28 | 
 29 |     text: str
 30 | 
 31 | 
 32 | class ChunkOutput(BaseModel):
 33 |     """The output of a `ChunkTask`.
 34 | 
 35 |     Attributes:
 36 |         chunks: A list of smaller sections of the input text.
 37 |     """
 38 | 
 39 |     chunks: Sequence[TextChunk]
 40 | 
 41 | 
 42 | class Chunk(Task[ChunkInput, ChunkOutput]):
 43 |     """Splits a longer text into smaller text chunks.
 44 | 
 45 |     Provide a text of any length and chunk it into smaller pieces using a
 46 |     tokenizer that is available within the Aleph Alpha client.
 47 | 
 48 |     Args:
 49 |         model: A valid Aleph Alpha model.
 50 |         max_tokens_per_chunk: The maximum number of tokens to fit into one chunk.
 51 |     """
 52 | 
 53 |     def __init__(self, model: AlephAlphaModel, max_tokens_per_chunk: int = 512):
 54 |         super().__init__()
 55 |         self._splitter = TextSplitter.from_huggingface_tokenizer(
 56 |             model.get_tokenizer(), capacity=max_tokens_per_chunk
 57 |         )
 58 | 
 59 |     def do_run(self, input: ChunkInput, task_span: TaskSpan) -> ChunkOutput:
 60 |         chunks = [TextChunk(t) for t in self._splitter.chunks(input.text)]
 61 |         return ChunkOutput(chunks=chunks)
 62 | 
 63 | 
 64 | class ChunkWithStartEndIndices(BaseModel, frozen=True):
 65 |     """A `TextChunk` and its `start_index` and `end_index` within the given text.
 66 | 
 67 |     Attributes:
 68 |         chunk: The actual text.
 69 |         start_index: The character start index of the chunk within the given text.
 70 |         end_index: The character end index of the chunk within the given text.
 71 |     """
 72 | 
 73 |     chunk: TextChunk
 74 |     start_index: int
 75 |     end_index: int
 76 | 
 77 | 
 78 | class ChunkWithIndicesOutput(BaseModel):
 79 |     """The output of a `ChunkWithIndices`-task.
 80 | 
 81 |     Attributes:
 82 |         chunks_with_indices: A list of smaller sections of the input text with the respective start_index.
 83 |     """
 84 | 
 85 |     chunks_with_indices: Sequence[ChunkWithStartEndIndices]
 86 | 
 87 | 
 88 | class ChunkWithIndices(Task[ChunkInput, ChunkWithIndicesOutput]):
 89 |     """Splits a longer text into smaller text chunks and returns the chunks' start indices.
 90 | 
 91 |     Provide a text of any length and chunk it into smaller pieces using a
 92 |     tokenizer that is available within the Aleph Alpha client. For each chunk, the respective
 93 |     start index relative to the document is also returned.
 94 | 
 95 |     Args:
 96 |         model: A valid Aleph Alpha model.
 97 |         max_tokens_per_chunk: The maximum number of tokens to fit into one chunk.
 98 |     """
 99 | 
100 |     def __init__(self, model: AlephAlphaModel, max_tokens_per_chunk: int = 512):
101 |         super().__init__()
102 |         self._splitter = TextSplitter.from_huggingface_tokenizer(
103 |             model.get_tokenizer(), capacity=max_tokens_per_chunk, trim=False
104 |         )
105 | 
106 |     def do_run(self, input: ChunkInput, task_span: TaskSpan) -> ChunkWithIndicesOutput:
107 |         chunks_with_indices = [
108 |             ChunkWithStartEndIndices(
109 |                 chunk=TextChunk(chunk),
110 |                 start_index=start_index,
111 |                 end_index=start_index + len(chunk),
112 |             )
113 |             for (start_index, chunk) in self._splitter.chunk_indices(input.text)
114 |         ]
115 |         return ChunkWithIndicesOutput(chunks_with_indices=chunks_with_indices)
116 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/core/echo.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from typing import NewType
 3 | 
 4 | from aleph_alpha_client import Prompt, Text
 5 | from pydantic import BaseModel
 6 | 
 7 | from intelligence_layer.core.model import AlephAlphaModel
 8 | from intelligence_layer.core.task import Task, Token
 9 | from intelligence_layer.core.tracer.tracer import TaskSpan
10 | 
11 | LogProb = NewType("LogProb", float)
12 | 
13 | 
14 | class TokenWithLogProb(BaseModel):
15 |     token: Token
16 |     prob: LogProb
17 | 
18 | 
19 | class EchoInput(BaseModel):
20 |     """The input for an `Echo` task.
21 | 
22 |     Attributes:
23 |         prompt: The input text that serves as the starting point for the LLM.
24 |         expected_completion: The desired completion based on the prompt.
25 |             The likelihood of the tokens in this will be examined.
26 |     """
27 | 
28 |     prompt: Prompt
29 |     expected_completion: str
30 | 
31 | 
32 | class EchoOutput(BaseModel):
33 |     """The output of an `Echo` task.
34 | 
35 |     Attributes:
36 |         tokens_with_log_probs: Every token of the `expected_completion` of the
37 |             `EchoInput` accompanied by its probability of having been generated
38 |             in a completion scenario.
39 |     """
40 | 
41 |     tokens_with_log_probs: Sequence[TokenWithLogProb]
42 | 
43 | 
44 | class Echo(Task[EchoInput, EchoOutput]):
45 |     """Task that returns probabilities of a completion given a prompt.
46 | 
47 |     Analyzes the likelihood of generating tokens in the expected completion based on
48 |     a given prompt and model. Does not generate any tokens.
49 | 
50 |     Args:
51 |         model: A model to use in the task.
52 | 
53 |     Example:
54 |         >>> from aleph_alpha_client import Prompt
55 |         >>> from intelligence_layer.core import Echo, EchoInput, InMemoryTracer, LuminousControlModel
56 | 
57 |         >>> model = LuminousControlModel(name="luminous-base-control")
58 |         >>> task = Echo(model)
59 |         >>> input = EchoInput(
60 |         ...     prompt=Prompt.from_text("This is a "),
61 |         ...     expected_completion="happy text",
62 |         ... )
63 |         >>> tracer = InMemoryTracer()
64 |         >>> output = task.run(input, tracer)
65 |     """
66 | 
67 |     PROMPT_TEMPLATE_STR: str = "{{prompt}}{{expected_completion}}"
68 | 
69 |     def __init__(self, model: AlephAlphaModel) -> None:
70 |         super().__init__()
71 |         self._model = model
72 | 
73 |     def do_run(self, input: EchoInput, task_span: TaskSpan) -> EchoOutput:
74 |         if len(input.prompt.items) != 1:
75 |             raise NotImplementedError(
76 |                 "`Echo` currently only supports prompts with one item."
77 |             )
78 | 
79 |         if not isinstance(input.prompt.items[0], Text):
80 |             raise NotImplementedError(
81 |                 "`Echo` currently only supports prompts that are of type `Text`."
82 |             )
83 | 
84 |         echo_output = self._model.echo(
85 |             input.prompt.items[0].text, input.expected_completion, task_span
86 |         )
87 | 
88 |         tokens_with_prob = [
89 |             TokenWithLogProb(
90 |                 token=token,
91 |                 prob=LogProb(log_prob or 0.0),
92 |             )
93 |             for token, log_prob in echo_output
94 |         ]
95 |         return EchoOutput(tokens_with_log_probs=tokens_with_prob)
96 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/core/instruct.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from intelligence_layer.core.model import CompleteInput, CompleteOutput, ControlModel
 6 | from intelligence_layer.core.task import Task
 7 | from intelligence_layer.core.tracer.tracer import TaskSpan
 8 | 
 9 | 
10 | class InstructInput(BaseModel):
11 |     instruction: str
12 |     input: Optional[str] = None
13 |     response_prefix: Optional[str] = None
14 |     maximum_tokens: int = 128
15 | 
16 | 
17 | class Instruct(Task[InstructInput, CompleteOutput]):
18 |     def __init__(self, model: ControlModel) -> None:
19 |         super().__init__()
20 |         self._model = model
21 | 
22 |     def do_run(self, input: InstructInput, task_span: TaskSpan) -> CompleteOutput:
23 |         prompt = self._model.to_instruct_prompt(
24 |             instruction=input.instruction,
25 |             input=input.input,
26 |             response_prefix=input.response_prefix,
27 |         )
28 |         return self._model.complete(
29 |             CompleteInput(prompt=prompt, maximum_tokens=input.maximum_tokens), task_span
30 |         )
31 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/core/tracer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/core/tracer/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/core/tracer/file_tracer.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from json import loads
  3 | from pathlib import Path
  4 | from typing import Optional
  5 | from uuid import UUID
  6 | 
  7 | from pydantic import BaseModel
  8 | 
  9 | from intelligence_layer.core.tracer.in_memory_tracer import InMemoryTracer
 10 | from intelligence_layer.core.tracer.persistent_tracer import (
 11 |     LogLine,
 12 |     PersistentSpan,
 13 |     PersistentTaskSpan,
 14 |     PersistentTracer,
 15 | )
 16 | from intelligence_layer.core.tracer.tracer import Context, PydanticSerializable
 17 | 
 18 | 
 19 | class FileTracer(PersistentTracer):
 20 |     """A `Tracer` that logs to a file.
 21 | 
 22 |     Each log-entry is represented by a JSON object. The information logged allows
 23 |     to reconstruct the hierarchical nature of the logs, i.e. all entries have a
 24 |     _pointer_ to its parent element in form of a parent attribute containing
 25 |     the uuid of the parent.
 26 | 
 27 |     Args:
 28 |         log_file_path: Denotes the file to log to.
 29 | 
 30 |     Attributes:
 31 |         uuid: a uuid for the tracer. If multiple :class:`FileTracer` instances log to the same file
 32 |             the child-elements for a tracer can be identified by referring to this id as parent.
 33 |     """
 34 | 
 35 |     def __init__(self, log_file_path: Path | str) -> None:
 36 |         super().__init__()
 37 |         self._log_file_path = Path(log_file_path)
 38 | 
 39 |     def _log_entry(self, id: UUID, entry: BaseModel) -> None:
 40 |         self._log_file_path.parent.mkdir(parents=True, exist_ok=True)
 41 |         with self._log_file_path.open(mode="a", encoding="utf-8") as f:
 42 |             f.write(
 43 |                 LogLine(
 44 |                     trace_id=id, entry_type=type(entry).__name__, entry=entry
 45 |                 ).model_dump_json()
 46 |                 + "\n"
 47 |             )
 48 | 
 49 |     def span(
 50 |         self,
 51 |         name: str,
 52 |         timestamp: Optional[datetime] = None,
 53 |     ) -> "FileSpan":
 54 |         span = FileSpan(self._log_file_path, context=self.context)
 55 |         self._log_span(span, name, timestamp)
 56 |         return span
 57 | 
 58 |     def task_span(
 59 |         self,
 60 |         task_name: str,
 61 |         input: PydanticSerializable,
 62 |         timestamp: Optional[datetime] = None,
 63 |     ) -> "FileTaskSpan":
 64 |         task = FileTaskSpan(
 65 |             self._log_file_path,
 66 |             context=self.context,
 67 |         )
 68 |         self._log_task(task, task_name, input, timestamp)
 69 |         return task
 70 | 
 71 |     def traces(self, trace_id: Optional[str] = None) -> InMemoryTracer:
 72 |         with self._log_file_path.open("r", encoding="utf-8") as f:
 73 |             traces = (LogLine.model_validate(loads(line)) for line in f)
 74 |             filtered_traces = (
 75 |                 (line for line in traces if line.trace_id == trace_id)
 76 |                 if trace_id is not None
 77 |                 else traces
 78 |             )
 79 |             return self._parse_log(filtered_traces)
 80 | 
 81 |     def convert_file_for_viewing(self, file_path: Path | str) -> None:
 82 |         in_memory_tracer = self.traces()
 83 |         traces = in_memory_tracer.export_for_viewing()
 84 |         path_to_file = Path(file_path)
 85 |         with path_to_file.open(mode="w", encoding="utf-8") as file:
 86 |             for exportedSpan in traces:
 87 |                 file.write(exportedSpan.model_dump_json() + "\n")
 88 | 
 89 | 
 90 | class FileSpan(PersistentSpan, FileTracer):
 91 |     """A `Span` created by `FileTracer.span`."""
 92 | 
 93 |     def __init__(self, log_file_path: Path, context: Optional[Context] = None) -> None:
 94 |         PersistentSpan.__init__(self, context=context)
 95 |         FileTracer.__init__(self, log_file_path=log_file_path)
 96 | 
 97 | 
 98 | class FileTaskSpan(PersistentTaskSpan, FileSpan):
 99 |     """A `TaskSpan` created by `FileTracer.task_span`."""
100 | 
101 |     pass
102 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/core/tracer/open_telemetry_tracer.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Sequence
  2 | from datetime import datetime
  3 | from typing import Optional
  4 | 
  5 | from opentelemetry.context import attach, detach
  6 | from opentelemetry.trace import Span as OpenTSpan
  7 | from opentelemetry.trace import StatusCode, set_span_in_context
  8 | from opentelemetry.trace import Tracer as OpenTTracer
  9 | from pydantic import BaseModel, SerializeAsAny
 10 | 
 11 | from intelligence_layer.core.tracer.tracer import (
 12 |     Context,
 13 |     ExportedSpan,
 14 |     JsonSerializer,
 15 |     PydanticSerializable,
 16 |     Span,
 17 |     SpanStatus,
 18 |     SpanType,
 19 |     TaskSpan,
 20 |     Tracer,
 21 | )
 22 | 
 23 | 
 24 | class OpenTelemetryTracer(Tracer):
 25 |     """A `Tracer` that uses open telemetry."""
 26 | 
 27 |     def __init__(self, tracer: OpenTTracer) -> None:
 28 |         self._tracer = tracer
 29 | 
 30 |     def span(
 31 |         self,
 32 |         name: str,
 33 |         timestamp: Optional[datetime] = None,
 34 |     ) -> "OpenTelemetrySpan":
 35 |         tracer_span = self._tracer.start_span(
 36 |             name,
 37 |             attributes={"type": SpanType.SPAN.value},
 38 |             start_time=None if not timestamp else _open_telemetry_timestamp(timestamp),
 39 |         )
 40 |         token = attach(set_span_in_context(tracer_span))
 41 |         return OpenTelemetrySpan(tracer_span, self._tracer, token, self.context)
 42 | 
 43 |     def task_span(
 44 |         self,
 45 |         task_name: str,
 46 |         input: PydanticSerializable,
 47 |         timestamp: Optional[datetime] = None,
 48 |     ) -> "OpenTelemetryTaskSpan":
 49 |         tracer_span = self._tracer.start_span(
 50 |             task_name,
 51 |             attributes={"input": _serialize(input), "type": SpanType.TASK_SPAN.value},
 52 |             start_time=None if not timestamp else _open_telemetry_timestamp(timestamp),
 53 |         )
 54 |         token = attach(set_span_in_context(tracer_span))
 55 |         return OpenTelemetryTaskSpan(tracer_span, self._tracer, token, self.context)
 56 | 
 57 |     def export_for_viewing(self) -> Sequence[ExportedSpan]:
 58 |         raise NotImplementedError(
 59 |             "The OpenTelemetryTracer does not support export for viewing, as it can not access its own traces."
 60 |         )
 61 | 
 62 | 
 63 | class OpenTelemetrySpan(Span, OpenTelemetryTracer):
 64 |     """A `Span` created by `OpenTelemetryTracer.span`."""
 65 | 
 66 |     end_timestamp: Optional[datetime] = None
 67 | 
 68 |     def __init__(
 69 |         self,
 70 |         span: OpenTSpan,
 71 |         tracer: OpenTTracer,
 72 |         token: object,
 73 |         context: Optional[Context] = None,
 74 |     ) -> None:
 75 |         OpenTelemetryTracer.__init__(self, tracer)
 76 |         Span.__init__(self, context=context)
 77 |         self.open_ts_span = span
 78 |         self._token = token
 79 | 
 80 |     def log(
 81 |         self,
 82 |         message: str,
 83 |         value: PydanticSerializable,
 84 |         timestamp: Optional[datetime] = None,
 85 |     ) -> None:
 86 |         self.open_ts_span.add_event(
 87 |             message,
 88 |             {"value": _serialize(value)},
 89 |             None if not timestamp else _open_telemetry_timestamp(timestamp),
 90 |         )
 91 | 
 92 |     def end(self, timestamp: Optional[datetime] = None) -> None:
 93 |         super().end(timestamp)
 94 |         self.open_ts_span.set_status(
 95 |             StatusCode.OK if self.status_code == SpanStatus.OK else StatusCode.ERROR
 96 |         )
 97 |         detach(self._token)
 98 |         self.open_ts_span.end(
 99 |             _open_telemetry_timestamp(timestamp) if timestamp is not None else None
100 |         )
101 | 
102 | 
103 | class OpenTelemetryTaskSpan(TaskSpan, OpenTelemetrySpan):
104 |     """A `TaskSpan` created by `OpenTelemetryTracer.task_span`."""
105 | 
106 |     output: Optional[PydanticSerializable] = None
107 | 
108 |     def record_output(self, output: PydanticSerializable) -> None:
109 |         self.open_ts_span.set_attribute("output", _serialize(output))
110 | 
111 | 
112 | def _open_telemetry_timestamp(t: datetime) -> int:
113 |     # Open telemetry expects *nanoseconds* since epoch
114 |     t_float = t.timestamp() * 1e9
115 |     return int(t_float)
116 | 
117 | 
118 | def _serialize(s: SerializeAsAny[PydanticSerializable]) -> str:
119 |     value = s if isinstance(s, BaseModel) else JsonSerializer(root=s)
120 |     return value.model_dump_json()
121 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/aggregation/accumulator.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Generic, TypeVar
 3 | 
 4 | T = TypeVar("T")
 5 | Output = TypeVar("Output")
 6 | 
 7 | 
 8 | class Accumulator(ABC, Generic[T, Output]):
 9 |     """Used for incremental computation.
10 | 
11 |     For use cases with large amount of data where you don't want to have every value in memory at once, e.g. evaluation.
12 |     """
13 | 
14 |     @abstractmethod
15 |     def add(self, value: T) -> None:
16 |         """Responsible for accumulating values.
17 | 
18 |         Args:
19 |             value: the value to add
20 |         Returns:
21 |              nothing
22 |         """
23 |         ...
24 | 
25 |     @abstractmethod
26 |     def extract(self) -> Output:
27 |         """Accumulates the final result.
28 | 
29 |         Returns:
30 |            float: 0.0 if no values were added before, else the mean
31 |         """
32 |         ...
33 | 
34 | 
35 | class MeanAccumulator(Accumulator[float, float]):
36 |     def __init__(self) -> None:
37 |         self._n = 0
38 |         self._acc = 0.0
39 |         self._squares_acc = 0.0  # Sum of squares of the values
40 | 
41 |     def add(self, value: float) -> None:
42 |         self._n += 1
43 |         self._acc += value
44 |         self._squares_acc += value**2
45 | 
46 |     def extract(self) -> float:
47 |         """Accumulates the mean.
48 | 
49 |         :return: 0.0 if no values were added before, else the mean
50 |         """
51 |         return 0.0 if self._n == 0 else self._acc / self._n
52 | 
53 |     def standard_deviation(self) -> float:
54 |         """Calculates the standard deviation."""
55 |         if self._n == 0:
56 |             return 0.0
57 |         mean = self.extract()
58 |         variance = (self._squares_acc / self._n) - (mean**2)
59 |         return variance**0.5
60 | 
61 |     def standard_error(self) -> float:
62 |         """Calculates the standard error of the mean."""
63 |         if self._n <= 1:
64 |             return 0.0
65 |         return self.standard_deviation() / (self._n**0.5)
66 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/aggregation/aggregation_repository.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from collections.abc import Iterable, Sequence
 3 | from typing import Optional
 4 | 
 5 | from intelligence_layer.evaluation.aggregation.domain import (
 6 |     AggregatedEvaluation,
 7 |     AggregationOverview,
 8 | )
 9 | 
10 | 
11 | class AggregationRepository(ABC):
12 |     """Base aggregation repository interface.
13 | 
14 |     Provides methods to store and load aggregated evaluation results: :class:`AggregationOverview`.
15 |     """
16 | 
17 |     @abstractmethod
18 |     def store_aggregation_overview(
19 |         self, aggregation_overview: AggregationOverview[AggregatedEvaluation]
20 |     ) -> None:
21 |         """Stores an :class:`AggregationOverview`.
22 | 
23 |         Args:
24 |             aggregation_overview: The aggregated results to be persisted.
25 |         """
26 |         ...
27 | 
28 |     @abstractmethod
29 |     def aggregation_overview(
30 |         self, aggregation_id: str, aggregation_type: type[AggregatedEvaluation]
31 |     ) -> Optional[AggregationOverview[AggregatedEvaluation]]:
32 |         """Returns an :class:`AggregationOverview` for the given ID.
33 | 
34 |         Args:
35 |             aggregation_id: ID of the aggregation overview to retrieve.
36 |             aggregation_type: Type of the aggregation.
37 | 
38 |         Returns:
39 |             :class:`EvaluationOverview` if it was found, `None` otherwise.
40 |         """
41 |         ...
42 | 
43 |     def aggregation_overviews(
44 |         self, aggregation_type: type[AggregatedEvaluation]
45 |     ) -> Iterable[AggregationOverview[AggregatedEvaluation]]:
46 |         """Returns all :class:`AggregationOverview`s sorted by their ID.
47 | 
48 |         Args:
49 |             aggregation_type: Type of the aggregation.
50 | 
51 |         Yields:
52 |             :class:`AggregationOverview`s.
53 |         """
54 |         for aggregation_id in self.aggregation_overview_ids():
55 |             aggregation_overview = self.aggregation_overview(
56 |                 aggregation_id, aggregation_type
57 |             )
58 |             if aggregation_overview is not None:
59 |                 yield aggregation_overview
60 | 
61 |     @abstractmethod
62 |     def aggregation_overview_ids(self) -> Sequence[str]:
63 |         """Returns sorted IDs of all stored :class:`AggregationOverview`s.
64 | 
65 |         Returns:
66 |             A :class:`Sequence` of the :class:`AggregationOverview` IDs.
67 |         """
68 |         pass
69 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/aggregation/domain.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Iterable, Sequence
  2 | from datetime import datetime
  3 | from typing import Generic, TypeVar
  4 | 
  5 | from pydantic import BaseModel, SerializeAsAny
  6 | 
  7 | from intelligence_layer.connectors.base.json_serializable import (
  8 |     SerializableDict,
  9 | )
 10 | from intelligence_layer.evaluation.evaluation.domain import (
 11 |     EvaluationFailed,
 12 |     EvaluationOverview,
 13 | )
 14 | from intelligence_layer.evaluation.run.domain import RunOverview
 15 | 
 16 | AggregatedEvaluation = TypeVar("AggregatedEvaluation", bound=BaseModel, covariant=True)
 17 | 
 18 | 
 19 | class AggregationOverview(BaseModel, Generic[AggregatedEvaluation], frozen=True):
 20 |     """Complete overview of the results of evaluating a :class:`Task` on a dataset.
 21 | 
 22 |     Created when running :meth:`Evaluator.eval_and_aggregate_runs`. Contains high-level information and statistics.
 23 | 
 24 |     Attributes:
 25 |         evaluation_overviews: :class:`EvaluationOverview`s used for aggregation.
 26 |         id: Aggregation overview ID.
 27 |         start: Start timestamp of the aggregation.
 28 |         end: End timestamp of the aggregation.
 29 |         end: The time when the evaluation run ended
 30 |         successful_evaluation_count: The number of examples that where successfully evaluated.
 31 |         crashed_during_evaluation_count: The number of examples that crashed during evaluation.
 32 |         failed_evaluation_count: The number of examples that crashed during evaluation
 33 |             plus the number of examples that failed to produce an output for evaluation.
 34 |         run_ids: IDs of all :class:`RunOverview`s from all linked :class:`EvaluationOverview`s.
 35 |         description: A short description.
 36 |         statistics: Aggregated statistics of the run. Whatever is returned by :meth:`Evaluator.aggregate`
 37 |         labels: Labels for filtering aggregation. Defaults to empty list.
 38 |         metadata: Additional information about the aggregation. Defaults to empty dict.
 39 | 
 40 |     """
 41 | 
 42 |     evaluation_overviews: frozenset[EvaluationOverview]
 43 |     id: str
 44 |     start: datetime
 45 |     end: datetime
 46 |     successful_evaluation_count: int
 47 |     crashed_during_evaluation_count: int
 48 |     description: str
 49 |     statistics: SerializeAsAny[AggregatedEvaluation]
 50 |     labels: set[str] = set()
 51 |     metadata: SerializableDict = dict()
 52 | 
 53 |     @property
 54 |     def run_ids(self) -> Sequence[str]:
 55 |         return [overview.id for overview in self.run_overviews()]
 56 | 
 57 |     def run_overviews(self) -> Iterable[RunOverview]:
 58 |         return set(
 59 |             run_overview
 60 |             for evaluation_overview in self.evaluation_overviews
 61 |             for run_overview in evaluation_overview.run_overviews
 62 |         )
 63 | 
 64 |     @property
 65 |     def failed_evaluation_count(self) -> int:
 66 |         return self.crashed_during_evaluation_count + sum(
 67 |             run_overview.failed_example_count for run_overview in self.run_overviews()
 68 |         )
 69 | 
 70 |     def raise_on_evaluation_failure(self) -> None:
 71 |         if self.crashed_during_evaluation_count > 0:
 72 |             raise EvaluationFailed(self.id, self.crashed_during_evaluation_count)
 73 | 
 74 |     def __repr__(self) -> str:
 75 |         return self.__str__()
 76 | 
 77 |     def __str__(self) -> str:
 78 |         res = (
 79 |             f"Aggregation Overview ID = {self.id}\n"
 80 |             f"Start time = {self.start}\n"
 81 |             f"End time = {self.end}\n"
 82 |             f"Successful example count = {self.successful_evaluation_count}\n"
 83 |             f"Count of examples crashed during evaluation = {self.failed_evaluation_count}\n"
 84 |             f'Description = "{self.description}"\n'
 85 |             f"Labels = {self.labels}\n"
 86 |             f"Metadata = {self.metadata}\n"
 87 |         )
 88 | 
 89 |         res += f"IDs of aggregated Evaluation Overviews = {[evaluation_overview.id for evaluation_overview in self.evaluation_overviews]}\n"
 90 |         res += f"IDs of aggregated Run Overviews = {self.run_ids}\n"
 91 | 
 92 |         res += "Statistics = {\n"
 93 |         res += f"{self.statistics}\n"
 94 |         res += "}\n"
 95 | 
 96 |         return res
 97 | 
 98 |     def __hash__(self) -> int:
 99 |         return hash(self.id)
100 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/aggregation/file_aggregation_repository.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from pathlib import Path
 3 | from typing import Optional
 4 | 
 5 | from fsspec.implementations.local import LocalFileSystem  # type: ignore
 6 | 
 7 | from intelligence_layer.evaluation.aggregation.aggregation_repository import (
 8 |     AggregationRepository,
 9 | )
10 | from intelligence_layer.evaluation.aggregation.domain import (
11 |     AggregatedEvaluation,
12 |     AggregationOverview,
13 | )
14 | from intelligence_layer.evaluation.infrastructure.file_system_based_repository import (
15 |     FileSystemBasedRepository,
16 | )
17 | 
18 | 
19 | class FileSystemAggregationRepository(AggregationRepository, FileSystemBasedRepository):
20 |     _SUB_DIRECTORY = "aggregations"
21 | 
22 |     def store_aggregation_overview(
23 |         self, aggregation_overview: AggregationOverview[AggregatedEvaluation]
24 |     ) -> None:
25 |         self.write_utf8(
26 |             self._aggregation_overview_path(aggregation_overview.id),
27 |             aggregation_overview.model_dump_json(indent=2),
28 |             create_parents=True,
29 |         )
30 | 
31 |     def aggregation_overview(
32 |         self, aggregation_id: str, aggregation_type: type[AggregatedEvaluation]
33 |     ) -> Optional[AggregationOverview[AggregatedEvaluation]]:
34 |         file_path = self._aggregation_overview_path(aggregation_id)
35 | 
36 |         if not self.exists(file_path):
37 |             return None
38 | 
39 |         content = self.read_utf8(file_path)
40 |         return AggregationOverview[aggregation_type].model_validate_json(  # type:ignore
41 |             content
42 |         )
43 | 
44 |     def aggregation_overview_ids(self) -> Sequence[str]:
45 |         return sorted(self.file_names(self._aggregation_root_directory()))
46 | 
47 |     def _aggregation_root_directory(self) -> Path:
48 |         return self._root_directory / self._SUB_DIRECTORY
49 | 
50 |     def _aggregation_directory(self, evaluation_id: str) -> Path:
51 |         return self._aggregation_root_directory() / evaluation_id
52 | 
53 |     def _aggregation_overview_path(self, aggregation_id: str) -> Path:
54 |         return self._aggregation_directory(aggregation_id).with_suffix(".json")
55 | 
56 | 
57 | class FileAggregationRepository(FileSystemAggregationRepository):
58 |     def __init__(self, root_directory: Path) -> None:
59 |         super().__init__(LocalFileSystem(), root_directory)
60 | 
61 |     @staticmethod
62 |     def path_to_str(path: Path) -> str:
63 |         return str(path)
64 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/aggregation/hugging_face_aggregation_repository.py:
--------------------------------------------------------------------------------
 1 | from intelligence_layer.evaluation.aggregation.file_aggregation_repository import (
 2 |     FileSystemAggregationRepository,
 3 | )
 4 | from intelligence_layer.evaluation.infrastructure.hugging_face_repository import (
 5 |     HuggingFaceRepository,
 6 | )
 7 | 
 8 | 
 9 | class HuggingFaceAggregationRepository(
10 |     FileSystemAggregationRepository, HuggingFaceRepository
11 | ):
12 |     # this class inherits all its behavior from its parents
13 |     pass
14 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/aggregation/in_memory_aggregation_repository.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from typing import Any, Optional
 3 | 
 4 | from intelligence_layer.evaluation.aggregation.aggregation_repository import (
 5 |     AggregationRepository,
 6 | )
 7 | from intelligence_layer.evaluation.aggregation.domain import (
 8 |     AggregatedEvaluation,
 9 |     AggregationOverview,
10 | )
11 | 
12 | 
13 | class InMemoryAggregationRepository(AggregationRepository):
14 |     def __init__(self) -> None:
15 |         super().__init__()
16 |         self._aggregation_overviews: dict[str, AggregationOverview[Any]] = dict()
17 | 
18 |     def store_aggregation_overview(
19 |         self, aggregation_overview: AggregationOverview[AggregatedEvaluation]
20 |     ) -> None:
21 |         self._aggregation_overviews[aggregation_overview.id] = aggregation_overview
22 | 
23 |     def aggregation_overview(
24 |         self, aggregation_id: str, aggregation_type: type[AggregatedEvaluation]
25 |     ) -> Optional[AggregationOverview[AggregatedEvaluation]]:
26 |         overview = self._aggregation_overviews.get(aggregation_id, None)
27 |         if overview is None or type(overview.statistics) is aggregation_type:
28 |             return overview
29 |         return AggregationOverview[AggregatedEvaluation](
30 |             evaluation_overviews=overview.evaluation_overviews,
31 |             id=overview.id,
32 |             start=overview.start,
33 |             end=overview.end,
34 |             successful_evaluation_count=overview.successful_evaluation_count,
35 |             crashed_during_evaluation_count=overview.crashed_during_evaluation_count,
36 |             description=overview.description,
37 |             statistics=aggregation_type.model_validate(overview.statistics),
38 |             labels=overview.labels,
39 |             metadata=overview.metadata,
40 |         )
41 | 
42 |     def aggregation_overview_ids(self) -> Sequence[str]:
43 |         return sorted(list(self._aggregation_overviews.keys()))
44 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/benchmark/trace_information.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from datetime import timedelta
 3 | from typing import cast
 4 | 
 5 | from aleph_alpha_client import CompletionResponse
 6 | 
 7 | from intelligence_layer.core import ExportedSpan
 8 | from intelligence_layer.core.model import _Complete
 9 | from intelligence_layer.core.tracer.tracer import SpanType
10 | 
11 | 
12 | def _get_root(trace: Sequence[ExportedSpan]) -> ExportedSpan | None:
13 |     root_spans = [span for span in trace if span.parent_id is None]
14 |     if len(root_spans) != 1:
15 |         return None
16 |     return root_spans[0]
17 | 
18 | 
19 | def extract_latency_from_trace(trace: Sequence[ExportedSpan]) -> int:
20 |     """Extract the total duration of a given trace based on its root trace.
21 | 
22 |     Args:
23 |         trace: trace to analyze
24 | 
25 |     Returns:
26 |         The duration of the trace in microseconds
27 |     """
28 |     root_span = _get_root(trace)
29 |     if root_span is None:
30 |         raise ValueError("No root span found in the trace")
31 |     latency = (root_span.end_time - root_span.start_time) / timedelta(microseconds=1)
32 |     return int(latency)
33 | 
34 | 
35 | def _is_complete_request(span: ExportedSpan) -> bool:
36 |     # Assuming that LLM requests have a specific name or attribute
37 |     return span.name == _Complete.__name__
38 | 
39 | 
40 | def _extract_tokens_from_complete_request(span: ExportedSpan) -> int:
41 |     if not hasattr(span.attributes, "output"):
42 |         raise ValueError(
43 |             "Function expects a complete span with attributes.output. Output was not present."
44 |         )
45 |     completion_output = cast(CompletionResponse, span.attributes.output)
46 |     return completion_output.num_tokens_generated
47 | 
48 | 
49 | def extract_token_count_from_trace(trace: Sequence[ExportedSpan]) -> int:
50 |     """Extract the number of tokens generated in a trace based on its completion requests.
51 | 
52 |     Note: Does not support traces of streamed responses.
53 | 
54 |     Args:
55 |         trace: trace to analyze.
56 | 
57 |     Returns:
58 |         The sum of newly generated tokens across all spans in the given trace.
59 |     """
60 |     token_count = 0
61 |     for span in trace:
62 |         if span.attributes.type != SpanType.TASK_SPAN:
63 |             continue
64 |         if _is_complete_request(span):
65 |             token_count += _extract_tokens_from_complete_request(span)
66 |     return token_count
67 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/dataset/domain.py:
--------------------------------------------------------------------------------
 1 | from typing import Generic, Optional, TypeVar
 2 | from uuid import uuid4
 3 | 
 4 | from pydantic import BaseModel, Field
 5 | from rich.tree import Tree
 6 | 
 7 | from intelligence_layer.connectors.base.json_serializable import (
 8 |     SerializableDict,
 9 | )
10 | from intelligence_layer.core.task import Input
11 | from intelligence_layer.core.tracer.tracer import PydanticSerializable
12 | 
13 | ExpectedOutput = TypeVar("ExpectedOutput", bound=PydanticSerializable)
14 | """Dataset-specific type that defines characteristics that an :class:`Output` can be checked against.
15 | 
16 | Traditional names for this are `label` or `y` in classification."""
17 | 
18 | 
19 | class Example(BaseModel, Generic[Input, ExpectedOutput]):
20 |     """Example case used for evaluations.
21 | 
22 |     Attributes:
23 |         input: Input for the :class:`Task`. Has to be same type as the input for the task used.
24 |         expected_output: The expected output from a given example run.
25 |             This will be used by the evaluator to compare the received output with.
26 |         id: Identifier for the example, defaults to uuid.
27 |         metadata: Optional dictionary of custom key-value pairs.
28 | 
29 |     Generics:
30 |         Input: Interface to be passed to the :class:`Task` that shall be evaluated.
31 |         ExpectedOutput: Output that is expected from the run with the supplied input.
32 |     """
33 | 
34 |     input: Input
35 |     expected_output: ExpectedOutput
36 |     id: str = Field(default_factory=lambda: str(uuid4()))
37 |     metadata: Optional[SerializableDict] = None
38 | 
39 |     def __repr__(self) -> str:
40 |         return self.__str__()
41 | 
42 |     def __str__(self) -> str:
43 |         return (
44 |             f"Example ID = {self.id}\n"
45 |             f"Input = {self.input}\n"
46 |             f"Expected output = {self.expected_output}\n"
47 |             f"Metadata = {self.metadata}\n"
48 |         )
49 | 
50 |     def _rich_render(self) -> Tree:
51 |         example_tree = Tree(f"Example: {self.id}")
52 |         example_tree.add("Input").add(str(self.input))
53 |         example_tree.add("Expected Output").add(str(self.expected_output))
54 |         if self.metadata:
55 |             example_tree.add("Metadata").add(str(self.metadata))
56 |         return example_tree
57 | 
58 | 
59 | class Dataset(BaseModel):
60 |     """Represents a dataset linked to multiple examples.
61 | 
62 |     Attributes:
63 |         id: Dataset ID.
64 |         name: A short name of the dataset.
65 |         label: Labels for filtering datasets. Defaults to empty list.
66 |         metadata: Additional information about the dataset. Defaults to empty dict.
67 |     """
68 | 
69 |     id: str = Field(default_factory=lambda: str(uuid4()))
70 |     name: str
71 |     labels: set[str] = set()
72 |     metadata: SerializableDict = dict()
73 | 
74 |     def __repr__(self) -> str:
75 |         return self.__str__()
76 | 
77 |     def __str__(self) -> str:
78 |         return (
79 |             f"Dataset ID = {self.id}\n"
80 |             f"Name = {self.name}\n"
81 |             f"Labels = {self.labels}\n"
82 |             f"Metadata = {self.metadata}"
83 |         )
84 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/dataset/hugging_face_dataset_repository.py:
--------------------------------------------------------------------------------
 1 | from functools import _lru_cache_wrapper, lru_cache
 2 | from typing import Optional
 3 | 
 4 | from intelligence_layer.evaluation.dataset.domain import Dataset
 5 | from intelligence_layer.evaluation.dataset.file_dataset_repository import (
 6 |     FileSystemDatasetRepository,
 7 | )
 8 | from intelligence_layer.evaluation.infrastructure.hugging_face_repository import (
 9 |     HuggingFaceRepository,
10 | )
11 | 
12 | 
13 | class HuggingFaceDatasetRepository(HuggingFaceRepository, FileSystemDatasetRepository):
14 |     def __init__(
15 |         self, repository_id: str, token: str, private: bool, caching: bool = True
16 |     ) -> None:
17 |         """Initializes a :class:`HuggingFaceDatasetRepository` to be ready for dataset storage and access.
18 | 
19 |         Args:
20 |             repository_id: The HuggingFace namespace and repository name, separated by a "/".
21 |             token: The HuggingFace authentication token.
22 |             private: Whether the dataset repository should be private on HuggingFace.
23 |             caching: If set, datasets are cached in memory once retrieved.
24 |                 This means external updates to datasets will be missed. Defaults to `True`.
25 |         """
26 |         super().__init__(repository_id, token, private)
27 |         if caching:
28 |             self.examples = lru_cache(maxsize=2)(self.examples)  # type: ignore
29 | 
30 |     def delete_dataset(self, dataset_id: str) -> None:
31 |         """Deletes a dataset identified by the given dataset ID.
32 | 
33 |         This implementation should be backwards compatible to datasets
34 |         created without a dataset object (i.e., there is no dataset file
35 |         with dataset metadata).
36 | 
37 |         Note, that HuggingFace API does not seem to support deleting not-existing files.
38 | 
39 |         Args:
40 |             dataset_id: Dataset ID of the dataset to delete.
41 |         """
42 |         if self.exists(self._dataset_examples_path(dataset_id)):
43 |             self._file_system.rm(
44 |                 self.path_to_str(self._dataset_examples_path(dataset_id))
45 |             )
46 | 
47 |         if self.exists(self._dataset_path(dataset_id)):
48 |             self._file_system.rm(self.path_to_str(self._dataset_path(dataset_id)))
49 |             # this resets the complete cache if a dataset gets deleted.
50 |         if isinstance(self.examples, _lru_cache_wrapper):
51 |             self.examples.cache_clear()
52 | 
53 |     def dataset(self, dataset_id: str) -> Optional[Dataset]:
54 |         """Returns a dataset identified by the given dataset ID.
55 | 
56 |         This implementation should be backwards compatible to datasets
57 |         created without a dataset object (i.e., there is no dataset file
58 |         with dataset metadata).
59 | 
60 |         Args:
61 |             dataset_id: Dataset ID of the dataset to delete.
62 | 
63 |         Returns:
64 |             :class:`Dataset` if it was not, `None` otherwise.
65 |         """
66 |         dataset_file_path = self._dataset_path(dataset_id)
67 |         examples_file_path = self._dataset_examples_path(dataset_id)
68 |         if not self.exists(dataset_file_path):
69 |             if not self.exists(examples_file_path):
70 |                 return None
71 |             else:
72 |                 return Dataset(id=dataset_id, name=f"HuggingFace dataset {dataset_id}")
73 | 
74 |         return super().dataset(dataset_id)
75 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/dataset/single_huggingface_dataset_repository.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Iterable, Sequence
 2 | from typing import Optional, cast
 3 | 
 4 | from datasets import Dataset as HFDataset  # type: ignore
 5 | from datasets import DatasetDict, IterableDataset, IterableDatasetDict
 6 | from pydantic import BaseModel
 7 | 
 8 | from intelligence_layer.connectors.base.json_serializable import SerializableDict
 9 | from intelligence_layer.core.task import Input
10 | from intelligence_layer.evaluation.dataset.dataset_repository import DatasetRepository
11 | from intelligence_layer.evaluation.dataset.domain import (
12 |     Dataset,
13 |     Example,
14 |     ExpectedOutput,
15 | )
16 | 
17 | 
18 | class MultipleChoiceInput(BaseModel):
19 |     question: str
20 |     choices: Sequence[str]
21 | 
22 | 
23 | class SingleHuggingfaceDatasetRepository(DatasetRepository):
24 |     def __init__(
25 |         self,
26 |         huggingface_dataset: (
27 |             DatasetDict | HFDataset | IterableDatasetDict | IterableDataset
28 |         ),
29 |     ) -> None:
30 |         self._huggingface_dataset = huggingface_dataset
31 | 
32 |     def create_dataset(
33 |         self,
34 |         examples: Iterable[Example[Input, ExpectedOutput]],
35 |         dataset_name: str,
36 |         id: str | None = None,
37 |         labels: set[str] | None = None,
38 |         metadata: SerializableDict | None = None,
39 |     ) -> Dataset:
40 |         raise NotImplementedError
41 | 
42 |     def dataset(self, dataset_id: str) -> Dataset | None:
43 |         raise NotImplementedError
44 | 
45 |     def dataset_ids(self) -> Iterable[str]:
46 |         raise NotImplementedError
47 | 
48 |     def delete_dataset(self, dataset_id: str) -> None:
49 |         raise NotImplementedError
50 | 
51 |     def example(
52 |         self,
53 |         dataset_id: str,
54 |         example_id: str,
55 |         input_type: type[Input],
56 |         expected_output_type: type[ExpectedOutput],
57 |     ) -> Example[Input, ExpectedOutput] | None:
58 |         examples = self.examples(
59 |             dataset_id=dataset_id,
60 |             input_type=input_type,
61 |             expected_output_type=expected_output_type,
62 |         )
63 | 
64 |         for example in examples:
65 |             if example.id == example_id:
66 |                 return example
67 |         return None
68 | 
69 |     def examples(
70 |         self,
71 |         dataset_id: str,
72 |         input_type: type[Input],
73 |         expected_output_type: type[ExpectedOutput],
74 |         examples_to_skip: Optional[frozenset[str]] = None,
75 |     ) -> Iterable[Example[Input, ExpectedOutput]]:
76 |         examples_to_skip = examples_to_skip or frozenset()
77 |         answers = "ABCD"
78 |         assert input_type == MultipleChoiceInput
79 |         assert expected_output_type is str
80 |         for index, sample in enumerate(self._huggingface_dataset["test"]):
81 |             if str(index) not in examples_to_skip:
82 |                 yield Example(
83 |                     input=cast(
84 |                         Input,
85 |                         MultipleChoiceInput(
86 |                             question=sample["question"], choices=sample["choices"]
87 |                         ),
88 |                     ),
89 |                     expected_output=cast(ExpectedOutput, answers[sample["answer"]]),
90 |                     id=str(index),
91 |                 )
92 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/evaluation/evaluation/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/evaluation/evaluator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/evaluation/evaluation/evaluator/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/infrastructure/file_system_based_repository.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from pathlib import Path
 3 | from typing import cast
 4 | 
 5 | from fsspec import AbstractFileSystem  # type: ignore
 6 | 
 7 | 
 8 | class FileSystemBasedRepository:
 9 |     """An :class:`FileBasedRepository` that stores evaluation results in files.
10 | 
11 |     Args:
12 |         file_system: The specific file system to use from fsspec.
13 |         root_directory: The folder where the files are stored. The folder
14 |             (along with its parents) will be created if it does not exist yet.
15 |     """
16 | 
17 |     def __init__(self, file_system: AbstractFileSystem, root_directory: Path) -> None:
18 |         self._root_directory = root_directory
19 |         self._file_system = file_system
20 |         self.mkdir(root_directory)
21 | 
22 |     def write_utf8(
23 |         self, path: Path, content: str, create_parents: bool = False
24 |     ) -> None:
25 |         if create_parents:
26 |             self.mkdir(path.parent)
27 |         self._file_system.write_text(self.path_to_str(path), content, encoding="utf-8")
28 | 
29 |     def read_utf8(self, path: Path) -> str:
30 |         return cast(
31 |             str, self._file_system.read_text(self.path_to_str(path), encoding="utf-8")
32 |         )
33 | 
34 |     def remove_file(self, path: Path) -> None:
35 |         self._file_system.rm_file(path)
36 | 
37 |     def exists(self, path: Path) -> bool:
38 |         return cast(bool, self._file_system.exists(self.path_to_str(path)))
39 | 
40 |     def mkdir(self, path: Path) -> None:
41 |         if self.exists(path):
42 |             return
43 |         try:
44 |             self._file_system.makedir(self.path_to_str(path), create_parents=True)
45 |         except FileExistsError:
46 |             return
47 | 
48 |     def file_names(self, path: Path, file_type: str = "json") -> Sequence[str]:
49 |         files = [
50 |             Path(file)
51 |             for file in self._file_system.ls(self.path_to_str(path), detail=False)
52 |         ]
53 |         return [file.stem for file in files if file.suffix == "." + file_type]
54 | 
55 |     @staticmethod
56 |     def path_to_str(path: Path) -> str:
57 |         """Returns a string for the given Path so that it's readable for the respective file system.
58 | 
59 |         Args:
60 |             path: Given Path that should be converted.
61 | 
62 |         Returns:
63 |             String representation of the given Path.
64 |         """
65 |         return str(path)
66 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/evaluation/infrastructure/hugging_face_repository.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import huggingface_hub
 4 | 
 5 | from intelligence_layer.evaluation.infrastructure.file_system_based_repository import (
 6 |     FileSystemBasedRepository,
 7 | )
 8 | 
 9 | 
10 | class HuggingFaceRepository(FileSystemBasedRepository):
11 |     """HuggingFace base repository."""
12 | 
13 |     _REPO_TYPE = "dataset"
14 |     _ROOT_DIRECTORY_PREFIX_ = "datasets"  # HuggingFace API root directory
15 | 
16 |     @staticmethod
17 |     def path_to_str(path: Path) -> str:
18 |         return path.as_posix()
19 | 
20 |     def __init__(self, repository_id: str, token: str, private: bool) -> None:
21 |         """Create a HuggingFace repository.
22 | 
23 |         Creates a corresponding repository and initializes the file system.
24 | 
25 |         Args:
26 |             repository_id: The HuggingFace namespace and repository name, separated by a "/".
27 |             token: The HuggingFace authentication token.
28 |             private: Whether the dataset repository should be private.
29 |         """
30 |         assert repository_id[-1] != "/"
31 |         self.create_repository(repository_id, token, private)
32 | 
33 |         file_system = huggingface_hub.HfFileSystem(token=token)
34 |         root_directory = Path(f"{self._ROOT_DIRECTORY_PREFIX_}/{repository_id}")
35 | 
36 |         super().__init__(file_system, root_directory)
37 |         self._repository_id = repository_id
38 |         # the file system is assigned in super init but this fixes the typing
39 |         self._file_system: huggingface_hub.HfFileSystem
40 | 
41 |     def create_repository(self, repository_id: str, token: str, private: bool) -> None:
42 |         huggingface_hub.create_repo(
43 |             repo_id=repository_id,
44 |             token=token,
45 |             repo_type=self._REPO_TYPE,
46 |             private=private,
47 |             exist_ok=True,
48 |         )
49 | 
50 |     def delete_repository(self) -> None:
51 |         huggingface_hub.delete_repo(
52 |             repo_id=self._repository_id,
53 |             token=self._file_system.token,
54 |             repo_type=self._REPO_TYPE,
55 |             missing_ok=True,
56 |         )
57 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/classify/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/examples/classify/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/classify/keyword_extract.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Mapping
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from intelligence_layer.core import (
 6 |     CompleteInput,
 7 |     ControlModel,
 8 |     Language,
 9 |     LuminousControlModel,
10 |     Task,
11 |     TaskSpan,
12 |     TextChunk,
13 | )
14 | 
15 | INSTRUCT_CONFIGS = {
16 |     Language(
17 |         "de"
18 |     ): "Worum geht es in dem Text? Extrahiere ein paar Stichwörter in Form einer Komma-separierten Liste.",
19 |     Language(
20 |         "en"
21 |     ): "What is the text about? Extract a few keywords in form of a comma-separated list.",
22 |     Language(
23 |         "es"
24 |     ): "¿De qué trata el texto? Extrae algunas palabras clave en forma de una lista separada por comas.",
25 |     Language(
26 |         "fr"
27 |     ): "De quoi parle le texte? Extraire quelques mots-clés sous forme d'une liste séparée par des virgules.",
28 |     Language(
29 |         "it"
30 |     ): "Di cosa tratta il testo? Estrai alcune parole chiave sotto forma di una lista separata da virgole.",
31 | }
32 | 
33 | 
34 | class KeywordExtractInput(BaseModel):
35 |     chunk: TextChunk
36 |     language: Language
37 | 
38 | 
39 | class KeywordExtractOutput(BaseModel):
40 |     keywords: frozenset[str]
41 | 
42 | 
43 | class KeywordExtract(Task[KeywordExtractInput, KeywordExtractOutput]):
44 |     def __init__(
45 |         self,
46 |         model: ControlModel | None = None,
47 |         instruct_configs: Mapping[Language, str] = INSTRUCT_CONFIGS,
48 |         maximum_tokens: int = 32,
49 |     ) -> None:
50 |         self._instruct_configs = instruct_configs
51 |         self._model = model or LuminousControlModel("luminous-base-control")
52 |         self._maximum_tokens = maximum_tokens
53 | 
54 |     def do_run(
55 |         self, input: KeywordExtractInput, task_span: TaskSpan
56 |     ) -> KeywordExtractOutput:
57 |         instruction = input.language.language_config(self._instruct_configs)
58 |         result = self._model.complete(
59 |             CompleteInput(
60 |                 prompt=self._model.to_instruct_prompt(
61 |                     instruction=instruction, input=str(input.chunk)
62 |                 ),
63 |                 maximum_tokens=self._maximum_tokens,
64 |             ),
65 |             task_span,
66 |         )
67 |         return KeywordExtractOutput(
68 |             keywords=frozenset(s.strip() for s in result.completion.split(","))
69 |         )
70 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/classify/prompt_based_classify_with_definitions.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from collections.abc import Sequence
  3 | from math import exp
  4 | 
  5 | from aleph_alpha_client import Prompt
  6 | from pydantic import BaseModel
  7 | 
  8 | from intelligence_layer.core import (
  9 |     CompleteInput,
 10 |     CompleteOutput,
 11 |     ControlModel,
 12 |     LuminousControlModel,
 13 |     Task,
 14 |     TaskSpan,
 15 |     TextChunk,
 16 | )
 17 | 
 18 | from .classify import ClassifyInput, Probability, SingleLabelClassifyOutput
 19 | 
 20 | 
 21 | class LabelWithDefinition(BaseModel):
 22 |     """Defines a label with a definition.
 23 | 
 24 |     Attributes:
 25 |         name: Name of the label.
 26 |         definition: A definition or description of the label.
 27 |     """
 28 | 
 29 |     name: str
 30 |     definition: str
 31 | 
 32 |     def to_string(self) -> str:
 33 |         return f"{self.name}: {self.definition}"
 34 | 
 35 | 
 36 | class PromptBasedClassifyWithDefinitions(
 37 |     Task[ClassifyInput, SingleLabelClassifyOutput]
 38 | ):
 39 |     INSTRUCTION: str = """Identify a class that describes the text adequately.
 40 | Reply with only the class label."""
 41 | 
 42 |     def __init__(
 43 |         self,
 44 |         labels_with_definitions: Sequence[LabelWithDefinition],
 45 |         model: ControlModel | None = None,
 46 |         instruction: str = INSTRUCTION,
 47 |     ) -> None:
 48 |         super().__init__()
 49 |         self._labels_with_definitions = labels_with_definitions
 50 |         self._model = model or LuminousControlModel("luminous-base-control")
 51 |         if not isinstance(self._model, LuminousControlModel):
 52 |             warnings.warn(
 53 |                 "PromptBasedClassifyWithDefinitions was build for luminous models. LLama models may not work correctly. "
 54 |                 "Proceed with caution and testing.",
 55 |                 UserWarning,
 56 |             )
 57 |         self._instruction = instruction
 58 | 
 59 |     def do_run(
 60 |         self, input: ClassifyInput, task_span: TaskSpan
 61 |     ) -> SingleLabelClassifyOutput:
 62 |         complete_output = self._model.complete(
 63 |             CompleteInput(
 64 |                 prompt=self._get_prompt(input.chunk, input.labels),
 65 |                 completion_bias_inclusion=list(input.labels),
 66 |                 log_probs=len(input.labels) * 2,
 67 |             ),
 68 |             task_span,
 69 |         )
 70 |         return SingleLabelClassifyOutput(scores=self._build_scores(complete_output))
 71 | 
 72 |     def _get_prompt(self, chunk: TextChunk, labels: frozenset[str]) -> Prompt:
 73 |         def format_input(text: str, labels: frozenset[str]) -> str:
 74 |             definitions = "\n".join(
 75 |                 label.to_string()
 76 |                 for label in self._labels_with_definitions
 77 |                 if label.name in labels
 78 |             )
 79 |             return f"""Labels:
 80 | {', '.join(label.name for label in self._labels_with_definitions if label.name in labels)}
 81 | 
 82 | Definitions:
 83 | {definitions}
 84 | 
 85 | Text: {text}"""
 86 | 
 87 |         unexpected_labels = labels - set(
 88 |             label.name for label in self._labels_with_definitions
 89 |         )
 90 |         if unexpected_labels:
 91 |             raise ValueError(f"Got unexpected labels: {', '.join(unexpected_labels)}")
 92 | 
 93 |         return self._model.to_instruct_prompt(
 94 |             instruction=self._instruction,
 95 |             input=format_input(text=str(chunk), labels=labels),
 96 |         )
 97 | 
 98 |     def _build_scores(self, complete_output: CompleteOutput) -> dict[str, Probability]:
 99 |         raw_probs: dict[str, float] = {}
100 |         for label in self._labels_with_definitions:
101 |             label_prob = 0.0
102 |             assert complete_output.completions[0].log_probs
103 |             for token, prob in complete_output.completions[0].log_probs[0].items():
104 |                 if label.name.startswith(token.strip()) and prob:
105 |                     label_prob += exp(prob)
106 |             raw_probs[label.name] = label_prob
107 | 
108 |         total = sum(raw_probs.values())
109 |         return {key: Probability(value / total) for key, value in raw_probs.items()}
110 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/qa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/examples/qa/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/examples/search/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/summarize/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/examples/summarize/__init__.py


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/summarize/recursive_summarize.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | from intelligence_layer.core import Task, TaskSpan
 4 | from intelligence_layer.core.detect_language import Language
 5 | from intelligence_layer.examples.summarize.steerable_long_context_summarize import (
 6 |     SteerableLongContextSummarize,
 7 | )
 8 | from intelligence_layer.examples.summarize.summarize import (
 9 |     LongContextSummarizeInput,
10 |     LongContextSummarizeOutput,
11 |     SummarizeOutput,
12 | )
13 | 
14 | 
15 | class RecursiveSummarizeInput(BaseModel):
16 |     """The input for a recursive summarize-task for a text of any length.
17 | 
18 |     Attributes:
19 |         text: A text of any length.
20 |         language: The desired language of the summary. ISO 619 str with language e.g. en, fr, etc.
21 |         max_tokens: The maximum desired length of the summary in tokens.
22 |     """
23 | 
24 |     text: str
25 |     language: Language = Language("en")
26 |     max_tokens: int = 512
27 | 
28 | 
29 | class RecursiveSummarize(Task[RecursiveSummarizeInput, SummarizeOutput]):
30 |     """This task will summarize the input text recursively until the desired length is reached.
31 | 
32 |     It uses any long-context summarize task to go over text recursively and condense it even further.
33 | 
34 |     Args:
35 |             long_context_summarize_task: Any task that satifies the interface Input: LongContextSummarizeInput and Output: LongContextSummarizeOutput.
36 |                 Defaults to :class:`SteerableLongContextSummarize`
37 |     """
38 | 
39 |     def __init__(
40 |         self,
41 |         long_context_summarize_task: (
42 |             Task[LongContextSummarizeInput, LongContextSummarizeOutput] | None
43 |         ) = None,
44 |     ) -> None:
45 |         self.long_context_summarize_task = (
46 |             long_context_summarize_task or SteerableLongContextSummarize()
47 |         )
48 | 
49 |     def do_run(
50 |         self, input: RecursiveSummarizeInput, task_span: TaskSpan
51 |     ) -> SummarizeOutput:
52 |         num_partial_summaries = 0
53 |         text_to_summarize = input.text
54 |         summary = ""
55 |         num_generated_tokens = 0
56 |         while True:
57 |             summarize_output = self.long_context_summarize_task.run(
58 |                 LongContextSummarizeInput(
59 |                     text=text_to_summarize, language=input.language
60 |                 ),
61 |                 task_span,
62 |             )
63 |             # If the number of chunks stayed the same, we assume that no further summarization has taken place and we return the previous summary
64 |             if num_partial_summaries == len(summarize_output.partial_summaries):
65 |                 break
66 |             num_partial_summaries = len(summarize_output.partial_summaries)
67 | 
68 |             partial_summaries = summarize_output.partial_summaries
69 |             num_generated_tokens = sum(
70 |                 partial_summary.generated_tokens
71 |                 for partial_summary in partial_summaries
72 |             )
73 |             summary = "\n".join(
74 |                 partial_summary.summary for partial_summary in partial_summaries
75 |             )
76 |             # If the number of chunks is 1 we want to return the new summary since we assume that no further summarization will take place with our prompt
77 |             if (
78 |                 len(summarize_output.partial_summaries) == 1
79 |                 or num_generated_tokens < input.max_tokens
80 |             ):
81 |                 break
82 |             text_to_summarize = summary
83 | 
84 |         return SummarizeOutput(
85 |             summary=summary.strip(), generated_tokens=num_generated_tokens
86 |         )
87 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/summarize/steerable_long_context_summarize.py:
--------------------------------------------------------------------------------
 1 | from intelligence_layer.core import (
 2 |     Chunk,
 3 |     ChunkInput,
 4 |     ChunkOutput,
 5 |     ControlModel,
 6 |     LuminousControlModel,
 7 |     Task,
 8 |     TaskSpan,
 9 | )
10 | from intelligence_layer.examples.summarize.steerable_single_chunk_summarize import (
11 |     SteerableSingleChunkSummarize,
12 | )
13 | from intelligence_layer.examples.summarize.summarize import (
14 |     LongContextSummarizeInput,
15 |     LongContextSummarizeOutput,
16 |     PartialSummary,
17 |     SingleChunkSummarizeInput,
18 |     SummarizeOutput,
19 | )
20 | 
21 | 
22 | class SteerableLongContextSummarize(
23 |     Task[LongContextSummarizeInput, LongContextSummarizeOutput]
24 | ):
25 |     """Condenses a long text into a summary.
26 | 
27 |     Generate a summary given an instruction setup.
28 | 
29 |     Args:
30 |         summarize: The summarize task that is used to summarize a single chunk.
31 |             Make sure that this and the chunk task use the same model.
32 |             Defaults to :class:`SteerableSingleChunkSummarize` .
33 |         chunk: The chunk task that is used to chunk the long text into smaller pieces
34 |             such that a single chunk fits into the context of the model.
35 |             Make sure that this and the summarize task use the same model.
36 |             Defaults to :class:`Chunk` .
37 |         model: A valid Aleph Alpha control model. This is passed on to the
38 |             default summarize and chunk tasks. So it is ignored when the
39 |             defaults for both tasks are overwritten.
40 |             Defaults to luminous-base-control.
41 |     """
42 | 
43 |     def __init__(
44 |         self,
45 |         summarize: Task[SingleChunkSummarizeInput, SummarizeOutput] | None = None,
46 |         chunk: Task[ChunkInput, ChunkOutput] | None = None,
47 |         model: ControlModel | None = None,
48 |     ) -> None:
49 |         super().__init__()
50 |         model = model or LuminousControlModel("luminous-base-control")
51 |         self._summarize = summarize or SteerableSingleChunkSummarize(
52 |             model, max_generated_tokens=512
53 |         )
54 |         self._chunk_task = chunk or Chunk(model, max_tokens_per_chunk=1024)
55 | 
56 |     def do_run(
57 |         self, input: LongContextSummarizeInput, task_span: TaskSpan
58 |     ) -> LongContextSummarizeOutput:
59 |         chunk_output = self._chunk_task.run(ChunkInput(text=input.text), task_span)
60 |         summary_outputs = self._summarize.run_concurrently(
61 |             [
62 |                 SingleChunkSummarizeInput(chunk=chunk, language=input.language)
63 |                 for chunk in chunk_output.chunks
64 |             ],
65 |             task_span,
66 |         )
67 |         return LongContextSummarizeOutput(
68 |             partial_summaries=[
69 |                 PartialSummary(
70 |                     summary=summary_output.summary,
71 |                     chunk=chunk,
72 |                     generated_tokens=summary_output.generated_tokens,
73 |                 )
74 |                 for summary_output, chunk in zip(
75 |                     summary_outputs, chunk_output.chunks, strict=True
76 |                 )
77 |             ]
78 |         )
79 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/examples/summarize/steerable_single_chunk_summarize.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Mapping
 2 | 
 3 | from intelligence_layer.core import (
 4 |     CompleteInput,
 5 |     ControlModel,
 6 |     Language,
 7 |     LuminousControlModel,
 8 |     Task,
 9 |     TaskSpan,
10 | )
11 | from intelligence_layer.examples.summarize.summarize import (
12 |     SingleChunkSummarizeInput,
13 |     SummarizeOutput,
14 | )
15 | 
16 | INSTRUCTION_CONFIGS = {
17 |     Language("en"): "Summarize the text in a single paragraph.",
18 |     Language("de"): "Fasse den Text in einem Paragraphen zusammen.",
19 | }
20 | 
21 | 
22 | class SteerableSingleChunkSummarize(Task[SingleChunkSummarizeInput, SummarizeOutput]):
23 |     """Summarises a text given an instruction.
24 | 
25 |     Args:
26 |         model: A valid Aleph Alpha control model.
27 |         max_generated_tokens: The maximum number of tokens to be generated by the model.
28 |             This is not intended to steer the generation length, but instead will cut off the generation at the specified limit.
29 |             Note that maximum tokens + chunk size + prompt length should not exceed the context size of the model.
30 |         instruction_configs: A mapping of valid `Language` to `str` for each
31 |             supported language.
32 |     """
33 | 
34 |     def __init__(
35 |         self,
36 |         model: ControlModel | None = None,
37 |         max_generated_tokens: int = 256,
38 |         instruction_configs: Mapping[Language, str] = INSTRUCTION_CONFIGS,
39 |     ) -> None:
40 |         self._model = model or LuminousControlModel("luminous-base-control")
41 |         self._max_generated_tokens = max_generated_tokens
42 |         self._instruction_configs = instruction_configs
43 | 
44 |     def do_run(
45 |         self, input: SingleChunkSummarizeInput, task_span: TaskSpan
46 |     ) -> SummarizeOutput:
47 |         instruction = self._instruction_configs.get(input.language)
48 |         if not instruction:
49 |             raise ValueError(f"Could not find `prompt_config` for {input.language}.")
50 |         completion = self._model.complete(
51 |             CompleteInput(
52 |                 prompt=self._model.to_instruct_prompt(instruction, input.chunk),
53 |                 maximum_tokens=self._max_generated_tokens,
54 |             ),
55 |             task_span,
56 |         )
57 |         return SummarizeOutput(
58 |             summary=completion.completion.strip(),
59 |             generated_tokens=completion.generated_tokens,
60 |         )
61 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/learning/__init__.py:
--------------------------------------------------------------------------------
 1 | from .enrich import EnrichDomain as EnrichDomain
 2 | from .enrich import EnrichQuality as EnrichQuality
 3 | from .file_instruction_finetuning_data_repository import (
 4 |     FileInstructionFinetuningDataRepository as FileInstructionFinetuningDataRepository,
 5 | )
 6 | from .instruction_finetuning_data_handler import EnrichAction as EnrichAction
 7 | from .instruction_finetuning_data_handler import (
 8 |     InstructionFinetuningDataHandler as InstructionFinetuningDataHandler,
 9 | )
10 | from .instruction_finetuning_data_handler import (
11 |     instruction_finetuning_handler_builder as instruction_finetuning_handler_builder,
12 | )
13 | from .instruction_finetuning_data_repository import (
14 |     InstructionFinetuningDataRepository as InstructionFinetuningDataRepository,
15 | )
16 | from .models import InstructionFinetuningSample as InstructionFinetuningSample
17 | from .models import (
18 |     InstructionFinetuningSample_ as InstructionFinetuningSample_,
19 | )
20 | from .models import (
21 |     InstructionFinetuningSampleAttributes as InstructionFinetuningSampleAttributes,
22 | )
23 | from .models import InvalidSampleError as InvalidSampleError
24 | from .models import RawInstructionFinetuningSample as RawInstructionFinetuningSample
25 | from .models import TripletTransformation as TripletTransformation
26 | from .postgres_instruction_finetuning_data_repository import (
27 |     PostgresInstructionFinetuningDataRepository as PostgresInstructionFinetuningDataRepository,
28 | )
29 | 
30 | __all__ = [symbol for symbol in dir()]
31 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/learning/instruction_finetuning_data_repository.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from collections.abc import Iterable
  3 | from typing import Optional
  4 | 
  5 | from sqlalchemy import ColumnElement
  6 | 
  7 | from intelligence_layer.learning.models import (
  8 |     InstructionFinetuningSample,
  9 | )
 10 | 
 11 | 
 12 | class InstructionFinetuningDataRepository(ABC):
 13 |     @abstractmethod
 14 |     def store_sample(self, sample: InstructionFinetuningSample) -> str:
 15 |         """Stores a finetuning sample and returns its ID.
 16 | 
 17 |         Args:
 18 |             sample: The sample to store.
 19 | 
 20 |         Returns:
 21 |             The ID of the stored sample.
 22 |         """
 23 |         pass
 24 | 
 25 |     @abstractmethod
 26 |     def store_samples(
 27 |         self, samples: Iterable[InstructionFinetuningSample]
 28 |     ) -> list[str]:
 29 |         """Stores multiple finetuning samples and returns their IDs.
 30 | 
 31 |         Args:
 32 |             samples: The samples to store.
 33 | 
 34 |         Returns:
 35 |             The IDs of the stored samples.
 36 |         """
 37 |         pass
 38 | 
 39 |     @abstractmethod
 40 |     def head(self, limit: Optional[int] = 100) -> Iterable[InstructionFinetuningSample]:
 41 |         """Returns the first `limit` samples.
 42 | 
 43 |         Args:
 44 |             limit: The number of samples to return. Defaults to 100.
 45 | 
 46 |         Returns:
 47 |             Iterable[InstructionFinetuningSample]: The first `limit` samples.
 48 |         """
 49 |         pass
 50 | 
 51 |     @abstractmethod
 52 |     def sample(self, id: str) -> Optional[InstructionFinetuningSample]:
 53 |         """Gets a finetuning sample by its ID.
 54 | 
 55 |         Args:
 56 |             id: The ID of the sample.
 57 | 
 58 |         Returns:
 59 |             The sample with the given ID, or None if not found.
 60 |         """
 61 |         pass
 62 | 
 63 |     @abstractmethod
 64 |     def samples(self, ids: Iterable[str]) -> Iterable[InstructionFinetuningSample]:
 65 |         """Gets multiple finetuning samples by their IDs.
 66 | 
 67 |         Args:
 68 |             ids: The IDs of the samples.
 69 | 
 70 |         Returns:
 71 |             The samples with the given IDs.
 72 |         """
 73 |         pass
 74 | 
 75 |     @abstractmethod
 76 |     def samples_with_filter(
 77 |         self, filter_expression: ColumnElement[bool], limit: Optional[int] = 100
 78 |     ) -> Iterable[InstructionFinetuningSample]:
 79 |         """Gets samples that match the given filter.
 80 | 
 81 |         Args:
 82 |             filter_expression: The filter expression.
 83 |             limit: The number of samples to return. Defaults to 100.
 84 | 
 85 |         Returns:
 86 |             The samples that match the filter.
 87 |         """
 88 |         pass
 89 | 
 90 |     @abstractmethod
 91 |     def delete_sample(self, id: str) -> None:
 92 |         """Deletes a finetuning sample by its ID.
 93 | 
 94 |         Args:
 95 |             id: The ID of the sample.
 96 |         """
 97 |         pass
 98 | 
 99 |     @abstractmethod
100 |     def delete_samples(self, ids: Iterable[str]) -> None:
101 |         """Deletes multiple finetuning samples by their IDs.
102 | 
103 |         Args:
104 |             ids: The IDs of the samples.
105 |         """
106 |         pass
107 | 


--------------------------------------------------------------------------------
/src/intelligence_layer/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/src/intelligence_layer/py.typed


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/__init__.py


--------------------------------------------------------------------------------
/tests/connectors/kernel/test_kernel.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | from pydantic import BaseModel
 3 | 
 4 | from intelligence_layer.connectors.kernel.kernel import KernelTask
 5 | from intelligence_layer.core.tracer.tracer import NoOpTracer
 6 | 
 7 | 
 8 | def test_kernel_connector() -> None:
 9 |     load_dotenv()
10 |     tracer = NoOpTracer()
11 | 
12 |     class Input(BaseModel):
13 |         question: str
14 | 
15 |     class Output(BaseModel):
16 |         answer: str | None
17 | 
18 |     task = KernelTask(
19 |         skill="playground/super_rag",
20 |         input_model=Input,
21 |         output_model=Output,
22 |     )
23 | 
24 |     output = task.run(
25 |         Input(question="What is a transformer?"),
26 |         tracer,
27 |     )
28 |     assert output.answer and "transformer" in output.answer
29 | 


--------------------------------------------------------------------------------
/tests/connectors/retrievers/test_document_index_retriever.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from intelligence_layer.connectors.retrievers.document_index_retriever import (
 4 |     AsyncDocumentIndexRetriever,
 5 |     DocumentIndexRetriever,
 6 | )
 7 | 
 8 | pytestmark = pytest.mark.document_index
 9 | 
10 | 
11 | @pytest.mark.internal
12 | def test_document_index_retriever(
13 |     document_index_retriever: DocumentIndexRetriever,
14 | ) -> None:
15 |     documents = document_index_retriever.get_relevant_documents_with_scores("Coca-Cola")
16 |     assert len(documents) > 0
17 | 
18 | 
19 | @pytest.mark.internal
20 | async def test_async_document_index_retriever(
21 |     async_document_index_retriever: AsyncDocumentIndexRetriever,
22 | ) -> None:
23 |     documents = await async_document_index_retriever.get_relevant_documents_with_scores(
24 |         "Coca-Cola"
25 |     )
26 |     assert len(documents) > 0
27 | 


--------------------------------------------------------------------------------
/tests/connectors/retrievers/test_hybrid_qdrant_in_memory_retriever.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | 
 3 | from pytest import fixture
 4 | 
 5 | from intelligence_layer.connectors import AlephAlphaClientProtocol, RetrieverType
 6 | from intelligence_layer.connectors.retrievers.base_retriever import Document
 7 | from intelligence_layer.connectors.retrievers.hybrid_qdrant_in_memory_retriever import (
 8 |     HybridQdrantInMemoryRetriever,
 9 | )
10 | from tests.conftest import to_document
11 | 
12 | 
13 | @fixture
14 | def in_memory_retriever_documents() -> Sequence[Document]:
15 |     return [
16 |         Document(text="Summer is warm but I like it"),
17 |         Document(text="I do not like rain"),
18 |         Document(text="We are so back"),
19 |         Document(text="Summer rain is rejuvenating"),
20 |     ]
21 | 
22 | 
23 | @fixture
24 | def hybrid_asymmetric_in_memory_retriever(
25 |     client: AlephAlphaClientProtocol,
26 |     in_memory_retriever_documents: Sequence[Document],
27 | ) -> HybridQdrantInMemoryRetriever:
28 |     return HybridQdrantInMemoryRetriever(
29 |         in_memory_retriever_documents,
30 |         client=client,
31 |         k=2,
32 |         retriever_type=RetrieverType.ASYMMETRIC,
33 |     )
34 | 
35 | 
36 | @fixture
37 | def hybrid_symmetric_in_memory_retriever(
38 |     client: AlephAlphaClientProtocol,
39 |     in_memory_retriever_documents: Sequence[Document],
40 | ) -> HybridQdrantInMemoryRetriever:
41 |     return HybridQdrantInMemoryRetriever(
42 |         in_memory_retriever_documents,
43 |         client=client,
44 |         k=2,
45 |         retriever_type=RetrieverType.SYMMETRIC,
46 |     )
47 | 
48 | 
49 | def test_asymmetric_in_memory_retriever(
50 |     hybrid_asymmetric_in_memory_retriever: HybridQdrantInMemoryRetriever,
51 |     in_memory_retriever_documents: Sequence[Document],
52 | ) -> None:
53 |     query = "Do you like hot weather?"
54 |     documents = (
55 |         hybrid_asymmetric_in_memory_retriever.get_relevant_documents_with_scores(query)
56 |     )
57 |     assert in_memory_retriever_documents[0] == to_document(documents[0].document_chunk)
58 |     assert len(documents) <= 2
59 | 
60 | 
61 | def test_symmetric_in_memory_retriever(
62 |     hybrid_symmetric_in_memory_retriever: HybridQdrantInMemoryRetriever,
63 |     in_memory_retriever_documents: Sequence[Document],
64 | ) -> None:
65 |     query = "I hate drizzle"
66 |     documents = hybrid_symmetric_in_memory_retriever.get_relevant_documents_with_scores(
67 |         query
68 |     )
69 |     assert in_memory_retriever_documents[1] == to_document(documents[0].document_chunk)
70 |     assert len(documents) <= 2
71 | 
72 | 
73 | def test_hybrid_in_memory_retriever(
74 |     hybrid_asymmetric_in_memory_retriever: HybridQdrantInMemoryRetriever,
75 |     in_memory_retriever_documents: Sequence[Document],
76 | ) -> None:
77 |     query = "Summer rain"
78 |     documents = (
79 |         hybrid_asymmetric_in_memory_retriever.get_relevant_documents_with_scores(query)
80 |     )
81 |     assert in_memory_retriever_documents[3] == to_document(documents[0].document_chunk)
82 |     assert len(documents) <= 2
83 | 


--------------------------------------------------------------------------------
/tests/connectors/retrievers/test_qdrant_in_memory_retriever.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | 
 3 | from pytest import fixture
 4 | 
 5 | from intelligence_layer.connectors.retrievers.base_retriever import Document
 6 | from intelligence_layer.connectors.retrievers.qdrant_in_memory_retriever import (
 7 |     QdrantInMemoryRetriever,
 8 | )
 9 | from tests.conftest_document_index import to_document
10 | 
11 | 
12 | @fixture
13 | def in_memory_retriever_documents() -> Sequence[Document]:
14 |     return [
15 |         Document(text="I do not like rain"),
16 |         Document(text="Summer is warm"),
17 |         Document(text="We are so back"),
18 |     ]
19 | 
20 | 
21 | def test_asymmetric_in_memory_retriever(
22 |     asymmetric_in_memory_retriever: QdrantInMemoryRetriever,
23 |     in_memory_retriever_documents: Sequence[Document],
24 | ) -> None:
25 |     query = "Do you like summer?"
26 |     documents = asymmetric_in_memory_retriever.get_relevant_documents_with_scores(query)
27 |     assert in_memory_retriever_documents[1] == to_document(documents[0].document_chunk)
28 |     assert len(documents) <= 2
29 | 
30 | 
31 | def test_symmetric_in_memory_retriever(
32 |     symmetric_in_memory_retriever: QdrantInMemoryRetriever,
33 |     in_memory_retriever_documents: Sequence[Document],
34 | ) -> None:
35 |     query = "I hate drizzle"
36 |     documents = symmetric_in_memory_retriever.get_relevant_documents_with_scores(query)
37 |     assert in_memory_retriever_documents[0] == to_document(documents[0].document_chunk)
38 |     assert len(documents) <= 2
39 | 


--------------------------------------------------------------------------------
/tests/connectors/studio/conftest.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from unittest.mock import Mock
 3 | from uuid import uuid4
 4 | 
 5 | from dotenv import load_dotenv
 6 | from pydantic import BaseModel
 7 | from pytest import fixture
 8 | 
 9 | from intelligence_layer.connectors.studio.studio import StudioClient, StudioExample
10 | 
11 | 
12 | @fixture
13 | def studio_client() -> StudioClient:
14 |     load_dotenv()
15 |     project_name = str(uuid4())
16 |     client = StudioClient(project_name)
17 |     client.create_project(project_name)
18 |     return client
19 | 
20 | 
21 | @fixture
22 | def mock_studio_client() -> Mock:
23 |     return Mock(spec=StudioClient)
24 | 
25 | 
26 | class PydanticType(BaseModel):
27 |     data: int
28 | 
29 | 
30 | @fixture
31 | def examples() -> Sequence[StudioExample[PydanticType, PydanticType]]:
32 |     return [
33 |         StudioExample[PydanticType, PydanticType](
34 |             input=PydanticType(data=i), expected_output=PydanticType(data=i)
35 |         )
36 |         for i in range(2)
37 |     ]
38 | 
39 | 
40 | @fixture
41 | def many_examples() -> Sequence[StudioExample[PydanticType, PydanticType]]:
42 |     examples = [
43 |         StudioExample[PydanticType, PydanticType](
44 |             input=PydanticType(data=i), expected_output=PydanticType(data=i)
45 |         )
46 |         for i in range(15)
47 |     ]
48 |     return examples
49 | 


--------------------------------------------------------------------------------
/tests/connectors/studio/test_studio_dataset.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Iterable, Sequence
 2 | from typing import Any
 3 | from uuid import UUID
 4 | 
 5 | from pytest import fixture
 6 | 
 7 | from intelligence_layer.connectors import StudioClient
 8 | from intelligence_layer.connectors.studio.studio import StudioDataset
 9 | from intelligence_layer.evaluation.dataset.domain import Example
10 | from intelligence_layer.evaluation.dataset.in_memory_dataset_repository import (
11 |     InMemoryDatasetRepository,
12 | )
13 | from intelligence_layer.evaluation.dataset.studio_dataset_repository import (
14 |     StudioDatasetRepository,
15 | )
16 | from tests.connectors.studio.conftest import PydanticType
17 | 
18 | 
19 | @fixture
20 | def labels() -> set[str]:
21 |     return {"label1", "label2"}
22 | 
23 | 
24 | @fixture
25 | def metadata() -> dict[str, Any]:
26 |     return {"key": "value"}
27 | 
28 | 
29 | @fixture
30 | def with_uploaded_dataset(
31 |     studio_client: StudioClient, many_examples: Sequence[Example]
32 | ):
33 |     dataset_repo = StudioDatasetRepository(studio_client)
34 |     dataset = dataset_repo.create_dataset(many_examples, "my_dataset")
35 | 
36 |     return dataset
37 | 
38 | 
39 | def test_can_upload_dataset_with_minimal_request_body(
40 |     studio_client: StudioClient,
41 |     examples: Sequence[Example],
42 | ) -> None:
43 |     dataset_repo = InMemoryDatasetRepository()
44 |     dataset = dataset_repo.create_dataset(examples, "my_dataset")
45 | 
46 |     studio_dataset = StudioDatasetRepository.map_to_studio_dataset(dataset)
47 |     studio_examples = StudioDatasetRepository.map_to_many_studio_example(examples)
48 | 
49 |     result = studio_client.submit_dataset(
50 |         dataset=studio_dataset, examples=studio_examples
51 |     )
52 |     uuid = UUID(result)
53 |     assert uuid
54 | 
55 | 
56 | def test_can_upload_dataset_with_complete_request_body(
57 |     studio_client: StudioClient,
58 |     examples: Sequence[Example[PydanticType, PydanticType]],
59 |     labels: set[str],
60 |     metadata: dict[str, Any],
61 | ) -> None:
62 |     dataset_repo = InMemoryDatasetRepository()
63 |     dataset = dataset_repo.create_dataset(
64 |         examples, "my_dataset", labels=labels, metadata=metadata
65 |     )
66 | 
67 |     studio_dataset = StudioDatasetRepository.map_to_studio_dataset(dataset)
68 |     studio_examples = StudioDatasetRepository.map_to_many_studio_example(examples)
69 | 
70 |     result = studio_client.submit_dataset(
71 |         dataset=studio_dataset, examples=studio_examples
72 |     )
73 |     assert result
74 | 
75 | 
76 | def test_get_many_dataset_examples(
77 |     studio_client: StudioClient,
78 |     many_examples: Iterable[Example[PydanticType, PydanticType]],
79 |     with_uploaded_dataset: StudioDataset,
80 | ) -> None:
81 |     received_examples = studio_client.get_dataset_examples(
82 |         with_uploaded_dataset.id,
83 |         input_type=PydanticType,
84 |         expected_output_type=PydanticType,
85 |     )
86 | 
87 |     for received_example, given_example in zip(
88 |         received_examples, many_examples, strict=True
89 |     ):
90 |         # These models appear equal, but somehow are not -> we need to check the specific values, not the models
91 |         assert received_example.model_dump() == given_example.model_dump()
92 |         assert received_example.input.data == given_example.input.data
93 |         assert (
94 |             received_example.expected_output.data == given_example.expected_output.data
95 |         )
96 | 


--------------------------------------------------------------------------------
/tests/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/core/__init__.py


--------------------------------------------------------------------------------
/tests/core/test_detect_language.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from lingua import Language as LinguaLanguage
 3 | 
 4 | from intelligence_layer.core import (
 5 |     DetectLanguage,
 6 |     DetectLanguageInput,
 7 |     Language,
 8 |     NoOpTracer,
 9 | )
10 | 
11 | 
12 | @pytest.mark.parametrize(
13 |     "text_input,expected_language",
14 |     [
15 |         (
16 |             "Hello, my name is Niklas. I am working with Pit on this language detection piece.",
17 |             Language("en"),
18 |         ),
19 |         (
20 |             "Hola, mi nombre es Niklas. Estoy trabajando con Pit en esta pieza de detección de idioma.",
21 |             Language("es"),
22 |         ),
23 |         (
24 |             "Ciao, mi chiamo Niklas. Sto lavorando con Pit su questo pezzo di rilevamento della lingua.",
25 |             Language("it"),
26 |         ),
27 |         (
28 |             "Hallo, mein Name ist Niklas. Ich arbeite mit Pit an diesem Stück zur Spracherkennung.",
29 |             Language("de"),
30 |         ),
31 |         (
32 |             "Bonjour, je m'appelle Niklas. Je travaille avec Pit sur cette pièce de détection de langue.",
33 |             Language("fr"),
34 |         ),
35 |         (
36 |             "Hola, em dic Niklas. Estic treballant amb Pit en aquesta peça de detecció d'idiomes.",
37 |             Language("ca"),
38 |         ),
39 |         (
40 |             "Cześć, nazywam się Niklas. Pracuję z Pitem nad tym kawałkiem wykrywania języka.",
41 |             Language("pl"),
42 |         ),
43 |     ],
44 | )
45 | def test_detect_language_returns_correct_language(
46 |     text_input: str, expected_language: Language
47 | ) -> None:
48 |     task = DetectLanguage()
49 |     input = DetectLanguageInput(
50 |         text=text_input,
51 |         possible_languages=[
52 |             Language(lang) for lang in ["en", "de", "fr", "it", "es", "pl", "ca"]
53 |         ],
54 |     )
55 |     tracer = NoOpTracer()
56 |     output = task.run(input, tracer)
57 | 
58 |     assert output.best_fit == expected_language
59 | 
60 | 
61 | def test_detect_language_returns_none_if_no_language_can_be_detected() -> None:
62 |     text = "Je m’appelle Jessica. Je suis une fille, je suis française et j’ai treize ans."  # codespell:ignore
63 |     task = DetectLanguage()
64 |     input = DetectLanguageInput(
65 |         text=text,
66 |         possible_languages=[Language(lang) for lang in ["en", "de"]],
67 |     )
68 |     tracer = NoOpTracer()
69 |     output = task.run(input, tracer)
70 | 
71 |     assert output.best_fit is None
72 | 
73 | 
74 | def test_conversion_to_lingua_works() -> None:
75 |     language: Language = Language("de")
76 |     expected_language: LinguaLanguage = LinguaLanguage.GERMAN
77 | 
78 |     converted_language = language.to_lingua_language()
79 | 
80 |     assert converted_language == expected_language
81 | 


--------------------------------------------------------------------------------
/tests/core/test_task.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Callable
  2 | from concurrent.futures import ThreadPoolExecutor
  3 | from functools import wraps
  4 | from threading import Lock
  5 | from time import sleep
  6 | 
  7 | from intelligence_layer.core import (
  8 |     MAX_CONCURRENCY,
  9 |     InMemoryTracer,
 10 |     NoOpTracer,
 11 |     Task,
 12 |     TaskSpan,
 13 | )
 14 | 
 15 | 
 16 | class ConcurrencyCounter(Task[None, None]):
 17 |     max_concurrency_counter: int = 0
 18 |     concurrency_counter: int = 0
 19 | 
 20 |     def __init__(self) -> None:
 21 |         self.lock = Lock()
 22 | 
 23 |     def do_run(self, input: None, task_span: TaskSpan) -> None:
 24 |         with self.lock:
 25 |             self.concurrency_counter += 1
 26 |             self.max_concurrency_counter = max(
 27 |                 self.max_concurrency_counter, self.concurrency_counter
 28 |             )
 29 | 
 30 |         sleep(0.01)
 31 |         with self.lock:
 32 |             self.concurrency_counter -= 1
 33 | 
 34 | 
 35 | class DeadlockDetector(Task[None, None]):
 36 |     def __init__(self) -> None:
 37 |         super().__init__()
 38 |         self.inner_task = InnerTask()
 39 | 
 40 |     def do_run(self, input: None, task_span: TaskSpan) -> None:
 41 |         # wait a bit so all DeadlockDetector tasks run before the first InnerTask is submitted
 42 |         sleep(0.01)
 43 |         with ThreadPoolExecutor(max_workers=1) as executor:
 44 |             future = executor.submit(
 45 |                 self.inner_task.run_concurrently, [input], task_span
 46 |             )
 47 |             # wait a bit to ensure the future has finished
 48 |             # (even if the InnerTasks of all DeadlockDetector tasks are scheduled sequentially)
 49 |             for _ in range(20):
 50 |                 if future.done():
 51 |                     break
 52 |                 sleep(0.1)
 53 |             if not future.done():
 54 |                 executor.shutdown(wait=False)
 55 |                 raise RuntimeError("Deadlock detected")
 56 | 
 57 | 
 58 | class InnerTask(Task[None, None]):
 59 |     def do_run(self, input: None, task_span: TaskSpan) -> None:
 60 |         pass
 61 | 
 62 | 
 63 | def dummy_decorator(
 64 |     f: Callable[["BaseTask", None, TaskSpan], None],
 65 | ) -> Callable[["BaseTask", None, TaskSpan], None]:
 66 |     @wraps(f)
 67 |     def wrap(
 68 |         self: "BaseTask",
 69 |         input: None,
 70 |         task_span: TaskSpan,
 71 |     ) -> None:
 72 |         return f(self, input, task_span)
 73 | 
 74 |     return wrap
 75 | 
 76 | 
 77 | class BaseTask(Task[None, None]):
 78 |     @dummy_decorator
 79 |     def do_run(self, input: None, task_span: TaskSpan) -> None:
 80 |         task_span.log("Plain", "Entry")
 81 | 
 82 | 
 83 | class SubTask(BaseTask):
 84 |     pass
 85 | 
 86 | 
 87 | class NestedTask(Task[None, None]):
 88 |     def do_run(self, input: None, task_span: TaskSpan) -> None:
 89 |         BaseTask().run(input, task_span)
 90 | 
 91 | 
 92 | def test_run_concurrently() -> None:
 93 |     task = ConcurrencyCounter()
 94 |     task.run_concurrently([None] * MAX_CONCURRENCY * 10, NoOpTracer())
 95 |     assert task.max_concurrency_counter == MAX_CONCURRENCY
 96 | 
 97 | 
 98 | def test_run_concurrently_limited() -> None:
 99 |     task = ConcurrencyCounter()
100 |     limit_concurrency = MAX_CONCURRENCY // 2
101 |     task.run_concurrently([None] * MAX_CONCURRENCY * 3, NoOpTracer(), limit_concurrency)
102 |     assert task.max_concurrency_counter == limit_concurrency
103 | 
104 | 
105 | def test_run_concurrently_does_not_deadlock_if_nested() -> None:
106 |     task = DeadlockDetector()
107 |     task.run_concurrently([None] * MAX_CONCURRENCY, NoOpTracer())
108 | 
109 | 
110 | def test_sub_tasks_do_not_introduce_multiple_task_spans() -> None:
111 |     tracer = InMemoryTracer()
112 | 
113 |     SubTask().run(None, tracer)
114 | 
115 |     assert tracer.entries
116 |     assert isinstance(tracer.entries[0], TaskSpan)
117 |     assert tracer.entries[0].entries
118 |     assert not isinstance(tracer.entries[0].entries[0], TaskSpan)
119 | 


--------------------------------------------------------------------------------
/tests/core/tracer/conftest.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from pathlib import Path
 3 | 
 4 | from pytest import fixture
 5 | 
 6 | from intelligence_layer.core import FileTracer, InMemoryTracer, Task, TaskSpan
 7 | 
 8 | 
 9 | class TracerTestSubTask(Task[None, None]):
10 |     def do_run(self, input: None, task_span: TaskSpan) -> None:
11 |         task_span.log("subtask", "value")
12 | 
13 | 
14 | class TracerTestTask(Task[str, str]):
15 |     sub_task = TracerTestSubTask()
16 | 
17 |     def do_run(self, input: str, task_span: TaskSpan) -> str:
18 |         time.sleep(0.001)
19 |         with task_span.span("span") as sub_span:
20 |             time.sleep(0.001)
21 |             sub_span.log("message", "a value")
22 |             time.sleep(0.001)
23 |             self.sub_task.run(None, sub_span)
24 |             time.sleep(0.001)
25 |         self.sub_task.run(None, task_span)
26 |         time.sleep(0.001)
27 |         return "output"
28 | 
29 | 
30 | class SpecificTestException(Exception):
31 |     pass
32 | 
33 | 
34 | @fixture
35 | def tracer_test_task() -> Task[str, str]:
36 |     return TracerTestTask()
37 | 
38 | 
39 | @fixture
40 | def file_tracer(tmp_path: Path) -> FileTracer:
41 |     return FileTracer(tmp_path / "log.log")
42 | 
43 | 
44 | @fixture
45 | def in_memory_tracer() -> InMemoryTracer:
46 |     return InMemoryTracer()
47 | 


--------------------------------------------------------------------------------
/tests/core/tracer/fixtures/old_file_trace_format.jsonl:
--------------------------------------------------------------------------------
 1 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"StartTask","entry":{"uuid":"41528209-1b78-4785-a00d-7f65af1bb09c","parent":"75e79a11-1a26-4731-8b49-ef8634c352ed","name":"TestTask","start":"2024-05-22T09:43:37.428758Z","input":"input","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}}
 2 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"StartSpan","entry":{"uuid":"ad1ed79b-6ad6-4ea5-8ee8-26be4055e228","parent":"41528209-1b78-4785-a00d-7f65af1bb09c","name":"span","start":"2024-05-22T09:43:37.429448Z","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}}
 3 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"PlainEntry","entry":{"message":"message","value":"a value","timestamp":"2024-05-22T09:43:37.429503Z","parent":"ad1ed79b-6ad6-4ea5-8ee8-26be4055e228","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}}
 4 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"StartTask","entry":{"uuid":"e8cca541-57a8-440a-b848-7c3b33a97f52","parent":"ad1ed79b-6ad6-4ea5-8ee8-26be4055e228","name":"TestSubTask","start":"2024-05-22T09:43:37.429561Z","input":null,"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}}
 5 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"PlainEntry","entry":{"message":"subtask","value":"value","timestamp":"2024-05-22T09:43:37.429605Z","parent":"e8cca541-57a8-440a-b848-7c3b33a97f52","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}}
 6 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"EndTask","entry":{"uuid":"e8cca541-57a8-440a-b848-7c3b33a97f52","end":"2024-05-22T09:43:37.429647Z","output":null}}
 7 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"EndSpan","entry":{"uuid":"ad1ed79b-6ad6-4ea5-8ee8-26be4055e228","end":"2024-05-22T09:43:37.429687Z"}}
 8 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"StartTask","entry":{"uuid":"8840185c-2019-4105-9178-1b0e20ab6388","parent":"41528209-1b78-4785-a00d-7f65af1bb09c","name":"TestSubTask","start":"2024-05-22T09:43:37.429728Z","input":null,"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}}
 9 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"PlainEntry","entry":{"message":"subtask","value":"value","timestamp":"2024-05-22T09:43:37.429768Z","parent":"8840185c-2019-4105-9178-1b0e20ab6388","trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a"}}
10 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"EndTask","entry":{"uuid":"8840185c-2019-4105-9178-1b0e20ab6388","end":"2024-05-22T09:43:37.429806Z","output":null}}
11 | {"trace_id":"00b96d92-d1d1-454a-b8e9-f67e8541759a","entry_type":"EndTask","entry":{"uuid":"41528209-1b78-4785-a00d-7f65af1bb09c","end":"2024-05-22T09:43:37.429842Z","output":"output"}}
12 | 


--------------------------------------------------------------------------------
/tests/core/tracer/test_composite_tracer.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from intelligence_layer.core import CompositeTracer, InMemoryTracer, SpanStatus, Task
 4 | from tests.core.tracer.conftest import SpecificTestException
 5 | 
 6 | 
 7 | def test_composite_tracer(tracer_test_task: Task[str, str]) -> None:
 8 |     tracer_1 = InMemoryTracer()
 9 |     tracer_2 = InMemoryTracer()
10 |     tracer_test_task.run(input="input", tracer=CompositeTracer([tracer_1, tracer_2]))
11 | 
12 |     trace_1 = tracer_1.export_for_viewing()[0]
13 |     trace_2 = tracer_2.export_for_viewing()[0]
14 |     assert trace_1.name == trace_2.name
15 |     assert trace_1.attributes == trace_2.attributes
16 |     assert trace_1.status == trace_2.status
17 |     assert trace_1.context.trace_id != trace_2.context.trace_id
18 |     assert trace_1.context.span_id != trace_2.context.span_id
19 | 
20 | 
21 | def test_composite_tracer_can_get_span_status(
22 |     tracer_test_task: Task[str, str],
23 | ) -> None:
24 |     tracer_1 = InMemoryTracer()
25 |     tracer_2 = InMemoryTracer()
26 | 
27 |     composite_tracer = CompositeTracer([tracer_1, tracer_2])
28 | 
29 |     with composite_tracer.span("test_name") as composite_span:
30 |         assert composite_span.status_code == SpanStatus.OK
31 | 
32 | 
33 | def test_composite_tracer_raises_for_inconsistent_span_status(
34 |     tracer_test_task: Task[str, str],
35 | ) -> None:
36 |     tracer_1 = InMemoryTracer()
37 |     tracer_2 = InMemoryTracer()
38 | 
39 |     composite_tracer = CompositeTracer([tracer_1, tracer_2])
40 | 
41 |     with composite_tracer.span("test_name") as composite_span:
42 |         spans = composite_span.tracers
43 |         single_span = spans[0]
44 |         try:
45 |             with single_span:
46 |                 raise SpecificTestException
47 |         except SpecificTestException:
48 |             pass
49 | 
50 |         with pytest.raises(ValueError):
51 |             composite_span.status_code  # noqa: B018
52 | 


--------------------------------------------------------------------------------
/tests/core/tracer/test_file_tracer.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from unittest.mock import Mock
 3 | 
 4 | import pytest
 5 | from pytest import fixture
 6 | 
 7 | from intelligence_layer.core import (
 8 |     FileTracer,
 9 |     InMemoryTaskSpan,
10 |     Task,
11 |     TracerLogEntryFailed,
12 | )
13 | from tests.core.tracer.conftest import SpecificTestException
14 | 
15 | 
16 | @fixture
17 | def file_tracer(tmp_path: Path) -> FileTracer:
18 |     return FileTracer(tmp_path / "log.log")
19 | 
20 | 
21 | def test_file_tracer_retrieves_all_file_traces(
22 |     file_tracer: FileTracer, tracer_test_task: Task[str, str]
23 | ) -> None:
24 |     input = "input"
25 | 
26 |     tracer_test_task.run(input, file_tracer)
27 |     tracer_test_task.run(input, file_tracer)
28 |     traces = file_tracer.traces()
29 |     assert len(traces.entries) == 2
30 |     assert isinstance(traces.entries[0], InMemoryTaskSpan)
31 |     assert isinstance(traces.entries[1], InMemoryTaskSpan)
32 |     assert traces.entries[0].context.trace_id != traces.entries[1].context.trace_id
33 | 
34 | 
35 | def test_file_tracer_handles_tracer_log_entry_failed_exception(
36 |     file_tracer: FileTracer,
37 | ) -> None:
38 |     file_tracer._log_entry = Mock(  # type: ignore[method-assign]
39 |         side_effect=[TracerLogEntryFailed("Hi I am an error", "21"), None]
40 |     )
41 | 
42 |     try:
43 |         file_tracer.task_span(task_name="mock_task_name", input="42", timestamp=None)
44 |     except Exception as exception:
45 |         raise AssertionError(f"'Unexpected exception: {exception}") from None
46 | 
47 | 
48 | def test_file_tracer_raises_non_log_entry_failed_exceptions(
49 |     file_tracer: FileTracer,
50 | ) -> None:
51 |     file_tracer._log_entry = Mock(  # type: ignore[method-assign]
52 |         side_effect=[SpecificTestException("Hi I am an error", "21")]
53 |     )
54 |     with pytest.raises(SpecificTestException):
55 |         file_tracer.task_span(task_name="mock_task_name", input="42", timestamp=None)
56 | 
57 | 
58 | def test_file_tracer_is_backwards_compatible() -> None:
59 |     current_file_location = Path(__file__)
60 |     file_tracer = FileTracer(
61 |         current_file_location.parent / "fixtures/old_file_trace_format.jsonl"
62 |     )
63 |     tracer = file_tracer.traces()
64 | 
65 |     assert len(tracer.entries) == 1
66 |     task_span = tracer.entries[0]
67 |     assert isinstance(task_span, InMemoryTaskSpan)
68 |     assert task_span.input == "input"
69 |     assert task_span.start_timestamp and task_span.end_timestamp
70 |     assert task_span.start_timestamp < task_span.end_timestamp
71 | 


--------------------------------------------------------------------------------
/tests/dog-and-cat-cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/dog-and-cat-cover.jpg


--------------------------------------------------------------------------------
/tests/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/evaluation/__init__.py


--------------------------------------------------------------------------------
/tests/evaluation/aggregation/conftest.py:
--------------------------------------------------------------------------------
 1 | from pytest import fixture
 2 | 
 3 | from intelligence_layer.core import utc_now
 4 | from intelligence_layer.evaluation import AggregationOverview, EvaluationOverview
 5 | from tests.evaluation.conftest import DummyAggregatedEvaluation
 6 | 
 7 | 
 8 | @fixture
 9 | def dummy_aggregated_evaluation() -> DummyAggregatedEvaluation:
10 |     return DummyAggregatedEvaluation(score=0.5)
11 | 
12 | 
13 | @fixture
14 | def aggregation_overview(
15 |     evaluation_overview: EvaluationOverview,
16 |     dummy_aggregated_evaluation: DummyAggregatedEvaluation,
17 | ) -> AggregationOverview[DummyAggregatedEvaluation]:
18 |     return AggregationOverview(
19 |         evaluation_overviews=frozenset([evaluation_overview]),
20 |         id="aggregation-id",
21 |         start=utc_now(),
22 |         end=utc_now(),
23 |         successful_evaluation_count=5,
24 |         crashed_during_evaluation_count=3,
25 |         description="dummy-evaluator",
26 |         statistics=dummy_aggregated_evaluation,
27 |     )
28 | 


--------------------------------------------------------------------------------
/tests/evaluation/aggregation/test_accumulator.py:
--------------------------------------------------------------------------------
 1 | from intelligence_layer.evaluation import MeanAccumulator
 2 | 
 3 | 
 4 | def test_mean_accumulator_returns_mean() -> None:
 5 |     acc = MeanAccumulator()
 6 |     assert acc.extract() == 0.0
 7 |     acc.add(1)
 8 |     assert acc.extract() == 1.0
 9 |     acc.add(0)
10 |     assert acc.extract() == 0.5
11 | 
12 | 
13 | def test_mean_accumulator_returns_stdev_and_se() -> None:
14 |     acc = MeanAccumulator()
15 |     assert acc.standard_deviation() == 0.0
16 |     assert acc.standard_error() == 0.0
17 |     acc.add(1)
18 |     assert acc.standard_deviation() == 0.0
19 |     assert acc.standard_error() == 0.0
20 |     acc.add(0)
21 |     assert acc.standard_deviation() == 0.5
22 |     assert round(acc.standard_error(), 3) == 0.354
23 | 


--------------------------------------------------------------------------------
/tests/evaluation/aggregation/test_domain.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from intelligence_layer.evaluation.aggregation.domain import AggregationOverview
 4 | from intelligence_layer.evaluation.evaluation.domain import EvaluationFailed
 5 | from tests.evaluation.conftest import DummyAggregatedEvaluation
 6 | 
 7 | 
 8 | def test_raise_on_exception_for_evaluation_run_overview(
 9 |     aggregation_overview: AggregationOverview[DummyAggregatedEvaluation],
10 | ) -> None:
11 |     with pytest.raises(EvaluationFailed):
12 |         aggregation_overview.raise_on_evaluation_failure()
13 | 


--------------------------------------------------------------------------------
/tests/evaluation/aggregation/test_elo_calculator.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | from itertools import combinations
 3 | 
 4 | from pydantic import BaseModel
 5 | from pytest import fixture
 6 | 
 7 | from intelligence_layer.evaluation import EloCalculator, MatchOutcome, WinRateCalculator
 8 | from intelligence_layer.evaluation.evaluation.evaluator.incremental_evaluator import (
 9 |     ComparisonEvaluation,
10 | )
11 | 
12 | 
13 | @fixture
14 | def players() -> Sequence[str]:
15 |     return [str(i + 1) for i in range(10)]
16 | 
17 | 
18 | @fixture
19 | def matches(players: Sequence[str]) -> Sequence[ComparisonEvaluation]:
20 |     return [
21 |         ComparisonEvaluation(
22 |             first_player=player_a, second_player=player_b, outcome=MatchOutcome.A_WINS
23 |         )
24 |         for player_a, player_b in combinations(players, 2)
25 |     ]
26 | 
27 | 
28 | class MatchOutcomeModel(BaseModel):
29 |     match_outcome: MatchOutcome
30 | 
31 | 
32 | def test_match_outcome_serializes() -> None:
33 |     match_outcome_model = MatchOutcomeModel(match_outcome=MatchOutcome.A_WINS)
34 |     dumped = match_outcome_model.model_dump_json()
35 |     loaded = MatchOutcomeModel.model_validate_json(dumped)
36 | 
37 |     assert loaded == match_outcome_model
38 | 
39 | 
40 | def test_elo_calculator_works(
41 |     players: Sequence[str], matches: Sequence[ComparisonEvaluation]
42 | ) -> None:
43 |     elo_calculator = EloCalculator(players)
44 |     elo_calculator.calculate(matches)
45 | 
46 |     sorted_scores = {
47 |         k: v
48 |         for k, v in sorted(
49 |             elo_calculator.ratings.items(), key=lambda item: item[1], reverse=True
50 |         )
51 |     }
52 |     assert [int(i) for i in players] == [int(i) for i in sorted_scores]
53 |     assert (
54 |         round(sum(score for score in sorted_scores.values()) / len(sorted_scores), 0)
55 |         == 1500
56 |     )
57 | 
58 | 
59 | def test_win_rate_calculator_works(
60 |     players: Sequence[str], matches: Sequence[ComparisonEvaluation]
61 | ) -> None:
62 |     win_rate_calculator = WinRateCalculator(players)
63 |     scores = win_rate_calculator.calculate(matches)
64 | 
65 |     sorted_scores = {
66 |         k: v for k, v in sorted(scores.items(), key=lambda item: item[1], reverse=True)
67 |     }
68 |     assert [int(i) for i in players] == [int(i) for i in sorted_scores]
69 |     assert (
70 |         round(
71 |             sum(score for score in sorted_scores.values()) / len(sorted_scores),
72 |             5,
73 |         )
74 |         == 0.5
75 |     )
76 | 


--------------------------------------------------------------------------------
/tests/evaluation/aggregation/test_hugging_face_aggregation_repository.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Iterable
 2 | from uuid import uuid4
 3 | 
 4 | import huggingface_hub
 5 | from _pytest.fixtures import fixture
 6 | 
 7 | from intelligence_layer.core import utc_now
 8 | from intelligence_layer.evaluation import (
 9 |     AggregationOverview,
10 |     HuggingFaceAggregationRepository,
11 | )
12 | from tests.evaluation.conftest import DummyAggregatedEvaluation
13 | 
14 | 
15 | @fixture
16 | def dummy_aggregated_evaluation() -> DummyAggregatedEvaluation:
17 |     return DummyAggregatedEvaluation(score=0.5)
18 | 
19 | 
20 | # these fixtures should only be used once and are here for readable tests
21 | # because creating/deleting HuggingFace repositories can be rate-limited
22 | @fixture(scope="session")
23 | def hugging_face_aggregation_repository(
24 |     hugging_face_token: str, hugging_face_test_repository_id: str
25 | ) -> Iterable[HuggingFaceAggregationRepository]:
26 |     try:
27 |         yield HuggingFaceAggregationRepository(
28 |             hugging_face_test_repository_id,
29 |             token=hugging_face_token,
30 |             private=True,
31 |         )
32 |     finally:
33 |         huggingface_hub.delete_repo(
34 |             repo_id=hugging_face_test_repository_id,
35 |             token=hugging_face_token,
36 |             repo_type="dataset",
37 |             missing_ok=True,
38 |         )
39 | 
40 | 
41 | @fixture
42 | def aggregation_overview(
43 |     dummy_aggregated_evaluation: DummyAggregatedEvaluation,
44 | ) -> AggregationOverview[DummyAggregatedEvaluation]:
45 |     return AggregationOverview(
46 |         evaluation_overviews=frozenset([]),
47 |         id=str(uuid4()),
48 |         start=utc_now(),
49 |         end=utc_now(),
50 |         successful_evaluation_count=0,
51 |         crashed_during_evaluation_count=0,
52 |         description="",
53 |         statistics=dummy_aggregated_evaluation,
54 |     )
55 | 
56 | 
57 | def test_repository_operations(
58 |     hugging_face_aggregation_repository: HuggingFaceAggregationRepository,
59 |     aggregation_overview: AggregationOverview[DummyAggregatedEvaluation],
60 | ) -> None:
61 |     hugging_face_aggregation_repository.store_aggregation_overview(aggregation_overview)
62 |     overview = hugging_face_aggregation_repository.aggregation_overview(
63 |         aggregation_overview.id, DummyAggregatedEvaluation
64 |     )
65 | 
66 |     assert (
67 |         aggregation_overview.id
68 |         in hugging_face_aggregation_repository.aggregation_overview_ids()
69 |     )
70 |     assert overview is not None
71 | 


--------------------------------------------------------------------------------
/tests/evaluation/dataset/test_dataset_domain.py:
--------------------------------------------------------------------------------
 1 | from intelligence_layer.evaluation import Dataset
 2 | 
 3 | 
 4 | def test_default_values_are_set() -> None:
 5 |     dataset = Dataset(name="Test")
 6 | 
 7 |     assert dataset.id is not None
 8 |     assert len(dataset.metadata) == 0
 9 |     assert len(dataset.labels) == 0
10 | 
11 | 
12 | def test_default_values_are_not_changed() -> None:
13 |     modified_dataset = Dataset(name="Modified Dataset")
14 |     modified_dataset.labels.add("test_label")
15 |     modified_dataset.metadata.update({"key": "value"})
16 | 
17 |     default_dataset = Dataset(name="Default Dataset")
18 | 
19 |     assert modified_dataset.labels != default_dataset.labels
20 |     assert modified_dataset.metadata != default_dataset.metadata
21 | 


--------------------------------------------------------------------------------
/tests/evaluation/evaluation/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/evaluation/evaluation/conftest.py


--------------------------------------------------------------------------------
/tests/evaluation/evaluation/test_file_evaluation_repository.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from intelligence_layer.evaluation.evaluation.file_evaluation_repository import (
 6 |     FileEvaluationRepository,
 7 | )
 8 | 
 9 | """Contains specific test for the FileEvaluationRepository. For more generic
10 | tests, check the test_evaluation_repository file."""
11 | 
12 | 
13 | class DummyType(BaseModel):
14 |     pass
15 | 
16 | 
17 | def test_evaluation_overview_ids_does_not_create_a_folder(
18 |     file_evaluation_repository: FileEvaluationRepository,
19 | ) -> None:
20 |     assert not file_evaluation_repository._eval_root_directory().exists()
21 |     with contextlib.suppress(FileNotFoundError):
22 |         file_evaluation_repository.evaluation_overview_ids()
23 |     assert not file_evaluation_repository._eval_root_directory().exists()
24 | 
25 | 
26 | def test_evaluation_overview_does_not_create_a_folder(
27 |     file_evaluation_repository: FileEvaluationRepository,
28 | ) -> None:
29 |     assert not file_evaluation_repository._eval_root_directory().exists()
30 |     assert not file_evaluation_repository._eval_directory("Non-existent").exists()
31 | 
32 |     file_evaluation_repository.evaluation_overview("Non-existent")
33 |     assert not file_evaluation_repository._eval_root_directory().exists()
34 | 
35 | 
36 | def test_example_evaluations_does_not_create_a_folder(
37 |     file_evaluation_repository: FileEvaluationRepository,
38 | ) -> None:
39 |     assert not file_evaluation_repository._eval_root_directory().exists()
40 |     assert not file_evaluation_repository._eval_directory("Non-existent").exists()
41 | 
42 |     with contextlib.suppress(ValueError):
43 |         file_evaluation_repository.example_evaluations("Non-existent", DummyType)
44 |     assert not file_evaluation_repository._eval_root_directory().exists()
45 | 
46 | 
47 | def test_example_evaluation_does_not_create_a_folder(
48 |     file_evaluation_repository: FileEvaluationRepository,
49 | ) -> None:
50 |     assert not file_evaluation_repository._eval_root_directory().exists()
51 |     assert not file_evaluation_repository._eval_directory("Non-existent").exists()
52 | 
53 |     with contextlib.suppress(ValueError):
54 |         file_evaluation_repository.example_evaluation(
55 |             "Non-existent", "Non-existent", DummyType
56 |         )
57 |     assert not file_evaluation_repository._eval_root_directory().exists()
58 | 


--------------------------------------------------------------------------------
/tests/evaluation/infrastructure/test_hugging_face_repository.py:
--------------------------------------------------------------------------------
 1 | import huggingface_hub
 2 | 
 3 | from intelligence_layer.evaluation.infrastructure.hugging_face_repository import (
 4 |     HuggingFaceRepository,
 5 | )
 6 | 
 7 | 
 8 | def test_hugging_face_repository_can_create_and_delete_a_repository(
 9 |     hugging_face_token: str, hugging_face_test_repository_id: str
10 | ) -> None:
11 |     repository_id = hugging_face_test_repository_id + "unused-suffix"
12 | 
13 |     assert not huggingface_hub.repo_exists(
14 |         repo_id=repository_id,
15 |         token=hugging_face_token,
16 |         repo_type="dataset",
17 |     ), f"The repository with the ID {repository_id} already exists. Try to run the clean_hf script."
18 | 
19 |     created_repository = HuggingFaceRepository(
20 |         repository_id=repository_id,
21 |         token=hugging_face_token,
22 |         private=True,
23 |     )
24 | 
25 |     try:
26 |         assert huggingface_hub.repo_exists(
27 |             repo_id=repository_id,
28 |             token=hugging_face_token,
29 |             repo_type="dataset",
30 |         )
31 |         created_repository.delete_repository()
32 |         assert not huggingface_hub.repo_exists(
33 |             repo_id=repository_id,
34 |             token=hugging_face_token,
35 |             repo_type="dataset",
36 |         )
37 |     finally:
38 |         huggingface_hub.delete_repo(
39 |             repo_id=repository_id,
40 |             token=hugging_face_token,
41 |             repo_type="dataset",
42 |             missing_ok=True,
43 |         )
44 | 


--------------------------------------------------------------------------------
/tests/evaluation/run/test_file_run_repository.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | 
 3 | import pytest
 4 | from pydantic import BaseModel
 5 | 
 6 | from intelligence_layer.evaluation.run.file_run_repository import FileRunRepository
 7 | 
 8 | """Contains specific test for the FileRunRepository. For more generic
 9 | tests, check the test_run_repository file."""
10 | 
11 | 
12 | class DummyType(BaseModel):
13 |     pass
14 | 
15 | 
16 | def test_run_overview_ids_does_not_create_a_folder(
17 |     file_run_repository: FileRunRepository,
18 | ) -> None:
19 |     assert not file_run_repository._run_root_directory().exists()
20 |     with contextlib.suppress(FileNotFoundError):
21 |         file_run_repository.run_overview_ids()
22 |     assert not file_run_repository._run_root_directory().exists()
23 | 
24 | 
25 | def test_run_overview_does_not_create_a_folder(
26 |     file_run_repository: FileRunRepository,
27 | ) -> None:
28 |     assert not file_run_repository._run_root_directory().exists()
29 |     assert not file_run_repository._run_directory("Non-existent").exists()
30 | 
31 |     file_run_repository.run_overview("Non-existent")
32 |     assert not file_run_repository._run_root_directory().exists()
33 | 
34 | 
35 | @pytest.mark.filterwarnings("ignore::UserWarning")
36 | def test_example_runs_does_not_create_a_folder(
37 |     file_run_repository: FileRunRepository,
38 | ) -> None:
39 |     assert not file_run_repository._run_root_directory().exists()
40 |     assert not file_run_repository._run_directory("Non-existent").exists()
41 | 
42 |     with contextlib.suppress(ValueError):
43 |         file_run_repository.example_outputs("Non-existent", DummyType)
44 |     assert not file_run_repository._run_root_directory().exists()
45 | 
46 | 
47 | @pytest.mark.filterwarnings("ignore::UserWarning")
48 | def test_example_run_does_not_create_a_folder(
49 |     file_run_repository: FileRunRepository,
50 | ) -> None:
51 |     assert not file_run_repository._run_root_directory().exists()
52 |     assert not file_run_repository._run_directory("Non-existent").exists()
53 | 
54 |     with contextlib.suppress(ValueError):
55 |         file_run_repository.example_output("Non-existent", "Non-existent", DummyType)
56 |     assert not file_run_repository._run_root_directory().exists()
57 | 


--------------------------------------------------------------------------------
/tests/evaluation/run/test_run.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Iterable, Sequence
  2 | from pathlib import Path
  3 | 
  4 | from dotenv import load_dotenv
  5 | from pydantic import BaseModel
  6 | from pytest import fixture
  7 | 
  8 | from intelligence_layer.connectors import AlephAlphaClientProtocol
  9 | from intelligence_layer.core import Task, TaskSpan
 10 | from intelligence_layer.evaluation import (
 11 |     AggregationLogic,
 12 |     EvaluationLogic,
 13 |     Example,
 14 |     FileAggregationRepository,
 15 |     FileDatasetRepository,
 16 |     SuccessfulExampleOutput,
 17 | )
 18 | from intelligence_layer.evaluation.run_evaluation import main
 19 | 
 20 | load_dotenv()
 21 | 
 22 | 
 23 | @fixture
 24 | def examples() -> Sequence[Example[None, None]]:
 25 |     return [Example(input=None, expected_output=None)]
 26 | 
 27 | 
 28 | class DummyEvaluation(BaseModel):
 29 |     correct: bool
 30 | 
 31 | 
 32 | class DummyAggregation(BaseModel):
 33 |     correct_rate: float
 34 | 
 35 | 
 36 | class DummyTask(Task[None, None]):
 37 |     def __init__(self) -> None:
 38 |         pass
 39 | 
 40 |     def do_run(self, input: None, task_span: TaskSpan) -> None:
 41 |         return input
 42 | 
 43 | 
 44 | class DummyTaskWithClient(DummyTask):
 45 |     def __init__(self, client: AlephAlphaClientProtocol) -> None:
 46 |         pass
 47 | 
 48 | 
 49 | class DummyAggregationLogic(AggregationLogic[DummyEvaluation, DummyAggregation]):
 50 |     def aggregate(self, evaluations: Iterable[DummyEvaluation]) -> DummyAggregation:
 51 |         list(evaluations)
 52 |         return DummyAggregation(correct_rate=1.0)
 53 | 
 54 | 
 55 | class DummyEvaluationLogic(EvaluationLogic[None, None, None, DummyEvaluation]):
 56 |     def do_evaluate(
 57 |         self, example: Example[None, None], *output: SuccessfulExampleOutput[None]
 58 |     ) -> DummyEvaluation:
 59 |         return DummyEvaluation(correct=True)
 60 | 
 61 | 
 62 | def test_run_evaluation(
 63 |     tmp_path: Path, examples: Sequence[Example[None, None]]
 64 | ) -> None:
 65 |     dataset_path = tmp_path / "dataset"
 66 |     dataset_repository = FileDatasetRepository(dataset_path)
 67 |     dataset_id = dataset_repository.create_dataset(
 68 |         examples=examples, dataset_name="test-dataset"
 69 |     ).id
 70 | 
 71 |     aggregation_path = tmp_path / "eval"
 72 |     aggregation_repository = FileAggregationRepository(aggregation_path)
 73 | 
 74 |     main(
 75 |         [
 76 |             "",
 77 |             "--eval-logic",
 78 |             "tests.evaluation.run.test_run.DummyEvaluationLogic",
 79 |             "--aggregation-logic",
 80 |             "tests.evaluation.run.test_run.DummyAggregationLogic",
 81 |             "--task",
 82 |             "tests.evaluation.run.test_run.DummyTask",
 83 |             "--dataset-repository-path",
 84 |             str(dataset_path),
 85 |             "--dataset-id",
 86 |             dataset_id,
 87 |             "--target-dir",
 88 |             str(aggregation_path),
 89 |             "--description",
 90 |             "dummy-evaluator",
 91 |         ]
 92 |     )
 93 |     ids = aggregation_repository.aggregation_overview_ids()
 94 |     assert len(ids) == 1
 95 |     overview = aggregation_repository.aggregation_overview(ids[0], DummyAggregation)
 96 |     assert overview
 97 |     assert overview.successful_evaluation_count == 1
 98 | 
 99 | 
100 | def test_run_evaluation_with_task_with_client(
101 |     tmp_path: Path, examples: Sequence[Example[None, None]]
102 | ) -> None:
103 |     dataset_path = tmp_path / "dataset"
104 |     dataset_repository = FileDatasetRepository(dataset_path)
105 |     dataset_id = dataset_repository.create_dataset(
106 |         examples=examples, dataset_name="test-dataset"
107 |     ).id
108 | 
109 |     eval_path = tmp_path / "eval"
110 | 
111 |     main(
112 |         [
113 |             "",
114 |             "--eval-logic",
115 |             "tests.evaluation.run.test_run.DummyEvaluationLogic",
116 |             "--aggregation-logic",
117 |             "tests.evaluation.run.test_run.DummyAggregationLogic",
118 |             "--task",
119 |             "tests.evaluation.run.test_run.DummyTaskWithClient",
120 |             "--dataset-repository-path",
121 |             str(dataset_path),
122 |             "--dataset-id",
123 |             dataset_id,
124 |             "--target-dir",
125 |             str(eval_path),
126 |             "--description",
127 |             "dummy-evaluator",
128 |         ]
129 |     )
130 | 


--------------------------------------------------------------------------------
/tests/examples/classify/test_keyword_extract.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from intelligence_layer.core import NoOpTracer
 4 | from intelligence_layer.core.chunk import TextChunk
 5 | from intelligence_layer.core.detect_language import Language, LanguageNotSupportedError
 6 | from intelligence_layer.examples.classify.keyword_extract import (
 7 |     KeywordExtract,
 8 |     KeywordExtractInput,
 9 | )
10 | 
11 | 
12 | @pytest.fixture()
13 | def keyword_extract() -> KeywordExtract:
14 |     return KeywordExtract()
15 | 
16 | 
17 | def test_keyword_extract_works(keyword_extract: KeywordExtract) -> None:
18 |     input = KeywordExtractInput(
19 |         chunk=TextChunk("I really like my computer"), language=Language("en")
20 |     )
21 | 
22 |     result = keyword_extract.run(input, NoOpTracer())
23 |     assert "computer" in [keyword.lower() for keyword in result.keywords]
24 | 
25 | 
26 | def test_keyword_extract_raises_for_unsupported_language(
27 |     keyword_extract: KeywordExtract,
28 | ) -> None:
29 |     input = KeywordExtractInput(
30 |         chunk=TextChunk("text about computers"), language=Language("pt")
31 |     )
32 |     with pytest.raises(LanguageNotSupportedError) as _:
33 |         keyword_extract.run(input, NoOpTracer())
34 | 


--------------------------------------------------------------------------------
/tests/examples/classify/test_prompt_based_classify_with_definitions.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | 
 3 | from pytest import fixture
 4 | 
 5 | from intelligence_layer.core import Llama3InstructModel, NoOpTracer, TextChunk
 6 | from intelligence_layer.examples import (
 7 |     ClassifyInput,
 8 |     LabelWithDefinition,
 9 |     PromptBasedClassifyWithDefinitions,
10 |     SingleLabelClassifyOutput,
11 | )
12 | 
13 | 
14 | @fixture
15 | def labels_with_definitions() -> Sequence[LabelWithDefinition]:
16 |     return [
17 |         LabelWithDefinition(
18 |             name="Dinosaur", definition="Any text that is about dinosaurs."
19 |         ),
20 |         LabelWithDefinition(name="Plant", definition="Any text that is about plants."),
21 |         LabelWithDefinition(
22 |             name="Toy", definition="Everything that has something to do with toys."
23 |         ),
24 |     ]
25 | 
26 | 
27 | @fixture
28 | def prompt_based_classify_with_definitions(
29 |     llama_control_model: Llama3InstructModel,
30 |     labels_with_definitions: Sequence[LabelWithDefinition],
31 | ) -> PromptBasedClassifyWithDefinitions:
32 |     return PromptBasedClassifyWithDefinitions(
33 |         labels_with_definitions, llama_control_model
34 |     )
35 | 
36 | 
37 | def test_prompt_based_classify_with_definitions_returns_score_for_all_labels(
38 |     prompt_based_classify_with_definitions: PromptBasedClassifyWithDefinitions,
39 |     labels_with_definitions: Sequence[LabelWithDefinition],
40 | ) -> None:
41 |     classify_input = ClassifyInput(
42 |         chunk=TextChunk("I love my cactus!"),
43 |         labels=frozenset(label.name for label in labels_with_definitions),
44 |     )
45 | 
46 |     classify_output = prompt_based_classify_with_definitions.run(
47 |         classify_input, NoOpTracer()
48 |     )
49 | 
50 |     # Output contains everything we expect
51 |     assert isinstance(classify_output, SingleLabelClassifyOutput)
52 |     assert classify_input.labels == set(r for r in classify_output.scores)
53 | 


--------------------------------------------------------------------------------
/tests/examples/qa/test_multiple_chunk_qa.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Sequence
 2 | 
 3 | from intelligence_layer.core import NoOpTracer
 4 | from intelligence_layer.core.chunk import TextChunk
 5 | from intelligence_layer.core.detect_language import Language
 6 | from intelligence_layer.examples.qa.multiple_chunk_qa import (
 7 |     MultipleChunkQa,
 8 |     MultipleChunkQaInput,
 9 | )
10 | 
11 | CHUNK_CONTAINING_ANSWER = TextChunk(
12 |     "Paul Nicolas lost his mother at the age of 3, and then his father in 1914.[3] He was raised by his mother-in-law together with his brother Henri. "
13 |     "He began his football career with Saint-Mandé Club in 1916. Initially, he played as a defender, but he quickly realized that his destiny laid at the "
14 |     "forefront since he scored many goals.[3] In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, "
15 |     "and these two qualities combined eventually drew the attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916."
16 | )
17 | RELATED_CHUNK_WITHOUT_ANSWER = TextChunk(
18 |     "In addition to his goal-scoring instinct, Nicolas also stood out for his strong character on the pitch, and these two qualities combined eventually drew the "
19 |     "attention of Mr. Fort, the then president of the Gallia Club, who signed him as a centre-forward in 1916. "
20 | )
21 | RELATED_QUESTION = "What is the name of Paul Nicolas' brother?"
22 | IMPORTANT_PART_OF_CORRECT_ANSWER = "Henri"
23 | UNRELATED_QUESTION = "What is the the capital of Germany?"
24 | 
25 | 
26 | def test_multiple_chunk_qa_with_mulitple_chunks(
27 |     multiple_chunk_qa: MultipleChunkQa,
28 | ) -> None:
29 |     chunks: Sequence[TextChunk] = [
30 |         CHUNK_CONTAINING_ANSWER,
31 |         RELATED_CHUNK_WITHOUT_ANSWER,
32 |     ]
33 | 
34 |     input = MultipleChunkQaInput(
35 |         chunks=chunks, question=RELATED_QUESTION, generate_highlights=True
36 |     )
37 |     output = multiple_chunk_qa.run(input, NoOpTracer())
38 | 
39 |     assert output.answer
40 |     assert IMPORTANT_PART_OF_CORRECT_ANSWER in output.answer
41 |     assert len(output.subanswers) == 1
42 |     assert output.subanswers[0].chunk == chunks[0]
43 |     assert any(
44 |         IMPORTANT_PART_OF_CORRECT_ANSWER
45 |         in CHUNK_CONTAINING_ANSWER[highlight.start : highlight.end]
46 |         for highlight in output.subanswers[0].highlights
47 |     )
48 | 
49 | 
50 | def test_multiple_chunk_qa_with_mulitple_chunks_explainability_disabled(
51 |     multiple_chunk_qa: MultipleChunkQa,
52 | ) -> None:
53 |     chunks: Sequence[TextChunk] = [
54 |         CHUNK_CONTAINING_ANSWER,
55 |         RELATED_CHUNK_WITHOUT_ANSWER,
56 |     ]
57 | 
58 |     input = MultipleChunkQaInput(
59 |         chunks=chunks, question=RELATED_QUESTION, generate_highlights=False
60 |     )
61 |     output = multiple_chunk_qa.run(input, NoOpTracer())
62 | 
63 |     assert output.answer
64 |     assert IMPORTANT_PART_OF_CORRECT_ANSWER in output.answer
65 |     assert len(output.subanswers) == 1
66 |     assert output.subanswers[0].chunk == chunks[0]
67 |     assert all(not subanswer.highlights for subanswer in output.subanswers)
68 | 
69 | 
70 | def test_multiple_chunk_qa_without_answer(multiple_chunk_qa: MultipleChunkQa) -> None:
71 |     chunks: Sequence[TextChunk] = [CHUNK_CONTAINING_ANSWER]
72 | 
73 |     input = MultipleChunkQaInput(chunks=chunks, question=UNRELATED_QUESTION)
74 |     output = multiple_chunk_qa.run(input, NoOpTracer())
75 | 
76 |     assert output.answer is None
77 | 
78 | 
79 | def test_multiple_chunk_qa_with_spanish_question(
80 |     multiple_chunk_qa: MultipleChunkQa,
81 | ) -> None:
82 |     question = "¿Cómo se llama el hermano de Paul Nicola?"
83 |     chunks = [CHUNK_CONTAINING_ANSWER, CHUNK_CONTAINING_ANSWER]
84 | 
85 |     input = MultipleChunkQaInput(
86 |         chunks=chunks, question=question, language=Language("es")
87 |     )
88 |     output = multiple_chunk_qa.run(input, NoOpTracer())
89 | 
90 |     assert len(output.subanswers) == len(chunks)
91 |     assert output.answer
92 |     assert "hermano" in output.answer
93 | 


--------------------------------------------------------------------------------
/tests/examples/qa/test_multiple_chunk_retriever_qa.py:
--------------------------------------------------------------------------------
 1 | from pytest import fixture
 2 | 
 3 | from intelligence_layer.connectors import QdrantInMemoryRetriever
 4 | from intelligence_layer.core import LuminousControlModel, NoOpTracer
 5 | from intelligence_layer.core.tracer.in_memory_tracer import InMemoryTracer
 6 | from intelligence_layer.examples import (
 7 |     ExpandChunks,
 8 |     MultipleChunkRetrieverQa,
 9 |     RetrieverBasedQaInput,
10 | )
11 | 
12 | 
13 | @fixture
14 | def multiple_chunk_retriever_qa(
15 |     luminous_control_model: LuminousControlModel,
16 |     asymmetric_in_memory_retriever: QdrantInMemoryRetriever,
17 | ) -> MultipleChunkRetrieverQa[int]:
18 |     return MultipleChunkRetrieverQa(
19 |         retriever=asymmetric_in_memory_retriever,
20 |         model=luminous_control_model,
21 |         expand_chunks=ExpandChunks(
22 |             asymmetric_in_memory_retriever, luminous_control_model, 256
23 |         ),
24 |     )
25 | 
26 | 
27 | def test_multiple_chunk_retriever_qa_using_in_memory_retriever(
28 |     multiple_chunk_retriever_qa: MultipleChunkRetrieverQa[int],
29 |     no_op_tracer: NoOpTracer,
30 | ) -> None:
31 |     question = "When was Robert Moses born?"
32 |     input = RetrieverBasedQaInput(question=question)
33 |     tracer = InMemoryTracer()
34 |     output = multiple_chunk_retriever_qa.run(input, tracer)
35 |     assert output.answer
36 |     assert "1888" in output.answer
37 |     assert len(output.sources) == 5
38 | 


--------------------------------------------------------------------------------
/tests/examples/qa/test_retriever_based_qa.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from pytest import fixture
 3 | 
 4 | from intelligence_layer.connectors.document_index.document_index import DocumentPath
 5 | from intelligence_layer.connectors.retrievers.document_index_retriever import (
 6 |     DocumentIndexRetriever,
 7 | )
 8 | from intelligence_layer.connectors.retrievers.qdrant_in_memory_retriever import (
 9 |     QdrantInMemoryRetriever,
10 | )
11 | from intelligence_layer.core import NoOpTracer
12 | from intelligence_layer.examples import (
13 |     MultipleChunkQa,
14 |     RetrieverBasedQa,
15 |     RetrieverBasedQaInput,
16 | )
17 | 
18 | 
19 | @fixture
20 | def retriever_based_qa_with_in_memory_retriever(
21 |     multiple_chunk_qa: MultipleChunkQa,
22 |     asymmetric_in_memory_retriever: QdrantInMemoryRetriever,
23 | ) -> RetrieverBasedQa[int]:
24 |     return RetrieverBasedQa(
25 |         retriever=asymmetric_in_memory_retriever, multi_chunk_qa=multiple_chunk_qa
26 |     )
27 | 
28 | 
29 | @fixture
30 | def retriever_based_qa_with_document_index(
31 |     multiple_chunk_qa: MultipleChunkQa, document_index_retriever: DocumentIndexRetriever
32 | ) -> RetrieverBasedQa[DocumentPath]:
33 |     return RetrieverBasedQa(
34 |         retriever=document_index_retriever, multi_chunk_qa=multiple_chunk_qa
35 |     )
36 | 
37 | 
38 | @pytest.mark.filterwarnings("ignore::DeprecationWarning")
39 | def test_retriever_based_qa_using_in_memory_retriever(
40 |     retriever_based_qa_with_in_memory_retriever: RetrieverBasedQa[int],
41 |     no_op_tracer: NoOpTracer,
42 | ) -> None:
43 |     question = "When was Robert Moses born?"
44 |     input = RetrieverBasedQaInput(question=question)
45 |     output = retriever_based_qa_with_in_memory_retriever.run(input, no_op_tracer)
46 |     assert output.answer
47 |     assert "1888" in output.answer
48 |     assert output.subanswers[0].id == 3
49 | 


--------------------------------------------------------------------------------
/tests/examples/summarize/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/examples/summarize/__init__.py


--------------------------------------------------------------------------------
/tests/examples/summarize/test_recursive_summarize.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from aleph_alpha_client import Client, CompletionRequest, CompletionResponse
 4 | from pytest import fixture
 5 | 
 6 | from intelligence_layer.core import Chunk, Llama3InstructModel, NoOpTracer
 7 | from intelligence_layer.examples import RecursiveSummarize
 8 | from intelligence_layer.examples.summarize.recursive_summarize import (
 9 |     RecursiveSummarizeInput,
10 | )
11 | from intelligence_layer.examples.summarize.steerable_long_context_summarize import (
12 |     SteerableLongContextSummarize,
13 | )
14 | from intelligence_layer.examples.summarize.steerable_single_chunk_summarize import (
15 |     SteerableSingleChunkSummarize,
16 | )
17 | 
18 | 
19 | class RecursiveCountingClient(Client):
20 |     recursive_counter: int = 0
21 | 
22 |     def complete(self, request: CompletionRequest, model: str) -> CompletionResponse:
23 |         self.recursive_counter += 1
24 |         return super().complete(request, model)
25 | 
26 | 
27 | short_text = """The brown bear (Ursus arctos) is a large bear species found across Eurasia and North America.[1][3] In North America, the populations of brown bears are called grizzly bears, while the subspecies that inhabits the Kodiak Islands of Alaska is known as the Kodiak bear. It is one of the largest living terrestrial members of the order Carnivora, rivaled in size only by its closest relative, the polar bear (Ursus maritimus), which is much less variable in size and slightly bigger on average.[4][5][6][7][8] The brown bear's range includes parts of Russia, Central Asia, the Himalayas, China, Canada, the United States, Hokkaido, Scandinavia, Finland, the Balkans, the Picos de Europa and the Carpathian region (especially Romania), Iran, Anatolia, and the Caucasus.[1][9] The brown bear is recognized as a national and state animal in several European countries.[10]"""
28 | 
29 | 
30 | @fixture
31 | def recursive_counting_client(
32 |     token: str, inference_url: str
33 | ) -> RecursiveCountingClient:
34 |     return RecursiveCountingClient(token, host=inference_url)
35 | 
36 | 
37 | @fixture
38 | def very_long_text() -> str:
39 |     with (Path(__file__).parent / "very_long_text.txt").open(
40 |         mode="r", encoding="utf-8"
41 |     ) as file:
42 |         return file.read()
43 | 
44 | 
45 | def test_recursive_summarize_stops_when_hitting_max_tokens(
46 |     very_long_text: str,
47 |     steerable_long_context_summarize: SteerableLongContextSummarize,
48 | ) -> None:
49 |     max_tokens = 1000
50 |     input = RecursiveSummarizeInput(text=very_long_text, max_tokens=max_tokens)
51 |     task = RecursiveSummarize(steerable_long_context_summarize)
52 |     output = task.run(input, NoOpTracer())
53 | 
54 |     assert len(output.summary) < len(very_long_text)
55 |     assert output.generated_tokens < max_tokens
56 |     assert "new orleans" in output.summary.lower()
57 | 
58 | 
59 | def test_recursive_summarize_stops_when_num_partial_summaries_stays_same(
60 |     steerable_long_context_summarize: SteerableLongContextSummarize,
61 | ) -> None:
62 |     max_tokens = 2048
63 |     input = RecursiveSummarizeInput(text=short_text, max_tokens=max_tokens)
64 |     task = RecursiveSummarize(steerable_long_context_summarize)
65 |     output = task.run(input, NoOpTracer())
66 | 
67 |     assert output.generated_tokens > 50
68 | 
69 | 
70 | def test_recursive_summarize_stops_when_num_partial_summaries_stays_same_with_empty_text(
71 |     steerable_long_context_summarize: SteerableLongContextSummarize,
72 | ) -> None:
73 |     max_tokens = 2048
74 |     input = RecursiveSummarizeInput(text="", max_tokens=max_tokens)
75 |     task = RecursiveSummarize(steerable_long_context_summarize)
76 |     output = task.run(input, NoOpTracer())
77 | 
78 |     assert output.generated_tokens == 0
79 | 
80 | 
81 | def test_recursive_summarize_stops_after_one_chunk(
82 |     recursive_counting_client: RecursiveCountingClient,
83 | ) -> None:
84 |     model = Llama3InstructModel(
85 |         name="llama-3.1-8b-instruct", client=recursive_counting_client
86 |     )
87 | 
88 |     long_context_high_compression_summarize = SteerableLongContextSummarize(
89 |         summarize=SteerableSingleChunkSummarize(model, max_generated_tokens=100),
90 |         chunk=Chunk(model, max_tokens_per_chunk=1500),
91 |     )
92 |     input = RecursiveSummarizeInput(text=short_text)
93 |     task = RecursiveSummarize(long_context_high_compression_summarize)
94 |     task.run(input, NoOpTracer())
95 | 
96 |     assert recursive_counting_client.recursive_counter == 1
97 | 


--------------------------------------------------------------------------------
/tests/examples/summarize/test_steerable_long_context_summarize.py:
--------------------------------------------------------------------------------
 1 | from intelligence_layer.core import Chunk, Language, LuminousControlModel, NoOpTracer
 2 | from intelligence_layer.examples import (
 3 |     LongContextSummarizeInput,
 4 |     SteerableLongContextSummarize,
 5 | )
 6 | from intelligence_layer.examples.summarize.steerable_single_chunk_summarize import (
 7 |     SteerableSingleChunkSummarize,
 8 | )
 9 | 
10 | 
11 | def test_steerable_long_context_summarize_en(
12 |     steerable_long_context_summarize: SteerableLongContextSummarize,
13 |     long_text: str,
14 | ) -> None:
15 |     input = LongContextSummarizeInput(text=long_text)
16 |     output = steerable_long_context_summarize.run(input, NoOpTracer())
17 | 
18 |     assert output.partial_summaries
19 |     assert any(
20 |         "bear" in partial_summary.summary
21 |         for partial_summary in output.partial_summaries
22 |     )
23 |     assert len(
24 |         " ".join(
25 |             partial_summary.summary for partial_summary in output.partial_summaries
26 |         )
27 |     ) < len(long_text)
28 | 
29 | 
30 | def test_steerable_long_context_summarize_adapts_to_instruction(
31 |     luminous_control_model: LuminousControlModel,
32 |     long_text: str,
33 | ) -> None:
34 |     input = LongContextSummarizeInput(text=long_text)
35 |     steerable_long_context_summarize_keyword = SteerableLongContextSummarize(
36 |         summarize=SteerableSingleChunkSummarize(
37 |             luminous_control_model,
38 |             max_generated_tokens=128,
39 |             instruction_configs={Language("en"): "Summarize using bullet points."},
40 |         ),
41 |         chunk=Chunk(luminous_control_model, max_tokens_per_chunk=512),
42 |     )
43 | 
44 |     output = steerable_long_context_summarize_keyword.run(input, NoOpTracer())
45 | 
46 |     assert output.partial_summaries
47 |     assert any(
48 |         "bear" in partial_summary.summary
49 |         for partial_summary in output.partial_summaries
50 |     )
51 |     assert all(
52 |         partial_summary.summary.startswith("- ")
53 |         for partial_summary in output.partial_summaries
54 |     )
55 |     assert len(
56 |         " ".join(
57 |             partial_summary.summary for partial_summary in output.partial_summaries
58 |         )
59 |     ) < len(long_text)
60 | 


--------------------------------------------------------------------------------
/tests/image_example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Aleph-Alpha/intelligence-layer-sdk/261ab36a28977d91e2d0c8ca16039edcbab81925/tests/image_example.jpg


--------------------------------------------------------------------------------
/tests/learning/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | from pytest import fixture
 5 | 
 6 | from intelligence_layer.core import DetectLanguage, Language, NoOpTracer
 7 | from intelligence_layer.core.model import Message
 8 | from intelligence_layer.learning import (
 9 |     EnrichDomain,
10 |     EnrichQuality,
11 |     FileInstructionFinetuningDataRepository,
12 |     InstructionFinetuningDataHandler,
13 |     InstructionFinetuningSample,
14 |     InstructionFinetuningSampleAttributes,
15 |     PostgresInstructionFinetuningDataRepository,
16 |     RawInstructionFinetuningSample,
17 | )
18 | 
19 | 
20 | @fixture
21 | def raw_instruction_finetuning_sample() -> RawInstructionFinetuningSample:
22 |     return RawInstructionFinetuningSample(
23 |         messages=[
24 |             Message(role="user", content="Hi."),
25 |             Message(role="assistant", content="Hello, how can I help you?"),
26 |         ],
27 |         attributes=InstructionFinetuningSampleAttributes(
28 |             source="example", domain="general", quality=5, languages=[Language("en")]
29 |         ),
30 |         external_id="example_1",
31 |     )
32 | 
33 | 
34 | @fixture(scope="function")
35 | def instruction_finetuning_sample(
36 |     raw_instruction_finetuning_sample: RawInstructionFinetuningSample,
37 | ) -> InstructionFinetuningSample:
38 |     return InstructionFinetuningSample.from_raw_sample(
39 |         raw_instruction_finetuning_sample
40 |     )
41 | 
42 | 
43 | @fixture
44 | def postgres_instruction_finetuning_data_repository() -> (
45 |     PostgresInstructionFinetuningDataRepository
46 | ):
47 |     db_user = os.getenv("POSTGRES_USER")
48 |     db_pw = os.getenv("POSTGRES_PASSWORD")
49 |     db_host = os.getenv("POSTGRES_HOST")
50 |     db_port = os.getenv("POSTGRES_PORT")
51 | 
52 |     db_name = os.getenv("POSTGRES_DB")
53 |     db_url = f"postgresql://{db_user}:{db_pw}@{db_host}:{db_port}/{db_name}"
54 | 
55 |     return PostgresInstructionFinetuningDataRepository(db_url)
56 | 
57 | 
58 | @fixture
59 | def file_instruction_finetuning_data_repository(
60 |     tmp_path: Path,
61 | ) -> FileInstructionFinetuningDataRepository:
62 |     return FileInstructionFinetuningDataRepository(tmp_path)
63 | 
64 | 
65 | @fixture(scope="function")
66 | def instruction_finetuning_data_handler(
67 |     postgres_instruction_finetuning_data_repository: PostgresInstructionFinetuningDataRepository,
68 | ) -> InstructionFinetuningDataHandler:
69 |     return InstructionFinetuningDataHandler(
70 |         postgres_instruction_finetuning_data_repository,
71 |         EnrichDomain(["smalltalk", "weather", "gossip"]),
72 |         EnrichQuality(),
73 |         DetectLanguage(),
74 |         [Language("de"), Language("en")],
75 |         Language("en"),
76 |         NoOpTracer(),
77 |     )
78 | 


--------------------------------------------------------------------------------
/tests/learning/test_postgres_instruction_finetuning_data_repository.py:
--------------------------------------------------------------------------------
 1 | from intelligence_layer.learning import (
 2 |     InstructionFinetuningSample,
 3 |     PostgresInstructionFinetuningDataRepository,
 4 |     RawInstructionFinetuningSample,
 5 | )
 6 | 
 7 | 
 8 | def test_postgres_instruction_finetuning_data_repository_can_store_load_and_delete_sample(
 9 |     postgres_instruction_finetuning_data_repository: PostgresInstructionFinetuningDataRepository,
10 |     instruction_finetuning_sample: InstructionFinetuningSample,
11 | ) -> None:
12 |     postgres_instruction_finetuning_data_repository.store_sample(
13 |         instruction_finetuning_sample
14 |     )
15 |     loaded_sample = postgres_instruction_finetuning_data_repository.sample(
16 |         instruction_finetuning_sample.id
17 |     )
18 | 
19 |     assert instruction_finetuning_sample == loaded_sample
20 | 
21 |     postgres_instruction_finetuning_data_repository.delete_sample(
22 |         instruction_finetuning_sample.id
23 |     )
24 |     no_sample_expected = postgres_instruction_finetuning_data_repository.sample(
25 |         instruction_finetuning_sample.id
26 |     )
27 | 
28 |     assert no_sample_expected is None
29 | 
30 | 
31 | def test_postgres_instruction_finetuning_data_repository_can_store_load_and_delete_samples(
32 |     postgres_instruction_finetuning_data_repository: PostgresInstructionFinetuningDataRepository,
33 |     raw_instruction_finetuning_sample: RawInstructionFinetuningSample,
34 | ) -> None:
35 |     samples = [
36 |         InstructionFinetuningSample.from_raw_sample(raw_instruction_finetuning_sample)
37 |         for _ in range(10)
38 |     ]
39 |     ids = [sample.id for sample in samples]
40 |     id_iter = (id for id in ids)
41 | 
42 |     postgres_instruction_finetuning_data_repository.store_samples(samples)
43 |     loaded_samples = postgres_instruction_finetuning_data_repository.samples(id_iter)
44 | 
45 |     assert set(ids) == set(loaded_sample.id for loaded_sample in loaded_samples)
46 | 
47 |     postgres_instruction_finetuning_data_repository.delete_samples(ids)
48 |     no_samples_expected = postgres_instruction_finetuning_data_repository.samples(ids)
49 | 
50 |     assert list(no_samples_expected) == []
51 | 
52 | 
53 | def test_postgres_instruction_finetuning_data_repository_can_show_first_n_samples(
54 |     postgres_instruction_finetuning_data_repository: PostgresInstructionFinetuningDataRepository,
55 |     raw_instruction_finetuning_sample: RawInstructionFinetuningSample,
56 | ) -> None:
57 |     n = 10
58 |     samples = [
59 |         InstructionFinetuningSample.from_raw_sample(raw_instruction_finetuning_sample)
60 |         for _ in range(n)
61 |     ]
62 | 
63 |     postgres_instruction_finetuning_data_repository.store_samples(samples)
64 |     head = list(postgres_instruction_finetuning_data_repository.head(n // 2))
65 | 
66 |     assert len(head) == n // 2
67 | 


--------------------------------------------------------------------------------