├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── user-story.md
    ├── actions
    │   └── integration_tests
    │   │   └── action.yml
    ├── dependabot.yml
    ├── pull_request_template.md
    ├── utils
    │   ├── add-category-id.py
    │   └── pydoc-markdown.sh
    └── workflows
    │   ├── api-docs.yaml
    │   ├── compliance.yml
    │   ├── continuous-deployment-dev.yml
    │   ├── continuous-deployment-prod.yml
    │   ├── continuous-integration.yml
    │   ├── deploy-prod.yml
    │   ├── deploy-test.yml
    │   ├── deploy.yml
    │   ├── high-prio-bug.yml
    │   └── merge-queue.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
    ├── cli.gif
    └── logo.png
├── deepset_cloud_sdk
    ├── README.md
    ├── __about__.py
    ├── __init__.py
    ├── _api
    │   ├── config.py
    │   ├── deepset_cloud_api.py
    │   ├── files.py
    │   └── upload_sessions.py
    ├── _s3
    │   ├── __init__.py
    │   └── upload.py
    ├── _service
    │   └── files_service.py
    ├── _utils
    │   ├── __init__.py
    │   └── datetime.py
    ├── cli.py
    ├── models.py
    └── workflows
    │   ├── __init__.py
    │   ├── async_client
    │       ├── __init__.py
    │       └── files.py
    │   ├── pipeline_client
    │       ├── __init__.py
    │       ├── models.py
    │       ├── pipeline_client.py
    │       └── pipeline_service.py
    │   └── sync_client
    │       ├── __init__.py
    │       ├── files.py
    │       └── utils.py
├── docs
    ├── _images
    │   ├── favicon.svg
    │   └── white-logo.svg
    ├── _pydoc
    │   ├── __init__.py
    │   ├── config
    │   │   ├── async_client.yml
    │   │   ├── cli.yml
    │   │   └── sync_client.yml
    │   ├── renderers.py
    │   └── requirements.txt
    ├── _stylesheets
    │   └── extra.css
    ├── examples
    │   ├── cli
    │   │   └── README.md
    │   ├── data
    │   │   ├── example.pdf
    │   │   ├── example.txt
    │   │   └── example.txt.meta.json
    │   └── sdk
    │   │   ├── README.md
    │   │   └── upload.py
    ├── index.md
    └── upload_files.md
├── mkdocs.yml
├── pyproject.toml
├── test-upload
    ├── example.txt
    ├── example.txt.meta.json
    ├── example2.txt
    └── example2.txt.meta.json
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── data
        ├── .fake-env
        ├── direct_upload
        │   ├── example.txt
        │   └── example.txt.meta.json
        ├── example.txt
        ├── upload_folder
        │   ├── example.csv
        │   ├── example.csv.meta.json
        │   ├── example.docx
        │   ├── example.html
        │   ├── example.jpg
        │   ├── example.json
        │   ├── example.md
        │   ├── example.pdf
        │   ├── example.pptx
        │   ├── example.txt
        │   ├── example.txt.meta.json
        │   ├── example.xlsx
        │   └── example.xml
        ├── upload_folder_nested
        │   ├── example.txt
        │   ├── meta
        │   │   └── example.txt.meta.json
        │   └── nested_folder
        │   │   └── second.txt
        └── upload_folder_with_duplicates
        │   ├── file1.txt
        │   ├── file2.txt
        │   └── old_files
        │       ├── file1.txt
        │       └── file2.txt
    ├── integration
        ├── api
        │   ├── test_integration_files.py
        │   └── test_integration_upload_sessions.py
        ├── service
        │   └── test_integration_files_service.py
        └── workflows
        │   └── test_integration_pipeline_client.py
    ├── test_data
        ├── basic.txt
        ├── msmarco.10
        │   ├── 103275.txt
        │   ├── 103275.txt.meta.json
        │   ├── 103291.txt
        │   ├── 103291.txt.meta.json
        │   ├── 110580.txt
        │   ├── 110580.txt.meta.json
        │   ├── 117256.txt
        │   ├── 117256.txt.meta.json
        │   ├── 16675.txt
        │   ├── 16675.txt.meta.json
        │   ├── 22297.txt
        │   ├── 22297.txt.meta.json
        │   ├── 35887.txt
        │   ├── 35887.txt.meta.json
        │   ├── 61768.txt
        │   ├── 61768.txt.meta.json
        │   ├── 79388.txt
        │   ├── 79388.txt.meta.json
        │   ├── 87243.txt
        │   └── 87243.txt.meta.json
        ├── multiple_file_types
        │   ├── file00.txt
        │   ├── file00.txt.meta.json
        │   ├── file01.xml
        │   ├── file01.xml.meta.json
        │   ├── file02.pptx
        │   ├── file02.pptx.meta.json
        │   ├── file03.xlsx
        │   ├── file03.xlsx.meta.json
        │   ├── file04.json
        │   ├── file04.json.meta.json
        │   ├── file05.docx
        │   ├── file05.docx.meta.json
        │   ├── file06.md
        │   ├── file06.md.meta.json
        │   ├── file07.csv
        │   ├── file07.csv.meta.json
        │   ├── file08.pdf
        │   ├── file08.pdf.meta.json
        │   ├── file09.html
        │   └── file09.html.meta.json
        └── multiple_file_types_caps
        │   ├── File00.txt
        │   └── File00.txt.meta.json
    └── unit
        ├── api
            ├── test_deepset_cloud_api.py
            ├── test_files.py
            └── test_upload_sessions.py
        ├── s3
            └── test_upload.py
        ├── service
            └── test_files_service.py
        ├── test_cli.py
        ├── utils
            ├── test_datetime_utils.py
            └── test_load_configuration.py
        └── workflows
            ├── async_client
                └── test_async_workflow_files.py
            ├── pipeline_client
                ├── test_models.py
                ├── test_pipeline_client.py
                └── test_pipeline_service.py
            └── sync_client
                ├── test_sync_workflow_files.py
                └── test_utils.py


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @deepset-ai/grow-squad-backend
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: 'Create a report '
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | 
27 | **Checklist**
28 | - [ ] I added a label for the level of urgency of this bug (see definitions [here](https://www.notion.so/deepsetai/DC-processes-Bugs-and-Issues-79f7250be94b450a934296afd987a29a#4139a178336c439b8f52a99dde5e6b87))
29 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/user-story.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: User Story
 3 | about: User Story
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **User Story**
11 | As a ...
12 | I want ...
13 | So that ...
14 | 
15 | **Relevant Links**
16 | 
17 | **Acceptance Criteria**
18 | - Criteria 1
19 | - Criteria 2
20 | - Criteria 3
21 | 
22 | **Task List**
23 | - [ ] #2
24 | - [ ] Task 2
25 | - [ ] Task 3
26 | - ...
27 | 


--------------------------------------------------------------------------------
/.github/actions/integration_tests/action.yml:
--------------------------------------------------------------------------------
 1 | name: "Integration Tests"
 2 | description: "Runs the Integration tests "
 3 | 
 4 | inputs:
 5 |   API_KEY:
 6 |     description: "The API_KEY for deepset Cloud"
 7 |     required: true
 8 |   API_URL:
 9 |     description: "The API_URL for deepset Cloud"
10 |     required: true
11 | 
12 | outputs: {}
13 | runs:
14 |   using: "composite"
15 |   steps:
16 |     - uses: actions/setup-python@v4
17 |       with:
18 |         python-version: "3.10"
19 |     - name: Install Hatch
20 |       shell: bash
21 |       run: pip install hatch==${{ env.HATCH_VERSION }}
22 |     # we are using the "automated-tests" organization with predefined users and workspaces
23 |     - name: Run SDK Tests
24 |       shell: bash
25 |       run: |
26 |         API_KEY=${{inputs.API_KEY}} API_URL=${{inputs.API_URL}} hatch run test:integration
27 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # See all config options here https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 2 | version: 2
 3 | updates:
 4 |   - package-ecosystem: "pip"
 5 |     directory: "/"
 6 |     schedule:
 7 |       interval: "weekly"
 8 |     commit-message:
 9 |       prefix: "build: "
10 |     groups:
11 |       python-production-updates:
12 |         dependency-type: "production"
13 |         update-types:
14 |           - "major"
15 |           - "minor"
16 |           - "patch"
17 |       python-development-updates:
18 |         dependency-type: "development"
19 |         update-types:
20 |           - "major"
21 |           - "minor"
22 |           - "patch"
23 |   - package-ecosystem: "github-actions"
24 |     directory: "/"
25 |     schedule:
26 |       interval: "weekly"
27 |     commit-message:
28 |       prefix: "ci: "
29 |     groups:
30 |       # Specify a name for the group, which will be used in pull request titles
31 |       # and branch names
32 |       ci-updates:
33 |         update-types:
34 |           - "minor"
35 |           - "patch"
36 |           - "major"
37 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ### Related Issues
 2 | 
 3 | - fixes #issue-number
 4 | 
 5 | ### Proposed Changes?
 6 | 
 7 |  <!--- In case of a bug: Describe what caused the issue and how you solved it-->
 8 | 
 9 | ### How did you test it?
10 | 
11 | <!-- unit tests, integration tests, manual verification, instructions for manual tests -->
12 | 
13 | ### Notes for the reviewer
14 | 
15 | <!-- E.g. point out section where the reviewer  -->
16 | 
17 | ### Screenshots (optional)
18 | 
19 | <!-- May be added to illustrate the changes -->
20 | 
21 | ### Checklist
22 | 
23 | - [ ] I have updated the referenced issue with new insights and changes
24 | - [ ] If this is a code change, I have added unit tests
25 | - [ ] I've used the [conventional commit specification](https://www.conventionalcommits.org/en/v1.0.0/) for my PR title
26 | - [ ] I updated the docstrings
27 | - [ ] If this is a code change, I added meaningful logs and prepared Datadog visualizations and alerts
28 | 


--------------------------------------------------------------------------------
/.github/utils/add-category-id.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from typing import List
 4 | 
 5 | 
 6 | def read_file(file_path: str) -> List[str]:
 7 |     """Reads the content of a markdown file and returns it as a list of lines."""
 8 |     with open(file_path, "r", encoding="utf-8") as file:
 9 |         content = file.readlines()
10 |     return content
11 | 
12 | 
13 | def modify_header(content: List[str], category_id: str) -> List[str]:
14 |     """Modifies the YAML front matter in the markdown content to include the category."""
15 |     in_header = False
16 |     new_content = []
17 |     category_added = False
18 |     end_header_pattern = r"^---$"
19 |     start_header_found = False
20 | 
21 |     for line in content:
22 |         if re.match(end_header_pattern, line) and start_header_found:
23 |             in_header = False
24 |             if not category_added:
25 |                 new_content.append(f"category: {category_id}\n")
26 |             new_content.append(line)
27 |         elif in_header:
28 |             if line.startswith("category:"):
29 |                 new_content.append(f"category: {category_id}\n")
30 |                 category_added = True
31 |             else:
32 |                 new_content.append(line)
33 |         else:
34 |             if line.strip() == "---":
35 |                 in_header = True
36 |                 start_header_found = True
37 |             new_content.append(line)
38 |     return new_content
39 | 
40 | 
41 | def update_markdown_files(directory: str, category_id: str) -> None:
42 |     """Updates all markdown files in a given directory by modifying their headers."""
43 |     for filename in os.listdir(directory):
44 |         if filename.endswith(".md"):
45 |             file_path = os.path.join(directory, filename)
46 |             content = read_file(file_path)
47 |             modified_content = modify_header(content, category_id)
48 |             with open(file_path, "w", encoding="utf-8") as file:
49 |                 file.writelines(modified_content)
50 | 
51 | 
52 | # Example usage
53 | if __name__ == "__main__":
54 |     directory = os.getenv("MARKDOWN_FILES_DIRECTORY", "default_directory")
55 |     category_id = os.getenv("CATEGORY_ID", "default_category_id")
56 |     update_markdown_files(directory, category_id)
57 | 


--------------------------------------------------------------------------------
/.github/utils/pydoc-markdown.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e   # Fails on any error in the following loop
 4 | export PYTHONPATH=$PWD/docs/_pydoc # Make the renderers available to pydoc
 5 | cd docs/_pydoc
 6 | rm -rf temp && mkdir temp
 7 | cd temp
 8 | for file in ../config/* ; do
 9 |     echo "Converting $file..."
10 |     pydoc-markdown "$file"
11 | done
12 | 


--------------------------------------------------------------------------------
/.github/workflows/api-docs.yaml:
--------------------------------------------------------------------------------
 1 | name: API Docs
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - published
 7 | 
 8 | env:
 9 |   CATEGORY_ID: ${{ secrets.CATEGORY_ID }}
10 | 
11 | permissions:
12 |   contents: write
13 | 
14 | jobs:
15 |   deploy:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - name: Checkout repository
19 |         uses: actions/checkout@v4
20 | 
21 |       - name: Setup python
22 |         uses: actions/setup-python@v5
23 |         with:
24 |           python-version: 3.x
25 | 
26 |       - name: Setup cache
27 |         uses: actions/cache@v4
28 |         with:
29 |           key: ${{ github.ref }}
30 |           path: .cache
31 | 
32 |       - name: Install dependencies for doc generation
33 |         run: |
34 |             python -m pip install --upgrade pip
35 |             pip install -r docs/_pydoc/requirements.txt
36 |             pip install --upgrade setuptools # Fix to prevent: ModuleNotFoundError: No module named 'pkg_resources'
37 | 
38 | 
39 |       - name: Generate API docs
40 |         run: ./.github/utils/pydoc-markdown.sh
41 | 
42 |       - name: Configure git to push docs
43 |         run: |
44 |           git config --global user.name docs-bot
45 |           git config --global user.email docs@bot.com
46 |           git config pull.rebase false
47 |           git pull --allow-unrelated-histories origin gh-pages
48 | 
49 |       - name: Install dependencies for doc deployment
50 |         run: pip install mkdocs-material mkdocstrings[python] mkdocs-mermaid2-plugin mike
51 | 
52 |       - name: Publish docs to pages
53 |         run: |
54 |           mike deploy --push --update-aliases ${{github.ref_name}} && \
55 |           mike set-default --push ${{github.ref_name}}
56 | 
57 |       - name: Add Category ID to all API docs
58 |         run: python ./.github/utils/add-category-id.py
59 |         env:
60 |           MARKDOWN_FILES_DIRECTORY: docs/_pydoc/temp/
61 |           CATEGORY_ID: ${{env.CATEGORY_ID}}
62 | 
63 |       - name: Run `docs` command 🚀
64 |         uses: readmeio/rdme@v10
65 |         with:
66 |           rdme: docs docs/_pydoc/temp --key=${{ secrets.README_API_KEY }} --version=1.0
67 | 


--------------------------------------------------------------------------------
/.github/workflows/compliance.yml:
--------------------------------------------------------------------------------
 1 | name: Compliance Checks
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - 'pyproject.toml'
 7 |   schedule:
 8 |     - cron: '0 0 * * *'  # every day at midnight
 9 | 
10 | env:
11 |   SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
12 |   SLACK_ALERT_CHANNEL: "#dc-alerts"
13 | 
14 | jobs:
15 |   check-license-compliance:
16 |     name: Check license compliance
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c
22 |         with:
23 |           python-version: "3.10"
24 | 
25 |       - name: Install prod dependencies
26 |         run: |
27 |           pip install hatch==v1.14.0
28 |           hatch run tools:requirements
29 |           pip install -r requirements.txt
30 | 
31 |       - name: Create file with full dependency list
32 |         run: |
33 |           pip freeze > requirements-full.txt
34 | 
35 |       - name: Send license report to Fossa
36 |         # This will collect all necessary information (mostly used dependencies) and send it to the Fossa API
37 |         uses: fossas/fossa-action@3ebcea1862c6ffbd5cf1b4d0bd6b3fe7bd6f2cac # Use a specific version if locking is preferred
38 |         with:
39 |           api-key: ${{ secrets.FOSSA_LICENSE_SCAN_TOKEN }}
40 | 
41 |       - name: Check license compliance
42 |         # This will poll the Fossa API until they have processed the information which we've sent in the previous step
43 |         # and fail if Fossa found an issue with the licences of our dependencies.
44 |         uses: fossas/fossa-action@3ebcea1862c6ffbd5cf1b4d0bd6b3fe7bd6f2cac # Use a specific version if locking is preferred
45 |         with:
46 |           api-key: ${{ secrets.FOSSA_LICENSE_SCAN_TOKEN }}
47 |           run-tests: true
48 | 
49 |       - name: Send Slack notification if license check failed
50 |         uses: act10ns/slack@44541246747a30eb3102d87f7a4cc5471b0ffb7d
51 |         if: failure() && github.ref == 'refs/heads/main'
52 |         with:
53 |           status: ${{ job.status }}
54 |           channel: ${{ env.SLACK_ALERT_CHANNEL }}
55 | 


--------------------------------------------------------------------------------
/.github/workflows/continuous-deployment-dev.yml:
--------------------------------------------------------------------------------
 1 | name: CD - Dev Integration Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       api_url:
 7 |         required: true
 8 |         default: https://api.dev.cloud.dpst.dev/api/v1
 9 |         type: string
10 |         description: "The API URL for the test run"
11 | 
12 | env:
13 |   HATCH_VERSION: "v1.14.0" # keep in sync with deploy.yml
14 | 
15 | jobs:
16 |   tests:
17 |     name: Tests
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - name: Run integration tests
22 |         uses: ./.github/actions/integration_tests
23 |         with:
24 |           API_KEY: "${{secrets.API_KEY}}"
25 |           API_URL: "${{inputs.api_url}}"
26 | 


--------------------------------------------------------------------------------
/.github/workflows/continuous-deployment-prod.yml:
--------------------------------------------------------------------------------
 1 | name: CD - Prod Integration Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       api_url:
 7 |         required: false
 8 |         default: "https://api.cloud.deepset.ai/api/v1"
 9 |         type: string
10 |         description: "The API URL for the test run"
11 | 
12 | env:
13 |   HATCH_VERSION: "v1.14.0" # keep in sync with deploy.yml
14 | 
15 | jobs:
16 |   tests:
17 |     name: Tests
18 |     environment: PROD
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - uses: actions/checkout@v4
22 |       - name: Run integration tests
23 |         uses: ./.github/actions/integration_tests
24 |         with:
25 |           API_KEY: "${{secrets.API_KEY}}"
26 |           API_URL: ${{ inputs.api_url }}
27 | 


--------------------------------------------------------------------------------
/.github/workflows/continuous-integration.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   pull_request:
  5 | 
  6 | env:
  7 |   HATCH_VERSION: "v1.14.0" # keep in sync with deploy.yml
  8 | 
  9 | permissions:
 10 |   pull-requests: write
 11 |   contents: write
 12 | 
 13 | jobs:
 14 |   format-black:
 15 |     name: Format black
 16 |     runs-on: ubuntu-latest
 17 |     steps:
 18 |       - uses: actions/checkout@v4
 19 |       - uses: actions/setup-python@v5
 20 |         with:
 21 |           python-version: "3.10"
 22 |       - name: Install Hatch
 23 |         run: pip install hatch==${{ env.HATCH_VERSION }}
 24 |       - name: Run black
 25 |         run: hatch run code-quality:format
 26 | 
 27 | 
 28 |   mypy:
 29 |     name: MyPy
 30 |     runs-on: ubuntu-latest
 31 |     steps:
 32 |       - uses: actions/checkout@v4
 33 |       - uses: actions/setup-python@v5
 34 |         with:
 35 |           python-version: "3.10"
 36 |       - name: Install Hatch
 37 |         run: pip install hatch==${{ env.HATCH_VERSION }}
 38 |       - name: Run mypy
 39 |         run: hatch run code-quality:types
 40 | 
 41 |   lint:
 42 |     name: Lint Code
 43 |     runs-on: ubuntu-latest
 44 |     steps:
 45 |       - uses: actions/checkout@v4
 46 |       - uses: actions/setup-python@v5
 47 |         with:
 48 |           python-version: "3.10"
 49 |       - name: Install Hatch
 50 |         run: pip install hatch==${{ env.HATCH_VERSION }}
 51 |       - name: Run pylint
 52 |         run: hatch run code-quality:lint
 53 | 
 54 |   isort:
 55 |     name: Sort imports
 56 |     runs-on: ubuntu-latest
 57 |     steps:
 58 |       - uses: actions/checkout@v4
 59 |       - uses: actions/setup-python@v5
 60 |         with:
 61 |           python-version: "3.10"
 62 |       - name: Install Hatch
 63 |         run: pip install hatch==${{ env.HATCH_VERSION }}
 64 |       - name: Run mypy
 65 |         run: hatch run code-quality:sort
 66 | 
 67 |   pydocstyle:
 68 |     name: Check docstrings
 69 |     runs-on: ubuntu-latest
 70 |     steps:
 71 |       - uses: actions/checkout@v4
 72 |       - uses: actions/setup-python@v5
 73 |         with:
 74 |           python-version: "3.10"
 75 |       - name: Install Hatch
 76 |         run: pip install hatch==${{ env.HATCH_VERSION }}
 77 |       - name: Run pydocstyle
 78 |         run: hatch run code-quality:docstrings
 79 | 
 80 |   scan-for-secrets:
 81 |     name: Scan for secrets
 82 |     runs-on: ubuntu-latest
 83 |     steps:
 84 |       - uses: actions/checkout@v4
 85 |         with:
 86 |           fetch-depth: 0
 87 |       - name: Install gitleaks
 88 |         run: wget -O - https://github.com/gitleaks/gitleaks/releases/download/v8.16.1/gitleaks_8.16.1_linux_x64.tar.gz | tar -xz
 89 |       - run: ./gitleaks detect --log-opts "${{  github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}"
 90 |         if: github.event_name == 'pull_request'
 91 |       - run: ./gitleaks detect --log-opts "${{  github.event.before }}..${{ github.event.after }}"
 92 |         if: github.event_name == 'push'
 93 | 
 94 |   tests:
 95 |     name: Tests
 96 |     runs-on: ubuntu-latest
 97 |     env:
 98 |       API_KEY: "not-a-real-api-key"
 99 |     steps:
100 |       - uses: actions/checkout@v4
101 |         with:
102 |           # for coverage comment action
103 |           fetch-depth: 1000
104 |       - uses: actions/setup-python@v5
105 |         with:
106 |           python-version: "3.10"
107 |       - name: Install Hatch
108 |         run: pip install hatch==${{ env.HATCH_VERSION }}
109 |       - name: Run unit tests
110 |         run: hatch run test:unit-with-cov
111 |       - name: Coverage comment
112 |         id: coverage_comment
113 |         uses: py-cov-action/python-coverage-comment-action@0abd69a9baf90729d9b2d5b585fc790ec4e6f3dd
114 |         with:
115 |           GITHUB_TOKEN: ${{ github.token }}
116 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-prod.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy to Prod PyPi
 2 | 
 3 | on:
 4 |   release:
 5 |     types:
 6 |       - published
 7 |   workflow_dispatch:
 8 | 
 9 | env:
10 |   HATCH_VERSION: "v1.14.0" # keep in sync with deploy.yml
11 | 
12 | permissions:
13 |   id-token: write
14 | 
15 | jobs:
16 |   deploy-prod:
17 |     runs-on: ubuntu-latest
18 |     environment: release
19 |     env:
20 |       pypi: ${{ vars.PYPI_URL }}
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - uses: actions/setup-python@v5
24 |       with:
25 |        python-version: "3.10"
26 |     - name: Install Hatch
27 |       run: pip install hatch==${{ env.HATCH_VERSION }}
28 |     - name: Bump version
29 |       # Bump version to Github action tag
30 |       run: hatch version ${{github.ref_name}}
31 |     - name: Build
32 |       run: hatch build
33 |     - name: publish
34 |       uses: pypa/gh-action-pypi-publish@release/v1
35 |       with:
36 |         repository-url: ${{env.pypi}}
37 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-test.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy to Test PyPi
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types:
 6 |       - labeled
 7 |   workflow_dispatch:
 8 | 
 9 | env:
10 |   HATCH_VERSION: "v1.14.0" # keep in sync with deploy.yml
11 | 
12 | jobs:
13 |   deploy-test:
14 |     if: ${{ github.event.label.name == 'test-deploy' }} || github.event.label.name !='integration'`
15 |     uses: ./.github/workflows/deploy.yml
16 |     with:
17 |       deployment_env: test
18 |       api_url: "https://api.dev.cloud.dpst.dev/api/v1"
19 |     secrets: inherit
20 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy
 2 | 
 3 | on:
 4 |   workflow_call:
 5 |     inputs:
 6 |       deployment_env:
 7 |         required: true
 8 |         type: string
 9 |       api_url:
10 |         required: true
11 |         default: https://api.dev.cloud.dpst.dev/api/v1
12 |         type: string
13 |       version:
14 |         required: true
15 |         type: string
16 | 
17 | permissions:
18 |   id-token: write
19 | 
20 | env:
21 |   HATCH_VERSION: "v1.14.0" # keep in sync with continuous-integration.yml
22 | 
23 | jobs:
24 |   build-and-deploy:
25 |     runs-on: ubuntu-latest
26 |     environment: ${{inputs.deployment_env}}
27 |     env:
28 |       pypi: ${{ vars.PYPI_URL }}
29 | 
30 |     steps:
31 |       - uses: actions/checkout@v4
32 |       - uses: actions/setup-python@v5
33 |         with:
34 |          python-version: "3.10"
35 |       - name: Install Hatch
36 |         run: pip install hatch==${{ env.HATCH_VERSION }}
37 |       - name: Bump version
38 |         # Bump version to Github action tag
39 |         run: hatch version ${{ inputs.version }}
40 |       - name: Build
41 |         run: hatch build
42 |       - name: publish
43 |         uses: pypa/gh-action-pypi-publish@release/v1
44 |         with:
45 |           repository-url: ${{env.pypi}}
46 | 


--------------------------------------------------------------------------------
/.github/workflows/high-prio-bug.yml:
--------------------------------------------------------------------------------
 1 | name: Slack alert for high priority bugs
 2 | 
 3 | on:
 4 |   issues:
 5 |     types:
 6 |       - labeled
 7 | 
 8 | env:
 9 |   SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_BUG_ALERT_URL }}
10 |   SLACK_ALERT_CHANNEL: "#dc-alerts"
11 | 
12 | jobs:
13 |   add-comment:
14 |     if: github.event.label.name == 'high'
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - name: Send Slack alert for high priority bug
18 |         uses: act10ns/slack@44541246747a30eb3102d87f7a4cc5471b0ffb7d
19 |         with:
20 |           status: High Priority Bug
21 |           message: High Priority Bug ${{ github.event.issue.html_url }}
22 |           channel: ${{ env.SLACK_ALERT_CHANNEL }}
23 | 


--------------------------------------------------------------------------------
/.github/workflows/merge-queue.yml:
--------------------------------------------------------------------------------
 1 | name: Merge Queue
 2 | 
 3 | on:
 4 |   merge_group:
 5 |   pull_request:
 6 |     types: [labeled, synchronize]
 7 |   workflow_call:
 8 |     inputs:
 9 |       api_url:
10 |         required: true
11 |         type: string
12 |       deployment_env:
13 |         required: true
14 |         type: string
15 |     secrets:
16 |       API_KEY:
17 |         required: true
18 | 
19 | env:
20 |   HATCH_VERSION: "v1.14.0" # keep in sync with deploy.yml
21 | 
22 | jobs:
23 |   # the API_KEYs are stored as a secret in the repository
24 |   # we are using the "automated-tests" organization with predefined users and workspaces
25 |   integration_tests:
26 |     name: Tests
27 |     environment: ${{ github.event.inputs.deployment_env }}
28 |     runs-on: ubuntu-latest
29 |     if: (github.event.action =='labeled' && github.event.label.name =='integration') || (github.event.action =='synchronize' && contains(github.event.pull_request.labels.*.name, 'integration')) || github.event.action =='workflow_call' || github.event_name == 'merge_group'
30 |     steps:
31 |       - uses: actions/checkout@v4
32 |       - name: Run integration tests
33 |         uses: ./.github/actions/integration_tests
34 |         with:
35 |           API_KEY: "${{ inputs.deployment_env == 'release' && secrets.API_KEY_PROD || secrets.API_KEY}}"
36 |           API_URL: "${{ inputs.api_url || 'https://api.dev.cloud.dpst.dev/api/v1'}}"
37 | 
38 |   build:
39 |     name: Build package
40 |     needs: [integration_tests]
41 |     runs-on: ubuntu-latest
42 |     steps:
43 |       - uses: actions/checkout@v4
44 |       - uses: actions/setup-python@v5
45 |         with:
46 |          python-version: "3.10"
47 |       - name: Install Hatch
48 |         run: pip install hatch==${{ env.HATCH_VERSION }}
49 |       - name: Build
50 |         run: hatch build
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # achive tests
 10 | archive
 11 | pytest_html_report.html
 12 | output.json
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | cover/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | db.sqlite3-journal
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | .pybuilder/
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | #   For a library or package, you might want to ignore these files since the code is
 92 | #   intended to run in multiple environments; otherwise, check them in:
 93 | # .python-version
 94 | 
 95 | # pipenv
 96 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 97 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 98 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 99 | #   install all needed dependencies.
100 | #Pipfile.lock
101 | 
102 | # poetry
103 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
104 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
105 | #   commonly ignored for libraries.
106 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
107 | #poetry.lock
108 | 
109 | # pdm
110 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
111 | #pdm.lock
112 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
113 | #   in version control.
114 | #   https://pdm.fming.dev/#use-with-ide
115 | .pdm.toml
116 | 
117 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118 | __pypackages__/
119 | 
120 | # Celery stuff
121 | celerybeat-schedule
122 | celerybeat.pid
123 | 
124 | # SageMath parsed files
125 | *.sage.py
126 | 
127 | # Environments
128 | .env
129 | *.env
130 | .venv
131 | env/
132 | venv/
133 | ENV/
134 | env.bak/
135 | venv.bak/
136 | 
137 | # Spyder project settings
138 | .spyderproject
139 | .spyproject
140 | 
141 | # Rope project settings
142 | .ropeproject
143 | 
144 | # mkdocs documentation
145 | /site
146 | 
147 | # mypy
148 | .mypy_cache/
149 | .dmypy.json
150 | dmypy.json
151 | 
152 | # Pyre type checker
153 | .pyre/
154 | 
155 | # pytype static type analyzer
156 | .pytype/
157 | 
158 | # Cython debug symbols
159 | cython_debug/
160 | 
161 | # PyCharm
162 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
165 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
166 | #.idea/
167 | 
168 | .vscode
169 | temp
170 | .idea
171 | .python-version
172 | .DS_Store
173 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   python: python3.10
 3 | fail_fast: true
 4 | 
 5 | # We can't use local hooks since some developers use dev containers and commit outside an environment which has the
 6 | # required dependencies installed.
 7 | 
 8 | repos:
 9 |   - repo: https://github.com/pre-commit/pre-commit-hooks
10 |     rev: v4.4.0
11 |     hooks:
12 |       - id: check-json # checks JSON files for parseable syntax.
13 |       - id: check-yaml # checks yaml files for parseable syntax.
14 |       - id: end-of-file-fixer # ensures that a file is either empty, or ends with one newline.
15 |       - id: trailing-whitespace # trims trailing whitespace
16 | 
17 |   - repo: https://github.com/PyCQA/autoflake
18 |     rev: v2.1.1
19 |     hooks:
20 |       - id: autoflake
21 |         args:
22 |           - "--in-place"
23 |           - "--expand-star-imports"
24 |           - "--remove-duplicate-keys"
25 |           - "--remove-unused-variables"
26 |           - "-v"
27 | 
28 |   - repo: https://github.com/psf/black
29 |     # Please keep these aligned with the versions defined in the pyproject.toml [tool.hatch.envs.code-quality]
30 |     rev: 23.3.0
31 |     hooks:
32 |       - id: black
33 | 
34 |   - repo: https://github.com/pre-commit/mirrors-mypy
35 |     # Please keep these aligned with the versions defined in the pyproject.toml [tool.hatch.envs.code-quality]
36 |     rev: "v1.1.1"
37 |     hooks:
38 |       - id: mypy
39 |         args:
40 |           - "--ignore-missing-imports"
41 |         additional_dependencies:
42 |           - types-aiofiles==23.1.0.2
43 |           - "types-tabulate~=0.9.0"
44 |           - "types-requests~=2.28.11"
45 |         #   - "types-Markdown~=3.4.2"
46 |         #   - "types-PyYAML~=6.0.12"
47 |         #   - "types-python-dateutil~=2.8.19"
48 |         #   - "types-redis~=4.5.1"
49 | 
50 |   - repo: https://github.com/pycqa/isort
51 |     # Please keep these aligned with the versions defined in the pyproject.toml [tool.hatch.envs.code-quality]
52 |     rev: 5.12.0
53 |     hooks:
54 |       - id: isort
55 |         args: ["--profile", "black"]
56 | 
57 |   - repo: https://github.com/zricethezav/gitleaks
58 |     rev: v8.16.1
59 |     hooks:
60 |       - id: gitleaks
61 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ## Setup
 4 | 
 5 | ### Build from source
 6 | 
 7 | Install hatch
 8 | ```
 9 | pip install hatch=="v1.14.0"
10 | ```
11 | 
12 | ### Install pre-commit hooks
13 | ```
14 | hatch run code-quality:hooks
15 | ```
16 | 
17 | ## CI
18 | Code quality checks, unit tests, and integration tests (against dev) are performed on the creation of a PR, and subsequent pushes for that PR.
19 | Code quality checks, unit tests, and integration tests (against dev) are performed on a push to main.
20 | Integration tests are triggered whenever the e2e tests are triggered (environment will be dependent on e2e tests)
21 | Code quality checks, unit tests, and integration tests (against prod) are performed on the publishing of a release tag.
22 | 
23 | ## Deploy to test PyPi
24 | 
25 | When you create a PR in the deepset-cloud-sdk repository, add the 'test-deploy' label to trigger deployment to the test PyPi repository.
26 | 
27 | ## Publishing to PyPi
28 | 
29 | To publish a new version of the SDK you will need to create and publish a new release tag.
30 | 
31 | 
32 | ## Software design
33 | 
34 | Have a look at this [README](/deepset_cloud_sdk/README.md) to get an overview of the software design.
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 | Apache License
  3 | Version 2.0, January 2004
  4 | http://www.apache.org/licenses/
  5 | 
  6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 | 1. Definitions.
  9 | 
 10 | "License" shall mean the terms and conditions for use, reproduction,
 11 | and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 | "Licensor" shall mean the copyright owner or entity authorized by
 14 | the copyright owner that is granting the License.
 15 | 
 16 | "Legal Entity" shall mean the union of the acting entity and all
 17 | other entities that control, are controlled by, or are under common
 18 | control with that entity. For the purposes of this definition,
 19 | "control" means (i) the power, direct or indirect, to cause the
 20 | direction or management of such entity, whether by contract or
 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 | outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 | "You" (or "Your") shall mean an individual or Legal Entity
 25 | exercising permissions granted by this License.
 26 | 
 27 | "Source" form shall mean the preferred form for making modifications,
 28 | including but not limited to software source code, documentation
 29 | source, and configuration files.
 30 | 
 31 | "Object" form shall mean any form resulting from mechanical
 32 | transformation or translation of a Source form, including but
 33 | not limited to compiled object code, generated documentation,
 34 | and conversions to other media types.
 35 | 
 36 | "Work" shall mean the work of authorship, whether in Source or
 37 | Object form, made available under the License, as indicated by a
 38 | copyright notice that is included in or attached to the work
 39 | (an example is provided in the Appendix below).
 40 | 
 41 | "Derivative Works" shall mean any work, whether in Source or Object
 42 | form, that is based on (or derived from) the Work and for which the
 43 | editorial revisions, annotations, elaborations, or other modifications
 44 | represent, as a whole, an original work of authorship. For the purposes
 45 | of this License, Derivative Works shall not include works that remain
 46 | separable from, or merely link (or bind by name) to the interfaces of,
 47 | the Work and Derivative Works thereof.
 48 | 
 49 | "Contribution" shall mean any work of authorship, including
 50 | the original version of the Work and any modifications or additions
 51 | to that Work or Derivative Works thereof, that is intentionally
 52 | submitted to Licensor for inclusion in the Work by the copyright owner
 53 | or by an individual or Legal Entity authorized to submit on behalf of
 54 | the copyright owner. For the purposes of this definition, "submitted"
 55 | means any form of electronic, verbal, or written communication sent
 56 | to the Licensor or its representatives, including but not limited to
 57 | communication on electronic mailing lists, source code control systems,
 58 | and issue tracking systems that are managed by, or on behalf of, the
 59 | Licensor for the purpose of discussing and improving the Work, but
 60 | excluding communication that is conspicuously marked or otherwise
 61 | designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 | "Contributor" shall mean Licensor and any individual or Legal Entity
 64 | on behalf of whom a Contribution has been received by Licensor and
 65 | subsequently incorporated within the Work.
 66 | 
 67 | 2. Grant of Copyright License. Subject to the terms and conditions of
 68 | this License, each Contributor hereby grants to You a perpetual,
 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 | copyright license to reproduce, prepare Derivative Works of,
 71 | publicly display, publicly perform, sublicense, and distribute the
 72 | Work and such Derivative Works in Source or Object form.
 73 | 
 74 | 3. Grant of Patent License. Subject to the terms and conditions of
 75 | this License, each Contributor hereby grants to You a perpetual,
 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 | (except as stated in this section) patent license to make, have made,
 78 | use, offer to sell, sell, import, and otherwise transfer the Work,
 79 | where such license applies only to those patent claims licensable
 80 | by such Contributor that are necessarily infringed by their
 81 | Contribution(s) alone or by combination of their Contribution(s)
 82 | with the Work to which such Contribution(s) was submitted. If You
 83 | institute patent litigation against any entity (including a
 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 | or a Contribution incorporated within the Work constitutes direct
 86 | or contributory patent infringement, then any patent licenses
 87 | granted to You under this License for that Work shall terminate
 88 | as of the date such litigation is filed.
 89 | 
 90 | 4. Redistribution. You may reproduce and distribute copies of the
 91 | Work or Derivative Works thereof in any medium, with or without
 92 | modifications, and in Source or Object form, provided that You
 93 | meet the following conditions:
 94 | 
 95 | (a) You must give any other recipients of the Work or
 96 | Derivative Works a copy of this License; and
 97 | 
 98 | (b) You must cause any modified files to carry prominent notices
 99 | stating that You changed the files; and
100 | 
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 | 
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 | 
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 | 
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 | 
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 | 
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 | 
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 | 
177 | END OF TERMS AND CONDITIONS
178 | 
179 | APPENDIX: How to apply the Apache License to your work.
180 | 
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!)  The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 | 
190 | Copyright 2021 deepset GmbH
191 | 
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 | 
196 | http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <a href="https://cloud.deepset.ai/"><img src="/assets/logo.png"  alt="deepset Cloud SDK"></a>
 3 | </p>
 4 | 
 5 | [![Coverage badge](https://github.com/deepset-ai/deepset-cloud-sdk/raw/python-coverage-comment-action-data/badge.svg)](https://github.com/deepset-ai/deepset-cloud-sdk/tree/python-coverage-comment-action-data)
 6 | [![Tests](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/continuous-integration.yml/badge.svg)](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/continuous-integration.yml)
 7 | [![Deploy PyPi](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/deploy-prod.yml/badge.svg)](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/deploy-prod.yml)
 8 | [![Compliance Checks](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/compliance.yml/badge.svg)](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/compliance.yml)
 9 | 
10 | The deepset Cloud SDK is an open source software development kit that provides convenient access to and integration with deepset Cloud, a powerful cloud offering for various natural language processing (NLP) tasks.
11 | This README provides an overview of the SDK and its features, and information on contributing to the project and exploring related resources.
12 | 
13 | - [Official SDK Docs](https://docs.cloud.deepset.ai/docs/working-with-the-sdk)
14 | - Tutorials: 
15 |     - [Uploading with CLI](https://docs.cloud.deepset.ai/docs/tutorial-uploading-files-with-cli) 
16 |     - [Uploading with Python Methods](https://docs.cloud.deepset.ai/docs/tutorial-uploading-files-with-python-methods)
17 | 
18 | # Supported Features
19 | 
20 | In its current shape, the SDK offers a suite of tools for seamless data upload to deepset Cloud. 
21 | The following examples demonstrate how to use the deepset Cloud SDK to interact with deepset Cloud using Python.
22 | You can use the deepset Cloud SDK in the command line as well. For more information, see the [CLI documentation](docs/examples/cli/README.md).
23 | 
24 | -   [SDK Examples - Upload datasets](/docs/examples/sdk/upload.py)
25 | -   [CLI Examples - Upload datasets](/docs/examples/cli/README.md)
26 | 
27 | ## Installation
28 | The deepset Cloud SDK is available on [PyPI](https://pypi.org/project/deepset-cloud-sdk/) and you can install it using pip:
29 | ```bash
30 | pip install deepset-cloud-sdk
31 | ```
32 | 
33 | After installing the deepset Cloud SDK, you can use it to interact with deepset Cloud. It comes with a command line interface (CLI), that you can use by calling:
34 | ```bash
35 | deepset-cloud --help
36 | ```
37 | 
38 | <p align="center">
39 |   <a href="https://cloud.deepset.ai/"><img src="/assets/cli.gif"  alt="deepset Cloud CLI"></a>
40 | </p>
41 | 
42 | ### Development Installation
43 | To install the deepset Cloud SDK for development, clone the repository and install the package in editable mode:
44 | ```bash
45 | pip install hatch==1.7.0
46 | hatch build
47 | ```
48 | 
49 | Instead of calling the CLI from the build package, you can call it directly from the source code:
50 | ```bash
51 | python3 -m deepset_cloud_sdk.cli --help
52 | ```
53 | 
54 | ## Contributing
55 | We welcome contributions from the open source community to enhance the deepset Cloud SDK. If you would like to contribute, have a look at [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines and instructions on how to get started.
56 | We appreciate your contributions, whether they're bug fixes, new features, or documentation improvements.
57 | 
58 | 
59 | ---
60 | 
61 | ## Interested in Haystack?
62 | deepset Cloud is powered by Haystack, an open source framework for building end-to-end NLP pipelines.
63 | 
64 |  -    [Project website](https://haystack.deepset.ai/)
65 |  -    [GitHub repository](https://github.com/deepset-ai/haystack)
66 | 
67 | ---
68 | 
69 | # Licenses
70 | 
71 | The SDK is licensed under Apache 2.0, you can see the license [here](https://github.com/deepset-ai/deepset-cloud-sdk/blob/main/LICENSE)
72 | 
73 | We use several libraries in this SDK that are licensed under the [MPL 2.0 license](https://www.mozilla.org/en-US/MPL/2.0/)
74 | 
75 | - [tqdm](https://github.com/tqdm/tqdm) for progress bars
76 | - [pathspec](https://github.com/cpburnz/python-pathspec) for pattern matching file paths
77 | - [certifi](https://github.com/certifi/python-certifi) for validating trustworthiness of SSL certificates
78 | 


--------------------------------------------------------------------------------
/assets/cli.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/assets/cli.gif


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/assets/logo.png


--------------------------------------------------------------------------------
/deepset_cloud_sdk/README.md:
--------------------------------------------------------------------------------
 1 | # Software development kit for the deepset Cloud API
 2 | 
 3 | This package is split into multiple layers:
 4 | - API layer
 5 | - Client layer
 6 | - Service layer
 7 | - Workflow layer
 8 | 
 9 | 
10 | ### API layer
11 | This layer is the lowest level of abstraction and contains the API definition, including all HTTP methods. It takes care of the authentication.
12 | You can find this layer in the `deepset_cloud_sdk/_api/deepset_cloud_api.py` file. We should implement reties on this lowest layer.
13 | 
14 | ### Client layer
15 | This layer adds a thin wrapper around the API layer and provides a more convenient interface to the API. It includes explicit methods
16 | for endpoints by specifying the HTTP methods and endpoints for example for uploading files.
17 | 
18 | ### Service layer
19 | This layer takes care of combining client methods to provide more complex functionality. Within this layer, we can add functionalities like
20 | creating sessions, uploading files, and closing sessions.
21 | 
22 | ### Workflow layer
23 | Public methods for users. These workflows are split into async and sync implementation.
24 | 
25 | 
26 | ## Software architecture principles
27 | 
28 | ### Factories
29 | We are using factories implemented like this:
30 | ```python
31 | @classmethod
32 | async def factory(cls, config: CommonConfig) -> YourClass:
33 |     """Create a new instance of the API client.
34 | 
35 |     :param config: CommonConfig object.
36 |     """
37 |     yield cls(config)
38 | ```
39 | 
40 | ### Tests
41 | We are using the classical pyramid of tests: unit tests (for each layer), integration tests. The goal is to gradually test each layer and
42 | then test the whole stack once within the integration tests.
43 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/__about__.py:
--------------------------------------------------------------------------------
1 | """This file defines the package version."""
2 | # Will be automatically overridden during the release process
3 | # It's okay if this is outdated in the repo. We will use the tag from the release as the version.
4 | __version__ = "1.0.2"
5 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/__init__.py:
--------------------------------------------------------------------------------
 1 | """This is the entrypoint for the package."""
 2 | 
 3 | 
 4 | import logging
 5 | 
 6 | import structlog
 7 | 
 8 | from deepset_cloud_sdk.workflows.pipeline_client import PipelineClient
 9 | from deepset_cloud_sdk.workflows.pipeline_client.models import (
10 |     IndexConfig,
11 |     IndexInputs,
12 |     IndexOutputs,
13 |     PipelineConfig,
14 |     PipelineInputs,
15 |     PipelineOutputs,
16 | )
17 | 
18 | structlog.configure(
19 |     wrapper_class=structlog.make_filtering_bound_logger(logging.INFO),
20 | )
21 | 
22 | log = structlog.get_logger()
23 | 
24 | __all__ = [
25 |     "PipelineClient",
26 |     "PipelineConfig",
27 |     "PipelineInputs",
28 |     "PipelineOutputs",
29 |     "IndexConfig",
30 |     "IndexInputs",
31 |     "IndexOutputs",
32 | ]
33 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/_api/config.py:
--------------------------------------------------------------------------------
  1 | """Config for loading env variables and setting default values."""
  2 | 
  3 | import os
  4 | from dataclasses import dataclass
  5 | from pathlib import Path
  6 | 
  7 | import structlog
  8 | from dotenv import load_dotenv
  9 | 
 10 | logger = structlog.get_logger(__name__)
 11 | 
 12 | ENV_FILE_PATH = Path.home() / ".deepset-cloud" / ".env"
 13 | 
 14 | 
 15 | def load_environment(show_warnings: bool = True) -> bool:
 16 |     """Load environment variables using a cascading fallback model.
 17 | 
 18 |     1. Load local .env file in current directory if it exists
 19 |     2. Load from global ~/.deepset-cloud/.env to supplement local .env file
 20 |     3. Environment variables can override both local and global .env files
 21 | 
 22 |     :param show_warnings: Whether to show warnings about missing files/variables
 23 |     :return: True if required environment variables were loaded successfully, False otherwise.
 24 |     """
 25 |     current_path_env = Path.cwd() / ".env"
 26 |     local_loaded = current_path_env.is_file() and load_dotenv(current_path_env)
 27 |     global_loaded = ENV_FILE_PATH.is_file() and load_dotenv(ENV_FILE_PATH, override=False)
 28 | 
 29 |     if local_loaded:
 30 |         logger.info(f"Environment variables successfully loaded from local .env file at {current_path_env}.")
 31 |     if global_loaded:
 32 |         if local_loaded:
 33 |             logger.info(f"Loaded global .env file at {ENV_FILE_PATH} to supplement local .env file.")
 34 |         else:
 35 |             logger.info(f"Environment variables successfully loaded from global .env file at {ENV_FILE_PATH}.")
 36 | 
 37 |     if not (local_loaded or global_loaded) and show_warnings:
 38 |         logger.warning(
 39 |             "No .env files found. Run `deepset-cloud login` to create a global configuration file. "
 40 |             "You can also create a custom local .env file in your project directory."
 41 |         )
 42 |         return False
 43 | 
 44 |     # Check for required environment variables
 45 |     required_vars = ["API_KEY", "API_URL", "DEFAULT_WORKSPACE_NAME"]
 46 |     missing_vars = [var for var in required_vars if not os.getenv(var)]
 47 | 
 48 |     if missing_vars and show_warnings:
 49 |         logger.warning(
 50 |             f"Missing required environment variables: {', '.join(missing_vars)}. "
 51 |             "Run `deepset-cloud login` to set up your configuration or set these variables "
 52 |             "manually in your .env file."
 53 |         )
 54 |         return False
 55 | 
 56 |     return True
 57 | 
 58 | 
 59 | # Load environment variables silently at import time to support CLI commands that depend on .env files.
 60 | # Warnings are only shown later in CommonConfig when users don't provide explicit parameters
 61 | # and the config values fall back to global defaults.
 62 | load_environment(show_warnings=False)
 63 | 
 64 | # connection to deepset AI Platform
 65 | API_URL: str = os.getenv("API_URL", "https://api.cloud.deepset.ai/api/v1")
 66 | 
 67 | API_KEY: str = os.getenv("API_KEY", "")
 68 | 
 69 | # configuration to use a selected workspace
 70 | DEFAULT_WORKSPACE_NAME: str = os.getenv("DEFAULT_WORKSPACE_NAME", "")
 71 | 
 72 | ASYNC_CLIENT_TIMEOUT: int = int(os.getenv("ASYNC_CLIENT_TIMEOUT", "300"))
 73 | 
 74 | 
 75 | @dataclass
 76 | class CommonConfig:
 77 |     """Common config for connecting to the deepset AI Platform.
 78 | 
 79 |     Configuration is loaded in the following order of precedence:
 80 |     1. Explicit parameters passed to this class
 81 |     2. Environment variables
 82 |     3. Local .env file in project root
 83 |     4. Global .env file in ~/.deepset-cloud/ (supplements local .env)
 84 |     5. Built-in defaults
 85 |     """
 86 | 
 87 |     api_key: str = ""
 88 |     api_url: str = ""
 89 |     safe_mode: bool = False
 90 | 
 91 |     def __post_init__(self) -> None:
 92 |         """Validate config."""
 93 |         # Only try loading from environment if user didn't provide explicit parameters)
 94 |         if not self.api_key or not self.api_url:
 95 |             load_environment(show_warnings=True)
 96 |             if not self.api_key:
 97 |                 self.api_key = os.getenv("API_KEY", "")
 98 |             if not self.api_url:
 99 |                 self.api_url = os.getenv("API_URL", "https://api.cloud.deepset.ai/api/v1")
100 | 
101 |         if not self.api_key:
102 |             raise ValueError(
103 |                 "API key is required. Either set the API_KEY environment variable or pass api_key parameter. Go to [API Keys](https://cloud.deepset.ai/settings/api-keys) in deepset AI Platform to get an API key."
104 |             )
105 | 
106 |         if self.api_url.endswith("/"):
107 |             self.api_url = self.api_url[:-1]
108 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/_api/deepset_cloud_api.py:
--------------------------------------------------------------------------------
  1 | """DeepsetCloudAPI class."""
  2 | from __future__ import annotations
  3 | 
  4 | from contextlib import asynccontextmanager
  5 | from typing import Any, AsyncGenerator, Callable, Dict, Optional
  6 | 
  7 | import httpx
  8 | import structlog
  9 | from httpx import Response
 10 | from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed
 11 | 
 12 | from deepset_cloud_sdk._api.config import CommonConfig
 13 | 
 14 | logger = structlog.get_logger(__name__)
 15 | 
 16 | 
 17 | DEFAULT_MAX_ATTEMPTS = 3
 18 | SAFE_MODE_MAX_ATTEMPTS = 10
 19 | 
 20 | 
 21 | class WorkspaceNotDefinedError(Exception):
 22 |     """The workspace_name is not defined. Set an environment variable or pass the `workspace_name` argument."""
 23 | 
 24 | 
 25 | class DeepsetCloudAPI:
 26 |     """deepset Cloud API client.
 27 | 
 28 |     This class takes care of all API calls to deepset Cloud and handles authentication and errors.
 29 |     """
 30 | 
 31 |     def __init__(self, config: CommonConfig, client: httpx.AsyncClient) -> None:
 32 |         """Create a deepset Cloud API client.
 33 | 
 34 |         Add a config for authentication and a HTTPX client for
 35 |         sending requests.
 36 | 
 37 |         :param config: Config for authentication.
 38 |         :param client: HTTPX client for sending requests.
 39 |         """
 40 |         self.headers = {
 41 |             "Accept": "application/json",
 42 |             "Authorization": f"Bearer {config.api_key}",
 43 |             "X-Client-Source": "deepset-cloud-sdk",
 44 |         }
 45 |         self.base_url = lambda workspace_name: self._get_base_url(config.api_url)(workspace_name)
 46 |         self.client = client
 47 |         self.max_attempts = SAFE_MODE_MAX_ATTEMPTS if config.safe_mode else DEFAULT_MAX_ATTEMPTS
 48 | 
 49 |     @staticmethod
 50 |     def _get_base_url(api_url: str) -> Callable:
 51 |         def func(workspace_name: str) -> str:
 52 |             """Get the base URL for the API.
 53 | 
 54 |             :param workspace_name: Name of the workspace to use.
 55 |             :return: Base URL.
 56 |             """
 57 |             if not workspace_name or workspace_name == "":
 58 |                 raise WorkspaceNotDefinedError(
 59 |                     f"Workspace name is not defined. Got '{workspace_name}'. Enter the name of the workspace in `workspace_name`."
 60 |                 )
 61 | 
 62 |             return f"{api_url}/workspaces/{workspace_name}"
 63 | 
 64 |         return func
 65 | 
 66 |     @classmethod
 67 |     @asynccontextmanager
 68 |     async def factory(cls, config: CommonConfig) -> AsyncGenerator[DeepsetCloudAPI, None]:
 69 |         """Create a new instance of the API client.
 70 | 
 71 |         :param config: CommonConfig object.
 72 |         """
 73 |         if config.safe_mode:
 74 |             safe_mode_limits = httpx.Limits(max_keepalive_connections=1, max_connections=1)
 75 |             safe_mode_timeout = httpx.Timeout(None)
 76 |             async with httpx.AsyncClient(limits=safe_mode_limits, timeout=safe_mode_timeout) as client:
 77 |                 yield cls(config, client)
 78 |         else:
 79 |             async with httpx.AsyncClient() as client:
 80 |                 yield cls(config, client)
 81 | 
 82 |     async def get(
 83 |         self, workspace_name: str, endpoint: str, params: Optional[Dict[str, Any]] = None, timeout_s: int = 20
 84 |     ) -> Response:
 85 |         """Make a GET request to the deepset Cloud API.
 86 | 
 87 |         :param workspace_name: Name of the workspace to use.
 88 |         :param endpoint: Endpoint to call.
 89 |         :param params: Query parameters to pass.
 90 |         :param timeout_s: Timeout in seconds.
 91 |         :return: Response object.
 92 |         """
 93 | 
 94 |         @retry(
 95 |             retry=retry_if_exception_type(httpx.RequestError),
 96 |             stop=stop_after_attempt(self.max_attempts),
 97 |             wait=wait_fixed(1),
 98 |             reraise=True,
 99 |         )
100 |         async def retry_wrapper() -> Response:
101 |             return await self._get(workspace_name, endpoint, params, timeout_s)
102 | 
103 |         return await retry_wrapper()
104 | 
105 |     async def _get(
106 |         self, workspace_name: str, endpoint: str, params: Optional[Dict[str, Any]] = None, timeout_s: int = 20
107 |     ) -> Response:
108 |         response = await self.client.get(
109 |             f"{self.base_url(workspace_name)}/{endpoint}",
110 |             params=params or {},
111 |             headers=self.headers,
112 |             timeout=timeout_s,
113 |         )
114 |         logger.debug(
115 |             "Called deepset Cloud API.",
116 |             method="GET",
117 |             workspace=workspace_name,
118 |             endpoint=endpoint,
119 |             params=params,
120 |             status=response.status_code,
121 |         )
122 |         return response
123 | 
124 |     async def post(
125 |         self,
126 |         workspace_name: str,
127 |         endpoint: str,
128 |         params: Optional[Dict[str, Any]] = None,
129 |         json: Optional[Dict[str, Any]] = None,
130 |         files: Optional[Dict[str, Any]] = None,
131 |         data: Optional[Dict[str, Any]] = None,
132 |         timeout_s: int = 20,
133 |     ) -> Response:
134 |         """Make a POST request to the deepset Cloud API.
135 | 
136 |         :param workspace_name: Name of the workspace to use.
137 |         :param endpoint: Endpoint to call.
138 |         :param params: Query parameters to pass.
139 |         :param json: JSON data to pass.
140 |         :param data: Data to pass.
141 |         :param files: Files to pass.
142 |         :param timeout_s: Timeout in seconds.
143 |         :return: Response object.
144 |         """
145 |         response = await self.client.post(
146 |             f"{self.base_url(workspace_name)}/{endpoint}",
147 |             params=params or {},
148 |             json=json or {},
149 |             data=data or {},
150 |             files=files,
151 |             headers=self.headers,
152 |             timeout=timeout_s,
153 |         )
154 |         logger.debug(
155 |             "Called deepset Cloud API",
156 |             method="POST",
157 |             workspace=workspace_name,
158 |             endpoint=endpoint,
159 |             data=data or {},
160 |             files=files,
161 |             status=response.status_code,
162 |         )
163 |         return response
164 | 
165 |     async def delete(
166 |         self, workspace_name: str, endpoint: str, params: Optional[Dict[str, Any]] = None, timeout_s: int = 20
167 |     ) -> Response:
168 |         """
169 |         Make a DELETE request to the deepset Cloud API.
170 | 
171 |         :param workspace_name: Name of the workspace to use.
172 |         :param endpoint: Endpoint to call.
173 |         :param params: Query parameters to pass.
174 |         :param timeout_s: Timeout in seconds.
175 |         :return: Response object.
176 |         """
177 |         response = await self.client.delete(
178 |             f"{self.base_url(workspace_name)}/{endpoint}",
179 |             params=params or {},
180 |             headers=self.headers,
181 |             timeout=timeout_s,
182 |         )
183 |         logger.debug(
184 |             "Called deepset Cloud API",
185 |             method="DELETE",
186 |             workspace=workspace_name,
187 |             endpoint=endpoint,
188 |             params=params,
189 |             status=response.status_code,
190 |         )
191 |         return response
192 | 
193 |     async def put(
194 |         self,
195 |         workspace_name: str,
196 |         endpoint: str,
197 |         params: Optional[Dict[str, Any]] = None,
198 |         data: Optional[Dict[str, Any]] = None,
199 |         timeout_s: int = 20,
200 |     ) -> Response:
201 |         """Make a PUT request to the deepset Cloud API.
202 | 
203 |         :param workspace_name: Name of the workspace to use.
204 |         :param endpoint: Endpoint to call.
205 |         :param params: Query parameters to pass.
206 |         :param data: Data to pass.
207 |         :param timeout_s: Timeout in seconds.
208 |         :return: Response object.
209 |         """
210 | 
211 |         @retry(
212 |             retry=retry_if_exception_type(httpx.ConnectError),
213 |             stop=stop_after_attempt(self.max_attempts),
214 |             wait=wait_fixed(1),
215 |             reraise=True,
216 |         )
217 |         async def retry_wrapper() -> Response:
218 |             return await self._put(workspace_name, endpoint, params, data, timeout_s)
219 | 
220 |         return await retry_wrapper()
221 | 
222 |     async def _put(
223 |         self,
224 |         workspace_name: str,
225 |         endpoint: str,
226 |         params: Optional[Dict[str, Any]] = None,
227 |         data: Optional[Dict[str, Any]] = None,
228 |         timeout_s: int = 20,
229 |     ) -> Response:
230 |         response = await self.client.put(
231 |             f"{self.base_url(workspace_name)}/{endpoint}",
232 |             params=params or {},
233 |             json=data or {},
234 |             headers=self.headers,
235 |             timeout=timeout_s,
236 |         )
237 |         logger.debug(
238 |             "Called deepset Cloud API",
239 |             method="PUT",
240 |             workspace=workspace_name,
241 |             endpoint=endpoint,
242 |             data=data or {},
243 |             status=response.status_code,
244 |         )
245 |         return response
246 | 
247 | 
248 | def get_deepset_cloud_api(config: CommonConfig, client: httpx.AsyncClient) -> DeepsetCloudAPI:  # noqa
249 |     """deepset Cloud API factory. Return an instance of DeepsetCloudAPI.
250 | 
251 |     :param config: CommonConfig object.
252 |     :param client: httpx.AsyncClient object.
253 |     :return: DeepsetCloudAPI object.
254 |     """
255 |     return DeepsetCloudAPI(config=config, client=client)
256 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/_api/files.py:
--------------------------------------------------------------------------------
  1 | """
  2 | File API for deepset Cloud.
  3 | 
  4 | This module takes care of all file-related API calls to deepset Cloud, including uploading, downloading, listing, and
  5 | deleting files.
  6 | """
  7 | 
  8 | import datetime
  9 | import inspect
 10 | import json
 11 | from dataclasses import dataclass
 12 | from pathlib import Path
 13 | from typing import Any, Dict, List, Optional, Union
 14 | from uuid import UUID
 15 | 
 16 | import structlog
 17 | from httpx import codes
 18 | 
 19 | from deepset_cloud_sdk._api.deepset_cloud_api import DeepsetCloudAPI
 20 | from deepset_cloud_sdk._api.upload_sessions import WriteMode
 21 | from deepset_cloud_sdk._utils.datetime import from_isoformat
 22 | 
 23 | logger = structlog.get_logger(__name__)
 24 | 
 25 | 
 26 | class NotMatchingFileTypeException(Exception):
 27 |     """Exception raised when a file is not matching the file type."""
 28 | 
 29 | 
 30 | class FileNotFoundInDeepsetCloudException(Exception):
 31 |     """Exception raised when a file is not found."""
 32 | 
 33 | 
 34 | class FailedToUploadFileException(Exception):
 35 |     """Exception raised when a file failed to be uploaded."""
 36 | 
 37 | 
 38 | @dataclass
 39 | class File:
 40 |     """File primitive from deepset Cloud. This dataclass is used for all file-related operations that don't include the actual file content."""
 41 | 
 42 |     file_id: UUID
 43 |     url: str
 44 |     name: str
 45 |     size: int
 46 |     created_at: datetime.datetime
 47 |     meta: Dict[str, Any]
 48 | 
 49 |     @classmethod
 50 |     def from_dict(cls, env: Dict[str, Any]) -> Any:
 51 |         """Parse a dictionary into a File object.
 52 | 
 53 |         Ignores keys that don't exist.
 54 | 
 55 |         :param env: Dictionary to parse.
 56 |         """
 57 |         to_parse = {k: v for k, v in env.items() if k in inspect.signature(cls).parameters}
 58 |         to_parse["created_at"] = from_isoformat(to_parse["created_at"])
 59 |         to_parse["file_id"] = UUID(to_parse["file_id"])
 60 |         return cls(**to_parse)
 61 | 
 62 | 
 63 | @dataclass
 64 | class FileList:
 65 |     """List of files from deepset Cloud. This dataclass is used for all file-related operations that return a list of files."""
 66 | 
 67 |     total: int
 68 |     data: List[File]
 69 |     has_more: bool
 70 | 
 71 | 
 72 | class FilesAPI:
 73 |     """File API for deepset Cloud.
 74 | 
 75 |     This module takes care of all file-related API calls to deepset Cloud, including
 76 |     uploading, downloading, listing, and deleting files.
 77 | 
 78 |     :param deepset_cloud_api: Instance of the DeepsetCloudAPI.
 79 |     """
 80 | 
 81 |     def __init__(self, deepset_cloud_api: DeepsetCloudAPI) -> None:
 82 |         """
 83 |         Create FileAPI object.
 84 | 
 85 |         :param deepset_cloud_api: Instance of the DeepsetCloudAPI.
 86 |         """
 87 |         self._deepset_cloud_api = deepset_cloud_api
 88 | 
 89 |     async def list_paginated(
 90 |         self,
 91 |         workspace_name: str,
 92 |         limit: int = 100,
 93 |         name: Optional[str] = None,
 94 |         odata_filter: Optional[str] = None,
 95 |         after_value: Optional[Any] = None,
 96 |         after_file_id: Optional[UUID] = None,
 97 |     ) -> FileList:
 98 |         """
 99 |         List files in a workspace using cursor-based pagination.
100 | 
101 |         :param workspace_name: Name of the workspace to use.
102 |         :param limit: Number of files to return per page.
103 |         :param name: Name of the file to odata_filter by.
104 |         :param odata_filter: Odata odata_filter to apply.
105 |         :param after_value: Value to start after.
106 |         :param after_file_id: File ID to start after.
107 |         """
108 |         params: Dict[str, Union[str, int]] = {"limit": limit}
109 |         if after_value and after_file_id:
110 |             params["after_value"] = (
111 |                 after_value.isoformat() if isinstance(after_value, datetime.datetime) else str(after_value)
112 |             )
113 |             params["after_file_id"] = str(after_file_id)
114 | 
115 |         # substring match file name
116 |         if name:
117 |             params["name"] = name
118 | 
119 |         # odata odata_filter for file meta
120 |         if odata_filter:
121 |             params["filter"] = odata_filter
122 | 
123 |         response = await self._deepset_cloud_api.get(workspace_name, "files", params=params)
124 |         assert response.status_code == codes.OK, f"Failed to list files: {response.text}"
125 |         response_body = response.json()
126 |         total = response_body["total"]
127 |         data = response_body["data"]
128 |         has_more = response_body["has_more"]
129 |         return FileList(total=total, data=[File.from_dict(d) for d in data], has_more=has_more)
130 | 
131 |     @staticmethod
132 |     def _available_file_name(file_path: Path, suffix: str = "_1") -> str:
133 |         logger.warning("File already exists. Renaming file to avoid overwriting.", file_path=str(file_path))
134 |         base = file_path.stem
135 |         ext = file_path.suffix
136 |         new_filename = file_path.with_name(f"{base}{suffix}{ext}")
137 |         while new_filename.exists():
138 |             suffix = f"_{int(suffix[1:]) + 1}"
139 |             new_filename = file_path.with_name(f"{base}{suffix}{ext}")
140 |         return str(new_filename)
141 | 
142 |     async def _save_to_disk(self, file_dir: Path, file_name: str, content: bytes) -> str:
143 |         """Save the given content to disk.
144 | 
145 |         If there is a collision, the file name is changed to avoid overwriting.
146 |         This new name is returned by the function.
147 | 
148 |         :param file_dir: Path to the file.
149 |         :param file_name: Name of the file.
150 |         :param content: Content of the file.
151 |         :return: The new file name.
152 |         """
153 |         # Check if the directory exists, and create it if necessary
154 |         file_dir.mkdir(parents=True, exist_ok=True)
155 | 
156 |         new_filename: str = file_name
157 |         file_path = file_dir / file_name
158 |         if file_path.exists():
159 |             new_filename = self._available_file_name(file_path)
160 | 
161 |         with (file_dir / new_filename).open("wb") as file:
162 |             file.write(content)
163 |         return new_filename
164 | 
165 |     async def direct_upload_path(
166 |         self,
167 |         workspace_name: str,
168 |         file_path: Union[Path, str],
169 |         file_name: Optional[str] = None,
170 |         meta: Optional[Dict[str, Any]] = None,
171 |         write_mode: WriteMode = WriteMode.KEEP,
172 |     ) -> UUID:
173 |         """Directly upload a file to deepset Cloud.
174 | 
175 |         :param workspace_name: Name of the workspace to use.
176 |         :param file_path: Path to the file to upload.
177 |         :param file_name: Name of the file to upload.
178 |         :param meta: Meta information to attach to the file.
179 |         :param write_mode: Specifies what to do when a file with the same name already exists in the workspace.
180 |         Possible options are:
181 |         KEEP - uploads the file with the same name and keeps both files in the workspace.
182 |         OVERWRITE - overwrites the file that is in the workspace.
183 |         FAIL - fails to upload the file with the same name.
184 |         :return: ID of the uploaded file.
185 |         """
186 |         if isinstance(file_path, str):
187 |             file_path = Path(file_path)
188 | 
189 |         if file_name is None:
190 |             file_name = file_path.name
191 | 
192 |         with file_path.open("rb") as file:
193 |             response = await self._deepset_cloud_api.post(
194 |                 workspace_name,
195 |                 "files",
196 |                 files={"file": (file_name, file), "meta": (None, json.dumps(meta))},
197 |                 params={"write_mode": write_mode.value},
198 |             )
199 |         if response.status_code != codes.CREATED or response.json().get("file_id") is None:
200 |             raise FailedToUploadFileException(
201 |                 f"Failed to upload file with status code {response.status_code}. response was: {response.text}"
202 |             )
203 |         file_id: UUID = UUID(response.json()["file_id"])
204 |         return file_id
205 | 
206 |     async def direct_upload_in_memory(
207 |         self,
208 |         workspace_name: str,
209 |         content: Union[bytes, str],
210 |         file_name: str,
211 |         meta: Optional[Dict[str, Any]] = None,
212 |         write_mode: WriteMode = WriteMode.KEEP,
213 |     ) -> UUID:
214 |         """Directly upload files to deepset Cloud.
215 | 
216 |         :param workspace_name: Name of the workspace to use.
217 |         :param content: File text to upload.
218 |         :param file_name: Name of the file to upload.
219 |         :param meta: Meta information to attach to the file.
220 |         :param write_mode: Specifies what to do when a file with the same name already exists in the workspace.
221 |         Possible options are:
222 |         KEEP - uploads the file with the same name and keeps both files in the workspace.
223 |         OVERWRITE - overwrites the file that is in the workspace.
224 |         FAIL - fails to upload the file with the same name.
225 |         :return: ID of the uploaded file.
226 |         """
227 |         response = await self._deepset_cloud_api.post(
228 |             workspace_name,
229 |             "files",
230 |             files={"file": (file_name, content)},
231 |             data={"meta": json.dumps(meta)},
232 |             params={"write_mode": write_mode.value},
233 |         )
234 | 
235 |         if response.status_code != codes.CREATED or response.json().get("file_id") is None:
236 |             raise FailedToUploadFileException(
237 |                 f"Failed to upload file with status code {response.status_code}. response was: {response.text}"
238 |             )
239 |         file_id: UUID = UUID(response.json()["file_id"])
240 |         return file_id
241 | 
242 |     async def download(
243 |         self,
244 |         workspace_name: str,
245 |         file_id: UUID,
246 |         file_name: str,
247 |         include_meta: bool = True,
248 |         file_dir: Optional[Union[Path, str]] = None,
249 |     ) -> None:
250 |         """Download a single file from a workspace.
251 | 
252 |         :param workspace_name: Name of the workspace to use.
253 |         :param file_id: ID of the file to download.
254 |         :param file_name: Name assigned to the downloaded file.
255 |         :param include_meta: Whether to include the file meta in the folder.
256 |         :param file_dir: Location to save the file locally. If not provided the current directory is used.
257 |         """
258 |         if file_dir is None:
259 |             file_dir = Path.cwd()
260 | 
261 |         if isinstance(file_dir, str):
262 |             # format dir to Path and take relative path into account
263 |             file_dir = Path(file_dir).resolve()
264 | 
265 |         response = await self._deepset_cloud_api.get(workspace_name, f"files/{file_id}")
266 |         if response.status_code == codes.NOT_FOUND:
267 |             raise FileNotFoundInDeepsetCloudException(f"Failed to download raw file: {response.text}")
268 |         if response.status_code != codes.OK:
269 |             raise Exception(f"Failed to download raw file: {response.text}")
270 |         new_local_file_name: str = await self._save_to_disk(
271 |             file_dir=file_dir, file_name=file_name, content=response.content
272 |         )
273 | 
274 |         if include_meta:
275 |             response = await self._deepset_cloud_api.get(workspace_name, f"files/{file_id}/meta")
276 |             if response.status_code == codes.NOT_FOUND:
277 |                 raise FileNotFoundInDeepsetCloudException(f"Failed to download raw file: {response.text}")
278 |             if response.status_code != codes.OK:
279 |                 raise Exception(f"Failed to download raw file: {response.text}")
280 |             await self._save_to_disk(
281 |                 file_dir=file_dir,
282 |                 file_name=f"{new_local_file_name}.meta.json",
283 |                 content=response.content,
284 |             )
285 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/_s3/__init__.py:
--------------------------------------------------------------------------------
1 | """Module that handles s3 interactions."""
2 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/_utils/__init__.py:
--------------------------------------------------------------------------------
1 | """A set of utils for the SDK."""
2 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/_utils/datetime.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for working with datetime objects."""
 2 | from datetime import datetime
 3 | 
 4 | 
 5 | def from_isoformat(date_str: str) -> datetime:
 6 |     """Parse a date string in ISO 8601 format and returns a datetime object.
 7 | 
 8 |     Our new Pydantic 2.0 API returns with the `Z` suffix, but the old one returns with `+00:00`
 9 |     Python versions < 3.12 don't support the `Z` suffix, so we need to replace it with `+00:00`
10 |     """
11 |     date_str = date_str.replace("Z", "+00:00")
12 |     return datetime.fromisoformat(date_str)
13 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/models.py:
--------------------------------------------------------------------------------
 1 | """General data classes for deepset Cloud SDK."""
 2 | import json
 3 | from abc import abstractmethod
 4 | from dataclasses import dataclass
 5 | from typing import Any, Dict, Optional, Union
 6 | from uuid import UUID
 7 | 
 8 | 
 9 | @dataclass
10 | class UserInfo:
11 |     """User info data class."""
12 | 
13 |     user_id: UUID
14 |     given_name: str
15 |     family_name: str
16 | 
17 | 
18 | class DeepsetCloudFileBase:  # pylint: disable=too-few-public-methods
19 |     """Base class for deepset Cloud files."""
20 | 
21 |     def __init__(self, name: str, meta: Optional[Dict[str, Any]] = None):
22 |         """
23 |         Initialize DeepsetCloudFileBase.
24 | 
25 |         :param name: The file name
26 |         :param meta: The file's metadata
27 |         """
28 |         self.name = name
29 |         self.meta = meta
30 | 
31 |     @abstractmethod
32 |     def content(self) -> Union[str, bytes]:
33 |         """Return content."""
34 |         raise NotImplementedError
35 | 
36 |     def meta_as_string(self) -> str:
37 |         """Return metadata as a string."""
38 |         if self.meta:
39 |             return json.dumps(self.meta)
40 | 
41 |         return json.dumps({})
42 | 
43 | 
44 | class DeepsetCloudFile(DeepsetCloudFileBase):  # pylint: disable=too-few-public-methods
45 |     """Data class for text files in deepset Cloud."""
46 | 
47 |     def __init__(self, text: str, name: str, meta: Optional[Dict[str, Any]] = None):
48 |         """
49 |         Initialize DeepsetCloudFileBase.
50 | 
51 |         :param name: The file name
52 |         :param text: The text content of the file
53 |         :param meta: The file's metadata
54 |         """
55 |         super().__init__(name, meta)
56 |         self.text = text
57 | 
58 |     def content(self) -> str:
59 |         """
60 |         Return the content of the file.
61 | 
62 |         :return: The text of the file.
63 |         """
64 |         return self.text
65 | 
66 | 
67 | # Didn't want to cause breaking changes in the DeepsetCloudFile class, though it
68 | # is technically the same as the below, the naming of the text field will be confusing
69 | # for users that are uploading anything other than text.
70 | 
71 | 
72 | class DeepsetCloudFileBytes(DeepsetCloudFileBase):  # pylint: disable=too-few-public-methods
73 |     """Data class for uploading files of any valid type in deepset Cloud."""
74 | 
75 |     def __init__(self, file_bytes: bytes, name: str, meta: Optional[Dict[str, Any]] = None):
76 |         """
77 |         Initialize DeepsetCloudFileBase.
78 | 
79 |         :param name: The file name
80 |         :param file_bytes: The content of the file represented in bytes
81 |         :param meta: The file's metadata
82 |         """
83 |         super().__init__(name, meta)
84 |         self.file_bytes = file_bytes
85 | 
86 |     def content(self) -> bytes:
87 |         """
88 |         Return the content of the file in bytes.
89 | 
90 |         :return: The content of the file in bytes.
91 |         """
92 |         return self.file_bytes
93 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/workflows/__init__.py:
--------------------------------------------------------------------------------
 1 | """Workflows for deepset AI platform SDK."""
 2 | 
 3 | from deepset_cloud_sdk.workflows.pipeline_client.models import (
 4 |     IndexConfig,
 5 |     IndexInputs,
 6 |     IndexOutputs,
 7 |     PipelineConfig,
 8 |     PipelineInputs,
 9 |     PipelineOutputs,
10 | )
11 | from deepset_cloud_sdk.workflows.pipeline_client.pipeline_client import PipelineClient
12 | 
13 | __all__ = [
14 |     "PipelineInputs",
15 |     "IndexInputs",
16 |     "IndexOutputs",
17 |     "PipelineOutputs",
18 |     "IndexConfig",
19 |     "PipelineConfig",
20 |     "PipelineClient",
21 | ]
22 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/workflows/async_client/__init__.py:
--------------------------------------------------------------------------------
1 | """Async implementation of workflows client."""
2 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/workflows/pipeline_client/__init__.py:
--------------------------------------------------------------------------------
 1 | """Package to enable importing pipelines and indexes to deepset AI platform."""
 2 | 
 3 | from deepset_cloud_sdk.workflows.pipeline_client.models import (
 4 |     IndexConfig,
 5 |     IndexInputs,
 6 |     IndexOutputs,
 7 |     PipelineConfig,
 8 |     PipelineInputs,
 9 |     PipelineOutputs,
10 | )
11 | from deepset_cloud_sdk.workflows.pipeline_client.pipeline_client import PipelineClient
12 | 
13 | __all__ = [
14 |     "PipelineClient",
15 |     "PipelineInputs",
16 |     "IndexInputs",
17 |     "IndexOutputs",
18 |     "PipelineOutputs",
19 |     "IndexConfig",
20 |     "PipelineConfig",
21 | ]
22 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/workflows/pipeline_client/models.py:
--------------------------------------------------------------------------------
  1 | """Models for the pipeline service."""
  2 | from typing import List
  3 | 
  4 | from pydantic import BaseModel, Field, model_validator
  5 | 
  6 | 
  7 | class InputOutputBaseModel(BaseModel):
  8 |     """Base model for input and output configurations.
  9 | 
 10 |     This class provides common functionality for input and output models, such as YAML conversion.
 11 |     """
 12 | 
 13 |     def to_yaml_dict(self) -> dict:
 14 |         """Convert the model to a YAML-compatible dictionary.
 15 | 
 16 |         Clears empty values from the dictionary.
 17 | 
 18 |         :return: Dictionary ready for YAML serialization.
 19 |         """
 20 |         fields = self.model_dump(exclude_none=True)
 21 |         # Remove empty values
 22 |         return {k: v for k, v in fields.items() if v}
 23 | 
 24 | 
 25 | class PipelineInputs(InputOutputBaseModel):
 26 |     """Pipeline input configuration.
 27 | 
 28 |     Defines the components that should receive the Query input and any filters that apply to it.
 29 | 
 30 |     :param query: List of components that will receive the `query` input.
 31 |         Specify each component in the format: '<component-name>.<run-method-parameter-name>', for example: 'retriever.query'
 32 |     :param filters: Optional list of components that will receive the filters input.
 33 |         Specify each component using the format: '<component-name>.<run-method-parameter-name>', for example: 'retriever.filters'.
 34 |     """
 35 | 
 36 |     model_config = {"extra": "allow"}  # Allow additional fields in inputs
 37 | 
 38 |     query: List[str] = Field(
 39 |         ...,
 40 |         description=(
 41 |             "List of components and parameters that will receive the `query` input when they are executed. "
 42 |             "Use the format: '<component-name>.<run-method-parameter-name>', for example: 'retriever.query'."
 43 |         ),
 44 |         min_items=1,
 45 |     )
 46 |     filters: List[str] = Field(
 47 |         default_factory=list,
 48 |         description=(
 49 |             "List of components and parameters that will receive the `filters` input when they are executed. "
 50 |             "Use the format: '<component-name>.<run-method-parameter-name>', for example: 'retriever.filters'."
 51 |         ),
 52 |     )
 53 | 
 54 | 
 55 | class PipelineOutputs(InputOutputBaseModel):
 56 |     """Pipeline output configuration.
 57 | 
 58 |     Specify the components that will output `documents`, `answers`, or both.
 59 |     You must include at least one. The outputs of these components become the final output of the pipeline.
 60 | 
 61 |     :param documents: Name of the component and parameter that will provide `documents` as output.
 62 |         Use the format '<component-name>.<output-parameter>', for example: 'retriever.documents'.
 63 |     :param answers: Name of the component and parameter that will provide `answers` as output.
 64 |         Use the format '<component-name>.<output-parameter>', for example: 'reader.answers'.
 65 |     """
 66 | 
 67 |     model_config = {"extra": "allow"}  # Allow additional fields in outputs
 68 | 
 69 |     documents: str | None = Field(
 70 |         default=None,
 71 |         description="Name of the component that will provide `documents` as output. "
 72 |         "Format: '<component-name>.<output-parameter>', for example: 'meta_ranker.documents'",
 73 |     )
 74 |     answers: str | None = Field(
 75 |         default=None,
 76 |         description="Name of the component that will provide `answers` as output. "
 77 |         "Format: '<component-name>.<output-parameter>', for example: 'answers_builder.answers'",
 78 |     )
 79 | 
 80 |     @model_validator(mode="after")
 81 |     def validate_documents_xor_answers(self) -> "PipelineOutputs":
 82 |         """Validate that either `documents`, `answers`, or both are defined."""
 83 |         if not self.documents and not self.answers:
 84 |             raise ValueError("Define at least one pipeline output, either 'documents, 'answers' or both.")
 85 |         return self
 86 | 
 87 | 
 88 | class IndexOutputs(InputOutputBaseModel):
 89 |     """Output configuration for the index.
 90 | 
 91 |     Index outputs are optional.
 92 |     """
 93 | 
 94 |     model_config = {"extra": "allow"}  # Allow additional fields in outputs
 95 | 
 96 | 
 97 | class PipelineConfig(BaseModel):
 98 |     """Configuration required to import the pipeline into deepset AI Platform.
 99 | 
100 |     :param name: Name of the pipeline to be imported
101 |     :param inputs: Pipeline input configuration. Use `PipelineInputs` model to define the inputs.
102 |     :param outputs: Pipeline output configuration. Use `PipelineOutputs` model to define the outputs.
103 |     """
104 | 
105 |     model_config = {"extra": "forbid"}
106 | 
107 |     name: str = Field(..., description="The name of the pipeline to be imported", min_length=1)
108 |     inputs: PipelineInputs = Field(
109 |         default_factory=PipelineInputs,
110 |         description=("Pipeline input configuration. Use `PipelineInputs` model to define the inputs."),
111 |     )
112 |     outputs: PipelineOutputs = Field(
113 |         default_factory=PipelineOutputs,
114 |         description=("Pipeline output configuration. Use `PipelineOutputs` model to define the outputs."),
115 |     )
116 | 
117 | 
118 | class IndexInputs(InputOutputBaseModel):
119 |     """Configuration required to import an index into deepset AI Platform.
120 | 
121 |     Defines the index components that should receive the `Files` input.
122 | 
123 |     :param files: List of components and parameters that should receive files as input.
124 |         Specify the components using the format: '<component-name>.<run-method-parameter-name>', for example: 'file_type_router.sources'.
125 |     """
126 | 
127 |     model_config = {"extra": "allow"}  # Allow additional fields in inputs
128 | 
129 |     files: List[str] = Field(
130 |         default_factory=list,
131 |         description=(
132 |             "List of components and parameters that will receive files as input when they're executed. "
133 |             "Format: '<component-name>.<run-parameter-name>', for example: 'file_type_router.sources'."
134 |         ),
135 |     )
136 | 
137 | 
138 | class IndexConfig(BaseModel):
139 |     """Index configuration for importing an index to deepset AI platform.
140 | 
141 |     :param name: Name of the index to be imported.
142 |     :param inputs: Index input configuration. Use `IndexInputs` model to define the inputs.
143 |     :param outputs: Index output configuration. Optional. Use `IndexOutputs` model to define the outputs.
144 |     """
145 | 
146 |     model_config = {"extra": "forbid"}
147 | 
148 |     name: str = Field(..., description="Name of the index to be imported.", min_length=1)
149 |     inputs: IndexInputs = Field(
150 |         default_factory=IndexInputs,
151 |         description=("Input configuration for the index. Use `IndexInputs` model to define the inputs."),
152 |     )
153 |     outputs: IndexOutputs | None = Field(
154 |         default_factory=IndexOutputs,
155 |         description=("Optional output configuration for the index. Use `IndexOutputs` model to define the outputs."),
156 |     )
157 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/workflows/pipeline_client/pipeline_client.py:
--------------------------------------------------------------------------------
  1 | """Pipeline client for importing pipelines and indexes to deepset AI Platform."""
  2 | import asyncio
  3 | 
  4 | import structlog
  5 | 
  6 | from deepset_cloud_sdk._api.config import (
  7 |     API_KEY,
  8 |     API_URL,
  9 |     DEFAULT_WORKSPACE_NAME,
 10 |     CommonConfig,
 11 | )
 12 | from deepset_cloud_sdk._api.deepset_cloud_api import DeepsetCloudAPI
 13 | from deepset_cloud_sdk.workflows.pipeline_client.models import (
 14 |     IndexConfig,
 15 |     PipelineConfig,
 16 | )
 17 | from deepset_cloud_sdk.workflows.pipeline_client.pipeline_service import (
 18 |     PipelineProtocol,
 19 |     PipelineService,
 20 | )
 21 | 
 22 | logger = structlog.get_logger(__name__)
 23 | 
 24 | 
 25 | class PipelineClient:
 26 |     """Client for importing Haystack pipelines and indexes to deepset AI platform.
 27 | 
 28 |     This class provides functionality to import Haystack pipelines and indexes into the deepset AI platform.
 29 | 
 30 |     Example for importing a Haystack pipeline or index to deepset AI platform:
 31 |         ```python
 32 |         from deepset_cloud_sdk import PipelineClient, PipelineConfig, PipelineInputs, PipelineOutputs, IndexConfig, IndexInputs
 33 |         from haystack import Pipeline
 34 | 
 35 |         # Initialize the client with configuration from environment variables (after running `deepset-cloud login`)
 36 |         client = PipelineClient()
 37 | 
 38 |         # or initialize the client with explicit configuration
 39 |         client = PipelineClient(
 40 |             api_key="your-api-key",
 41 |             workspace_name="your-workspace",
 42 |             api_url="https://api.deepset.ai"
 43 |         )
 44 | 
 45 |         # Configure your pipeline
 46 |         pipeline = Pipeline()
 47 | 
 48 |         # Configure import
 49 |         # if importing a pipeline, use PipelineConfig
 50 |         config = PipelineConfig(
 51 |             name="my-pipeline",
 52 |             inputs=PipelineInputs(
 53 |                 query=["prompt_builder.query"],
 54 |                 filters=["bm25_retriever.filters", "embedding_retriever.filters"],
 55 |             ),
 56 |             outputs=PipelineOutputs(
 57 |                 answers="answers_builder.answers",
 58 |                 documents="ranker.documents",
 59 |             ),
 60 |         )
 61 | 
 62 |         # if importing an index, use IndexConfig
 63 |         config = IndexConfig(
 64 |             name="my-index",
 65 |             inputs=IndexInputs(files=["file_type_router.sources"]),
 66 |         )
 67 | 
 68 |         # sync execution
 69 |         client.import_into_deepset(pipeline, config)
 70 | 
 71 |         # async execution
 72 |         await client.import_into_deepset_async(pipeline, config)
 73 |         ```
 74 |     """
 75 | 
 76 |     def __init__(
 77 |         self,
 78 |         api_key: str | None = None,
 79 |         workspace_name: str | None = None,
 80 |         api_url: str | None = None,
 81 |     ) -> None:
 82 |         """Initialize the Pipeline Client.
 83 | 
 84 |         The client can be configured in two ways:
 85 | 
 86 |         1. Using environment variables (recommended):
 87 |            - Run `deepset-cloud login` to set up the following environment variables:
 88 |              - `API_KEY`: Your deepset AI platform API key
 89 |              - `API_URL`: The URL of the deepset AI platform API
 90 |              - `DEFAULT_WORKSPACE_NAME`: The workspace name to use.
 91 | 
 92 |         2. Using explicit parameters:
 93 |            - Provide the values directly to this constructor
 94 |            - Any missing parameters will fall back to environment variables
 95 | 
 96 |         :param api_key: Your deepset AI platform API key. Falls back to `API_KEY` environment variable.
 97 |         :param workspace_name: The workspace to use. Falls back to `DEFAULT_WORKSPACE_NAME` environment variable.
 98 |         :param api_url: The URL of the deepset AI platform API. Falls back to `API_URL` environment variable.
 99 |         :raises ValueError: If no api key or workspace name is provided and `API_KEY` or `DEFAULT_WORKSPACE_NAME` is not set in the environment.
100 |         """
101 |         self._api_config = CommonConfig(
102 |             api_key=api_key or API_KEY,
103 |             api_url=api_url or API_URL,
104 |         )
105 |         self._workspace_name = workspace_name or DEFAULT_WORKSPACE_NAME
106 |         if not self._workspace_name:
107 |             raise ValueError(
108 |                 "Workspace not configured. Provide a workspace name or set the `DEFAULT_WORKSPACE_NAME` environment variable."
109 |             )
110 | 
111 |     async def import_into_deepset_async(self, pipeline: PipelineProtocol, config: IndexConfig | PipelineConfig) -> None:
112 |         """Import a Haystack `Pipeline` or `AsyncPipeline` into deepset AI Platform asynchronously.
113 | 
114 |         The pipeline must be imported as either an index or a pipeline:
115 |         - An index: Processes files and stores them in a document store, making them available for
116 |           pipelines to search.
117 |         - A pipeline: For other use cases, for example, searching through documents stored by index pipelines.
118 | 
119 |         :param pipeline: The Haystack `Pipeline` or `AsyncPipeline` to import.
120 |         :param config: Configuration for importing, use either `IndexConfig` or `PipelineConfig`.
121 |             If importing an index, the config argument is expected to be of type `IndexConfig`,
122 |             if importing a pipeline, the config argument is expected to be of type `PipelineConfig`.
123 |         """
124 |         async with DeepsetCloudAPI.factory(self._api_config) as api:
125 |             service = PipelineService(api, self._workspace_name)
126 |             await service.import_async(pipeline, config)
127 | 
128 |     def import_into_deepset(self, pipeline: PipelineProtocol, config: IndexConfig | PipelineConfig) -> None:
129 |         """Import a Haystack `Pipeline` or `AsyncPipeline` into deepset AI Platform synchronously.
130 | 
131 |         The pipeline must be imported as either an index or a pipeline:
132 |         - An index: Processes files and stores them in a document store, making them available for
133 |           pipelines to search.
134 |         - A pipeline: For other use cases, for example, searching through documents stored by index pipelines.
135 | 
136 |         :param pipeline: The Haystack `Pipeline` or `AsyncPipeline` to import.
137 |         :param config: Configuration for importing into deepset, use either `IndexConfig` or `PipelineConfig`.
138 |             If importing an index, the config argument is expected to be of type `IndexConfig`,
139 |             if importing a pipeline, the config argument is expected to be of type `PipelineConfig`.
140 |         """
141 |         try:
142 |             loop = asyncio.get_event_loop()
143 |             # do not close if event loop already exists, e.g. in Jupyter notebooks
144 |             should_close = False
145 |         except RuntimeError:
146 |             loop = asyncio.new_event_loop()
147 |             asyncio.set_event_loop(loop)
148 |             should_close = True
149 | 
150 |         try:
151 |             return loop.run_until_complete(self.import_into_deepset_async(pipeline, config))
152 |         finally:
153 |             if should_close:
154 |                 loop.close()
155 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/workflows/pipeline_client/pipeline_service.py:
--------------------------------------------------------------------------------
  1 | """Pipeline importing service for deepset Cloud SDK."""
  2 | # pylint: disable=unnecessary-ellipsis,import-outside-toplevel
  3 | from __future__ import annotations
  4 | 
  5 | from http import HTTPStatus
  6 | from io import StringIO
  7 | from typing import Any, Optional, Protocol, runtime_checkable
  8 | 
  9 | import structlog
 10 | from ruamel.yaml import YAML
 11 | 
 12 | from deepset_cloud_sdk._api.config import DEFAULT_WORKSPACE_NAME, CommonConfig
 13 | from deepset_cloud_sdk._api.deepset_cloud_api import DeepsetCloudAPI
 14 | from deepset_cloud_sdk.workflows.pipeline_client.models import (
 15 |     IndexConfig,
 16 |     PipelineConfig,
 17 | )
 18 | 
 19 | logger = structlog.get_logger(__name__)
 20 | 
 21 | 
 22 | @runtime_checkable
 23 | class PipelineProtocol(Protocol):
 24 |     """Protocol defining the required methods for a Haystack Pipeline or AsyncPipeline."""
 25 | 
 26 |     def dumps(self) -> str:
 27 |         """Convert the pipeline to a YAML string.
 28 | 
 29 |         :return: YAML string representation of the pipeline.
 30 |         """
 31 |         ...
 32 | 
 33 |     def add_component(self, name: str, instance: Any) -> None:
 34 |         """Add a component to the pipeline.
 35 | 
 36 |         :param name: Name of the component.
 37 |         :param instance: Component instance to add.
 38 |         """
 39 |         ...
 40 | 
 41 | 
 42 | class PipelineService:
 43 |     """Handles the importing of Haystack pipelines and indexes into deepset AI platform."""
 44 | 
 45 |     def __init__(self, api: DeepsetCloudAPI, workspace_name: Optional[str] = None) -> None:
 46 |         """Initialize the pipeline service.
 47 | 
 48 |         :param api: An initialized DeepsetCloudAPI instance.
 49 |         :param workspace_name: Optional workspace name to use instead of environment variable.
 50 |         """
 51 |         self._api = api
 52 |         self._workspace_name = workspace_name or DEFAULT_WORKSPACE_NAME
 53 |         self._yaml = YAML()
 54 |         self._yaml.preserve_quotes = True
 55 |         self._yaml.indent(mapping=2, sequence=2)
 56 | 
 57 |     @classmethod
 58 |     async def factory(cls, config: CommonConfig, workspace_name: Optional[str] = None) -> PipelineService:
 59 |         """Create a new instance of the pipeline service.
 60 | 
 61 |         :param config: CommonConfig object.
 62 |         :param workspace_name: Optional workspace name to use instead of environment variable.
 63 |         """
 64 |         async with DeepsetCloudAPI.factory(config) as api:
 65 |             return cls(api, workspace_name)
 66 | 
 67 |     async def import_async(self, pipeline: PipelineProtocol, config: IndexConfig | PipelineConfig) -> None:
 68 |         """Import a pipeline or an index into deepset AI platform.
 69 | 
 70 |         :param pipeline: The pipeline or index to import. Must be a Haystack Pipeline or AsyncPipeline.
 71 |         :param config: Configuration for importing, either `IndexConfig` or `PipelineConfig`.
 72 |             If importing an index, the config argument is expected to be of type `IndexConfig`,
 73 |             if importing a pipeline, the config argument is expected to be of type `PipelineConfig`.
 74 | 
 75 |         :raises TypeError: If the pipeline object isn't a Haystack Pipeline or AsyncPipeline.
 76 |         :raises ValueError: If no workspace is configured.
 77 |         :raises ImportError: If haystack-ai is not installed.
 78 |         """
 79 |         logger.debug(f"Starting async importing for {config.name}")
 80 | 
 81 |         # import locally to avoid Haystack dependency to be installed in the SDK
 82 |         try:
 83 |             from haystack import AsyncPipeline as HaystackAsyncPipeline
 84 |             from haystack import Pipeline as HaystackPipeline
 85 |         except ImportError as err:
 86 |             raise ImportError(
 87 |                 "Can't import Pipeline or AsyncPipeline because haystack-ai is not installed. Run 'pip install haystack-ai'."
 88 |             ) from err
 89 | 
 90 |         if not isinstance(pipeline, (HaystackPipeline, HaystackAsyncPipeline)):
 91 |             raise TypeError(
 92 |                 "Haystack Pipeline or AsyncPipeline object expected. "
 93 |                 "Make sure you have installed haystack-ai and use Pipeline or AsyncPipeline "
 94 |                 "to define your pipeline or index."
 95 |             )
 96 | 
 97 |         if not self._workspace_name:
 98 |             raise ValueError(
 99 |                 "The workspace to import into is not configured. "
100 |                 "Run 'deepset-cloud login' and follow the instructions or configure the workspace name on the SDK instance."
101 |             )
102 | 
103 |         if isinstance(config, IndexConfig):
104 |             logger.debug(f"Importing index into workspace {self._workspace_name}")
105 |             await self._import_index(pipeline, config)
106 |         else:
107 |             logger.debug(f"Importing pipeline into workspace {self._workspace_name}")
108 |             await self._import_pipeline(pipeline, config)
109 | 
110 |     async def _import_index(self, pipeline: PipelineProtocol, config: IndexConfig) -> None:
111 |         """Import an index into deepset AI Platform.
112 | 
113 |         :param pipeline: The Haystack pipeline to import.
114 |         :param config: Configuration for importing an index.
115 |         """
116 |         pipeline_yaml = self._from_haystack_pipeline(pipeline, config)
117 |         response = await self._api.post(
118 |             workspace_name=self._workspace_name,
119 |             endpoint="indexes",
120 |             json={"name": config.name, "config_yaml": pipeline_yaml},
121 |         )
122 |         response.raise_for_status()
123 |         if response.status_code == HTTPStatus.NO_CONTENT:
124 |             logger.debug(f"Index {config.name} successfully created.")
125 | 
126 |     async def _import_pipeline(self, pipeline: PipelineProtocol, config: PipelineConfig) -> None:
127 |         """Import a pipeline into deepset AI Platform.
128 | 
129 |         :param pipeline: The Haystack pipeline to import.
130 |         :param config: Configuration for importing a pipeline.
131 |         """
132 |         logger.debug(f"Importing pipeline {config.name}")
133 |         pipeline_yaml = self._from_haystack_pipeline(pipeline, config)
134 |         response = await self._api.post(
135 |             workspace_name=self._workspace_name,
136 |             endpoint="pipelines",
137 |             json={"name": config.name, "query_yaml": pipeline_yaml},
138 |         )
139 |         response.raise_for_status()
140 |         if response.status_code == HTTPStatus.NO_CONTENT:
141 |             logger.debug(f"Pipeline {config.name} successfully created.")
142 | 
143 |     def _from_haystack_pipeline(self, pipeline: PipelineProtocol, config: IndexConfig | PipelineConfig) -> str:
144 |         """Create a YAML configuration from the pipeline.
145 | 
146 |         :param pipeline: The Haystack pipeline to create the configuration for.
147 |         :param config: Configuration for importing.
148 |         :return: YAML configuration as a string.
149 |         """
150 |         # Parse the pipeline YAML
151 |         pipeline_dict = self._yaml.load(pipeline.dumps())
152 |         self._add_inputs_and_outputs(pipeline_dict, config)
153 |         self._add_async_flag_if_needed(pipeline, pipeline_dict)
154 | 
155 |         # Convert back to string
156 |         yaml_str = StringIO()
157 |         self._yaml.dump(pipeline_dict, yaml_str)
158 |         return yaml_str.getvalue()
159 | 
160 |     def _add_inputs_and_outputs(self, pipeline_dict: dict, config: IndexConfig | PipelineConfig) -> None:
161 |         """Add inputs and outputs to the pipeline dictionary from config.
162 | 
163 |         :param pipeline_dict: The pipeline dictionary to add inputs and outputs to.
164 |         :param config: Configuration for importing.
165 |         """
166 |         if config.inputs and (converted_inputs := config.inputs.to_yaml_dict()):
167 |             pipeline_dict["inputs"] = converted_inputs
168 |         if config.outputs and (converted_outputs := config.outputs.to_yaml_dict()):
169 |             pipeline_dict["outputs"] = converted_outputs
170 | 
171 |     def _add_async_flag_if_needed(self, pipeline: PipelineProtocol, pipeline_dict: dict) -> None:
172 |         """Add async_enabled flag to pipeline dict if pipeline is AsyncPipeline.
173 | 
174 |         This enables running pipelines asynchronously in deepset.
175 | 
176 |         :param pipeline: The Haystack pipeline to check.
177 |         :param pipeline_dict: The pipeline dictionary to modify.
178 |         """
179 |         try:
180 |             from haystack import AsyncPipeline as HaystackAsyncPipeline
181 | 
182 |             if isinstance(pipeline, HaystackAsyncPipeline):
183 |                 pipeline_dict["async_enabled"] = True
184 |         except ImportError:
185 |             # If haystack-ai is not available, we can't check the type
186 |             # This should not happen since we already checked in import_async
187 |             pass
188 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/workflows/sync_client/__init__.py:
--------------------------------------------------------------------------------
1 | """Sync implementation of workflows client."""
2 | 


--------------------------------------------------------------------------------
/deepset_cloud_sdk/workflows/sync_client/utils.py:
--------------------------------------------------------------------------------
 1 | """Utils for making async code sync."""
 2 | from asyncio import AbstractEventLoop
 3 | from typing import AsyncIterator, Generator, Optional, Tuple, TypeVar
 4 | 
 5 | T = TypeVar("T")
 6 | 
 7 | 
 8 | def iter_over_async(ait: AsyncIterator[T], loop: AbstractEventLoop) -> Generator[T, None, None]:
 9 |     """Convert an async generator to a sync generator.
10 | 
11 |     :param ait: Async generator to convert.
12 |     :param loop: Event loop to run the async generator on.
13 |     :return: Sync generator.
14 |     """
15 |     # Taken from
16 |     # https://stackoverflow.com/questions/63587660/yielding-asyncio-generator-data-back-from-event-loop-possible/63595496#63595496
17 |     ait = ait.__aiter__()  # pylint: disable=unnecessary-dunder-call
18 | 
19 |     async def get_next() -> Tuple[bool, Optional[T]]:
20 |         try:
21 |             obj = await ait.__anext__()  # pylint: disable=unnecessary-dunder-call
22 |             return False, obj
23 |         except StopAsyncIteration:
24 |             return True, None
25 | 
26 |     while True:
27 |         done, obj = loop.run_until_complete(get_next())
28 |         if done:
29 |             break
30 |         # object will always be not `None`
31 |         yield obj  # type: ignore
32 | 


--------------------------------------------------------------------------------
/docs/_images/favicon.svg:
--------------------------------------------------------------------------------
1 | <svg width="411" height="358" viewBox="0 0 411 358" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path fill-rule="evenodd" clip-rule="evenodd" d="M251.99 83.2798C267.141 65.5801 290.531 54.5298 318.531 54.5298C370.93 54.5298 410.58 92.7002 410.58 143.09C410.58 186.28 381.791 218.3 341.26 227.09C333.482 228.991 325.525 230.064 317.521 230.29H109.98C49.25 226.65 0.890625 176.42 0.890625 115.14C0.890625 51.6099 52.8906 1.45959e-05 116.891 1.45959e-05C138.303 -0.0107276 159.299 5.90822 177.551 17.1006C195.805 28.293 210.602 44.3218 220.301 63.4102C221.09 65.0401 223.711 70.5698 224.09 71.3198C227.881 80.4902 231.031 91.1602 234.441 102.09L236.191 107.77C244.781 135.89 254.631 165.02 276.34 183.48L277.861 184.99C290.611 194.66 305.51 200.81 316.75 200.81C351.59 200.81 376.34 176.71 376.34 143.18C376.34 109.65 351.59 85.7901 317.5 85.7901C297.551 85.7901 285.18 93.1099 274.83 103.79C273.568 105.135 271.865 105.98 270.031 106.17C268.982 106.229 267.936 106.045 266.971 105.632C266.008 105.219 265.15 104.589 264.471 103.79L252.102 92.4902C251.471 91.9429 250.967 91.2671 250.621 90.5078C250.404 90.0327 250.254 89.5318 250.172 89.0195C250.123 88.7129 250.1 88.4019 250.102 88.0899C250.197 86.3252 250.859 84.6382 251.99 83.2798ZM195.574 82.3613C195.266 81.6343 194.979 80.9522 194.631 80.27H194.67C187.869 65.4273 176.947 52.8501 163.205 44.0342C149.463 35.2183 133.479 30.5347 117.15 30.5401C70.0508 30.5401 31.8008 68.46 31.8008 115.3C31.8008 162.14 70.1504 200.06 117.15 200.06H249.18C237.311 187.75 228.471 173.19 221.781 158.12C214.801 142.374 209.906 126.384 205.531 112.084L205.361 111.53L204.885 109.953C201.916 100.124 199.135 90.9194 196.021 83.4102C195.863 83.0479 195.717 82.6997 195.574 82.3613ZM280.447 351.265C274.465 355.304 267.42 357.48 260.201 357.52C255.389 357.538 250.619 356.607 246.166 354.78C241.713 352.953 237.664 350.265 234.252 346.871C230.838 343.477 228.129 339.443 226.277 335C224.426 330.557 223.469 325.793 223.461 320.98C223.461 313.762 225.6 306.705 229.605 300.7C233.611 294.696 239.307 290.012 245.973 287.241C252.637 284.47 259.975 283.734 267.057 285.128C274.139 286.522 280.65 289.982 285.768 295.072C290.887 300.163 294.383 306.655 295.814 313.73C297.246 320.804 296.551 328.145 293.816 334.825C291.082 341.505 286.43 347.226 280.447 351.265Z" fill="#188BF5"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/docs/_pydoc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/docs/_pydoc/__init__.py


--------------------------------------------------------------------------------
/docs/_pydoc/config/async_client.yml:
--------------------------------------------------------------------------------
 1 | loaders:
 2 |   - type: python
 3 |     search_path: [../../../deepset_cloud_sdk/workflows/async_client]
 4 |     modules: ["files"]
 5 |     ignore_when_discovered: ["__init__"]
 6 | processors:
 7 |   - type: filter
 8 |     expression:
 9 |     documented_only: true
10 |     do_not_filter_modules: false
11 |     skip_empty_modules: true
12 |   - type: smart
13 |   - type: crossref
14 | renderer:
15 |   type: renderers.ReadmeRenderer
16 |   excerpt: An asynchronous client for the deepset Cloud API.
17 |   category_slug: sdk-10
18 |   title: Asynchronous Client
19 |   slug: async_client
20 |   order: 0
21 |   markdown:
22 |     descriptive_class_title: false
23 |     descriptive_module_title: true
24 |     add_method_class_prefix: true
25 |     add_member_class_prefix: false
26 |     filename: async_client.md
27 | 


--------------------------------------------------------------------------------
/docs/_pydoc/config/cli.yml:
--------------------------------------------------------------------------------
 1 | loaders:
 2 |   - type: python
 3 |     search_path: [../../../deepset_cloud_sdk]
 4 |     modules: ["cli"]
 5 |     ignore_when_discovered: ["__init__"]
 6 | processors:
 7 |   - type: filter
 8 |     expression:
 9 |     documented_only: true
10 |     do_not_filter_modules: false
11 |     skip_empty_modules: true
12 |   - type: smart
13 |   - type: crossref
14 | renderer:
15 |   type: renderers.ReadmeRenderer
16 |   excerpt: A cli tool for the deepset Cloud API.
17 |   category_slug: sdk-10
18 |   title: deepset Cloud CLI
19 |   slug: cli
20 |   order: 0
21 |   markdown:
22 |     descriptive_class_title: false
23 |     descriptive_module_title: true
24 |     add_method_class_prefix: true
25 |     add_member_class_prefix: false
26 |     filename: cli.md
27 | 


--------------------------------------------------------------------------------
/docs/_pydoc/config/sync_client.yml:
--------------------------------------------------------------------------------
 1 | loaders:
 2 |   - type: python
 3 |     search_path: [../../../deepset_cloud_sdk/workflows/sync_client]
 4 |     modules: ["files"]
 5 |     ignore_when_discovered: ["__init__"]
 6 | processors:
 7 |   - type: filter
 8 |     expression:
 9 |     documented_only: true
10 |     do_not_filter_modules: false
11 |     skip_empty_modules: true
12 |   - type: smart
13 |   - type: crossref
14 | renderer:
15 |   type: renderers.ReadmeRenderer
16 |   excerpt: A synchronous client for the deepset Cloud API.
17 |   category_slug: sdk-10
18 |   title: Synchronous Client
19 |   slug: sync_client
20 |   order: 0
21 |   markdown:
22 |     descriptive_class_title: false
23 |     descriptive_module_title: true
24 |     add_method_class_prefix: true
25 |     add_member_class_prefix: false
26 |     filename: sync_client.md
27 | 


--------------------------------------------------------------------------------
/docs/_pydoc/renderers.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import dataclasses
 3 | import io
 4 | import os
 5 | import sys
 6 | import typing as t
 7 | 
 8 | import docspec
 9 | from pydoc_markdown.contrib.renderers.markdown import MarkdownRenderer
10 | from pydoc_markdown.interfaces import Context, Renderer
11 | 
12 | README_FRONTMATTER = """---
13 | title: {title}
14 | excerpt: {excerpt}
15 | slug: {slug}
16 | order: {order}
17 | hidden: false
18 | ---
19 | 
20 | """
21 | 
22 | 
23 | @dataclasses.dataclass
24 | class ReadmeRenderer(Renderer):
25 |     """
26 |     This custom Renderer is heavily based on the `MarkdownRenderer`,
27 |     it just prepends a front matter so that the output can be published
28 |     directly to readme.io.
29 |     """
30 | 
31 |     # These settings will be used in the front matter output
32 |     title: str
33 |     category_slug: str
34 |     excerpt: str
35 |     slug: str
36 |     order: int
37 |     # Docs categories fetched from Readme.io
38 |     categories: t.Dict[str, str] = dataclasses.field(init=False)
39 |     # This exposes a special `markdown` settings value that can be used to pass
40 |     # parameters to the underlying `MarkdownRenderer`
41 |     markdown: MarkdownRenderer = dataclasses.field(default_factory=MarkdownRenderer)
42 | 
43 |     def init(self, context: Context) -> None:
44 |         self.markdown.init(context)
45 |         version = self._doc_version()
46 | 
47 |     def _doc_version(self) -> str:
48 |         """
49 |         Returns the docs version.
50 |         """
51 |         # full_version = about.__version__
52 |         # major, minor = full_version.split(".")[:2]
53 |         # return f"v{major}.{minor}"
54 | 
55 |         # The readme.io version is hardcoded for now to manually maintain the guides
56 |         # within the same documentation page as the deepset Cloud Docs.
57 |         return "v1.0"
58 | 
59 |     def render(self, modules: t.List[docspec.Module]) -> None:
60 |         if self.markdown.filename is None:
61 |             sys.stdout.write(self._frontmatter())
62 |             self.markdown.render_single_page(sys.stdout, modules)
63 |         else:
64 |             with io.open(self.markdown.filename, "w", encoding=self.markdown.encoding) as fp:
65 |                 fp.write(self._frontmatter())
66 |                 self.markdown.render_single_page(t.cast(t.TextIO, fp), modules)
67 | 
68 |     def _frontmatter(self) -> str:
69 |         return README_FRONTMATTER.format(
70 |             title=self.title,
71 |             excerpt=self.excerpt,
72 |             slug=self.slug,
73 |             order=self.order,
74 |         )
75 | 


--------------------------------------------------------------------------------
/docs/_pydoc/requirements.txt:
--------------------------------------------------------------------------------
1 | pydoc-markdown==4.8.2
2 | PyYAML==6.0.1
3 | # pin docspec while waiting for https://github.com/NiklasRosenstein/docspec/issues/91 to be fixed
4 | docspec-python==2.2.1
5 | requests==2.31.0
6 | 


--------------------------------------------------------------------------------
/docs/_stylesheets/extra.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |     --md-primary-fg-color: #2b2f55;
 3 |     --md-accent-fg-color: #1890ff;
 4 |     /* accent is used for over*/
 5 | }
 6 | 
 7 | /* Hide the title deepset Cloud SDK, but still show the name in the tab */
 8 | .md-nav__title {
 9 |     display: none;
10 | }


--------------------------------------------------------------------------------
/docs/examples/cli/README.md:
--------------------------------------------------------------------------------
  1 | # deepset Cloud CLI
  2 | The deepset Cloud CLI is a command-line interface tool that you can use to interact with the deepset Cloud SDK and perform various operations, such as uploading files and folders to your deepset Cloud workspace.
  3 | 
  4 | ## Installation
  5 | To install the deepset Cloud CLI, use `pip`:
  6 | 
  7 | ```shell
  8 | pip install deepset-cloud-sdk
  9 | ```
 10 | ## Configuration
 11 | Before using the deepset Cloud CLI, log in and provide your credentials. You can do this by running the command:
 12 | 
 13 | On MacOS and Linux:
 14 | 
 15 | ```shell
 16 | deepset-cloud login
 17 | ```
 18 | On Windows:
 19 | 
 20 | ```shell
 21 | python -m deepset_cloud_sdk.cli login
 22 | ```
 23 | 
 24 | This command prompts you to enter your API key and default workspace name. Once you provide these details, the CLI stores your credentials in the `~/.deepset-cloud/.env` file. This file is used as the default configuration for subsequent CLI commands.
 25 | 
 26 | Alternatively, to use a different environment file for your configuration, you can create an `.env` file in the local directory. Additionally, you have the flexibility to provide the credentials directly as command-line arguments or set them programmatically in your code.
 27 | 
 28 | ## Usage
 29 | You can use the deepset Cloud CLI by running the following command:
 30 | 
 31 | On MacOS and Linux:
 32 | 
 33 | ```shell
 34 | deepset-cloud <command>
 35 | ```
 36 | 
 37 | On Windows:
 38 | 
 39 | ```shell
 40 | python -m deepset_cloud_sdk.cli <command>
 41 | ```
 42 | 
 43 | Replace <command> with one of the supported commands. To list all available commands, use the `--help` flag.
 44 | 
 45 | ## Example Commands
 46 | 
 47 | ### Upload Files and Folders
 48 | 
 49 | You don't have to follow any special folder structure. If there are multiple files with the same name in your folder, they're all uploaded by default. You can change this behavior with the `--write-mode` flag. See the examples below.
 50 | 
 51 | This command uploads the file example.txt to your deepset Cloud workspace.
 52 | On MacOS and Linux:
 53 | 
 54 | ```shell
 55 | deepset-cloud upload ./examples/data/example.txt
 56 | ```
 57 | 
 58 | On Windows:
 59 | 
 60 | ```shell
 61 | python -m deepset_cloud_sdk.cli upload ./examples/data/example.txt
 62 | ```
 63 | 
 64 | This command uploads all `.txt` and `.pdf` files from the folder located in the _examples_ directory to your deepset Cloud workspace. By default only `.txt` and `.pdf` files are uploaded. To upload different file types see below.
 65 | 
 66 | The paths in the examples are relative to the current working directory.
 67 | 
 68 | On MacOS and Linux:
 69 | 
 70 | ```shell
 71 | deepset-cloud upload ./examples/data
 72 | ```
 73 | On Windows:
 74 | ```shell
 75 | python -m deepset_cloud_sdk.cli upload ./examples/data
 76 | ```
 77 | To overwrite existing files in your project, use the `--write-mode` flag. For example:
 78 | 
 79 | On MacOS and Linux:
 80 | ```shell
 81 | deepset-cloud upload ./examples/data --write-mode OVERWRITE
 82 | ```
 83 | On Windows:
 84 | ```shell
 85 | python -m deepset_cloud_sdk.cli upload ./examples/data --write-mode OVERWRITE
 86 | ```
 87 | This syncs your local files with the files in your deepset Cloud workspace without having to manually delete the files in your workspace.
 88 | 
 89 | ## Upload different file types
 90 | 
 91 | To upload other file types than text, specify the desired file types using the flag `--use-type`.
 92 | The command below uploads all file types from the ./example/data directory that are supported by deepset Cloud.
 93 | 
 94 | ```shell
 95 | deepset-cloud upload ./examples/data --use-type .csv --use-type .docx --use-type .html --use-type .json --use-type .md --use-type .txt --use-type .pdf --use-type .pptx --use-type .xlsx --use-type .xml
 96 | 
 97 | ```
 98 | On Windows:
 99 | ```shell
100 | python -m deepset_cloud_sdk.cli upload ./examples/data --use-type .csv --use-type .docx --use-type .html --use-type .json --use-type .md --use-type .txt --use-type .pdf --use-type .pptx --use-type .xlsx --use-type .xml
101 | ```
102 | 
103 | 
104 | ### Downloading Files from deepset Cloud
105 | This command downloads all files from a workspace to a local directory. For example:
106 | 
107 | On MacOS and Linux:
108 | 
109 | ```shell
110 | deepset-cloud download --workspace-name <your-workspace-name>
111 | ```
112 | On Windows:
113 | ```shell
114 | python -m deepset_cloud_sdk.cli download --workspace-name <your-workspace-name>
115 | ```
116 | 
117 | To filter for specific files, use the same filters as for listing files.
118 | 
119 | 
120 | ### List Files
121 | You can run the `list-files` operation to search files in your deepset Cloud workspace. For example:
122 | 
123 | On MacOS and Linux:
124 | ```shell
125 | deepset-cloud list-files
126 | ```
127 | On Windows:
128 | ```shell
129 | python -m deepset_cloud_sdk.cli list-files
130 | ```
131 | with optional arguments:
132 | 
133 | ```shell
134 | --name "<your-file-name>"  # search by file name
135 | --content "content" # search by file content
136 | --odata-filter "key eq 'value'" # search by odata filter
137 | ```
138 | 
139 | ### Support
140 | If you encounter issues or have  questions, reach out to our team on [Discord](https://discord.com/invite/qZxjM4bAHU).
141 | 
142 | We hope you find the deepset Cloud CLI useful in your projects. Happy coding!
143 | 


--------------------------------------------------------------------------------
/docs/examples/data/example.pdf:
--------------------------------------------------------------------------------
  1 | %PDF-1.3
  2 | %����
  3 | 
  4 | 1 0 obj
  5 | <<
  6 | /Type /Catalog
  7 | /Outlines 2 0 R
  8 | /Pages 3 0 R
  9 | >>
 10 | endobj
 11 | 
 12 | 2 0 obj
 13 | <<
 14 | /Type /Outlines
 15 | /Count 0
 16 | >>
 17 | endobj
 18 | 
 19 | 3 0 obj
 20 | <<
 21 | /Type /Pages
 22 | /Count 2
 23 | /Kids [ 4 0 R 6 0 R ] 
 24 | >>
 25 | endobj
 26 | 
 27 | 4 0 obj
 28 | <<
 29 | /Type /Page
 30 | /Parent 3 0 R
 31 | /Resources <<
 32 | /Font <<
 33 | /F1 9 0 R 
 34 | >>
 35 | /ProcSet 8 0 R
 36 | >>
 37 | /MediaBox [0 0 612.0000 792.0000]
 38 | /Contents 5 0 R
 39 | >>
 40 | endobj
 41 | 
 42 | 5 0 obj
 43 | << /Length 1074 >>
 44 | stream
 45 | 2 J
 46 | BT
 47 | 0 0 0 rg
 48 | /F1 0027 Tf
 49 | 57.3750 722.2800 Td
 50 | ( A Simple PDF File ) Tj
 51 | ET
 52 | BT
 53 | /F1 0010 Tf
 54 | 69.2500 688.6080 Td
 55 | ( This is a small demonstration .pdf file - ) Tj
 56 | ET
 57 | BT
 58 | /F1 0010 Tf
 59 | 69.2500 664.7040 Td
 60 | ( just for use in the Virtual Mechanics tutorials. More text. And more ) Tj
 61 | ET
 62 | BT
 63 | /F1 0010 Tf
 64 | 69.2500 652.7520 Td
 65 | ( text. And more text. And more text. And more text. ) Tj
 66 | ET
 67 | BT
 68 | /F1 0010 Tf
 69 | 69.2500 628.8480 Td
 70 | ( And more text. And more text. And more text. And more text. And more ) Tj
 71 | ET
 72 | BT
 73 | /F1 0010 Tf
 74 | 69.2500 616.8960 Td
 75 | ( text. And more text. Boring, zzzzz. And more text. And more text. And ) Tj
 76 | ET
 77 | BT
 78 | /F1 0010 Tf
 79 | 69.2500 604.9440 Td
 80 | ( more text. And more text. And more text. And more text. And more text. ) Tj
 81 | ET
 82 | BT
 83 | /F1 0010 Tf
 84 | 69.2500 592.9920 Td
 85 | ( And more text. And more text. ) Tj
 86 | ET
 87 | BT
 88 | /F1 0010 Tf
 89 | 69.2500 569.0880 Td
 90 | ( And more text. And more text. And more text. And more text. And more ) Tj
 91 | ET
 92 | BT
 93 | /F1 0010 Tf
 94 | 69.2500 557.1360 Td
 95 | ( text. And more text. And more text. Even more. Continued on page 2 ...) Tj
 96 | ET
 97 | endstream
 98 | endobj
 99 | 
100 | 6 0 obj
101 | <<
102 | /Type /Page
103 | /Parent 3 0 R
104 | /Resources <<
105 | /Font <<
106 | /F1 9 0 R 
107 | >>
108 | /ProcSet 8 0 R
109 | >>
110 | /MediaBox [0 0 612.0000 792.0000]
111 | /Contents 7 0 R
112 | >>
113 | endobj
114 | 
115 | 7 0 obj
116 | << /Length 676 >>
117 | stream
118 | 2 J
119 | BT
120 | 0 0 0 rg
121 | /F1 0027 Tf
122 | 57.3750 722.2800 Td
123 | ( Simple PDF File 2 ) Tj
124 | ET
125 | BT
126 | /F1 0010 Tf
127 | 69.2500 688.6080 Td
128 | ( ...continued from page 1. Yet more text. And more text. And more text. ) Tj
129 | ET
130 | BT
131 | /F1 0010 Tf
132 | 69.2500 676.6560 Td
133 | ( And more text. And more text. And more text. And more text. And more ) Tj
134 | ET
135 | BT
136 | /F1 0010 Tf
137 | 69.2500 664.7040 Td
138 | ( text. Oh, how boring typing this stuff. But not as boring as watching ) Tj
139 | ET
140 | BT
141 | /F1 0010 Tf
142 | 69.2500 652.7520 Td
143 | ( paint dry. And more text. And more text. And more text. And more text. ) Tj
144 | ET
145 | BT
146 | /F1 0010 Tf
147 | 69.2500 640.8000 Td
148 | ( Boring.  More, a little more text. The end, and just as well. ) Tj
149 | ET
150 | endstream
151 | endobj
152 | 
153 | 8 0 obj
154 | [/PDF /Text]
155 | endobj
156 | 
157 | 9 0 obj
158 | <<
159 | /Type /Font
160 | /Subtype /Type1
161 | /Name /F1
162 | /BaseFont /Helvetica
163 | /Encoding /WinAnsiEncoding
164 | >>
165 | endobj
166 | 
167 | 10 0 obj
168 | <<
169 | /Creator (Rave \(http://www.nevrona.com/rave\))
170 | /Producer (Nevrona Designs)
171 | /CreationDate (D:20060301072826)
172 | >>
173 | endobj
174 | 
175 | xref
176 | 0 11
177 | 0000000000 65535 f
178 | 0000000019 00000 n
179 | 0000000093 00000 n
180 | 0000000147 00000 n
181 | 0000000222 00000 n
182 | 0000000390 00000 n
183 | 0000001522 00000 n
184 | 0000001690 00000 n
185 | 0000002423 00000 n
186 | 0000002456 00000 n
187 | 0000002574 00000 n
188 | 
189 | trailer
190 | <<
191 | /Size 11
192 | /Root 1 0 R
193 | /Info 10 0 R
194 | >>
195 | 
196 | startxref
197 | 2714
198 | %%EOF
199 | 


--------------------------------------------------------------------------------
/docs/examples/data/example.txt:
--------------------------------------------------------------------------------
1 | This is text
2 | 


--------------------------------------------------------------------------------
/docs/examples/data/example.txt.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "key": "value"
3 | }
4 | 


--------------------------------------------------------------------------------
/docs/examples/sdk/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | ## Upload files to deepset Cloud
 4 | 
 5 | You can upload files in three different ways:
 6 | 1. Upload multiple files by providing explicit file paths.
 7 | 2. Upload all files from a folder.
 8 | 3. Upload raw text.
 9 | 
10 | For uploading files from your local machine to deepset Cloud, you can use `upload`.
11 | 
12 | ## Authentication
13 | 
14 | You will need to either explicitly pass an api_key to the `upload` function or set the environment variable
15 | `DEEPSET_CLOUD_API_KEY` to your api key.
16 | By running `deepset-cloud login` you can also store your api key globally on your machine.
17 | This will allow you to omit the api_key parameter in the following examples.
18 | 
19 | ## Example 1: Upload all files from a folder
20 | Uploads all files from a folder to the default workspace.
21 | 
22 | ```python
23 | upload(
24 |     # workspace_name="my_workspace",  # optional, by default the environment variable "DEFAULT_WORKSPACE_NAME" is used
25 |     paths=[Path("./examples/data")],
26 |     blocking=True,  # optional, by default True
27 |     timeout_s=300,  # optional, by default 300
28 |     show_progress=True,  # optional, by default True
29 |     recursive=False,  # optional, by default False
30 | )
31 | ```
32 | 
33 | ## Example 2: Upload raw texts
34 | 
35 | Uploads a list of raw texts to the default workspace.
36 | This can be useful if you want to process your text first and later upload the content of the files.
37 | 
38 | ```python
39 | upload_texts(
40 |     # workspace_name="my_workspace",  # optional, by default the environment variable "DEFAULT_WORKSPACE_NAME" is used
41 |     files=[
42 |         DeepsetCloudFile(
43 |             name="example.txt",
44 |             text="this is text",
45 |             meta={"key": "value"},  # optional
46 |         )
47 |     ],
48 |     blocking=True,  # optional, by default True
49 |     timeout_s=300,  # optional, by default 300
50 | )
51 | ```
52 | ## Colab Notebook
53 | 
54 | We created this Colab notebook with different upload scenarios that you can test out: [Upload files with SDK in Collab](https://colab.research.google.com/drive/1y2KMB606h-57BafCkhuiaXFWo4gDKtG3?authuser=1#scrollTo=QpIbW_nNA_fT).


--------------------------------------------------------------------------------
/docs/examples/sdk/upload.py:
--------------------------------------------------------------------------------
 1 | ## Authentication
 2 | ## --------------
 3 | ## Either explicitly pass an api_key to the `upload` function or set the environment variable
 4 | ## `DEEPSET_CLOUD_API_KEY` to your API key.
 5 | ## By running `deepset-cloud login` you can also store your API key globally on your machine.
 6 | ## This omits the `api_key`` parameter in the following examples.
 7 | 
 8 | ## Example 1: Upload all files from a folder
 9 | ## -----------------------------------------
10 | ## Uploads all files from a folder to the default workspace.
11 | 
12 | from pathlib import Path
13 | 
14 | from deepset_cloud_sdk.workflows.sync_client.files import upload
15 | 
16 | upload(
17 |     # workspace_name="my_workspace",  # optional, by default the environment variable "DEFAULT_WORKSPACE_NAME" is used
18 |     paths=[Path("./examples/data")],
19 |     blocking=True,  # optional, by default True
20 |     timeout_s=300,  # optional, by default 300
21 |     show_progress=True,  # optional, by default True
22 |     recursive=False,  # optional, by default False
23 | )
24 | 
25 | 
26 | ## Example 2: Upload raw texts
27 | ## ---------------------------
28 | ## Uploads a list of raw texts to the default workspace.
29 | ## This is useful if you want to process your text first and upload the content of the files later.
30 | 
31 | from deepset_cloud_sdk.workflows.sync_client.files import upload_texts
32 | 
33 | upload_texts(
34 |     # workspace_name="my_workspace",  # optional, by default the environment variable "DEFAULT_WORKSPACE_NAME" is used
35 |     files=[
36 |         DeepsetCloudFile(
37 |             name="example.txt",
38 |             text="this is text",
39 |             meta={"key": "value"},  # optional
40 |         )
41 |     ],
42 |     blocking=True,  # optional, by default True
43 |     timeout_s=300,  # optional, by default 300
44 | )
45 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <a href="https://cloud.deepset.ai/"><img src="https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/main/assets/logo.png"  alt="deepset Cloud SDK"></a>
 3 | </p>
 4 | 
 5 | [![Coverage badge](https://github.com/deepset-ai/deepset-cloud-sdk/raw/python-coverage-comment-action-data/badge.svg)](https://github.com/deepset-ai/deepset-cloud-sdk/tree/python-coverage-comment-action-data)
 6 | [![Tests](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/continuous-integration.yml/badge.svg)](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/continuous-integration.yml)
 7 | [![Deploy PyPi](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/deploy-prod.yml/badge.svg)](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/deploy-prod.yml)
 8 | [![Compliance Checks](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/compliance.yml/badge.svg)](https://github.com/deepset-ai/deepset-cloud-sdk/actions/workflows/compliance.yml)
 9 | 
10 | The deepset Cloud SDK is an open source software development kit that provides convenient access and integration with deepset Cloud, a powerful cloud offering for various natural language processing (NLP) tasks. To learn more about deepset Cloud, please have a look at the [official Documentation](https://docs.cloud.deepset.ai/).
11 | 
12 | # Supported Features
13 | The following examples demonstrate how to use the deepset Cloud SDK to interact with deepset Cloud using Python.
14 | You can use the deepset Cloud SDK in the command line as well. For more information, see the [CLI documentation](/deepset-cloud-sdk/examples/cli).
15 | - [SDK Examples - Upload datasets](/deepset-cloud-sdk/examples/sdk)
16 | - [CLI Examples - Upload datasets](/deepset-cloud-sdk/examples/cli/)
17 | 
18 | ## Installation
19 | The deepset Cloud SDK is available on PyPI and you can install it using pip:
20 | ```bash
21 | pip install deepset-cloud-sdk
22 | ```
23 | 
24 | After installing the deepset Cloud SDK, you can use it to interact with deepset Cloud. It comes with a command line interface (CLI), that you can use by calling:
25 | ```bash
26 | deepset-cloud --help
27 | ```
28 | 
29 | <p align="center">
30 |   <a href="https://cloud.deepset.ai/"><img src="https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/main/assets/cli.gif"  alt="deepset Cloud CLI"></a>
31 | </p>
32 | 
33 | ### Development Installation
34 | To install the deepset Cloud SDK for development, clone the repository and install the package in editable mode:
35 | ```bash
36 | pip install hatch==1.7.0
37 | hatch build
38 | ```
39 | 
40 | Instead of calling the cli from the build package, you can call it directly from the source code:
41 | ```bash
42 | python3 -m deepset_cloud_sdk.cli --help
43 | ```
44 | 
45 | ---
46 | ## Interested in deepset Cloud?
47 | If you are interested in exploring deepset Cloud, visit cloud.deepset.ai.
48 | deepset Cloud provides a range of NLP capabilities and services to help you build and deploy powerful
49 | natural language processing applications.
50 | 
51 | ## Interested in Haystack?
52 | deepset Cloud is powered by Haystack, an open source framework for building end-to-end NLP pipelines.
53 |  - [Project website](https://haystack.deepset.ai/)
54 |  - [GitHub repository](https://github.com/deepset-ai/haystack)
55 | 


--------------------------------------------------------------------------------
/docs/upload_files.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | Uploading with SDK is the fastest way if you have many files. It uses sessions under the hood. That means, you create a session and then upload files to this session. Each session has an ID and you can check its status. The upload starts when you close a session. If you leave a session open, it expires after 24 hours.
 4 | 
 5 | After your files are uploaded, it can take a while for them to be listed in deepset Cloud. This means that if you deployed a pipeline, you may need to wait a while for it to run on the newly uploaded files.
 6 | 
 7 | You can use the CLI or the SDK Python methods to upload your files.
 8 | 
 9 | ## Folder Structure
10 | 
11 | You don't need to follow any specific folder structure. If your folder contains files with the same name, all these files are uploaded, by default. You can set the `--write-mode` to overwrite the files, keep them all, or fail the upload. For more information, see [CLI examples](/examples/cli/README.md) and [SDK examples](/examples/sdk/README.md).
12 | 
13 | # Upload Files
14 | 
15 | ## Upload text files:
16 | 
17 | By default it is allowed to upload .txt and .pdf files. See below to upload different file types.
18 | 
19 | 1. Log in to the sdk: `deepset-cloud login` (MacOS and Linux) or `python -m deepset_cloud_sdk.cli login` (Windows).
20 | 2. When prompted, paste your deepset Cloud API key.
21 | 3. Type the name of the deepset Cloud workspace you want to set as default for all operations.
22 | 4. Choose if you want to use the CLI or a Python script to upload:
23 |     - To upload files from a folder using CLI, run: `deepset-cloud upload <path to the upload folder>` (MacOS and Linux) or `python -m deepset_cloud_sdk.cli upload <path to the upload folder>` (On Windows)
24 |     - To upload files from a folder using a Python script, create the script and run it. Here's an example you can use:
25 | 
26 |     ```python
27 |     from pathlib import Path
28 |     from deepset_cloud_sdk.workflows.sync_client.files import upload
29 | 
30 |     ## Uploads all txt and pdf files from a given path
31 |     upload(
32 |     paths=[Path("<your_path_to_the_upload_folder>")],
33 |     blocking=True,  # waits until the files are displayed in deepset Cloud,
34 |                     # this may take a couple of minutes
35 |     timeout_s=300,  # the timeout for the `blocking` parameter in number of seconds
36 |     show_progress=True,  # shows the progress bar
37 |     recursive=True,  # uploads text files from all subfolders as well
38 |     )
39 |     ```
40 | 
41 | ## Upload other file types
42 | 
43 | Deepset Cloud currently supports uploading : .csv, .docx, .html, .json, .md, .txt, .pdf, .pptx, .xlsx and .xml.
44 | 
45 | 
46 |     ```python
47 |     from pathlib import Path
48 |     from deepset_cloud_sdk.workflows.sync_client.files import upload
49 | 
50 |     ## Uploads supported files from a given path
51 |     upload(
52 |     paths=[Path("<your_path_to_the_upload_folder>")],
53 |     blocking=True,
54 |     timeout_s=300,
55 |     show_progress=True,
56 |     recursive=True,
57 |     desired_file_types=[ # list of desired file types to upload
58 |         ".csv", ".docx", ".html", ".json", ".md", ".txt", ".pdf", ".pptx", ".xlsx", ".xml"
59 |     ]
60 |     )
61 |     ```
62 | 
63 | For more examples, see [CLI examples](/examples/cli/README.md) and [SDK examples](/examples/sdk/README.md).
64 | 
65 | # Metadata
66 | 
67 | To add metadata to your files, create one metadata file for each file you upload. The metadata file must be a JSON with the same name as the file whose metadata it contains and the extension `meta.json`.
68 | 
69 | For example, if you're uploading a file called `example.txt`, the metadata file should be called `example.txt.meta.json`. If you're uploading a file called `example.pdf`, the metadata file should be `example.pdf.meta.json`.
70 | 
71 | The format your metadata in your metadata files should follow is: `{"meta_key1": "value1", "meta_key2": "value2"}`. See the [example metadata file](/examples/data/example.txt.meta.json).
72 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: deepset Cloud SDK
 2 | site_description: deepset Cloud SDK Documentation
 3 | site_url: https://deepset-ai.github.io/deepset-cloud-sdk/
 4 | 
 5 | # Repository
 6 | repo_name: deepset-ai/deepset-cloud-sdk
 7 | repo_url: https://github.com/deepset-ai/deepset-cloud-sdk
 8 | edit_uri: ""
 9 | 
10 | theme:
11 |   name: material
12 |   favicon: _images/favicon.svg
13 |   logo: _images/white-logo.svg
14 |   features:
15 |    - content.code.copy
16 |   palette:
17 |    primary: custom
18 | 
19 | plugins:
20 |   - search
21 |   - mermaid2
22 |   - mkdocstrings
23 | 
24 | markdown_extensions:
25 |   - pymdownx.highlight:
26 |       anchor_linenums: true
27 |       line_spans: __span
28 |       pygments_lang_class: true
29 |   - pymdownx.inlinehilite
30 |   - pymdownx.snippets
31 |   - pymdownx.superfences:
32 |       preserve_tabs: true
33 |       custom_fences:
34 |         - name: mermaid
35 |           class: mermaid
36 |           format: !!python/name:pymdownx.superfences.fence_code_format
37 | 
38 | extra:
39 |   version:
40 |     provider: mike
41 | 
42 | extra_javascript:
43 |   - optionalConfig.js
44 |   - https://unpkg.com/mermaid@9.4.0/dist/mermaid.min.js
45 |   - extra-loader.js
46 | 
47 | extra_css:
48 |   - _stylesheets/extra.css
49 | 
50 | # mkdocs uses the `docs` folder as root folder
51 | nav:
52 |   - Get Started: index.md
53 |   - Upload Files: upload_files.md
54 |   - Examples:
55 |       - CLI: examples/cli/README.md
56 |       - SDK: examples/sdk/README.md
57 |   - API Docs: # autogenerated within the /docs/_pydoc folder
58 |       - Synchronous SDK: _pydoc/temp/sync_client.md
59 |       - Asynchronous SDK: _pydoc/temp/async_client.md
60 |       - CLI: _pydoc/temp/cli.md
61 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [project]
  6 | name = "deepset-cloud-sdk"
  7 | dynamic = ["version"]
  8 | description = 'deepset Cloud SDK'
  9 | readme = "README.md"
 10 | requires-python = ">= 3.8"
 11 | license = "Apache-2.0"
 12 | keywords = []
 13 | authors = [{ name = "deepset", email = "rohan.janjua@deepset.ai" }]
 14 | classifiers = [
 15 |   "Development Status :: 4 - Beta",
 16 |   "Programming Language :: Python",
 17 |   "Programming Language :: Python :: 3.8",
 18 |   "Programming Language :: Python :: 3.9",
 19 |   "Programming Language :: Python :: 3.10",
 20 |   "Programming Language :: Python :: 3.11",
 21 |   "Programming Language :: Python :: Implementation :: CPython",
 22 |   "Programming Language :: Python :: Implementation :: PyPy",
 23 | ]
 24 | dependencies = [
 25 |   "structlog>=24.0.0",
 26 |   "httpx>=0.27.2",
 27 |   "python-dotenv>=1.0.1",
 28 |   "typer>=0.16.0",
 29 |   "click==8.2.0",  # fixed because of bug in 8.2.1, see https://github.com/pallets/click/issues/2939
 30 |   "tenacity>=8.3.0",
 31 |   "aiohttp>=3.10.10",
 32 |   "aiofiles>=24.1.0",
 33 |   "tabulate>=0.9.0",
 34 |   "tqdm>=4.66.4",
 35 |   "yaspin>=3.0.0",
 36 |   "pyrate-limiter>=3.7.0",
 37 |   "pydantic>=2.11.4",
 38 |   "ruamel.yaml>=0.18.10",
 39 | ]
 40 | 
 41 | [project.urls]
 42 | Documentation = "https://github.com/deepset-ai/deepset-cloud-sdk#readme"
 43 | Issues = "https://github.com/deepset-ai/deepset-cloud-sdk/issues"
 44 | Source = "https://github.com/deepset-ai/deepset-cloud-sdk"
 45 | 
 46 | 
 47 | [project.scripts]
 48 | deepset-cloud = "deepset_cloud_sdk.cli:run_packaged"
 49 | 
 50 | [tool.hatch.version]
 51 | path = "deepset_cloud_sdk/__about__.py"
 52 | 
 53 | [tool.hatch.envs.default.scripts]
 54 | tests-with-cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=deepset_cloud_sdk tests/unit"
 55 | tests-unit = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=deepset_cloud_sdk tests/unit"
 56 | tests-integration = "pytest tests/integration"
 57 | 
 58 | [[tool.hatch.envs.all.matrix]]
 59 | python = ["3.10"]
 60 | 
 61 | [tool.hatch.envs.default]
 62 | dependencies = []
 63 | 
 64 | [tool.hatch.envs.test.scripts]
 65 | unit-with-cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=deepset_cloud_sdk tests/unit"
 66 | integration = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=deepset_cloud_sdk tests/integration"
 67 | 
 68 | [tool.hatch.envs.test]
 69 | template = 'default'
 70 | dependencies = [
 71 |   "pytest-cov==4.0.0",
 72 |   "pytest==7.3.1",
 73 |   "pytest-asyncio==0.21.0",
 74 |   "haystack-ai>=2.13.2",  # only for testing
 75 |   "respx==0.22.0",
 76 | ]
 77 | 
 78 | 
 79 | [tool.hatch.envs.code-quality]
 80 | python = "3.10"
 81 | template = 'default'
 82 | detached = false
 83 | # Please keep these aligned with the versions defined in .pre-commit-config.yaml
 84 | dependencies = [
 85 |   "pylint==2.17.4",
 86 |   "pydocstyle==6.3.0",
 87 |   "black==23.3.0",
 88 |   "isort==5.12.0",
 89 |   "mypy==1.1.1",
 90 |   "pre-commit==2.20.0",
 91 |   "types-aiofiles==23.1.0.2",
 92 |   "types-tabulate==0.9.0.2",
 93 |   "autoflake==2.1.1",
 94 | ]
 95 | 
 96 | [tool.hatch.envs.code-quality.scripts]
 97 | types = "mypy deepset_cloud_sdk tests"
 98 | format = "black deepset_cloud_sdk tests --check"
 99 | format-fix = "black deepset_cloud_sdk tests"
100 | lint = "pylint deepset_cloud_sdk"
101 | sort = "isort --check --profile black ."
102 | sort-fix = "isort --profile black ."
103 | hooks = "pre-commit install"
104 | docstrings = "pydocstyle deepset_cloud_sdk"
105 | flake = "autoflake --remove-all-unused-imports --remove-duplicate-keys --remove-unused-variables -v -r ./deepset_cloud_sdk"
106 | all = "hatch run types && hatch run format-fix && hatch run lint && hatch run sort && hatch run docstrings && hatch run flake"
107 | 
108 | [tool.hatch.envs.tools]
109 | detached = false
110 | # Please keep these aligned with the versions defined in .pre-commit-config.yaml
111 | dependencies = ["pip-tools==6.13.0"]
112 | 
113 | [tool.hatch.envs.tools.scripts]
114 | requirements = "pip-compile -o requirements.txt pyproject.toml"
115 | 
116 | [tool.coverage.run]
117 | branch = true
118 | relative_files = true
119 | omit = ["deepset_cloud_sdk/__about__.py"]
120 | 
121 | [tool.coverage.report]
122 | exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
123 | 
124 | [tool.black]
125 | line-length = 120
126 | 
127 | [tool.mypy]
128 | python_version = "3.10"
129 | warn_return_any = true
130 | warn_unused_configs = true
131 | ignore_missing_imports = true
132 | disallow_incomplete_defs = true
133 | disallow_untyped_defs = true
134 | 
135 | [tool.pylint.'MESSAGES CONTROL']
136 | max-line-length = 150
137 | disable = [
138 |   "fixme",
139 |   "c-extension-no-member",
140 |   "wrong-spelling-in-comment",
141 |   "wrong-spelling-in-docstring",
142 |   "missing-module-docstring",
143 | ]
144 | [tool.pylint.'DESIGN']
145 | max-args = 9
146 | 
147 | [tool.pylint.'SIMILARITIES']
148 | min-similarity-lines = 10
149 | 
150 | [tool.pylint.'BASIC']
151 | good-names = ["i", "k", "v", "_", "f1"]
152 | 
153 | [tool.hatch.build.targets.sdist]
154 | exclude = ["/.github", "/tests"]
155 | 
156 | [tool.hatch.build.targets.wheel]
157 | packages = ["deepset_cloud_sdk"]
158 | 


--------------------------------------------------------------------------------
/test-upload/example.txt:
--------------------------------------------------------------------------------
1 | this is my text
2 | 


--------------------------------------------------------------------------------
/test-upload/example.txt.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "key": "value",
3 |     "key2": "value2"
4 | }
5 | 


--------------------------------------------------------------------------------
/test-upload/example2.txt:
--------------------------------------------------------------------------------
1 | this is my text 2
2 | 


--------------------------------------------------------------------------------
/test-upload/example2.txt.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "key": "value",
3 |     "key2": "value2"
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import json
  3 | import os
  4 | from http import HTTPStatus
  5 | from typing import Generator, List
  6 | from unittest.mock import AsyncMock, Mock
  7 | from uuid import uuid4
  8 | 
  9 | import httpx
 10 | import pytest
 11 | import structlog
 12 | from dotenv import load_dotenv
 13 | 
 14 | # from faker import Faker
 15 | from tenacity import retry, stop_after_delay, wait_fixed
 16 | 
 17 | from deepset_cloud_sdk._api.config import CommonConfig
 18 | from deepset_cloud_sdk._api.deepset_cloud_api import DeepsetCloudAPI
 19 | from deepset_cloud_sdk._api.files import FilesAPI
 20 | from deepset_cloud_sdk._api.upload_sessions import (
 21 |     AWSPrefixedRequestConfig,
 22 |     UploadSession,
 23 |     UploadSessionsAPI,
 24 | )
 25 | from deepset_cloud_sdk._s3.upload import S3
 26 | 
 27 | load_dotenv()
 28 | 
 29 | logger = structlog.get_logger(__name__)
 30 | 
 31 | 
 32 | def _get_file_names(integration_config: CommonConfig, workspace_name: str) -> List[str]:
 33 |     list_response = httpx.get(
 34 |         f"{integration_config.api_url}/workspaces/{workspace_name}/files",
 35 |         headers={"Authorization": f"Bearer {integration_config.api_key}"},
 36 |         params={"limit": 100},
 37 |     )
 38 |     assert list_response.status_code == HTTPStatus.OK
 39 |     file_names: List[str] = list_response.json()["data"]
 40 |     logger.info("Found files", file_names=file_names)
 41 |     return file_names
 42 | 
 43 | 
 44 | @pytest.fixture(scope="session")
 45 | def integration_config() -> CommonConfig:
 46 |     config = CommonConfig(
 47 |         api_key=os.getenv("API_KEY", ""),
 48 |         api_url=os.getenv("API_URL", ""),
 49 |     )
 50 |     assert config.api_key != "", "API_KEY environment variable must be set"
 51 |     assert config.api_url != "", "API_URL environment variable must be set"
 52 |     return config
 53 | 
 54 | 
 55 | @pytest.fixture(scope="session")
 56 | def integration_config_safe_mode() -> CommonConfig:
 57 |     config = CommonConfig(
 58 |         api_key=os.getenv("API_KEY", ""),
 59 |         api_url=os.getenv("API_URL", ""),
 60 |         safe_mode=True,
 61 |     )
 62 |     assert config.api_key != "", "API_KEY environment variable must be set"
 63 |     assert config.api_url != "", "API_URL environment variable must be set"
 64 |     return config
 65 | 
 66 | 
 67 | @pytest.fixture
 68 | def unit_config() -> CommonConfig:
 69 |     return CommonConfig(api_key="test_api_key", api_url="https://fake.dc.api/api/v1")
 70 | 
 71 | 
 72 | @pytest.fixture
 73 | def mocked_client() -> Mock:
 74 |     return Mock(spec=httpx.AsyncClient)
 75 | 
 76 | 
 77 | @pytest.fixture
 78 | def mocked_deepset_cloud_api() -> Mock:
 79 |     return Mock(spec=DeepsetCloudAPI)
 80 | 
 81 | 
 82 | @pytest.fixture
 83 | def mocked_upload_sessions_api() -> Mock:
 84 |     return Mock(spec=UploadSessionsAPI)
 85 | 
 86 | 
 87 | @pytest.fixture
 88 | def mocked_files_api() -> Mock:
 89 |     return Mock(spec=FilesAPI)
 90 | 
 91 | 
 92 | @pytest.fixture
 93 | def mocked_s3() -> Mock:
 94 |     # TODO: add aws client mock that sends files to aws
 95 |     return AsyncMock(spec=S3)
 96 | 
 97 | 
 98 | @pytest.fixture
 99 | def deepset_cloud_api(unit_config: CommonConfig, mocked_client: Mock) -> DeepsetCloudAPI:
100 |     return DeepsetCloudAPI(config=unit_config, client=mocked_client)
101 | 
102 | 
103 | @pytest.fixture
104 | def upload_session_response() -> UploadSession:
105 |     return UploadSession(
106 |         session_id=uuid4(),
107 |         documentation_url="Documentation URL",
108 |         expires_at=datetime.datetime.now(),
109 |         aws_prefixed_request_config=AWSPrefixedRequestConfig(url="uploadURL", fields={"key": "value"}),
110 |     )
111 | 
112 | 
113 | @retry(
114 |     stop=stop_after_delay(120),
115 |     wait=wait_fixed(1),
116 |     reraise=True,
117 | )
118 | def _wait_for_file_to_be_available(
119 |     integration_config: CommonConfig, workspace_name: str, expected_file_count: int = 15
120 | ) -> None:
121 |     assert len(_get_file_names(integration_config, workspace_name)) >= expected_file_count
122 | 
123 | 
124 | @pytest.fixture(scope="session")
125 | def workspace_name(integration_config: CommonConfig) -> Generator[str, None, None]:
126 |     """Create a workspace for the tests and delete it afterwards."""
127 |     workspace_name = f"sdktest_{uuid4()}"
128 | 
129 |     logger.info("Creating workspace", workspace_name=workspace_name)
130 | 
131 |     # try creating workspace
132 |     response = httpx.post(
133 |         f"{integration_config.api_url}/workspaces",
134 |         json={"name": workspace_name},
135 |         headers={"Authorization": f"Bearer {integration_config.api_key}"},
136 |     )
137 |     assert response.status_code in (HTTPStatus.CREATED, HTTPStatus.CONFLICT)
138 | 
139 |     try:
140 |         if len(_get_file_names(integration_config=integration_config, workspace_name=workspace_name)) == 0:
141 |             for i in range(15):
142 |                 response = httpx.post(
143 |                     f"{integration_config.api_url}/workspaces/{workspace_name}/files",
144 |                     data={"text": "This is text"},
145 |                     files={
146 |                         "meta": (None, json.dumps({"find": "me"}).encode("utf-8")),
147 |                     },
148 |                     params={"file_name": f"example{i}.txt"},
149 |                     headers={"Authorization": f"Bearer {integration_config.api_key}"},
150 |                 )
151 |                 assert response.status_code == HTTPStatus.CREATED
152 | 
153 |             _wait_for_file_to_be_available(integration_config, workspace_name, expected_file_count=15)
154 | 
155 |         yield workspace_name
156 | 
157 |     finally:
158 |         response = httpx.delete(
159 |             f"{integration_config.api_url}/workspaces/{workspace_name}",
160 |             headers={"Authorization": f"Bearer {integration_config.api_key}"},
161 |         )
162 | 
163 |         assert response.status_code in (HTTPStatus.OK, HTTPStatus.NO_CONTENT)
164 | 


--------------------------------------------------------------------------------
/tests/data/.fake-env:
--------------------------------------------------------------------------------
1 | API_KEY="fake-api-key"
2 | 


--------------------------------------------------------------------------------
/tests/data/direct_upload/example.txt:
--------------------------------------------------------------------------------
1 | asdf
2 | 


--------------------------------------------------------------------------------
/tests/data/direct_upload/example.txt.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "key": "value"
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/data/example.txt:
--------------------------------------------------------------------------------
1 | This is text


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.csv:
--------------------------------------------------------------------------------
1 | example 1,example 2
2 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.csv.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "key": "value"
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.docx:
--------------------------------------------------------------------------------
1 | This is text
2 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.html:
--------------------------------------------------------------------------------
1 | <h1>example 1</h1>
2 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/data/upload_folder/example.jpg


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.json:
--------------------------------------------------------------------------------
1 | {
2 |     "example": "This is an example of a JSON object."
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.md:
--------------------------------------------------------------------------------
1 | # Example
2 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/data/upload_folder/example.pdf


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/data/upload_folder/example.pptx


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.txt:
--------------------------------------------------------------------------------
1 | This is text
2 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.txt.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "key": "value"
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/data/upload_folder/example.xlsx


--------------------------------------------------------------------------------
/tests/data/upload_folder/example.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0"?>
2 | <Examples>
3 | 	<Sample1>Example 1</Sample1>
4 | </Examples>
5 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder_nested/example.txt:
--------------------------------------------------------------------------------
1 | This is text
2 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder_nested/meta/example.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"a": "b"}
2 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder_nested/nested_folder/second.txt:
--------------------------------------------------------------------------------
1 | This is text 2
2 | 


--------------------------------------------------------------------------------
/tests/data/upload_folder_with_duplicates/file1.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/data/upload_folder_with_duplicates/file1.txt


--------------------------------------------------------------------------------
/tests/data/upload_folder_with_duplicates/file2.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/data/upload_folder_with_duplicates/file2.txt


--------------------------------------------------------------------------------
/tests/data/upload_folder_with_duplicates/old_files/file1.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/data/upload_folder_with_duplicates/old_files/file1.txt


--------------------------------------------------------------------------------
/tests/data/upload_folder_with_duplicates/old_files/file2.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/data/upload_folder_with_duplicates/old_files/file2.txt


--------------------------------------------------------------------------------
/tests/integration/api/test_integration_files.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | 
 3 | import pytest
 4 | import tenacity
 5 | 
 6 | from deepset_cloud_sdk._api.config import CommonConfig
 7 | from deepset_cloud_sdk._api.deepset_cloud_api import DeepsetCloudAPI
 8 | from deepset_cloud_sdk._api.files import FilesAPI
 9 | 
10 | 
11 | @pytest.mark.asyncio
12 | class TestListFiles:
13 |     async def test_list_paginated(
14 |         self,
15 |         integration_config: CommonConfig,
16 |         workspace_name: str,
17 |     ) -> None:
18 |         async with DeepsetCloudAPI.factory(integration_config) as deepset_cloud_api:
19 |             files_api = FilesAPI(deepset_cloud_api)
20 | 
21 |             # We need to retry fetching this, because the file itself is available
22 |             # immediately, but the search index might not be updated yet.
23 |             # We are searching by context here which is otherwise not available.
24 |             for attempt in tenacity.Retrying(
25 |                 stop=tenacity.stop_after_delay(300),
26 |                 wait=tenacity.wait_fixed(wait=timedelta(seconds=0.5)),
27 |                 reraise=True,
28 |             ):
29 |                 with attempt:
30 |                     result = await files_api.list_paginated(
31 |                         workspace_name=workspace_name,
32 |                         limit=10,
33 |                         name="example0.txt",
34 |                         odata_filter="find eq 'me'",
35 |                     )
36 |                     assert result.total == 1
37 |                     assert result.has_more is False
38 |                     assert len(result.data) == 1
39 |                     found_file = result.data[0]
40 |                     assert found_file.name == "example0.txt"
41 |                     assert found_file.size > 0
42 |                     assert found_file.meta == {"find": "me"}
43 | 


--------------------------------------------------------------------------------
/tests/integration/api/test_integration_upload_sessions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepset_cloud_sdk._api.config import CommonConfig
 4 | from deepset_cloud_sdk._api.deepset_cloud_api import DeepsetCloudAPI
 5 | from deepset_cloud_sdk._api.upload_sessions import (
 6 |     UploadSession,
 7 |     UploadSessionDetailList,
 8 |     UploadSessionIngestionStatus,
 9 |     UploadSessionsAPI,
10 | )
11 | 
12 | 
13 | @pytest.mark.asyncio
14 | @pytest.mark.parametrize("integration_config", ["integration_config", "integration_config_safe_mode"], indirect=True)
15 | class TestCreateUploadSessions:
16 |     async def test_create_and_close_upload_session(self, integration_config: CommonConfig, workspace_name: str) -> None:
17 |         async with DeepsetCloudAPI.factory(integration_config) as deepset_cloud_api:
18 |             upload_session_client = UploadSessionsAPI(deepset_cloud_api)
19 | 
20 |             result: UploadSession = await upload_session_client.create(workspace_name=workspace_name)
21 |             assert result.session_id is not None
22 |             assert result.documentation_url is not None
23 |             assert result.expires_at is not None
24 | 
25 |             assert "-user-files-upload.s3.amazonaws.com/" in result.aws_prefixed_request_config.url
26 | 
27 |             assert result.aws_prefixed_request_config.fields["key"] is not None
28 | 
29 |             await upload_session_client.close(workspace_name=workspace_name, session_id=result.session_id)
30 | 
31 |             session_status = await upload_session_client.status(
32 |                 workspace_name=workspace_name, session_id=result.session_id
33 |             )
34 |             assert session_status.session_id is not None
35 |             assert session_status.documentation_url is not None
36 |             assert session_status.expires_at is not None
37 |             assert session_status.ingestion_status == UploadSessionIngestionStatus(failed_files=0, finished_files=0)
38 | 
39 |     async def test_list_upload_session(self, integration_config: CommonConfig, workspace_name: str) -> None:
40 |         async with DeepsetCloudAPI.factory(integration_config) as deepset_cloud_api:
41 |             upload_session_client = UploadSessionsAPI(deepset_cloud_api)
42 | 
43 |             await upload_session_client.create(workspace_name=workspace_name)
44 | 
45 |             result: UploadSessionDetailList = await upload_session_client.list(
46 |                 workspace_name=workspace_name, limit=1, page_number=1
47 |             )
48 | 
49 |             assert result.total > 0
50 |             assert result.data is not None
51 |             assert len(result.data) == 1
52 | 


--------------------------------------------------------------------------------
/tests/integration/workflows/test_integration_pipeline_client.py:
--------------------------------------------------------------------------------
  1 | """Integration tests for importing Haystack pipelines into deepset AI Platform."""
  2 | import json
  3 | 
  4 | import pytest
  5 | import respx
  6 | from haystack import AsyncPipeline, Pipeline
  7 | from haystack.components.builders.answer_builder import AnswerBuilder
  8 | from haystack.components.builders.prompt_builder import PromptBuilder
  9 | from haystack.components.converters.txt import TextFileToDocument
 10 | from haystack.components.embedders.sentence_transformers_document_embedder import (
 11 |     SentenceTransformersDocumentEmbedder,
 12 | )
 13 | from haystack.components.generators.openai import OpenAIGenerator
 14 | from haystack.components.routers.file_type_router import FileTypeRouter
 15 | from haystack.utils import Secret
 16 | from httpx import Response
 17 | 
 18 | from deepset_cloud_sdk.workflows.pipeline_client import PipelineClient
 19 | from deepset_cloud_sdk.workflows.pipeline_client.models import (
 20 |     IndexConfig,
 21 |     IndexInputs,
 22 |     PipelineConfig,
 23 |     PipelineInputs,
 24 |     PipelineOutputs,
 25 | )
 26 | 
 27 | 
 28 | @pytest.mark.parametrize("pipeline_class", [Pipeline, AsyncPipeline])
 29 | class TestImportIndexIntoDeepset:
 30 |     @pytest.fixture
 31 |     def sample_index(self, pipeline_class: Pipeline | AsyncPipeline) -> Pipeline:
 32 |         """Create a simple index for testing."""
 33 |         file_type_router = FileTypeRouter(mime_types=["text/plain"])
 34 |         text_converter = TextFileToDocument(encoding="utf-8")
 35 |         document_embedder = SentenceTransformersDocumentEmbedder(normalize_embeddings=True, model="intfloat/e5-base-v2")
 36 | 
 37 |         # Create and configure pipeline
 38 |         index = pipeline_class()
 39 | 
 40 |         # Add components
 41 |         index.add_component("file_type_router", file_type_router)
 42 |         index.add_component("text_converter", text_converter)
 43 |         index.add_component("document_embedder", document_embedder)
 44 | 
 45 |         # Connect components
 46 |         index.connect("file_type_router.text/plain", "text_converter.sources")
 47 |         index.connect("text_converter.documents", "document_embedder.documents")
 48 | 
 49 |         return index
 50 | 
 51 |     @pytest.mark.integration
 52 |     @respx.mock
 53 |     def test_import_index_into_deepset(self, sample_index: Pipeline) -> None:
 54 |         """Test synchronously importing an index into deepset."""
 55 |         route = respx.post("https://test-api-url.com/workspaces/test-workspace/indexes").mock(
 56 |             return_value=Response(status_code=201, json={"id": "test-index-id"})
 57 |         )
 58 | 
 59 |         # Initialize client with explicit configuration
 60 |         client = PipelineClient(
 61 |             api_key="test-api-key", api_url="https://test-api-url.com", workspace_name="test-workspace"
 62 |         )
 63 | 
 64 |         index_config = IndexConfig(
 65 |             name="test-index",
 66 |             inputs=IndexInputs(
 67 |                 files=["file_type_router.sources"],
 68 |             ),
 69 |         )
 70 | 
 71 |         client.import_into_deepset(sample_index, index_config)
 72 | 
 73 |         assert route.called
 74 |         request = route.calls.last.request
 75 |         assert request.headers["Authorization"] == "Bearer test-api-key"
 76 | 
 77 |         request_body = json.loads(request.content)
 78 |         assert request_body["name"] == "test-index"
 79 |         assert request_body["config_yaml"].startswith("components:\n  document_embedder:\n")
 80 | 
 81 |     @pytest.mark.asyncio
 82 |     @pytest.mark.integration
 83 |     @respx.mock
 84 |     async def test_import_index_into_deepset_async(self, sample_index: Pipeline) -> None:
 85 |         """Test asynchronously importing an index into deepset."""
 86 |         route = respx.post("https://test-api-url.com/workspaces/test-workspace/indexes").mock(
 87 |             return_value=Response(status_code=201, json={"id": "test-index-id"})
 88 |         )
 89 | 
 90 |         # Initialize client with explicit configuration
 91 |         client = PipelineClient(
 92 |             api_key="test-api-key", api_url="https://test-api-url.com", workspace_name="test-workspace"
 93 |         )
 94 | 
 95 |         index_config = IndexConfig(
 96 |             name="test-index-async",
 97 |             inputs=IndexInputs(
 98 |                 files=["file_type_router.sources"],
 99 |             ),
100 |         )
101 | 
102 |         await client.import_into_deepset_async(sample_index, index_config)
103 | 
104 |         assert route.called
105 |         request = route.calls.last.request
106 |         assert request.headers["Authorization"] == "Bearer test-api-key"
107 | 
108 |         request_body = json.loads(request.content)
109 |         assert request_body["name"] == "test-index-async"
110 |         assert request_body["config_yaml"].startswith("components:\n  document_embedder:\n")
111 | 
112 | 
113 | @pytest.mark.parametrize("pipeline_class", [Pipeline, AsyncPipeline])
114 | class TestImportPipelineIntoDeepset:
115 |     @pytest.fixture
116 |     def sample_pipeline(self, pipeline_class: Pipeline | AsyncPipeline, monkeypatch: pytest.MonkeyPatch) -> Pipeline:
117 |         """Create a sample pipeline for testing."""
118 |         monkeypatch.setenv("OPENAI_API_KEY", "test-openai-api-key")
119 | 
120 |         # Initialize components
121 |         prompt_builder = PromptBuilder(
122 |             template="""You are a technical expert.
123 |                 You summary should be no longer than five sentences.
124 |                 Passage: {{ question }}
125 |                 Your summary: """,
126 |             required_variables=["*"],
127 |         )
128 | 
129 |         llm = OpenAIGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY", strict=False), model="gpt-4")
130 | 
131 |         answer_builder = AnswerBuilder()
132 | 
133 |         # Create and configure pipeline
134 |         pipeline = pipeline_class()
135 | 
136 |         # Add components
137 |         pipeline.add_component("prompt_builder", prompt_builder)
138 |         pipeline.add_component("llm", llm)
139 |         pipeline.add_component("answer_builder", answer_builder)
140 | 
141 |         # Connect components
142 |         pipeline.connect("prompt_builder.prompt", "llm.prompt")
143 |         pipeline.connect("llm.replies", "answer_builder.replies")
144 | 
145 |         return pipeline
146 | 
147 |     @pytest.mark.integration
148 |     @respx.mock
149 |     def test_import_pipeline_into_deepset(self, sample_pipeline: Pipeline) -> None:
150 |         """Test synchronously importing a pipeline into deepset AI Platform."""
151 |         route = respx.post("https://test-api-url.com/workspaces/test-workspace/pipelines").mock(
152 |             return_value=Response(status_code=201, json={"id": "test-pipeline-id"})
153 |         )
154 | 
155 |         client = PipelineClient(
156 |             api_key="test-api-key", api_url="https://test-api-url.com", workspace_name="test-workspace"
157 |         )
158 | 
159 |         pipeline_config = PipelineConfig(
160 |             name="test-pipeline",
161 |             inputs=PipelineInputs(query=["prompt_builder.prompt", "answer_builder.query"]),
162 |             outputs=PipelineOutputs(answers="answer_builder.answers"),
163 |         )
164 |         client.import_into_deepset(sample_pipeline, pipeline_config)
165 | 
166 |         assert route.called
167 |         request = route.calls.last.request
168 |         assert request.headers["Authorization"] == "Bearer test-api-key"
169 | 
170 |         request_body = json.loads(request.content)
171 |         assert request_body["name"] == "test-pipeline"
172 |         assert request_body["query_yaml"].startswith("components:\n  answer_builder:\n    init_parameters:\n")
173 | 
174 |     @pytest.mark.asyncio
175 |     @pytest.mark.integration
176 |     @respx.mock
177 |     async def test_import_pipeline_into_deepset_async(self, sample_pipeline: Pipeline) -> None:
178 |         """Test asynchronously importing a pipeline into deepset."""
179 |         route = respx.post("https://test-api-url.com/workspaces/test-workspace/pipelines").mock(
180 |             return_value=Response(status_code=200, json={"name": "test-pipeline-id"})
181 |         )
182 | 
183 |         client = PipelineClient(
184 |             api_key="test-api-key", api_url="https://test-api-url.com", workspace_name="test-workspace"
185 |         )
186 | 
187 |         pipeline_config = PipelineConfig(
188 |             name="test-pipeline",
189 |             inputs=PipelineInputs(query=["prompt_builder.prompt", "answer_builder.query"]),
190 |             outputs=PipelineOutputs(answers="answer_builder.answers"),
191 |         )
192 |         await client.import_into_deepset_async(sample_pipeline, pipeline_config)
193 | 
194 |         assert route.called
195 |         request = route.calls.last.request
196 |         assert request.headers["Authorization"] == "Bearer test-api-key"
197 | 
198 |         request_body = json.loads(request.content)
199 |         assert request_body["name"] == "test-pipeline"
200 |         assert request_body["query_yaml"].startswith("components:\n  answer_builder:\n    init_parameters:\n")
201 | 


--------------------------------------------------------------------------------
/tests/test_data/basic.txt:
--------------------------------------------------------------------------------
1 | this is a file
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/103275.txt:
--------------------------------------------------------------------------------
1 | For example the median expected hourly pay for a typical Physician - Pediatric Neonatology in the United States is $116 an hour, so 50% of the people who perform the job of Physician - Pediatric Neonatology in the United States are expected to make less than $116. Source: HR Reported data as of January 02, 2018
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/103275.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "103275", "source": "msmarco", "meta_1": "category_4", "meta_2": 1, "has_devset_query": false}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/103291.txt:
--------------------------------------------------------------------------------
1 | Origin of the name Cassandra: Derived from the Greek Kassandra, the mythological daughter of Priam and Hecuba who had the power of prophesy. Var: Casaundra, Kasandra, Kassandra, Kasaundra. Short: Cass, Kass, Sandra, Saundra.Pet: Cassi, Cassie, Cassy, Kassi, Kassie, Sandi, Sandie, Sandy.From A World of Baby Names by Teresa Norman.erived from the Greek Kassandra, the mythological daughter of Priam and Hecuba who had the power of prophesy. Var: Casaundra, Kasandra, Kassandra, Kasaundra. Short: Cass, Kass, Sandra, Saundra. Pet: Cassi, Cassie, Cassy, Kassi, Kassie, Sandi, Sandie, Sandy. From A World of Baby Names by Teresa Norman.
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/103291.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "103291", "source": "msmarco", "meta_1": "category_0", "meta_2": 1, "has_devset_query": false}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/110580.txt:
--------------------------------------------------------------------------------
1 | Toobtaintext#messagedetails#theAccount#Holder#must#completethis#consent#form,#signit#andhaveit#notarizedbefore. returning#it#to#Sprint.!We!can!provide!text!message!details!(dates,!times!and!phone!numbers),!butnot$the$content!of!your!
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/110580.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "110580", "source": "msmarco", "meta_1": "category_0", "meta_2": 0, "has_devset_query": false}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/117256.txt:
--------------------------------------------------------------------------------
1 | GBC International Bank is an FDIC insured institution located in Los Angeles, CA. It was founded in 1976 and has approximately $0.49 billion in assets. Customers can open an account at one of its 9 branches.
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/117256.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "117256", "source": "msmarco", "meta_1": "category_3", "meta_2": 1, "has_devset_query": false}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/16675.txt:
--------------------------------------------------------------------------------
1 | Well, i usually measure 20 inches above floor height. That is a standard i use, and for switches i measure 52 inches. for all the houses and buildings i have installed i aâ¦pplied this. Standard heights on receptacles is 12 inches (300 mm) to center and on switches 48 inches (1200 mm) to center.
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/16675.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "16675", "source": "msmarco", "meta_1": "category_2", "meta_2": 3, "has_devset_query": false}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/22297.txt:
--------------------------------------------------------------------------------
1 | Wonderful Tonight is written by Eric Clapton. It was included on Clapton's 1977 album Slowhand and released as a single the following year. In 1988, Clapton appeared in the Nelson Mandela 70th Birthday Tribute concert as a guest guitarist for Dire Straits. The group became his backing musicians for a surprise performance of Wonderful Tonight during their set.
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/22297.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "22297", "source": "msmarco", "meta_1": "category_1", "meta_2": 2, "has_devset_query": true}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/35887.txt:
--------------------------------------------------------------------------------
1 | The Flu Is Contagious Most healthy adults may be able to infect other people beginning 1 day before symptoms develop and up to 5 to 7 days after becoming sick. Children may pass the virus for longer than 7 days.
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/35887.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "35887", "source": "msmarco", "meta_1": "category_4", "meta_2": 0, "has_devset_query": true}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/61768.txt:
--------------------------------------------------------------------------------
1 | Definition of saturated - holding as much water or moisture as can be absorbed; thoroughly soaked, (of an organic molecule) containing the greatest possible Definition of saturated - holding as much water or moisture as can be absorbed; thoroughly soaked, (of an organic molecule) containing the greatest possible dictionary thesaurus
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/61768.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "61768", "source": "msmarco", "meta_1": "category_2", "meta_2": 4, "has_devset_query": false}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/79388.txt:
--------------------------------------------------------------------------------
1 | We welcome all our Booking.com guests to the Aruba Marriott Resort & Stellaris Casino. Come experience our first class service paired with our stellar resort on Palm Beach in Aruba.
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/79388.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "79388", "source": "msmarco", "meta_1": "category_0", "meta_2": 0, "has_devset_query": false}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/87243.txt:
--------------------------------------------------------------------------------
1 | Dear SS: According to Foodsafety.gov, uncooked poultry is safe in the refrigerator for 1-2 days, and safe in the freezer for 9 months (for pieces) and up to 12 months for whole chickens or turkey. Cooked poultry is safe refrigerated for 3-4 days.
2 | 


--------------------------------------------------------------------------------
/tests/test_data/msmarco.10/87243.txt.meta.json:
--------------------------------------------------------------------------------
1 | {"pid": "87243", "source": "msmarco", "meta_1": "category_1", "meta_2": 0, "has_devset_query": true}
2 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file00.txt:
--------------------------------------------------------------------------------
1 | Some text as a Textfile of file file00.txt
2 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file00.txt.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "file_name_duplicate_check": "file00.txt",
3 |     "source": "multiple file types"
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file01.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0"?>
2 | <Examples>
3 | 	<Sample1>Example 1</Sample1>
4 | </Examples>
5 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file01.xml.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file01",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file02.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/test_data/multiple_file_types/file02.pptx


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file02.pptx.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file02",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file03.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/test_data/multiple_file_types/file03.xlsx


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file03.xlsx.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file03",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file04.json:
--------------------------------------------------------------------------------
1 | {
2 |     "example": "This is an example of a JSON object."
3 | }
4 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file04.json.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file04",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file05.docx:
--------------------------------------------------------------------------------
1 | This is text
2 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file05.docx.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file05",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file06.md:
--------------------------------------------------------------------------------
1 | # Example
2 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file06.md.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file06",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file07.csv:
--------------------------------------------------------------------------------
1 | example 1,example 2
2 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file07.csv.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file07",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file08.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepset-ai/deepset-cloud-sdk/3dca35534085225f60b2b18d50b8310c1f006099/tests/test_data/multiple_file_types/file08.pdf


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file08.pdf.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file08",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file09.html:
--------------------------------------------------------------------------------
1 | <h1>example 1</h1>
2 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types/file09.html.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "pid": "file09",
3 |     "source": "multiple file types",
4 |     "meta_1": "category_3",
5 |     "meta_2": 1,
6 |     "has_devset_query": false
7 | }
8 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types_caps/File00.txt:
--------------------------------------------------------------------------------
1 | Some text as a Textfile of file File00.txt with capital letters and some small letters.
2 | 


--------------------------------------------------------------------------------
/tests/test_data/multiple_file_types_caps/File00.txt.meta.json:
--------------------------------------------------------------------------------
1 | {
2 |     "file_name_duplicate_check": "File00.txt",
3 |     "source": "multiple file types"
4 | }
5 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_datetime_utils.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timezone
 2 | 
 3 | import pytest
 4 | 
 5 | from deepset_cloud_sdk._utils.datetime import from_isoformat
 6 | 
 7 | 
 8 | class TestFromIsoformat:
 9 |     @pytest.mark.parametrize(
10 |         "input",
11 |         [
12 |             "2024-02-03T08:10:10.335884Z",
13 |             "2024-02-03T08:10:10.335884+00:00",
14 |         ],
15 |     )
16 |     def test_fromisoformat(self, input: str) -> None:
17 |         assert from_isoformat(input) == datetime(2024, 2, 3, 8, 10, 10, 335884).replace(tzinfo=timezone.utc)
18 | 


--------------------------------------------------------------------------------
/tests/unit/utils/test_load_configuration.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | from typing import Generator
  4 | from unittest.mock import Mock
  5 | 
  6 | import pytest
  7 | 
  8 | from deepset_cloud_sdk._api.config import load_environment
  9 | 
 10 | 
 11 | class TestLoadEnvironment:
 12 |     """Test the environment loading functionality."""
 13 | 
 14 |     @pytest.fixture(autouse=True)
 15 |     def clean_env(self) -> Generator[None, None, None]:
 16 |         """Fixture to provide a clean environment for tests."""
 17 |         original_environ = os.environ.copy()
 18 |         os.environ.clear()
 19 |         yield
 20 |         os.environ.clear()
 21 |         os.environ.update(original_environ)
 22 | 
 23 |     def test_load_local_env_only(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
 24 |         """Test loading only local .env file."""
 25 |         # Create a temporary local .env file
 26 |         local_env = tmp_path / ".env"
 27 |         local_env.write_text("API_KEY=local_key\nAPI_URL=local_url\nDEFAULT_WORKSPACE_NAME=local_workspace")
 28 | 
 29 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
 30 |         # Mock Path.is_file to return True for local .env and False for global
 31 |         monkeypatch.setattr(Path, "is_file", lambda self: self == local_env)
 32 | 
 33 |         # Mock load_dotenv to actually load the variables into the environment
 34 |         def mock_load_dotenv(path: Path, override: bool = True) -> bool:
 35 |             os.environ["API_KEY"] = "local_key"
 36 |             os.environ["API_URL"] = "local_url"
 37 |             os.environ["DEFAULT_WORKSPACE_NAME"] = "local_workspace"
 38 |             return True
 39 | 
 40 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.load_dotenv", mock_load_dotenv)
 41 | 
 42 |         assert load_environment()
 43 | 
 44 |     def test_load_global_env_only(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
 45 |         """Test loading only global .env file."""
 46 |         # Create a temporary global .env file
 47 |         global_env_dir = tmp_path / "global_config"
 48 |         global_env_dir.mkdir()
 49 |         global_env = global_env_dir / ".env"
 50 |         global_env.write_text("API_KEY=global_key\nAPI_URL=global_url\nDEFAULT_WORKSPACE_NAME=global_workspace")
 51 | 
 52 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
 53 |         # point mocked global path to global ENV_FILE_PATH definition
 54 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.ENV_FILE_PATH", global_env)
 55 | 
 56 |         # Mock load_dotenv to actually load the variables into the environment
 57 |         def mock_load_dotenv(path: Path, override: bool = True) -> bool:
 58 |             os.environ["API_KEY"] = "global_key"
 59 |             os.environ["API_URL"] = "global_url"
 60 |             os.environ["DEFAULT_WORKSPACE_NAME"] = "global_workspace"
 61 |             return True
 62 | 
 63 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.load_dotenv", mock_load_dotenv)
 64 | 
 65 |         assert load_environment()
 66 | 
 67 |     def test_load_both_env_files(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
 68 |         """Test loading both local and global .env files."""
 69 |         # Create temporary local and global .env files
 70 |         local_env = tmp_path / ".env"
 71 |         local_env.write_text("API_KEY=local_key\nAPI_URL=local_url\nDEFAULT_WORKSPACE_NAME=local_workspace")
 72 |         global_env = tmp_path / "global.env"
 73 |         global_env.write_text("API_KEY=global_key\nAPI_URL=global_url\nDEFAULT_WORKSPACE_NAME=global_workspace")
 74 | 
 75 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
 76 |         monkeypatch.setattr(Path, "is_file", Mock(return_value=True))
 77 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.ENV_FILE_PATH", global_env)
 78 | 
 79 |         assert load_environment()
 80 |         assert os.environ["API_KEY"] == "local_key"
 81 |         assert os.environ["API_URL"] == "local_url"
 82 |         assert os.environ["DEFAULT_WORKSPACE_NAME"] == "local_workspace"
 83 | 
 84 |     def test_global_env_fills_missing_variables(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
 85 |         """Test that global .env variables are available when not defined in local .env."""
 86 |         # Create local .env with only API_KEY
 87 |         local_env = tmp_path / ".env"
 88 |         local_env.write_text("API_KEY=local_key")
 89 | 
 90 |         # Create global .env with both API_KEY and API_URL
 91 |         global_env_dir = tmp_path / "global_config"
 92 |         global_env_dir.mkdir()
 93 |         global_env = global_env_dir / ".env"
 94 |         global_env.write_text("API_KEY=global_key\nAPI_URL=global_url\nDEFAULT_WORKSPACE_NAME=global_workspace")
 95 | 
 96 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
 97 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.ENV_FILE_PATH", global_env)
 98 | 
 99 |         # Mock is_file to return True for both files
100 |         monkeypatch.setattr(Path, "is_file", lambda self: self in [local_env, global_env])
101 | 
102 |         assert load_environment()
103 |         # Local API_KEY should take precedence
104 |         assert os.environ["API_KEY"] == "local_key"
105 |         # Global API_URL should be available
106 |         assert os.environ["API_URL"] == "global_url"
107 |         # Global DEFAULT_WORKSPACE_NAME should be available
108 |         assert os.environ["DEFAULT_WORKSPACE_NAME"] == "global_workspace"
109 | 
110 |     def test_pre_existing_env_vars_take_precedence(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
111 |         """Test that pre-existing environment variables take precedence over .env files."""
112 |         # Create local .env with API_KEY and API_URL
113 |         local_env = tmp_path / ".env"
114 |         local_env.write_text("API_KEY=local_key\nAPI_URL=local_url\nDEFAULT_WORKSPACE_NAME=local_workspace")
115 | 
116 |         # Create global .env with different values
117 |         global_env_dir = tmp_path / "global_config"
118 |         global_env_dir.mkdir()
119 |         global_env = global_env_dir / ".env"
120 |         global_env.write_text("API_KEY=global_key\nAPI_URL=global_url\nDEFAULT_WORKSPACE_NAME=global_workspace")
121 | 
122 |         # Set pre-existing environment variables
123 |         os.environ["API_KEY"] = "pre_existing_key"
124 |         os.environ["API_URL"] = "pre_existing_url"
125 |         os.environ["DEFAULT_WORKSPACE_NAME"] = "pre_existing_workspace"
126 | 
127 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
128 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.ENV_FILE_PATH", global_env)
129 | 
130 |         # Mock is_file to return True for both files
131 |         monkeypatch.setattr(Path, "is_file", Mock(return_value=True))
132 | 
133 |         assert load_environment()
134 |         # Pre-existing values should take precedence
135 |         assert os.environ["API_KEY"] == "pre_existing_key"
136 |         assert os.environ["API_URL"] == "pre_existing_url"
137 |         assert os.environ["DEFAULT_WORKSPACE_NAME"] == "pre_existing_workspace"
138 | 
139 |     def test_no_env_files_with_warnings(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
140 |         """Test when no .env files exist and show_warnings=True."""
141 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
142 |         monkeypatch.setattr(Path, "is_file", Mock(return_value=False))
143 |         mocked_load_dotenv = Mock()
144 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.load_dotenv", mocked_load_dotenv)
145 | 
146 |         assert not load_environment()
147 | 
148 |         # Mock the logger to verify it's called
149 |         mock_logger = Mock()
150 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.logger", mock_logger)
151 | 
152 |         result = load_environment(show_warnings=True)
153 | 
154 |         assert result is False
155 |         mock_logger.warning.assert_called_once()
156 |         warning_call = mock_logger.warning.call_args[0][0]
157 |         assert "No .env files found" in warning_call
158 | 
159 |     def test_no_env_files_in_silent_mode(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
160 |         """Test that no warnings are logged when show_warnings=False."""
161 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
162 |         monkeypatch.setattr(Path, "is_file", Mock(return_value=False))
163 |         mocked_load_dotenv = Mock()
164 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.load_dotenv", mocked_load_dotenv)
165 | 
166 |         # Mock the logger to verify it's not called
167 |         mock_logger = Mock()
168 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.logger", mock_logger)
169 | 
170 |         result = load_environment(show_warnings=False)
171 | 
172 |         assert result is True
173 |         mock_logger.warning.assert_not_called()
174 |         assert mocked_load_dotenv.call_count == 0
175 | 
176 |     def test_missing_vars_no_warning_in_silent_mode(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
177 |         """Test that missing variables warning is NOT logged when show_warnings=False."""
178 |         # Create a temporary local .env file with only API_KEY
179 |         local_env = tmp_path / ".env"
180 |         local_env.write_text("API_KEY=test_key")
181 | 
182 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
183 |         monkeypatch.setattr(Path, "is_file", lambda self: self == local_env)
184 | 
185 |         # Mock load_dotenv to actually load the variables into the environment
186 |         def mock_load_dotenv(path: Path, override: bool = True) -> bool:
187 |             os.environ["API_KEY"] = "test_key"
188 |             return True
189 | 
190 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.load_dotenv", mock_load_dotenv)
191 |         mock_logger = Mock()
192 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.logger", mock_logger)
193 | 
194 |         result = load_environment(show_warnings=False)
195 | 
196 |         assert result is True
197 |         mock_logger.warning.assert_not_called()
198 | 
199 |     @pytest.mark.parametrize(
200 |         "missing_var",
201 |         [
202 |             "API_KEY=global_key\nAPI_URL=global_url",
203 |             "API_KEY=global_key\nDEFAULT_WORKSPACE_NAME=global_workspace",
204 |             "API_URL=global_url\nDEFAULT_WORKSPACE_NAME=global_workspace",
205 |         ],
206 |     )
207 |     def test_missing_required_variables_with_warnings(
208 |         self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, missing_var: str
209 |     ) -> None:
210 |         """Test when required environment variables are missing and show_warnings=True."""
211 |         local_env = tmp_path / ".env"
212 |         local_env.write_text(missing_var)
213 | 
214 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.Path.cwd", Mock(return_value=tmp_path))
215 |         monkeypatch.setattr(Path, "is_file", lambda self: self == local_env)
216 | 
217 |         # Mock load_dotenv to actually load the variables into the environment
218 |         def mock_load_dotenv(path: Path, override: bool = True) -> bool:
219 |             for line in missing_var.split("\n"):
220 |                 key, value = line.split("=")
221 |                 os.environ[key] = value
222 |             return True
223 | 
224 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.load_dotenv", mock_load_dotenv)
225 | 
226 |         mock_logger = Mock()
227 |         monkeypatch.setattr("deepset_cloud_sdk._api.config.logger", mock_logger)
228 | 
229 |         result = load_environment(show_warnings=True)
230 | 
231 |         assert result is False
232 |         assert mock_logger.warning.call_count == 1
233 |         warning_call = mock_logger.warning.call_args[0][0]
234 |         assert "Missing required environment variables" in warning_call
235 | 


--------------------------------------------------------------------------------
/tests/unit/workflows/async_client/test_async_workflow_files.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | from pathlib import Path
  3 | from typing import Any, AsyncGenerator, List
  4 | from unittest.mock import AsyncMock
  5 | from uuid import UUID
  6 | 
  7 | import pytest
  8 | from _pytest.monkeypatch import MonkeyPatch
  9 | from sniffio import AsyncLibraryNotFoundError
 10 | 
 11 | from deepset_cloud_sdk._api.config import DEFAULT_WORKSPACE_NAME
 12 | from deepset_cloud_sdk._api.files import File
 13 | from deepset_cloud_sdk._api.upload_sessions import (
 14 |     UploadSessionDetail,
 15 |     UploadSessionIngestionStatus,
 16 |     UploadSessionStatus,
 17 |     UploadSessionStatusEnum,
 18 |     UploadSessionWriteModeEnum,
 19 |     WriteMode,
 20 | )
 21 | from deepset_cloud_sdk._service.files_service import FilesService
 22 | from deepset_cloud_sdk.models import DeepsetCloudFile, UserInfo
 23 | from deepset_cloud_sdk.workflows.async_client.files import (
 24 |     download,
 25 |     get_upload_session,
 26 |     list_files,
 27 |     list_upload_sessions,
 28 |     upload,
 29 |     upload_texts,
 30 | )
 31 | 
 32 | 
 33 | @pytest.mark.asyncio
 34 | class TestUploadFiles:
 35 |     async def test_upload_show_progress(self, monkeypatch: MonkeyPatch) -> None:
 36 |         paths = [Path("./tests/data/example.txt")]
 37 |         mocked_preprocess = AsyncMock(return_value=paths)
 38 |         mocked_upload_file_paths = AsyncMock(return_value=None)
 39 |         monkeypatch.setattr(FilesService, "_preprocess_paths", mocked_preprocess)
 40 |         monkeypatch.setattr(FilesService, "upload_file_paths", mocked_upload_file_paths)
 41 | 
 42 |         await upload(paths=paths, show_progress=True)
 43 | 
 44 |         assert mocked_preprocess.call_args.kwargs.get("spinner") is not None
 45 | 
 46 |     async def test_upload_dont_show_progress(self, monkeypatch: MonkeyPatch) -> None:
 47 |         paths = [Path("./tests/data/example.txt")]
 48 |         mocked_preprocess = AsyncMock(return_value=paths)
 49 |         mocked_upload_file_paths = AsyncMock(return_value=None)
 50 |         monkeypatch.setattr(FilesService, "_preprocess_paths", mocked_preprocess)
 51 |         monkeypatch.setattr(FilesService, "upload_file_paths", mocked_upload_file_paths)
 52 | 
 53 |         await upload(paths=paths, show_progress=False)
 54 | 
 55 |         assert mocked_preprocess.call_args.kwargs.get("spinner") is None
 56 | 
 57 |     async def test_upload(self, monkeypatch: MonkeyPatch) -> None:
 58 |         mocked_upload = AsyncMock(return_value=None)
 59 | 
 60 |         monkeypatch.setattr(FilesService, "upload", mocked_upload)
 61 |         await upload(paths=[Path("./tests/data/upload_folder")])
 62 | 
 63 |         mocked_upload.assert_called_once_with(
 64 |             workspace_name=DEFAULT_WORKSPACE_NAME,
 65 |             paths=[Path("./tests/data/upload_folder")],
 66 |             write_mode=WriteMode.KEEP,
 67 |             blocking=True,
 68 |             timeout_s=None,
 69 |             show_progress=True,
 70 |             recursive=False,
 71 |             desired_file_types=None,
 72 |             enable_parallel_processing=False,
 73 |         )
 74 | 
 75 |     async def test_upload_with_timeout(self, monkeypatch: MonkeyPatch) -> None:
 76 |         mocked_upload = AsyncMock(return_value=None)
 77 | 
 78 |         monkeypatch.setattr(FilesService, "upload", mocked_upload)
 79 |         await upload(paths=[Path("./tests/data/upload_folder")], timeout_s=123)
 80 | 
 81 |         mocked_upload.assert_called_once_with(
 82 |             workspace_name=DEFAULT_WORKSPACE_NAME,
 83 |             paths=[Path("./tests/data/upload_folder")],
 84 |             write_mode=WriteMode.KEEP,
 85 |             blocking=True,
 86 |             timeout_s=123,
 87 |             show_progress=True,
 88 |             recursive=False,
 89 |             desired_file_types=None,
 90 |             enable_parallel_processing=False,
 91 |         )
 92 | 
 93 |     async def test_upload_texts(self, monkeypatch: MonkeyPatch) -> None:
 94 |         mocked_upload_texts = AsyncMock(return_value=None)
 95 |         monkeypatch.setattr(FilesService, "upload_in_memory", mocked_upload_texts)
 96 |         files = [
 97 |             DeepsetCloudFile(
 98 |                 name="test_file.txt",
 99 |                 text="test content",
100 |                 meta={"test": "test"},
101 |             )
102 |         ]
103 |         await upload_texts(files=files)
104 | 
105 |         mocked_upload_texts.assert_called_once_with(
106 |             workspace_name=DEFAULT_WORKSPACE_NAME,
107 |             files=files,
108 |             write_mode=WriteMode.KEEP,
109 |             blocking=True,
110 |             timeout_s=None,
111 |             show_progress=True,
112 |             enable_parallel_processing=False,
113 |         )
114 | 
115 | 
116 | @pytest.mark.asyncio
117 | class TestDownloadFiles:
118 |     async def test_download_files(self, monkeypatch: MonkeyPatch) -> None:
119 |         mocked_download = AsyncMock(return_value=None)
120 |         monkeypatch.setattr(FilesService, "download", mocked_download)
121 |         await download(
122 |             workspace_name="my_workspace",
123 |             name="test_file.txt",
124 |             odata_filter="test",
125 |             batch_size=100,
126 |             timeout_s=100,
127 |         )
128 |         mocked_download.assert_called_once_with(
129 |             workspace_name="my_workspace",
130 |             file_dir=None,
131 |             name="test_file.txt",
132 |             odata_filter="test",
133 |             include_meta=True,
134 |             batch_size=100,
135 |             show_progress=True,
136 |             timeout_s=100,
137 |         )
138 | 
139 | 
140 | @pytest.mark.asyncio
141 | class TestListFiles:
142 |     async def test_list_files(self, monkeypatch: MonkeyPatch) -> None:
143 |         async def mocked_list_all(
144 |             self: Any,
145 |             *args: Any,
146 |             **kwargs: Any,
147 |         ) -> AsyncGenerator[List[File], None]:
148 |             yield [
149 |                 File(
150 |                     file_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
151 |                     url="/api/v1/workspaces/search tests/files/cd16435f-f6eb-423f-bf6f-994dc8a36a10",
152 |                     name="silly_things_1.txt",
153 |                     size=611,
154 |                     meta={},
155 |                     created_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
156 |                 )
157 |             ]
158 | 
159 |         monkeypatch.setattr(FilesService, "list_all", mocked_list_all)
160 |         async for file_batch in list_files(
161 |             workspace_name="my_workspace",
162 |             name="test_file.txt",
163 |             odata_filter="test",
164 |             batch_size=100,
165 |             timeout_s=100,
166 |         ):
167 |             assert file_batch == [
168 |                 File(
169 |                     file_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
170 |                     url="/api/v1/workspaces/search tests/files/cd16435f-f6eb-423f-bf6f-994dc8a36a10",
171 |                     name="silly_things_1.txt",
172 |                     size=611,
173 |                     meta={},
174 |                     created_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
175 |                 )
176 |             ]
177 | 
178 |     async def test_list_files_silence_exit(self, monkeypatch: MonkeyPatch) -> None:
179 |         async def mocked_list_all(
180 |             self: Any,
181 |             *args: Any,
182 |             **kwargs: Any,
183 |         ) -> AsyncGenerator[List[File], None]:
184 |             raise AsyncLibraryNotFoundError()
185 |             yield []  # for some reason monkeypatch requires to have the yield statement
186 | 
187 |         monkeypatch.setattr(FilesService, "list_all", mocked_list_all)
188 |         async for file_batch in list_files(
189 |             workspace_name="my_workspace",
190 |             name="test_file.txt",
191 |             odata_filter="test",
192 |             batch_size=100,
193 |             timeout_s=100,
194 |         ):
195 |             pass
196 | 
197 | 
198 | @pytest.mark.asyncio
199 | class TestListUploadSessions:
200 |     async def test_list_upload_sessions(self, monkeypatch: MonkeyPatch) -> None:
201 |         async def mocked_list_upload_sessions(
202 |             self: Any,
203 |             *args: Any,
204 |             **kwargs: Any,
205 |         ) -> AsyncGenerator[List[UploadSessionDetail], None]:
206 |             yield [
207 |                 UploadSessionDetail(
208 |                     session_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
209 |                     created_by=UserInfo(
210 |                         user_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
211 |                         given_name="Fake",
212 |                         family_name="User",
213 |                     ),
214 |                     expires_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
215 |                     created_at=datetime.datetime.fromisoformat("2022-06-21T16:10:00.634653+00:00"),
216 |                     write_mode=UploadSessionWriteModeEnum.KEEP,
217 |                     status=UploadSessionStatusEnum.CLOSED,
218 |                 )
219 |             ]
220 | 
221 |         monkeypatch.setattr(FilesService, "list_upload_sessions", mocked_list_upload_sessions)
222 |         async for upload_session_batch in list_upload_sessions(
223 |             workspace_name="my_workspace",
224 |             is_expired=False,
225 |             batch_size=100,
226 |             timeout_s=100,
227 |         ):
228 |             assert upload_session_batch == [
229 |                 UploadSessionDetail(
230 |                     session_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
231 |                     created_by=UserInfo(
232 |                         user_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
233 |                         given_name="Fake",
234 |                         family_name="User",
235 |                     ),
236 |                     expires_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
237 |                     created_at=datetime.datetime.fromisoformat("2022-06-21T16:10:00.634653+00:00"),
238 |                     write_mode=UploadSessionWriteModeEnum.KEEP,
239 |                     status=UploadSessionStatusEnum.CLOSED,
240 |                 )
241 |             ]
242 | 
243 |     async def test_list_files_silence_exit(self, monkeypatch: MonkeyPatch) -> None:
244 |         async def mocked_list_upload_sessions(
245 |             self: Any,
246 |             *args: Any,
247 |             **kwargs: Any,
248 |         ) -> AsyncGenerator[List[File], None]:
249 |             raise AsyncLibraryNotFoundError()
250 |             yield []  # for some reason monkeypatch requires to have the yield statement
251 | 
252 |         monkeypatch.setattr(FilesService, "list_upload_sessions", mocked_list_upload_sessions)
253 |         async for _ in list_upload_sessions(
254 |             workspace_name="my_workspace",
255 |             batch_size=100,
256 |             timeout_s=100,
257 |         ):
258 |             pass
259 | 
260 | 
261 | @pytest.mark.asyncio
262 | class TestGetUploadSessionStatus:
263 |     async def test_get_upload_session(self, monkeypatch: MonkeyPatch) -> None:
264 |         mocked_upload_session = UploadSessionStatus(
265 |             session_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
266 |             expires_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
267 |             documentation_url="https://docs.deepset.ai",
268 |             ingestion_status=UploadSessionIngestionStatus(
269 |                 failed_files=0,
270 |                 finished_files=1,
271 |             ),
272 |         )
273 | 
274 |         async def mocked_get_upload_session(
275 |             self: Any,
276 |             *args: Any,
277 |             **kwargs: Any,
278 |         ) -> UploadSessionStatus:
279 |             return mocked_upload_session
280 | 
281 |         monkeypatch.setattr(FilesService, "get_upload_session", mocked_get_upload_session)
282 |         returned_upload_session = await get_upload_session(session_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"))
283 |         assert returned_upload_session == mocked_upload_session
284 | 


--------------------------------------------------------------------------------
/tests/unit/workflows/sync_client/test_sync_workflow_files.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | from pathlib import Path
  3 | from typing import Any, AsyncGenerator, List
  4 | from unittest.mock import AsyncMock, patch
  5 | from uuid import UUID
  6 | 
  7 | from deepset_cloud_sdk._api.config import DEFAULT_WORKSPACE_NAME
  8 | from deepset_cloud_sdk._api.files import File
  9 | from deepset_cloud_sdk._api.upload_sessions import (
 10 |     UploadSessionDetail,
 11 |     UploadSessionIngestionStatus,
 12 |     UploadSessionStatus,
 13 |     UploadSessionStatusEnum,
 14 |     UploadSessionWriteModeEnum,
 15 |     WriteMode,
 16 | )
 17 | from deepset_cloud_sdk.models import DeepsetCloudFile, UserInfo
 18 | from deepset_cloud_sdk.workflows.sync_client.files import (
 19 |     download,
 20 |     get_upload_session,
 21 |     list_files,
 22 |     list_upload_sessions,
 23 |     upload,
 24 |     upload_texts,
 25 | )
 26 | 
 27 | 
 28 | @patch("deepset_cloud_sdk.workflows.sync_client.files.async_upload")
 29 | def test_upload_folder(async_upload_mock: AsyncMock) -> None:
 30 |     upload(paths=[Path("./tests/data/upload_folder")], enable_parallel_processing=True)
 31 |     async_upload_mock.assert_called_once_with(
 32 |         paths=[Path("./tests/data/upload_folder")],
 33 |         api_key=None,
 34 |         api_url=None,
 35 |         workspace_name=DEFAULT_WORKSPACE_NAME,
 36 |         write_mode=WriteMode.KEEP,
 37 |         blocking=True,
 38 |         timeout_s=None,
 39 |         show_progress=True,
 40 |         recursive=False,
 41 |         desired_file_types=None,
 42 |         enable_parallel_processing=True,
 43 |         safe_mode=False,
 44 |     )
 45 | 
 46 | 
 47 | @patch("deepset_cloud_sdk.workflows.sync_client.files.async_upload")
 48 | def test_upload_folder_safe_mode(async_upload_mock: AsyncMock) -> None:
 49 |     upload(paths=[Path("./tests/data/upload_folder")], enable_parallel_processing=True, safe_mode=True)
 50 |     async_upload_mock.assert_called_once_with(
 51 |         paths=[Path("./tests/data/upload_folder")],
 52 |         api_key=None,
 53 |         api_url=None,
 54 |         workspace_name=DEFAULT_WORKSPACE_NAME,
 55 |         write_mode=WriteMode.KEEP,
 56 |         blocking=True,
 57 |         timeout_s=None,
 58 |         show_progress=True,
 59 |         recursive=False,
 60 |         desired_file_types=None,
 61 |         enable_parallel_processing=True,
 62 |         safe_mode=True,
 63 |     )
 64 | 
 65 | 
 66 | @patch("deepset_cloud_sdk.workflows.sync_client.files.async_upload_texts")
 67 | def test_upload_texts(async_upload_texts_mock: AsyncMock) -> None:
 68 |     files = [
 69 |         DeepsetCloudFile(
 70 |             name="test_file.txt",
 71 |             text="test content",
 72 |             meta={"test": "test"},
 73 |         )
 74 |     ]
 75 |     upload_texts(files=files, enable_parallel_processing=True)
 76 |     async_upload_texts_mock.assert_called_once_with(
 77 |         files=files,
 78 |         api_key=None,
 79 |         api_url=None,
 80 |         workspace_name=DEFAULT_WORKSPACE_NAME,
 81 |         write_mode=WriteMode.KEEP,
 82 |         blocking=True,
 83 |         timeout_s=None,
 84 |         show_progress=True,
 85 |         enable_parallel_processing=True,
 86 |     )
 87 | 
 88 | 
 89 | @patch("deepset_cloud_sdk.workflows.sync_client.files.async_upload_texts")
 90 | def test_upload_texts_with_timeout(async_upload_texts_mock: AsyncMock) -> None:
 91 |     files = [
 92 |         DeepsetCloudFile(
 93 |             name="test_file.txt",
 94 |             text="test content",
 95 |             meta={"test": "test"},
 96 |         )
 97 |     ]
 98 |     upload_texts(files=files, timeout_s=123)
 99 |     async_upload_texts_mock.assert_called_once_with(
100 |         files=files,
101 |         api_key=None,
102 |         api_url=None,
103 |         workspace_name=DEFAULT_WORKSPACE_NAME,
104 |         write_mode=WriteMode.KEEP,
105 |         blocking=True,
106 |         timeout_s=123,
107 |         show_progress=True,
108 |         enable_parallel_processing=False,
109 |     )
110 | 
111 | 
112 | def test_list_files() -> None:
113 |     async def mocked_async_list_files(*args: Any, **kwargs: Any) -> AsyncGenerator[List[File], None]:
114 |         yield [
115 |             File(
116 |                 file_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
117 |                 url="/api/v1/workspaces/search tests/files/cd16435f-f6eb-423f-bf6f-994dc8a36a10",
118 |                 name="silly_things_1.txt",
119 |                 size=611,
120 |                 meta={},
121 |                 created_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
122 |             )
123 |         ]
124 | 
125 |     with patch("deepset_cloud_sdk.workflows.sync_client.files.async_list_files", new=mocked_async_list_files):
126 |         returned_files = list(
127 |             list_files(
128 |                 workspace_name="my_workspace",
129 |                 name="test_file.txt",
130 |                 odata_filter="test",
131 |                 batch_size=100,
132 |                 timeout_s=100,
133 |             )
134 |         )
135 |         assert len(returned_files) == 1
136 |         assert returned_files[0] == [
137 |             File(
138 |                 file_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
139 |                 url="/api/v1/workspaces/search tests/files/cd16435f-f6eb-423f-bf6f-994dc8a36a10",
140 |                 name="silly_things_1.txt",
141 |                 size=611,
142 |                 meta={},
143 |                 created_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
144 |             )
145 |         ]
146 | 
147 | 
148 | def test_download_files() -> None:
149 |     mocked_async_download = AsyncMock()
150 |     with patch("deepset_cloud_sdk.workflows.sync_client.files.async_download", new=mocked_async_download):
151 |         download(
152 |             workspace_name="my_workspace",
153 |             name="test_file.txt",
154 |             odata_filter="test",
155 |             batch_size=100,
156 |             timeout_s=100,
157 |         )
158 |         mocked_async_download.assert_called_once_with(
159 |             api_key=None,
160 |             api_url=None,
161 |             workspace_name="my_workspace",
162 |             name="test_file.txt",
163 |             odata_filter="test",
164 |             file_dir=None,
165 |             include_meta=True,
166 |             batch_size=100,
167 |             show_progress=True,
168 |             timeout_s=100,
169 |             safe_mode=False,
170 |         )
171 | 
172 | 
173 | def test_list_upload_sessions() -> None:
174 |     async def mocked_async_upload_sessions(
175 |         *args: Any, **kwargs: Any
176 |     ) -> AsyncGenerator[List[UploadSessionDetail], None]:
177 |         yield [
178 |             UploadSessionDetail(
179 |                 session_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
180 |                 created_by=UserInfo(
181 |                     user_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
182 |                     given_name="Fake",
183 |                     family_name="User",
184 |                 ),
185 |                 expires_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
186 |                 created_at=datetime.datetime.fromisoformat("2022-06-21T16:10:00.634653+00:00"),
187 |                 write_mode=UploadSessionWriteModeEnum.KEEP,
188 |                 status=UploadSessionStatusEnum.CLOSED,
189 |             )
190 |         ]
191 | 
192 |     with patch(
193 |         "deepset_cloud_sdk.workflows.sync_client.files.async_list_upload_sessions", new=mocked_async_upload_sessions
194 |     ):
195 |         returned_files = list(
196 |             list_upload_sessions(
197 |                 workspace_name="my_workspace",
198 |                 is_expired=True,
199 |                 batch_size=100,
200 |                 timeout_s=100,
201 |             )
202 |         )
203 |         assert len(returned_files) == 1
204 |         assert returned_files[0] == [
205 |             UploadSessionDetail(
206 |                 session_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
207 |                 created_by=UserInfo(
208 |                     user_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
209 |                     given_name="Fake",
210 |                     family_name="User",
211 |                 ),
212 |                 expires_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
213 |                 created_at=datetime.datetime.fromisoformat("2022-06-21T16:10:00.634653+00:00"),
214 |                 write_mode=UploadSessionWriteModeEnum.KEEP,
215 |                 status=UploadSessionStatusEnum.CLOSED,
216 |             )
217 |         ]
218 | 
219 | 
220 | def test_get_upload_session() -> None:
221 |     existing_upload_session = UploadSessionStatus(
222 |         session_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
223 |         expires_at=datetime.datetime.fromisoformat("2022-06-21T16:40:00.634653+00:00"),
224 |         documentation_url="https://docs.deepset.ai",
225 |         ingestion_status=UploadSessionIngestionStatus(
226 |             failed_files=0,
227 |             finished_files=1,
228 |         ),
229 |     )
230 | 
231 |     async def mocked_async_get_upload_session(*args: Any, **kwargs: Any) -> UploadSessionStatus:
232 |         return existing_upload_session
233 | 
234 |     with patch(
235 |         "deepset_cloud_sdk.workflows.sync_client.files.async_get_upload_session", new=mocked_async_get_upload_session
236 |     ):
237 |         returned_upload_session = get_upload_session(
238 |             workspace_name="my_workspace",
239 |             session_id=UUID("cd16435f-f6eb-423f-bf6f-994dc8a36a10"),
240 |         )
241 |         returned_upload_session == existing_upload_session
242 | 


--------------------------------------------------------------------------------
/tests/unit/workflows/sync_client/test_utils.py:
--------------------------------------------------------------------------------
 1 | from asyncio import AbstractEventLoop
 2 | from typing import AsyncIterator
 3 | 
 4 | from deepset_cloud_sdk.workflows.sync_client.utils import iter_over_async
 5 | 
 6 | 
 7 | def test_iter_over_async(event_loop: AbstractEventLoop) -> None:
 8 |     async def async_generator() -> AsyncIterator[int]:
 9 |         yield 1
10 |         yield 2
11 |         yield 3
12 | 
13 |     sync_generator = iter_over_async(async_generator(), event_loop)
14 |     assert list(sync_generator) == [1, 2, 3]
15 | 


--------------------------------------------------------------------------------