├── .gitattributes
├── .github
    ├── labeler.yml
    ├── pull_request_template.md
    └── workflows
    │   ├── cd.yml
    │   ├── ci.yml
    │   ├── force-docs-build.yml
    │   ├── pr.yml
    │   └── remote-ci.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── codecov.yml
├── datasets
    ├── credit.txt
    ├── image-image_example.csv
    ├── text-image_example.csv
    └── text-text_example.csv
├── docs
    ├── Makefile
    ├── _static
    │   ├── JCloud-dark.svg
    │   ├── JCloud-light.svg
    │   ├── Powered-by-Jina-Large-Basic.svg
    │   ├── banner.png
    │   ├── cas-dark.svg
    │   ├── cas-light.svg
    │   ├── docarray-dark.svg
    │   ├── docarray-light.svg
    │   ├── docbot.css
    │   ├── favicon.ico
    │   ├── favicon.png
    │   ├── finetuner+_dark.svg
    │   ├── finetuner+_light.svg
    │   ├── finetuner-client-journey.svg
    │   ├── finetuner-dark.svg
    │   ├── finetuner-light.svg
    │   ├── finetuner-logo-ani.svg
    │   ├── hub-dark.svg
    │   ├── hub-light.svg
    │   ├── logo-dark.svg
    │   ├── logo-light.svg
    │   ├── main.css
    │   ├── now-dark.svg
    │   ├── now-light.svg
    │   ├── search-dark.svg
    │   └── search-light.svg
    ├── _templates
    │   ├── page.html
    │   ├── sidebar
    │   │   ├── brand.html
    │   │   └── navigation.html
    │   └── template_ft_in_action.md
    ├── advanced-topics
    │   ├── advanced-losses-optimizers-and-poolers.md
    │   ├── budget.md
    │   ├── finetuner-executor.md
    │   ├── linear-probe.md
    │   ├── negative-mining.md
    │   └── using-callbacks.md
    ├── api-rst.rst
    ├── conf.py
    ├── get-started
    │   ├── how-it-works.md
    │   ├── installation.md
    │   └── pretrained.md
    ├── html_extra
    │   └── robots.txt
    ├── imgs
    │   ├── DocumentArray_plot_image_sprites.png
    │   ├── DocumentArray_summary.png
    │   ├── Document_display.png
    │   ├── Document_summary.png
    │   ├── SphereFace-training.png
    │   ├── batch-sampling.png
    │   ├── distributions-loss.png
    │   ├── metric-train.png
    │   ├── mining.png
    │   └── tailor.svg
    ├── index.md
    ├── make.bat
    ├── makedoc.sh
    ├── notebooks
    │   ├── data_synthesis.ipynb
    │   ├── data_synthesis.md
    │   ├── image_to_image.ipynb
    │   ├── image_to_image.md
    │   ├── image_to_image_arcface.ipynb
    │   ├── image_to_image_arcface.md
    │   ├── mesh_to_mesh.ipynb
    │   ├── mesh_to_mesh.md
    │   ├── multilingual_text_to_image.ipynb
    │   ├── multilingual_text_to_image.md
    │   ├── text_to_image.ipynb
    │   ├── text_to_image.md
    │   ├── text_to_text.ipynb
    │   └── text_to_text.md
    ├── requirements.txt
    └── walkthrough
    │   ├── basic-concepts.md
    │   ├── choose-backbone.md
    │   ├── create-training-data.md
    │   ├── index.md
    │   ├── inference.md
    │   ├── login.md
    │   ├── run-job.md
    │   └── save-model.md
├── finetuner
    ├── __init__.py
    ├── callback.py
    ├── client
    │   ├── __init__.py
    │   ├── base.py
    │   ├── client.py
    │   └── session.py
    ├── console.py
    ├── constants.py
    ├── data.py
    ├── excepts.py
    ├── experiment.py
    ├── finetuner.py
    ├── hubble.py
    ├── model.py
    ├── names.py
    └── run.py
├── pyproject.toml
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── constants.py
    ├── helper.py
    ├── integration
        ├── __init__.py
        ├── conftest.py
        ├── test_data.py
        ├── test_experiments.py
        ├── test_hf_models.py
        └── test_runs.py
    └── unit
        ├── __init__.py
        ├── conftest.py
        ├── mocks.py
        ├── resources
            ├── cube.off
            ├── dummy.csv
            └── lena.png
        ├── test___init__.py
        ├── test_client.py
        ├── test_data.py
        ├── test_experiment.py
        ├── test_finetuner.py
        ├── test_hubble.py
        └── test_run.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # ignore ipynb line counts
2 | *.ipynb linguist-documentation


--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
 1 | area/docs:
 2 |   - docs/**/*
 3 | 
 4 | area/testing:
 5 |   - tests/**/*
 6 | 
 7 | area/setup:
 8 |   - setup.py
 9 |   - requirements*
10 |   - MANIFEST.in
11 | 
12 | area/housekeeping:
13 |   - .github/**/*
14 |   - ./.gitignore
15 |   - ./*.yaml
16 |   - ./*.yml
17 | 
18 | area/cicd:
19 |   - .github/workflows/**/*
20 | 
21 | area/docker:
22 |   - Dockerfiles/**/*
23 |   - ./.dockerignore
24 | 
25 | area/core:
26 |   - finetuner/**/*
27 | 
28 | area/entrypoint:
29 |   - finetuner/__init__.py
30 | 
31 | area/client:
32 |   - finetuner/client/**/*


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | <!--- PR Description here --->
2 | 
3 | 
4 | ---
5 | 
6 | - [ ] This PR references an open issue
7 | - [ ] I have added a line about this change to CHANGELOG


--------------------------------------------------------------------------------
/.github/workflows/cd.yml:
--------------------------------------------------------------------------------
 1 | name: CD
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*'
 7 | 
 8 | env:
 9 |   HOST: ${{ secrets.FINETUNER_HOST }}
10 |   JINA_AUTH_TOKEN: ${{ secrets.JINA_AUTH_TOKEN }}
11 | 
12 | jobs:
13 | 
14 |   update-docs:
15 |     name: Update docs
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: benc-uk/workflow-dispatch@v1
19 |         with:
20 |           workflow: Manual Docs Build
21 |           token: ${{ secrets.JINA_DEV_BOT }}
22 |           inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "TAG"}'
23 |         env:
24 |           release_token: ${{ secrets.FINETUNER_RELEASE_TOKEN }}
25 | 
26 |   release:
27 |     runs-on: ubuntu-latest
28 |     steps:
29 |       - uses: actions/checkout@v3
30 |       - name: Set up Python
31 |         uses: actions/setup-python@v2
32 |         with:
33 |           python-version: 3.8
34 |       - name: Release to PyPI
35 |         env:
36 |           TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
37 |           TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
38 |           JINA_SLACK_WEBHOOK: ${{ secrets.JINA_SLACK_WEBHOOK }}
39 |         run: |
40 |           pip install twine wheel
41 |           python setup.py sdist
42 |           twine upload dist/*
43 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push]
 4 | 
 5 | env:
 6 |   HOST: ${{ secrets.FINETUNER_HOST }}
 7 |   JINA_AUTH_TOKEN: ${{ secrets.JINA_AUTH_TOKEN }}
 8 |   HUGGING_FACE_HUB_TOKEN: ${{ secrets.HF_HUB_ACCESS_TOKEN }}
 9 | 
10 | jobs:
11 | 
12 |   check-codestyle:
13 |     name: Check codestyle
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v2
17 |       - name: Set up Python 3.8
18 |         uses: actions/setup-python@v2
19 |         with:
20 |           python-version: 3.8
21 |       - name: Install requirements
22 |         run: make init
23 |       - name: Lint with flake8, black and isort
24 |         run: make style
25 | 
26 |   run-tests:
27 |     name: Run tests
28 |     runs-on: ubuntu-latest
29 |     steps:
30 |       - uses: actions/checkout@v2
31 |       - name: Set up Python 3.8
32 |         uses: actions/setup-python@v2
33 |         with:
34 |           python-version: 3.8
35 |       - name: Install requirements
36 |         run: make init
37 |       - name: Run tests
38 |         run: make test
39 | 
40 |   check-versions:
41 |     name: Check Python Versions
42 |     runs-on: ubuntu-latest
43 |     strategy:
44 |       matrix:
45 |         version: [3.8, 3.9, '3.10']
46 |     steps:
47 |       - uses: actions/checkout@v2
48 |       - name: Setp up python version
49 |         uses: actions/setup-python@v2
50 |         with:
51 |           python-version: ${{ matrix.version }}
52 |       - name: Test install
53 |         run: make install
54 | 
55 |   # just for blocking the merge until all parallel core-test are successful
56 |   success-all-test:
57 |     needs: run-tests
58 |     if: always()
59 |     runs-on: ubuntu-latest
60 |     steps:
61 |       - uses: technote-space/workflow-conclusion-action@v2
62 |       - name: Check Failure
63 |         if: env.WORKFLOW_CONCLUSION == 'failure'
64 |         run: exit 1
65 |       - name: Success
66 |         if: ${{ success() }}
67 |         run: echo "All Done"
68 | 


--------------------------------------------------------------------------------
/.github/workflows/force-docs-build.yml:
--------------------------------------------------------------------------------
 1 | name: Manual Docs Build
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       release_token:
 7 |         description: 'Your release token'
 8 |         required: true
 9 |       triggered_by:
10 |         description: 'CD | TAG | MANUAL'
11 |         required: false
12 |         default: MANUAL
13 | 
14 | jobs:
15 |   token-check:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - run: echo "success!"
19 |         if: "${{ github.event.inputs.release_token }} == ${{ env.release_token }}"
20 |         env:
21 |           release_token: ${{ secrets.FINETUNER_RELEASE_TOKEN }}
22 | 
23 |   release-docs:
24 |     needs: token-check
25 |     runs-on: ubuntu-latest
26 |     steps:
27 |       - uses: actions/checkout@v2
28 |         with:
29 |           fetch-depth: 0
30 |       - uses: actions/setup-python@v2
31 |         with:
32 |           python-version: 3.8
33 |       - name: Build doc and push to gh-pages
34 |         run: |
35 |           git config --local user.email "dev-bot@jina.ai"
36 |           git config --local user.name "Jina Dev Bot"
37 |           pip install .
38 |           mkdir gen-html
39 |           cd docs
40 |           pip install -r requirements.txt
41 |           pip install -U furo
42 |           export NUM_RELEASES=5
43 |           bash makedoc.sh local-only
44 |           make notebook
45 |           cd ./build/dirhtml/
46 |           cp -r ./ ../../../gen-html
47 |           cd -  # back to ./docs
48 |           cd ..
49 |           git checkout -f gh-pages
50 |           git rm -rf ./docs
51 |           mkdir -p docs
52 |           cd gen-html
53 |           cp -r ./ ../docs
54 |           cd ../docs
55 |           ls -la
56 |           touch .nojekyll
57 |           cp 404/index.html 404.html
58 |           sed -i 's/href="\.\./href="/' 404.html # fix asset urls that needs to be updated in 404.html
59 |           echo finetuner.jina.ai > CNAME
60 |           cd ..
61 |           git status
62 |           git add docs && git commit -m "chore(docs): update docs due to ${{github.event_name}} on ${{github.repository}}"
63 |           git push --force origin gh-pages


--------------------------------------------------------------------------------
/.github/workflows/pr.yml:
--------------------------------------------------------------------------------
 1 | name: PR
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 | 
 8 |   assign-label-to-pr:
 9 |     name: Assign label to PR
10 |     runs-on: ubuntu-latest
11 |     if: ${{ !github.event.pull_request.head.repo.fork }}
12 |     steps:
13 |       - uses: codelytv/pr-size-labeler@v1
14 |         with:
15 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
16 |           xs_max_size: '10'
17 |           s_max_size: '100'
18 |           m_max_size: '500'
19 |           l_max_size: '1000'
20 |           fail_if_xl: 'false'
21 |       - uses: actions/labeler@v3
22 |         with:
23 |           repo-token: "${{ secrets.GITHUB_TOKEN }}"
24 |       - id: docs_updated
25 |         if: contains( github.event.pull_request.labels.*.name, 'area/docs')
26 |         run: echo '::set-output name=docs::true'
27 |     outputs:
28 |       docs: ${{ steps.docs_updated.outputs.docs }}
29 | 
30 |   deploy-to-netlify:
31 |     name: Deploy docs to netlify
32 |     runs-on: ubuntu-latest
33 |     needs: assign-label-to-pr
34 |     if: ${{ needs.assign-label-to-pr.outputs.docs == 'true' }}
35 |     steps:
36 |       - run: |
37 |           echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV
38 |       - uses: actions/checkout@v2
39 |         with:
40 |           repository: jina-ai/finetuner
41 |           ref: ${{ env.BRANCH_NAME }}
42 |       - uses: actions/setup-python@v2
43 |         with:
44 |           python-version: 3.8
45 |       - uses: actions/setup-node@v2
46 |         with:
47 |           node-version: '14'
48 |       - name: Build and Deploy
49 |         run: |
50 |           npm i -g netlify-cli
51 |           python -m pip install --upgrade pip
52 |           pip install -r requirements.txt
53 |           git fetch origin
54 |           export NUM_RELEASES=2 # only 2 last tags to save build time 
55 |           bash makedoc.sh development
56 |           netlify deploy --dir=_build/dirhtml --alias="ft-${{ env.BRANCH_NAME }}" --message="Deploying docs to ${{ env.BRANCH_NAME }} branch"
57 |         env:
58 |           NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
59 |           NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
60 |         working-directory: docs
61 |       - name: Find the prev comment if exists
62 |         uses: peter-evans/find-comment@v1
63 |         id: fc
64 |         with:
65 |           issue-number: ${{ github.event.pull_request.number }}
66 |           comment-author: 'github-actions[bot]'
67 |           body-includes: 'Docs are deployed'
68 |       - name: Delete comment if exists
69 |         if: ${{ steps.fc.outputs.comment-id != 0 && !github.event.pull_request.head.repo.fork }}
70 |         uses: actions/github-script@v3
71 |         with:
72 |           github-token: ${{ secrets.GITHUB_TOKEN }}
73 |           script: |
74 |             github.issues.deleteComment({
75 |               owner: context.repo.owner,
76 |               repo: context.repo.repo,
77 |               comment_id: ${{ steps.fc.outputs.comment-id }},
78 |             })
79 |       - name: Add or update comment
80 |         uses: peter-evans/create-or-update-comment@v1
81 |         with:
82 |           issue-number: ${{ github.event.pull_request.number }}
83 |           body: |
84 |             :memo: Docs are deployed on https://ft-${{ env.BRANCH_NAME }}--jina-docs.netlify.app :tada:


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Initially taken from Github's Python gitignore file
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | docs/api/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | docs/.python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # Environments
 93 | .venv
 94 | env/
 95 | venv/
 96 | ENV/
 97 | env.bak/
 98 | venv.bak/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | .dmypy.json
113 | dmypy.json
114 | 
115 | # Pyre type checker
116 | .pyre/
117 | .idea/
118 | /toy*.py
119 | .DS_Store
120 | post/
121 | /toy*.ipynb
122 | data/
123 | *.c
124 | .nes_cache
125 | /toy*.yml
126 | *.tmp
127 | 
128 | shell/jina-wizard.sh
129 | /junit/
130 | /tests/junit/
131 | /docs/chapters/proto/docs.md
132 | 
133 | # IntelliJ IDEA
134 | *.iml
135 | .idea
136 | 
137 | # VSCode
138 | .vscode
139 | 
140 | # test with config in resources
141 | tests/integration/crud/simple/simple_indexer/
142 | 
143 | # latency tracking
144 | latency
145 | MyIndexer/
146 | MyMemMap/
147 | original/
148 | output/
149 | 
150 | # Logging
151 | /wandb
152 | 
153 | # env
154 | .env
155 | test.py


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/ambv/black
 3 |     rev: "23.3.0"
 4 |     hooks:
 5 |       - id: black
 6 |         types: [python]
 7 |   - repo: https://github.com/pycqa/flake8
 8 |     rev: "6.0.0"
 9 |     hooks:
10 |       - id: flake8
11 |   - repo: https://github.com/pycqa/isort
12 |     rev: "5.12.0"
13 |     hooks:
14 |       - id: isort
15 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | 
  4 | ## Setup
  5 | 
  6 | ### Install dev requirements
  7 | 
  8 | ```bash
  9 | make install-dev
 10 | ```
 11 | 
 12 | ### Install finetuner
 13 | 
 14 | ```bash
 15 | make install
 16 | ```
 17 | 
 18 | ### Enable precommit hook
 19 | 
 20 | To automatically ensure formatting with `black`, import sorting with `isort` and linting
 21 | with `flake8`, you can install the pre-commit hooks 
 22 | 
 23 | ```bash
 24 | make pre-commit
 25 | ```
 26 | 
 27 | 
 28 | ## Making a PR
 29 | 
 30 | ### Open an issue
 31 | 
 32 | Each PR should reference an open issue, and this issue should be linked to your PR.
 33 | 
 34 | ### Running tests locally
 35 | 
 36 | To run tests locally, all you need to do is
 37 | 
 38 | ```bash
 39 | make test
 40 | ```
 41 | 
 42 | ### Adding an entry to the changelog
 43 | 
 44 | Make an entry in [CHANGELOG.md](https://github.com/jina-ai/finetuner/blob/main/CHANGELOG.md),
 45 | adding it to the `Unreleased` section (and the appropriate subsection), which should contain a
 46 | short description of what you have done in the PR, as well as the PR's number, e.g.
 47 | 
 48 | ```
 49 | - Add `NTXentLoss` loss class for supervised learning ([#24](https://github.com/jina-ai/finetuner.fit/pull/24))
 50 | ```
 51 | 
 52 | To avoid merge conflicts when multiple people are simultaneously working on new features, make sure there
 53 | is **an empty line above and below the entry**.
 54 | 
 55 | ## Update notebooks
 56 | 
 57 | We have three Google Colab embedded inside the documentation:
 58 | 
 59 | - [text-to-text with bert](https://colab.research.google.com/drive/1Ui3Gw3ZL785I7AuzlHv3I0-jTvFFxJ4_?usp=sharing)
 60 | - [image-to-image with resnet](https://colab.research.google.com/drive/1QuUTy3iVR-kTPljkwplKYaJ-NTCgPEc_?usp=sharing)
 61 | - [text-to-iamge with clip](https://colab.research.google.com/drive/1yKnmy2Qotrh3OhgwWRsMWPFwOSAecBxg?usp=sharing)
 62 | 
 63 | To update code in colab:
 64 | 
 65 | 1. Update code in the Google Colab.
 66 | 2. Download into `docs/notebooks/` folder.
 67 | 3. cd into `docs` folder, run `make notebook` and run `make dirhtml` to see output locally.
 68 | 
 69 | Only members of the team have the permissions to modify the notebook.
 70 | 
 71 | ## Releases
 72 | 
 73 | To make a release, follow these steps, in order.
 74 | 
 75 | ### Update CHANGELOG.md
 76 | 
 77 | In `CHANGELOG.md`, rename the top `Unreleased` entry with the with the version number (`X.Y.Z`), and enter the current date.
 78 | 
 79 | Then, add a new empty `Unreleased` section on top of it - this is where the changes for the next version will accumulate.
 80 | 
 81 | ### Tag the commit on `main` branch
 82 | 
 83 | In your repository, check out the `main` branch, and tag it with the appropriate version - it should match the one in `finetuner/__init__.py`!
 84 | If it does not, change it there first.
 85 | 
 86 | To tag the head commit in `main` branch, and then push this to remote, do the following steps
 87 | (you can also do this automatially by creating a release on GitHub)
 88 | 
 89 | ```bash
 90 | git checkout main
 91 | git tag vX.Y.Z
 92 | git push --tags
 93 | ```
 94 | 
 95 | At this point the new version is officially released. At this point any automated actions connected
 96 | to release would have been run.
 97 | 
 98 | ### Change version in `finetuner/__init__.py`
 99 | 
100 | Since now the `main` branch corresponds to the new development version, we need to change the version
101 | in `finetuner/__init__.py` to reflect that. So you should increment the version in that file.
102 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include LICENSE
3 | prune tests/


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Run:
  2 | #   make help
  3 | #
  4 | # for a description of the available targets
  5 | 
  6 | 
  7 | # ------------------------------------------------------------------------- Help target
  8 | 
  9 | TARGET_MAX_CHAR_NUM=20
 10 | GREEN  := $(shell tput -Txterm setaf 2)
 11 | YELLOW := $(shell tput -Txterm setaf 3)
 12 | WHITE  := $(shell tput -Txterm setaf 7)
 13 | RESET  := $(shell tput -Txterm sgr0)
 14 | 
 15 | ## Show this help message
 16 | help:
 17 | 	@echo ''
 18 | 	@echo 'Usage:'
 19 | 	@echo '  ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
 20 | 	@echo ''
 21 | 	@echo 'Targets:'
 22 | 	@awk '/^[a-zA-Z\-\_0-9]+:/ { \
 23 | 		helpMessage = match(lastLine, /^## (.*)/); \
 24 | 		if (helpMessage) { \
 25 | 			helpCommand = substr($$1, 0, index($$1, ":")-1); \
 26 | 			helpMessage = substr(lastLine, RSTART + 3, RLENGTH); \
 27 | 			printf "  ${YELLOW}%-$(TARGET_MAX_CHAR_NUM)s${RESET} ${GREEN}%s${RESET}\n", helpCommand, helpMessage; \
 28 | 		} \
 29 | 	} \
 30 | 	{ lastLine = $$0 }' $(MAKEFILE_LIST)
 31 | 
 32 | 
 33 | # ------------------------------------------------------------------------ Clean target
 34 | 
 35 | ## Delete temp operational stuff like artifacts, test outputs etc
 36 | clean:
 37 | 	rm -rf .mypy_cache/ .pytest_cache/
 38 | 	rm -f .coverage .coverage.*
 39 | 	rm -rf *.egg-info/ build/ docs/_build/ htmlcov/
 40 | 
 41 | 
 42 | # --------------------------------------------------------- Environment related targets
 43 | 
 44 | ## Create a virtual environment
 45 | env:
 46 | 	python3.8 -m venv .venv
 47 | 	source .venv/bin/activate
 48 | 	pip install -U pip
 49 | 
 50 | ## Install pre-commit hooks
 51 | pre-commit:
 52 | 	pip install pre-commit
 53 | 	pre-commit install
 54 | 
 55 | ## Install package requirements
 56 | install:
 57 | 	pip install --no-cache-dir -e ".[full]"
 58 | 	rm -rf *.egg-info/ build/
 59 | 
 60 | ## Install dev requirements
 61 | install-dev:
 62 | 	pip install --no-cache-dir -e ".[test]"
 63 | 	rm -rf *.egg-info/ build/
 64 | 
 65 | ## Install docs requirements
 66 | install-docs:
 67 | 	pip install --no-cache-dir -r docs/requirements.txt
 68 | 
 69 | ## Bootstrap dev environment
 70 | init: pre-commit install install-dev install-docs
 71 | 
 72 | 
 73 | # ----------------------------------------------------------------------- Build targets
 74 | 
 75 | ## Build wheel
 76 | build:
 77 | 	python setup.py bdist_wheel
 78 | 	rm -rf .eggs/ build/ *egg-info
 79 | 
 80 | ## Build source dist
 81 | build-sdist:
 82 | 	python setup.py sdist
 83 | 	rm -rf .eggs/ build/ *egg-info
 84 | 
 85 | 
 86 | # ---------------------------------------------------------------- Test related targets
 87 | 
 88 | PYTEST_ARGS = --show-capture no --verbose --cov finetuner/ --cov-report term-missing --cov-report html
 89 | 
 90 | ## Run tests
 91 | test:
 92 | 	pytest $(PYTEST_ARGS) $(TESTS_PATH)
 93 | 
 94 | 
 95 | # ---------------------------------------------------------------- Docs related targets
 96 | 
 97 | ## Build docs
 98 | build-docs:
 99 | 	cd docs/ && bash makedoc.sh development
100 | 
101 | 
102 | # ---------------------------------------------------------- Code style related targets
103 | 
104 | SRC_CODE = finetuner/ tests/
105 | 
106 | ## Run the flake linter
107 | flake:
108 | 	flake8 $(SRC_CODE)
109 | 
110 | ## Run the black formatter
111 | black:
112 | 	black $(SRC_CODE)
113 | 
114 | ## Dry run the black formatter
115 | black-check:
116 | 	black --check $(SRC_CODE)
117 | 
118 | ## Run the isort import formatter
119 | isort:
120 | 	isort $(SRC_CODE)
121 | 
122 | ## Dry run the isort import formatter
123 | isort-check:
124 | 	isort --check $(SRC_CODE)
125 | 
126 | ## Run the mypy static type checker
127 | mypy:
128 | 	mypy $(SRC_CODE)
129 | 
130 | ## Format source code
131 | format: black isort
132 | 
133 | ## Check code style
134 | style: flake black-check isort-check # mypy
135 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   # https://docs.codecov.io/docs/comparing-commits
 3 |   allow_coverage_offsets: true
 4 | coverage:
 5 |   status:
 6 |     project:
 7 |       default:
 8 |         informational: true
 9 |         target: auto  # auto compares coverage to the previous base commit
10 |   comment:
11 |     layout: "reach, diff, flags, files"
12 |     behavior: default
13 |     require_changes: false  # if true: only post the comment if coverage changes
14 |     branches:               # branch names that can post comment
15 |       - "main"
16 | 


--------------------------------------------------------------------------------
/datasets/credit.txt:
--------------------------------------------------------------------------------
1 | Data for `image-image_example.csv` and `text-image_example.csv` is sourced from the Cross-Market Recommendation dataset.
2 | https://xmrec.github.io/data/de/
3 | 
4 | Data for `text-text_example.csv` is sourced from the Quora Duplicate Questions dataset.
5 | https://www.sbert.net/examples/training/quora_duplicate_questions/README.html?highlight=quora#dataset
6 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | notebook:
23 | 	jupytext --to markdown notebooks/*.ipynb
24 | 


--------------------------------------------------------------------------------
/docs/_static/JCloud-dark.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="320px" height="320px" viewBox="0 0 320 320" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>JCloud-dark</title>
 4 |     <g id="JCloud-dark" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组">
 6 |             <rect id="矩形备份-11" fill-rule="nonzero" x="0" y="0" width="320" height="320"></rect>
 7 |             <g id="编组-4备份" transform="translate(20.000000, 60.000000)">
 8 |                 <path d="M256,79.8170732 C269.254834,79.8170732 280,90.5622392 280,103.817073 L280,163 C280,176.254834 269.254834,187 256,187 L196.571429,187 C183.316595,187 172.571429,176.254834 172.571429,163 L172.571429,103.817073 C172.571429,90.5622392 183.316595,79.8170732 196.571429,79.8170732 L256,79.8170732 Z M198.169366,110.093818 C195.503074,108.649809 192.149146,109.552333 190.574155,112.183335 L190.574155,112.183335 L190.444918,112.410652 C189.004211,115.083055 189.904672,118.444669 192.529657,120.02327 L192.529657,120.02327 L214.231325,133.073025 L192.421291,146.861608 L192.197604,147.010379 C189.714601,148.74624 189.008986,152.150329 190.640831,154.743329 C192.320671,157.412594 195.841369,158.21156 198.50453,156.52787 L198.50453,156.52787 L228.150872,137.785013 L228.366293,137.64224 C231.740047,135.299571 231.610909,130.197871 228.042505,128.051922 L228.042505,128.051922 L198.396164,110.223351 Z M260.581882,148.231707 L237.703833,148.231707 C235.184875,148.231707 233.142857,150.273726 233.142857,152.792683 C233.142857,155.31164 235.184875,157.353659 237.703833,157.353659 L237.703833,157.353659 L260.581882,157.353659 C263.100839,157.353659 265.142857,155.31164 265.142857,152.792683 C265.142857,150.273726 263.100839,148.231707 260.581882,148.231707 L260.581882,148.231707 Z" id="形状结合" fill="#FBCB67"></path>
 9 |                 <path d="M141.776,0 C180.100571,0 214.04,23.3154785 227.931429,58.0873691 C229.787429,62.7348563 227.485714,67.9919798 222.789714,69.8288061 C218.094857,71.6667634 212.782857,69.3888274 210.925714,64.7413402 C199.768,36.811175 172.525714,18.0968107 141.776,18.0968107 C112.677714,18.0968107 87.5097143,36.1495104 76.6137143,63.8783485 C75.2468571,67.3563293 71.8628571,69.647838 68.0925714,69.647838 C40.7954286,69.647838 18.2857143,92.4192811 18.2857143,119.401626 C18.2857143,146.475586 40.1474286,168.459817 67.2685714,168.896403 L68.0925714,168.903189 L156.099429,168.903189 C161.148571,168.903189 165.242286,172.954613 165.242286,177.951595 C165.242286,182.854699 161.301714,186.846177 156.384,186.995476 L156.099429,187 L68.0925714,187 C30.4777143,187 0,156.72856 0,119.401626 C0,84.7258744 27.0422857,55.2868877 61.424,51.879032 L61.9257143,51.83379 L62.2068571,51.2365952 C76.8091429,20.4855897 106.501714,0.429799254 140.673143,0.006786304 L141.776,0 Z" id="路径备份-5" fill="#FBCB67"></path>
10 |             </g>
11 |         </g>
12 |     </g>
13 | </svg>


--------------------------------------------------------------------------------
/docs/_static/JCloud-light.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="320px" height="320px" viewBox="0 0 320 320" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>JCloud-light</title>
 4 |     <g id="JCloud-light" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组">
 6 |             <rect id="矩形备份-11" fill-rule="nonzero" x="0" y="0" width="320" height="320"></rect>
 7 |             <g id="编组-4备份" transform="translate(20.000000, 60.000000)" fill="#009191">
 8 |                 <path d="M256,79.8170732 C269.254834,79.8170732 280,90.5622392 280,103.817073 L280,163 C280,176.254834 269.254834,187 256,187 L196.571429,187 C183.316595,187 172.571429,176.254834 172.571429,163 L172.571429,103.817073 C172.571429,90.5622392 183.316595,79.8170732 196.571429,79.8170732 L256,79.8170732 Z M198.169366,110.093818 C195.503074,108.649809 192.149146,109.552333 190.574155,112.183335 L190.574155,112.183335 L190.444918,112.410652 C189.004211,115.083055 189.904672,118.444669 192.529657,120.02327 L192.529657,120.02327 L214.231325,133.073025 L192.421291,146.861608 L192.197604,147.010379 C189.714601,148.74624 189.008986,152.150329 190.640831,154.743329 C192.320671,157.412594 195.841369,158.21156 198.50453,156.52787 L198.50453,156.52787 L228.150872,137.785013 L228.366293,137.64224 C231.740047,135.299571 231.610909,130.197871 228.042505,128.051922 L228.042505,128.051922 L198.396164,110.223351 Z M260.581882,148.231707 L237.703833,148.231707 C235.184875,148.231707 233.142857,150.273726 233.142857,152.792683 C233.142857,155.31164 235.184875,157.353659 237.703833,157.353659 L237.703833,157.353659 L260.581882,157.353659 C263.100839,157.353659 265.142857,155.31164 265.142857,152.792683 C265.142857,150.273726 263.100839,148.231707 260.581882,148.231707 L260.581882,148.231707 Z" id="形状结合"></path>
 9 |                 <path d="M141.776,0 C180.100571,0 214.04,23.3154785 227.931429,58.0873691 C229.787429,62.7348563 227.485714,67.9919798 222.789714,69.8288061 C218.094857,71.6667634 212.782857,69.3888274 210.925714,64.7413402 C199.768,36.811175 172.525714,18.0968107 141.776,18.0968107 C112.677714,18.0968107 87.5097143,36.1495104 76.6137143,63.8783485 C75.2468571,67.3563293 71.8628571,69.647838 68.0925714,69.647838 C40.7954286,69.647838 18.2857143,92.4192811 18.2857143,119.401626 C18.2857143,146.475586 40.1474286,168.459817 67.2685714,168.896403 L68.0925714,168.903189 L156.099429,168.903189 C161.148571,168.903189 165.242286,172.954613 165.242286,177.951595 C165.242286,182.854699 161.301714,186.846177 156.384,186.995476 L156.099429,187 L68.0925714,187 C30.4777143,187 0,156.72856 0,119.401626 C0,84.7258744 27.0422857,55.2868877 61.424,51.879032 L61.9257143,51.83379 L62.2068571,51.2365952 C76.8091429,20.4855897 106.501714,0.429799254 140.673143,0.006786304 L141.776,0 Z" id="路径备份-5"></path>
10 |             </g>
11 |         </g>
12 |     </g>
13 | </svg>


--------------------------------------------------------------------------------
/docs/_static/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/_static/banner.png


--------------------------------------------------------------------------------
/docs/_static/cas-dark.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="320px" height="320px" viewBox="0 0 320 320" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>cas-dark2</title>
 4 |     <g id="cas-dark2" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组">
 6 |             <rect id="矩形备份-11" fill-rule="nonzero" x="0" y="0" width="320" height="320"></rect>
 7 |             <g id="编组-4备份" transform="translate(30.000000, 30.000000)">
 8 |                 <rect id="矩形" x="0" y="0" width="260" height="260"></rect>
 9 |                 <path d="M79.8382,105.3338 L79.8387473,198.251658 L79.8581727,198.569286 C79.9609939,199.405908 80.4657239,200.149047 81.2223548,200.549567 L176.1838,250.8168 L175.321116,251.116603 C163.13226,255.212655 148.863942,257.4 136.114898,257.4 L134.366431,257.390613 C98.9122839,257.008932 69.4350516,244.99295 45.5867065,222.191679 L45.1984,221.8164 L45.1984,128.206 L79.8382,105.3338 Z M85.038,151.5364 L198.347845,207.833508 C199.081889,208.198212 199.944933,208.195424 200.676606,207.825983 L253.0606,181.376 L252.68315,182.217944 C238.785339,212.657349 213.438718,236.206074 182.603219,248.332789 L85.0382,196.6848 L85.038,151.5364 Z M94.5854,48.9034 L127.4208,67.6806 L81.0059794,98.3345989 L80.993,98.3434 L41.167499,124.636201 C40.4385768,125.117439 40,125.932528 40,126.80598 L39.9986216,216.903048 C18.4105009,194.129033 5.2,163.554471 5.2,129.944705 L5.21890038,127.743672 C5.35743788,119.682615 6.25685405,111.727068 7.89344246,103.948485 L8.385,101.7094 L94.5854,48.9034 Z M218.465,157.6328 L252.5146,175.825 L199.4902,202.5972 L161.9306,183.976 L218.465,157.6328 Z M103.3058,6.9186 L188.1542,55.1408 L188.3804,96.0284 L95.8260948,43.6198997 L95.5088518,43.4675701 C94.7515813,43.165163 93.8876553,43.2337245 93.1817236,43.6684005 L10.2336,94.7414 L10.4224055,94.1005404 C23.0156994,51.9707113 57.7410478,18.7862444 101.529426,7.36865099 L103.3058,6.9186 Z M135.13399,2.6 L136.796234,2.61010192 C169.453788,3.00718732 200.121611,15.0955821 223.675828,36.1955055 L224.0342,36.5196 L224.4268,78.3016 L193.5804,96.0258 L193.347709,53.6090522 L193.327615,53.3000964 C193.225741,52.4860452 192.743238,51.760275 192.016583,51.3539933 L109.7772,5.3716 L111.117717,5.09626593 C119.474965,3.43580737 127.266317,2.6 135.13399,2.6 Z" id="形状结合" fill="#FBCB67" fill-rule="nonzero"></path>
10 |             </g>
11 |         </g>
12 |     </g>
13 | </svg>


--------------------------------------------------------------------------------
/docs/_static/cas-light.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="320px" height="320px" viewBox="0 0 320 320" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>cas-dark2</title>
 4 |     <g id="cas-dark2" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组">
 6 |             <rect id="矩形备份-11" fill-rule="nonzero" x="0" y="0" width="320" height="320"></rect>
 7 |             <g id="编组-4备份" transform="translate(30.000000, 30.000000)">
 8 |                 <rect id="矩形" x="0" y="0" width="260" height="260"></rect>
 9 |                 <path d="M79.8382,105.3338 L79.8387473,198.251658 L79.8581727,198.569286 C79.9609939,199.405908 80.4657239,200.149047 81.2223548,200.549567 L176.1838,250.8168 L175.321116,251.116603 C163.13226,255.212655 148.863942,257.4 136.114898,257.4 L134.366431,257.390613 C98.9122839,257.008932 69.4350516,244.99295 45.5867065,222.191679 L45.1984,221.8164 L45.1984,128.206 L79.8382,105.3338 Z M85.038,151.5364 L198.347845,207.833508 C199.081889,208.198212 199.944933,208.195424 200.676606,207.825983 L253.0606,181.376 L252.68315,182.217944 C238.785339,212.657349 213.438718,236.206074 182.603219,248.332789 L85.0382,196.6848 L85.038,151.5364 Z M94.5854,48.9034 L127.4208,67.6806 L81.0059794,98.3345989 L80.993,98.3434 L41.167499,124.636201 C40.4385768,125.117439 40,125.932528 40,126.80598 L39.9986216,216.903048 C18.4105009,194.129033 5.2,163.554471 5.2,129.944705 L5.21890038,127.743672 C5.35743788,119.682615 6.25685405,111.727068 7.89344246,103.948485 L8.385,101.7094 L94.5854,48.9034 Z M218.465,157.6328 L252.5146,175.825 L199.4902,202.5972 L161.9306,183.976 L218.465,157.6328 Z M103.3058,6.9186 L188.1542,55.1408 L188.3804,96.0284 L95.8260948,43.6198997 L95.5088518,43.4675701 C94.7515813,43.165163 93.8876553,43.2337245 93.1817236,43.6684005 L10.2336,94.7414 L10.4224055,94.1005404 C23.0156994,51.9707113 57.7410478,18.7862444 101.529426,7.36865099 L103.3058,6.9186 Z M135.13399,2.6 L136.796234,2.61010192 C169.453788,3.00718732 200.121611,15.0955821 223.675828,36.1955055 L224.0342,36.5196 L224.4268,78.3016 L193.5804,96.0258 L193.347709,53.6090522 L193.327615,53.3000964 C193.225741,52.4860452 192.743238,51.760275 192.016583,51.3539933 L109.7772,5.3716 L111.117717,5.09626593 C119.474965,3.43580737 127.266317,2.6 135.13399,2.6 Z" id="形状结合" fill="#009191" fill-rule="nonzero"></path>
10 |             </g>
11 |         </g>
12 |     </g>
13 | </svg>


--------------------------------------------------------------------------------
/docs/_static/docarray-dark.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="320px" height="320px" viewBox="0 0 320 320" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>docarray-dark 2</title>
 4 |     <g id="docarray-dark-2" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组" transform="translate(31.000000, 31.000000)" fill="#FBCB67" fill-rule="nonzero">
 6 |             <g id="编组-57" transform="translate(129.000000, 129.000000) scale(-1, 1) translate(-129.000000, -129.000000) ">
 7 |                 <path d="M155,0.0045310358 C160.42678,0.0045310358 164.84543,4.32478085 164.996032,9.71569372 L165,10 C165,15.5228475 160.522847,20 155,20 L129,20 C69.9989231,20 21.9467921,66.8780096 20.0576823,125.419953 L20.0146029,127.197486 L20,129 C20,188.597047 67.8298231,237.022841 127.197486,237.985397 L155,237.996137 C160.523473,237.998271 165,242.476528 165,248 C165,253.522847 160.522847,258 155,258 L129,258 C58.4677146,258 1.15645429,201.394063 0.0172823144,131.133251 L0,129 C0,58.4677146 56.6059375,1.15645429 126.866749,0.0172823144 L155,0.0045310358 Z M240,0 C249.830669,0 257.820682,7.88078333 257.997024,17.6693348 L258,18 L258,248 C258,253.42924 253.673329,257.847932 248.279905,257.996158 L248,258 L204,258 C198.477153,258 194,253.522847 194,248 C194,242.477153 198.477153,238 204,238 L238,238 L238,20 L204,20 C198.477153,20 194,15.5228475 194,10 C194,4.4771525 198.477153,0 204,0 L240,0 Z M151.5,126 C172.210678,126 189,142.789322 189,163.5 C189,184.210678 172.210678,201 151.5,201 C130.789322,201 114,184.210678 114,163.5 C114,161.329305 114.184434,159.201688 114.538488,157.131962 L114.679179,156.358558 L124.117669,156.35849 C126.796188,156.35849 129.425849,155.641266 131.733454,154.281331 C138.772774,150.132866 141.180601,141.130787 137.209662,134.03715 L137.04051,133.742705 L134.788887,129.920123 C139.820587,127.411237 145.495487,126 151.5,126 Z M89.7549106,88.0242111 C91.100011,85.6125382 94.1454775,84.7479105 96.5571504,86.0930109 C97.2462577,86.4773576 97.8333702,87.0186169 98.2718658,87.6712617 L98.4292442,87.9211357 L128.425282,138.819895 C129.82731,141.198924 129.035293,144.264076 126.656264,145.666104 C125.972529,146.069047 125.203973,146.302717 124.414554,146.349669 L124.117669,146.35849 L65.7330609,146.35849 C62.9716371,146.35849 60.7330609,144.119914 60.7330609,141.35849 C60.7330609,140.600665 60.9053061,139.854027 61.2352488,139.174527 L61.3663409,138.92297 L89.7549106,88.0242111 Z M200.311688,65 C201.909369,65 203.215349,66.24892 203.306596,67.8237272 L203.311688,68 L203.311688,127 C203.311688,128.597681 202.062768,129.903661 200.488006,129.994907 L200.311738,130 L185.175,129.999 L184.696417,129.525108 C176.194189,121.212238 164.852133,116.319831 152.707395,116.015135 L152.099498,116.003729 L151.5,116 C150.380425,116 149.068887,116.085584 147.565386,116.256751 L147.05711,116.316977 L146.178383,116.431441 L145.260158,116.563515 L144.786234,116.636155 L143.808762,116.794644 L142.791791,116.970742 L142.268493,117.065395 L141.192274,117.267908 L140.639353,117.375768 L139.503887,117.604696 L138.311,117.855 L138.311688,68 C138.311688,66.4614925 139.469809,65.1934785 140.961825,65.0201832 L141.135416,65.0050927 L141.311688,65 L200.311688,65 Z" id="形状"></path>
 8 |             </g>
 9 |         </g>
10 |     </g>
11 | </svg>


--------------------------------------------------------------------------------
/docs/_static/docarray-light.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="320px" height="320px" viewBox="0 0 320 320" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>docarray-light</title>
 4 |     <g id="docarray-light" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组备份" transform="translate(31.000000, 31.000000)" fill="#009191" fill-rule="nonzero">
 6 |             <g id="编组-57" transform="translate(129.000000, 129.000000) scale(-1, 1) translate(-129.000000, -129.000000) ">
 7 |                 <path d="M155,0.0045310358 C160.42678,0.0045310358 164.84543,4.32478085 164.996032,9.71569372 L165,10 C165,15.5228475 160.522847,20 155,20 L129,20 C69.9989231,20 21.9467921,66.8780096 20.0576823,125.419953 L20.0146029,127.197486 L20,129 C20,188.597047 67.8298231,237.022841 127.197486,237.985397 L155,237.996137 C160.523473,237.998271 165,242.476528 165,248 C165,253.522847 160.522847,258 155,258 L129,258 C58.4677146,258 1.15645429,201.394063 0.0172823144,131.133251 L0,129 C0,58.4677146 56.6059375,1.15645429 126.866749,0.0172823144 L155,0.0045310358 Z M240,0 C249.830669,0 257.820682,7.88078333 257.997024,17.6693348 L258,18 L258,248 C258,253.42924 253.673329,257.847932 248.279905,257.996158 L248,258 L204,258 C198.477153,258 194,253.522847 194,248 C194,242.477153 198.477153,238 204,238 L238,238 L238,20 L204,20 C198.477153,20 194,15.5228475 194,10 C194,4.4771525 198.477153,0 204,0 L240,0 Z M151.5,126 C172.210678,126 189,142.789322 189,163.5 C189,184.210678 172.210678,201 151.5,201 C130.789322,201 114,184.210678 114,163.5 C114,161.329305 114.184434,159.201688 114.538488,157.131962 L114.679179,156.358558 L124.117669,156.35849 C126.796188,156.35849 129.425849,155.641266 131.733454,154.281331 C138.772774,150.132866 141.180601,141.130787 137.209662,134.03715 L137.04051,133.742705 L134.788887,129.920123 C139.820587,127.411237 145.495487,126 151.5,126 Z M89.7549106,88.0242111 C91.100011,85.6125382 94.1454775,84.7479105 96.5571504,86.0930109 C97.2462577,86.4773576 97.8333702,87.0186169 98.2718658,87.6712617 L98.4292442,87.9211357 L128.425282,138.819895 C129.82731,141.198924 129.035293,144.264076 126.656264,145.666104 C125.972529,146.069047 125.203973,146.302717 124.414554,146.349669 L124.117669,146.35849 L65.7330609,146.35849 C62.9716371,146.35849 60.7330609,144.119914 60.7330609,141.35849 C60.7330609,140.600665 60.9053061,139.854027 61.2352488,139.174527 L61.3663409,138.92297 L89.7549106,88.0242111 Z M200.311688,65 C201.909369,65 203.215349,66.24892 203.306596,67.8237272 L203.311688,68 L203.311688,127 C203.311688,128.597681 202.062768,129.903661 200.488006,129.994907 L200.311738,130 L185.175,129.999 L184.696417,129.525108 C176.194189,121.212238 164.852133,116.319831 152.707395,116.015135 L152.099498,116.003729 L151.5,116 C150.380425,116 149.068887,116.085584 147.565386,116.256751 L147.05711,116.316977 L146.178383,116.431441 L145.260158,116.563515 L144.786234,116.636155 L143.808762,116.794644 L142.791791,116.970742 L142.268493,117.065395 L141.192274,117.267908 L140.639353,117.375768 L139.503887,117.604696 L138.311,117.855 L138.311688,68 C138.311688,66.4614925 139.469809,65.1934785 140.961825,65.0201832 L141.135416,65.0050927 L141.311688,65 L200.311688,65 Z" id="形状"></path>
 8 |             </g>
 9 |         </g>
10 |     </g>
11 | </svg>


--------------------------------------------------------------------------------
/docs/_static/docbot.css:
--------------------------------------------------------------------------------
 1 | qa-bot[theme="follow"] {
 2 |   --qabot-color-shadow: var(--sd-color-shadow);
 3 |   --qabot-color-background: var(--color-background-primary);
 4 |   --qabot-color-padding: var(--sd-color-card-header);
 5 | 
 6 |   --qabot-color-primary: var(--sd-color-card-text);
 7 |   --qabot-color-action: var(--color-brand-primary);
 8 |   --qabot-color-action-contrast: var(--color-code-background);
 9 |   --qabot-color-dimmed: var(--color-background-border);
10 |   --qabot-color-muted: var(--color-foreground-muted);
11 | }
12 | qa-bot:not(:defined) {
13 |   display: none;
14 | }


--------------------------------------------------------------------------------
/docs/_static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/_static/favicon.ico


--------------------------------------------------------------------------------
/docs/_static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/_static/favicon.png


--------------------------------------------------------------------------------
/docs/_static/finetuner+_dark.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="160px" height="160px" viewBox="0 0 160 160" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>finetuner+_light备份</title>
 4 |     <g id="finetuner+_light备份" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组-14" transform="translate(20.000000, 20.000000)">
 6 |             <path d="M93.3199978,62.2395495 C94.133294,64.7333188 96.2079164,66.7091998 98.9271487,67.2342774 C101.727505,67.7750119 104.474198,66.6347166 106.158256,64.5088508 C109.378557,65.8615976 112.06779,67.798205 114.060789,70.2536352 C115.247433,71.7156123 117.156299,74.5979735 117.766741,76.0791829 C117.350444,77.8466793 117.907999,75.9548162 116.393657,80.3235712 C114.879315,84.6923263 111.90138,90.2308731 108.695332,94.6987938 L108.694065,94.6979296 C109.471403,91.3126142 109.139004,88.105184 107.568027,85.3841719 C105.088935,81.0902589 100.018625,78.8618483 93.6602808,78.866691 C87.61582,78.8712947 80.7141614,80.8895127 74.2881196,84.5995896 C72.3365295,85.7263407 70.4981458,86.9691058 68.7994407,88.3000592 C65.082848,86.0091443 60.2582163,86.6972296 57.6919834,90.1027357 C55.2745959,93.3107172 55.7275125,97.7804282 58.4851922,100.747839 C56.3463455,105.486148 56.1850353,110.102036 58.3276341,113.813126 C59.8986112,116.534138 62.5101276,118.425719 65.8305657,119.445183 C60.479569,120.432203 51.9753851,120.256409 46.0425376,118.408761 C44.5878786,117.260141 42.3187116,113.238806 41.8324939,111.954661 C36.4798276,99.1556328 46.0427544,82.0270031 66.1128157,70.4395478 C75.6610027,64.9268995 85.1347733,62.3485055 93.3199978,62.2395495 Z M93.741733,83.7819236 C98.6563687,83.7781804 102.229627,85.3486339 103.811281,88.0881389 C105.392935,90.8276439 104.966358,94.707403 102.505799,98.9617308 C99.8880905,103.48777 95.2588314,107.911549 89.4811801,111.247277 C83.7035288,114.583006 77.5577946,116.380172 72.3292755,116.384155 C67.4146398,116.387898 63.8413816,114.817444 62.2597276,112.077939 C60.9386577,109.789779 61.018601,106.706137 62.5145635,103.273175 C65.7966589,104.270706 69.4105436,103.303886 71.5111799,100.516247 C73.4290884,97.9710967 73.5402757,94.6317469 72.1271705,91.8785425 C73.5168767,90.8238216 75.0104451,89.8306584 76.5898284,88.9188011 C82.3674797,85.5830725 88.5132139,83.7859058 93.741733,83.7819236 Z M96.6262913,41.9513937 C106.285264,43.3398827 114.273864,47.8206516 119.762092,54.800241 L119.763593,54.8004888 C120.147032,59.4835571 119.972436,64.1725799 119.266111,68.7698632 L119.265173,68.7695432 C119.073297,68.3759838 118.867133,67.9880412 118.646578,67.6060286 C116.290323,63.5248745 112.526632,60.5278407 107.750674,58.6944042 C107.333843,55.5904412 105.030014,52.9747165 101.837859,52.3583187 C98.4336701,51.7009873 95.1087825,53.5277955 93.6757047,56.5894265 C84.10036,56.8894744 73.4271703,60.1174839 63.4718196,65.8652083 C52.7671414,72.0455572 44.1704729,80.4013301 39.2487394,89.230827 C34.1559458,98.3672024 33.3146523,107.456128 37.3920347,114.518361 C37.6125898,114.900374 37.8454764,115.272888 38.0903704,115.635838 L38.0901786,115.63681 C33.7590332,113.951179 29.6140124,111.759978 25.7526473,109.089209 L25.7531863,109.087822 C23.3632727,103.087022 22.7740851,96.5505125 24.0272051,89.920925 C26.533829,89.9606652 29.025706,89.1562313 31.0519682,87.4574698 C35.6380634,83.6126136 36.1226894,76.6253134 32.179523,71.753899 C37.3954996,64.5861661 44.825219,58.0086208 54.2334554,52.4782175 L54.6306804,52.2461095 L55.5335587,51.7278163 C70.1803613,43.427145 84.4312902,40.1983477 96.6262913,41.9513937 Z M90.6535908,20.3523087 C98.3825267,21.1330554 105.132159,23.6073104 110.55473,27.5564912 L110.555798,27.555422 C111.035942,28.3079505 111.501677,29.074656 111.952431,29.8553863 C114.502712,34.2726025 116.424442,38.8806101 117.745412,43.5762022 L118.179013,45.2346004 C117.606383,43.8319579 113.766217,40.9049802 111.529648,39.7034 C107.416782,37.4937929 102.626757,36.1105168 97.5863757,35.3859559 C83.9787579,33.4298444 68.4439103,36.8991445 52.7413013,45.6924389 L52.2654151,45.9605318 C51.9485376,46.1397041 51.6342222,46.3211741 51.3208456,46.5021022 L50.8531323,46.7759374 C40.6609297,52.7708629 32.5253147,59.9835893 26.7502733,67.9443504 C23.1923083,66.7304829 19.2004358,67.2931549 16.217582,69.7938959 C11.5770429,73.6843963 11.1359324,80.7923948 15.2323338,85.6700564 C15.9551395,86.530714 16.7709209,87.2644248 17.6498564,87.8678193 C17.058561,90.6628363 16.7509878,93.4521812 16.7480125,96.1990303 C16.7470107,97.1239756 17.1534616,99.3541381 17.3214316,99.9224669 C17.3391127,99.9822909 17.3635905,100.060065 17.3948652,100.155789 L17.4664768,100.37117 C17.6547163,100.929569 17.9909748,101.878877 18.4752523,103.219094 C16.7625398,101.731289 14.2112005,98.7966447 12.4316852,96.4772267 C10.7650724,94.3049658 9.28272007,91.9876397 8.04609298,89.8457388 C7.59533811,89.0650085 7.16421892,88.2783179 6.75258194,87.4862369 L6.75404194,87.4858466 C6.04523654,80.8151719 7.27728519,73.7326912 10.4656067,66.648863 C16.1749016,53.9639237 27.8012368,41.9905848 44.2161586,32.5134253 C51.8584622,28.1011393 59.4503616,24.858279 66.7789544,22.7934201 C69.2698862,26.6070961 74.5143649,28.7143073 78.8097569,27.8884447 C82.7634794,27.1282738 85.8274935,23.9172355 86.1644527,20.0928101 C87.6855654,20.1170508 89.182804,20.2037356 90.6535908,20.3523087 Z M85.909552,7.17152591 C87.3462741,7.39738064 88.7560299,7.67797176 90.1368488,8.01200071 L90.1371516,8.01135762 C94.8997503,10.7605234 100.044355,14.8916568 103.848235,19.0053398 C102.237053,18.2441788 99.8069074,17.2660928 97.818251,16.6692644 C95.8295946,16.0724361 93.4411706,15.7632669 91.1520747,15.5320315 C89.1715348,15.3319651 87.1511167,15.234831 85.0959527,15.2401249 C82.8016142,11.0463783 77.3964414,8.55165007 72.770523,9.30159396 C68.422002,10.006567 65.250774,13.9044958 65.3273154,18.1726413 C57.6403215,20.3522953 49.7250702,23.7419944 41.7952455,28.3202808 C24.5425654,38.2811203 12.1968302,50.9953306 6.04188219,64.670424 C5.0158819,66.9499965 4.17184938,69.23709 3.51006033,71.5193597 C3.02885425,73.1788644 2.68295986,75.9116409 2.69996325,77.5851772 C1.06746123,72.451175 0.120470826,66.1092835 0.0416054124,60.7549473 L0.0369574363,60.0311973 C0.43808935,58.6683585 0.899968223,57.3071786 1.42273332,55.9500134 C5.59668863,45.1138906 13.4619425,34.9639348 24.5265106,26.3473628 C26.4664714,27.4798337 29.1982894,27.4327244 31.5509627,26.0192352 C34.1764669,24.4418288 35.5210178,21.6486714 35.0516567,19.197781 C35.4161375,18.9807316 35.7843872,18.7652568 36.1550842,18.5512348 C53.4180611,8.58445039 70.7557175,4.78932177 85.909552,7.17152591 Z M79.3546157,3.1362558 L80.4870446,3.56966823 C66.352896,3.03196238 49.7649437,6.77997716 34.460445,15.6160336 C34.0737763,15.8392769 33.6897318,16.0642498 33.3083189,16.290924 C31.3664249,14.7193038 28.2631872,14.5922026 25.6335991,16.1720626 C22.7106258,17.9281888 21.3753127,21.1911611 22.3630705,23.811126 C13.577289,30.7745416 6.75524743,38.7597739 2.04499517,47.2104558 C1.88700815,47.493901 1.5026327,48.5012824 0.891868812,50.2326001 C3.38222505,33.5590523 14.5022767,17.0203202 30.0789769,8.02710819 C45.655677,-0.966103836 63.5791996,-2.1969237 79.3546157,3.1362558 Z" id="形状结合" fill="#FBCB67"></path>
 7 |             <path d="M75.2205826,12.5042186 C72.1904085,12.035586 69.3119649,14.433013 69.3119649,17.468037 C69.3119649,20.7343123 72.7358151,24.2764265 76.2502235,24.7152104 C79.3990133,25.1083457 82.1271415,22.9314599 82.1271415,20.0233681 C82.1271415,16.6548254 78.8088321,13.0591605 75.2205826,12.5042186 Z" id="椭圆形" fill="#4285F4" fill-rule="nonzero" transform="translate(75.719553, 18.603321) rotate(-18.000000) translate(-75.719553, -18.603321) "></path>
 8 |             <path d="M64.6432617,90.3429583 C67.0698681,90.3429583 69.0975678,92.528255 69.0975678,95.292334 C69.0975678,98.0564131 67.0698681,100.24171 64.6432617,100.24171 C62.2166553,100.24171 60.1889555,98.0564131 60.1889555,95.292334 C60.1889555,92.528255 62.2166553,90.3429583 64.6432617,90.3429583 Z" id="椭圆形备份-26" fill="#FF4559" transform="translate(64.643262, 95.292334) rotate(-53.000000) translate(-64.643262, -95.292334) "></path>
 9 |         </g>
10 |     </g>
11 | </svg>


--------------------------------------------------------------------------------
/docs/_static/finetuner+_light.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="160px" height="160px" viewBox="0 0 160 160" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>finetuner+_light</title>
 4 |     <g id="finetuner+_light" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组-14" transform="translate(20.000000, 20.000000)">
 6 |             <path d="M93.3199978,62.2395495 C94.133294,64.7333188 96.2079164,66.7091998 98.9271487,67.2342774 C101.727505,67.7750119 104.474198,66.6347166 106.158256,64.5088508 C109.378557,65.8615976 112.06779,67.798205 114.060789,70.2536352 C115.247433,71.7156123 117.156299,74.5979735 117.766741,76.0791829 C117.350444,77.8466793 117.907999,75.9548162 116.393657,80.3235712 C114.879315,84.6923263 111.90138,90.2308731 108.695332,94.6987938 L108.694065,94.6979296 C109.471403,91.3126142 109.139004,88.105184 107.568027,85.3841719 C105.088935,81.0902589 100.018625,78.8618483 93.6602808,78.866691 C87.61582,78.8712947 80.7141614,80.8895127 74.2881196,84.5995896 C72.3365295,85.7263407 70.4981458,86.9691058 68.7994407,88.3000592 C65.082848,86.0091443 60.2582163,86.6972296 57.6919834,90.1027357 C55.2745959,93.3107172 55.7275125,97.7804282 58.4851922,100.747839 C56.3463455,105.486148 56.1850353,110.102036 58.3276341,113.813126 C59.8986112,116.534138 62.5101276,118.425719 65.8305657,119.445183 C60.479569,120.432203 51.9753851,120.256409 46.0425376,118.408761 C44.5878786,117.260141 42.3187116,113.238806 41.8324939,111.954661 C36.4798276,99.1556328 46.0427544,82.0270031 66.1128157,70.4395478 C75.6610027,64.9268995 85.1347733,62.3485055 93.3199978,62.2395495 Z M93.741733,83.7819236 C98.6563687,83.7781804 102.229627,85.3486339 103.811281,88.0881389 C105.392935,90.8276439 104.966358,94.707403 102.505799,98.9617308 C99.8880905,103.48777 95.2588314,107.911549 89.4811801,111.247277 C83.7035288,114.583006 77.5577946,116.380172 72.3292755,116.384155 C67.4146398,116.387898 63.8413816,114.817444 62.2597276,112.077939 C60.9386577,109.789779 61.018601,106.706137 62.5145635,103.273175 C65.7966589,104.270706 69.4105436,103.303886 71.5111799,100.516247 C73.4290884,97.9710967 73.5402757,94.6317469 72.1271705,91.8785425 C73.5168767,90.8238216 75.0104451,89.8306584 76.5898284,88.9188011 C82.3674797,85.5830725 88.5132139,83.7859058 93.741733,83.7819236 Z M96.6262913,41.9513937 C106.285264,43.3398827 114.273864,47.8206516 119.762092,54.800241 L119.763593,54.8004888 C120.147032,59.4835571 119.972436,64.1725799 119.266111,68.7698632 L119.265173,68.7695432 C119.073297,68.3759838 118.867133,67.9880412 118.646578,67.6060286 C116.290323,63.5248745 112.526632,60.5278407 107.750674,58.6944042 C107.333843,55.5904412 105.030014,52.9747165 101.837859,52.3583187 C98.4336701,51.7009873 95.1087825,53.5277955 93.6757047,56.5894265 C84.10036,56.8894744 73.4271703,60.1174839 63.4718196,65.8652083 C52.7671414,72.0455572 44.1704729,80.4013301 39.2487394,89.230827 C34.1559458,98.3672024 33.3146523,107.456128 37.3920347,114.518361 C37.6125898,114.900374 37.8454764,115.272888 38.0903704,115.635838 L38.0901786,115.63681 C33.7590332,113.951179 29.6140124,111.759978 25.7526473,109.089209 L25.7531863,109.087822 C23.3632727,103.087022 22.7740851,96.5505125 24.0272051,89.920925 C26.533829,89.9606652 29.025706,89.1562313 31.0519682,87.4574698 C35.6380634,83.6126136 36.1226894,76.6253134 32.179523,71.753899 C37.3954996,64.5861661 44.825219,58.0086208 54.2334554,52.4782175 L54.6306804,52.2461095 L55.5335587,51.7278163 C70.1803613,43.427145 84.4312902,40.1983477 96.6262913,41.9513937 Z M90.6535908,20.3523087 C98.3825267,21.1330554 105.132159,23.6073104 110.55473,27.5564912 L110.555798,27.555422 C111.035942,28.3079505 111.501677,29.074656 111.952431,29.8553863 C114.502712,34.2726025 116.424442,38.8806101 117.745412,43.5762022 L118.179013,45.2346004 C117.606383,43.8319579 113.766217,40.9049802 111.529648,39.7034 C107.416782,37.4937929 102.626757,36.1105168 97.5863757,35.3859559 C83.9787579,33.4298444 68.4439103,36.8991445 52.7413013,45.6924389 L52.2654151,45.9605318 C51.9485376,46.1397041 51.6342222,46.3211741 51.3208456,46.5021022 L50.8531323,46.7759374 C40.6609297,52.7708629 32.5253147,59.9835893 26.7502733,67.9443504 C23.1923083,66.7304829 19.2004358,67.2931549 16.217582,69.7938959 C11.5770429,73.6843963 11.1359324,80.7923948 15.2323338,85.6700564 C15.9551395,86.530714 16.7709209,87.2644248 17.6498564,87.8678193 C17.058561,90.6628363 16.7509878,93.4521812 16.7480125,96.1990303 C16.7470107,97.1239756 17.1534616,99.3541381 17.3214316,99.9224669 C17.3391127,99.9822909 17.3635905,100.060065 17.3948652,100.155789 L17.4664768,100.37117 C17.6547163,100.929569 17.9909748,101.878877 18.4752523,103.219094 C16.7625398,101.731289 14.2112005,98.7966447 12.4316852,96.4772267 C10.7650724,94.3049658 9.28272007,91.9876397 8.04609298,89.8457388 C7.59533811,89.0650085 7.16421892,88.2783179 6.75258194,87.4862369 L6.75404194,87.4858466 C6.04523654,80.8151719 7.27728519,73.7326912 10.4656067,66.648863 C16.1749016,53.9639237 27.8012368,41.9905848 44.2161586,32.5134253 C51.8584622,28.1011393 59.4503616,24.858279 66.7789544,22.7934201 C69.2698862,26.6070961 74.5143649,28.7143073 78.8097569,27.8884447 C82.7634794,27.1282738 85.8274935,23.9172355 86.1644527,20.0928101 C87.6855654,20.1170508 89.182804,20.2037356 90.6535908,20.3523087 Z M85.909552,7.17152591 C87.3462741,7.39738064 88.7560299,7.67797176 90.1368488,8.01200071 L90.1371516,8.01135762 C94.8997503,10.7605234 100.044355,14.8916568 103.848235,19.0053398 C102.237053,18.2441788 99.8069074,17.2660928 97.818251,16.6692644 C95.8295946,16.0724361 93.4411706,15.7632669 91.1520747,15.5320315 C89.1715348,15.3319651 87.1511167,15.234831 85.0959527,15.2401249 C82.8016142,11.0463783 77.3964414,8.55165007 72.770523,9.30159396 C68.422002,10.006567 65.250774,13.9044958 65.3273154,18.1726413 C57.6403215,20.3522953 49.7250702,23.7419944 41.7952455,28.3202808 C24.5425654,38.2811203 12.1968302,50.9953306 6.04188219,64.670424 C5.0158819,66.9499965 4.17184938,69.23709 3.51006033,71.5193597 C3.02885425,73.1788644 2.68295986,75.9116409 2.69996325,77.5851772 C1.06746123,72.451175 0.120470826,66.1092835 0.0416054124,60.7549473 L0.0369574363,60.0311973 C0.43808935,58.6683585 0.899968223,57.3071786 1.42273332,55.9500134 C5.59668863,45.1138906 13.4619425,34.9639348 24.5265106,26.3473628 C26.4664714,27.4798337 29.1982894,27.4327244 31.5509627,26.0192352 C34.1764669,24.4418288 35.5210178,21.6486714 35.0516567,19.197781 C35.4161375,18.9807316 35.7843872,18.7652568 36.1550842,18.5512348 C53.4180611,8.58445039 70.7557175,4.78932177 85.909552,7.17152591 Z M79.3546157,3.1362558 L80.4870446,3.56966823 C66.352896,3.03196238 49.7649437,6.77997716 34.460445,15.6160336 C34.0737763,15.8392769 33.6897318,16.0642498 33.3083189,16.290924 C31.3664249,14.7193038 28.2631872,14.5922026 25.6335991,16.1720626 C22.7106258,17.9281888 21.3753127,21.1911611 22.3630705,23.811126 C13.577289,30.7745416 6.75524743,38.7597739 2.04499517,47.2104558 C1.88700815,47.493901 1.5026327,48.5012824 0.891868812,50.2326001 C3.38222505,33.5590523 14.5022767,17.0203202 30.0789769,8.02710819 C45.655677,-0.966103836 63.5791996,-2.1969237 79.3546157,3.1362558 Z" id="形状结合" fill="#4285F4"></path>
 7 |             <path d="M75.2205826,12.5042186 C72.1904085,12.035586 69.3119649,14.433013 69.3119649,17.468037 C69.3119649,20.7343123 72.7358151,24.2764265 76.2502235,24.7152104 C79.3990133,25.1083457 82.1271415,22.9314599 82.1271415,20.0233681 C82.1271415,16.6548254 78.8088321,13.0591605 75.2205826,12.5042186 Z" id="椭圆形" fill="#FFC92A" fill-rule="nonzero" transform="translate(75.719553, 18.603321) rotate(-18.000000) translate(-75.719553, -18.603321) "></path>
 8 |             <path d="M64.6432617,90.3429583 C67.0698681,90.3429583 69.0975678,92.528255 69.0975678,95.292334 C69.0975678,98.0564131 67.0698681,100.24171 64.6432617,100.24171 C62.2166553,100.24171 60.1889555,98.0564131 60.1889555,95.292334 C60.1889555,92.528255 62.2166553,90.3429583 64.6432617,90.3429583 Z" id="椭圆形备份-26" fill="#FF4559" transform="translate(64.643262, 95.292334) rotate(-53.000000) translate(-64.643262, -95.292334) "></path>
 9 |         </g>
10 |     </g>
11 | </svg>


--------------------------------------------------------------------------------
/docs/_static/finetuner-dark.svg:
--------------------------------------------------------------------------------
1 | <svg width="196" height="196" xmlns="http://www.w3.org/2000/svg">
2 |   <path d="M139.68 100.766c1.02 3.12 3.618 5.594 7.02 6.254 3.504.68 6.938-.743 9.041-3.4 4.03 1.698 7.396 4.123 9.892 7.196l.225.282.236.304.195.26.201.27.205.28.209.292.21.298.213.306.266.39.265.395.21.32.208.32.204.32.2.319.194.316.188.311.162.275.156.27.149.263c.12.216.233.426.336.625l.118.235c.114.23.212.444.293.64l-.14.619-.105.486-.024.097-.023.08-.043.13-.554 1.543-.463 1.314-.36 1.036c-1.888 5.462-5.605 12.384-9.609 17.967l-.002-.001c.968-4.233.548-8.244-1.42-11.649-3.107-5.373-9.452-8.167-17.405-8.17-7.56-.002-16.19 2.513-24.222 7.144a58.691 58.691 0 0 0-6.867 4.625c-4.655-2.859-10.686-2.005-13.89 2.241-3.023 4.006-2.447 9.593 1.014 13.303-2.67 5.923-2.866 11.697-.18 16.342 1.968 3.405 5.237 5.774 9.391 7.053-.468.086-.956.165-1.46.236l-.437.06a51.062 51.062 0 0 1-3.75.341l-.495.026a60.5 60.5 0 0 1-.752.031l-.507.016a64.88 64.88 0 0 1-1.543.023h-.521l-.524-.005a65.455 65.455 0 0 1-2.112-.06l-.531-.025a65.361 65.361 0 0 1-2.127-.145l-.53-.047c-3.357-.313-6.642-.892-9.462-1.773-.488-.385-1.048-1.027-1.62-1.792l-.171-.233-.172-.24-.172-.245-.185-.272-.093-.137-.184-.278-.182-.282-.224-.354-.22-.357-.257-.426-.167-.282-.161-.28-.195-.343-.186-.335-.21-.39-.132-.249-.124-.24-.116-.23-.108-.22-.099-.206-.09-.194-.079-.18a9.564 9.564 0 0 1-.036-.084l-.092-.228c-6.712-16.013 5.226-37.42 30.313-51.884 11.935-6.881 23.781-10.093 34.019-10.218Zm.557 26.94c6.147.003 10.618 1.971 12.6 5.4 1.982 3.427 1.454 8.279-1.618 13.596-3.268 5.656-9.052 11.182-16.274 15.346-7.222 4.163-14.907 6.402-21.446 6.4-6.147-.002-10.619-1.97-12.6-5.398-1.658-2.866-1.56-6.727.31-11.022 4.108 1.248 8.628.046 11.252-3.432 2.398-3.178 2.53-7.353.753-10.795a53.407 53.407 0 0 1 5.576-3.694c7.222-4.163 14.907-6.403 21.447-6.4Zm3.55-52.307c12.084 1.75 22.082 7.364 28.956 16.1h.002a74.803 74.803 0 0 1-.603 17.469h-.002c-.24-.493-.499-.978-.775-1.456-2.952-5.107-7.663-8.86-13.638-11.159-.527-3.882-3.413-7.157-7.407-7.932-4.258-.827-8.413 1.452-10.202 5.278-11.976.364-25.322 4.386-37.766 11.56-13.381 7.715-24.122 18.153-30.266 29.188-6.358 11.419-7.398 22.784-2.289 31.62.208.36.424.711.648 1.056l.227.344a75.053 75.053 0 0 1-15.44-8.204v-.002c-2.997-7.507-3.743-15.682-2.184-23.97 3.134.052 6.25-.95 8.783-3.073 5.73-4.802 6.328-13.54 1.389-19.637 6.514-8.956 15.798-17.171 27.558-24.075l.497-.29 1.128-.646c18.309-10.36 36.13-14.38 51.385-12.17ZM130.706 48.05a66.61 66.61 0 0 1 5.583.33c9.668.987 18.114 4.09 24.902 9.037l.001-.002a76.138 76.138 0 0 1 1.75 2.879 74.97 74.97 0 0 1 7.264 17.166l.545 2.074c-.194-.474-.688-1.09-1.36-1.763l-.18-.177-.187-.18-.194-.181-.2-.184-.103-.092-.21-.185-.216-.186-.22-.187-.225-.187-.23-.187-.232-.187-.235-.186-.239-.185-.24-.184-.364-.273-.243-.18-.245-.176-.366-.26-.243-.17-.242-.166-.24-.162-.239-.158-.235-.153-.233-.149-.286-.178-.334-.203-.27-.16-.21-.12-.303-.167c-5.148-2.768-11.14-4.505-17.446-5.418-17.023-2.464-36.449 1.853-56.077 12.828l-.595.335c-.396.224-.79.45-1.18.676l-.585.342c-12.74 7.483-22.906 16.491-30.118 26.438-4.453-1.521-9.445-.823-13.173 2.3-5.799 4.859-6.34 13.747-1.21 19.853a15.181 15.181 0 0 0 3.026 2.75c-.735 3.495-1.116 6.983-1.116 10.419 0 .1.004.213.01.336l.014.191.018.205.022.215.026.225.029.233.032.238.035.242.056.367.08.485.084.467.042.223.062.315.079.375.037.164.035.146.032.125.029.105.029.095.096.297.208.609.392 1.114.531 1.48.192.529c-.398-.345-.83-.752-1.287-1.205l-.276-.277-.282-.287-.43-.448-.364-.388-.37-.399-.296-.326-.298-.332-.449-.506-.299-.342-.446-.518-.295-.347-.293-.348-.29-.348-.428-.52-.28-.345-.275-.341-.268-.338-.263-.334-.38-.49c-2.087-2.72-3.944-5.619-5.494-8.3a76.126 76.126 0 0 1-1.621-2.952h.002c-.896-8.343.635-17.198 4.614-26.053 7.124-15.855 21.65-30.813 42.168-42.642 9.575-5.52 19.088-9.571 28.271-12.143 3.127 4.755 9.67 7.383 15.029 6.356 4.923-.944 8.74-4.93 9.183-9.689Zm-23.556 89.545c2.952 2.222 3.658 6.135 1.71 8.717-1.033 1.368-2.631 2.124-4.345 2.202l-.194.005c-1.462.023-2.995-.446-4.324-1.446-2.952-2.221-3.658-6.135-1.71-8.716 1.949-2.582 5.91-2.983 8.863-.762Zm23.188-105.704c1.797.284 3.56.637 5.288 1.057v-.001c5.961 3.444 12.401 8.618 17.165 13.767l-.353-.165-.372-.17-.388-.173-.608-.265-.421-.178-.476-.198c-1.601-.658-3.372-1.312-4.927-1.78-1.802-.543-3.866-.897-5.955-1.162l-.597-.073-.747-.086-.744-.08-.297-.03a73.48 73.48 0 0 0-7.504-.374c-2.853-5.284-9.642-8.437-15.447-7.5-5.448.88-9.415 5.773-9.297 11.126-9.628 2.716-19.54 6.949-29.469 12.673-21.566 12.433-36.99 28.316-44.67 45.41a64.808 64.808 0 0 0-3.157 8.56c-.238.82-.448 1.85-.614 2.926l-.057.38a36.98 36.98 0 0 0-.078.576l-.047.383-.043.382c-.114 1.076-.176 2.11-.167 2.938-2.09-6.55-3.289-14.672-3.352-21.45L23 97.878a66.462 66.462 0 0 1 1.729-5.101c5.205-13.545 15.029-26.227 28.856-36.988 2.43 1.418 5.846 1.363 8.787-.402 3.282-1.969 4.96-5.46 4.369-8.526.456-.27.916-.54 1.38-.806 21.578-12.441 43.258-17.164 62.216-14.164Zm-7.064-4.523.276.011c-17.679-.691-38.422 3.973-57.552 15.003-.483.278-.963.559-1.44.842-2.432-1.968-6.313-2.13-9.6-.159-3.653 2.192-5.319 6.27-4.08 9.547-10.824 8.575-19.26 18.392-25.13 28.787l-.264.472-.041.08-.048.102-.053.121-.081.192-.092.228-.131.335-.117.308-.232.625-.306.846-.258.728A75.108 75.108 0 0 1 34.84 57.604a78.488 78.488 0 0 1 5.681-7.752 74.776 74.776 0 0 1 20.052-16.807c19.825-11.43 42.703-12.793 62.7-5.677Zm-8.663 11.636c5.419-.945 9.824 2.277 11.13 6.29 1.127 3.464-1.276 7.11-5.175 7.858-4.35.834-9.796-2.063-11.062-5.953-1.176-3.616 1.331-7.535 5.107-8.195Z" fill="#FBCB67" fill-rule="evenodd"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/docs/_static/finetuner-light.svg:
--------------------------------------------------------------------------------
1 | <svg width="196" height="196" xmlns="http://www.w3.org/2000/svg">
2 |   <path d="M139.68 100.766c1.02 3.12 3.618 5.594 7.02 6.254 3.504.68 6.938-.743 9.041-3.4 4.03 1.698 7.396 4.123 9.892 7.196l.225.282.236.304.195.26.201.27.205.28.209.292.21.298.213.306.266.39.265.395.21.32.208.32.204.32.2.319.194.316.188.311.162.275.156.27.149.263c.12.216.233.426.336.625l.118.235c.114.23.212.444.293.64l-.14.619-.105.486-.024.097-.023.08-.043.13-.554 1.543-.463 1.314-.36 1.036c-1.888 5.462-5.605 12.384-9.609 17.967l-.002-.001c.968-4.233.548-8.244-1.42-11.649-3.107-5.373-9.452-8.167-17.405-8.17-7.56-.002-16.19 2.513-24.222 7.144a58.691 58.691 0 0 0-6.867 4.625c-4.655-2.859-10.686-2.005-13.89 2.241-3.023 4.006-2.447 9.593 1.014 13.303-2.67 5.923-2.866 11.697-.18 16.342 1.968 3.405 5.237 5.774 9.391 7.053-.468.086-.956.165-1.46.236l-.437.06a51.062 51.062 0 0 1-3.75.341l-.495.026a60.5 60.5 0 0 1-.752.031l-.507.016a64.88 64.88 0 0 1-1.543.023h-.521l-.524-.005a65.455 65.455 0 0 1-2.112-.06l-.531-.025a65.361 65.361 0 0 1-2.127-.145l-.53-.047c-3.357-.313-6.642-.892-9.462-1.773-.488-.385-1.048-1.027-1.62-1.792l-.171-.233-.172-.24-.172-.245-.185-.272-.093-.137-.184-.278-.182-.282-.224-.354-.22-.357-.257-.426-.167-.282-.161-.28-.195-.343-.186-.335-.21-.39-.132-.249-.124-.24-.116-.23-.108-.22-.099-.206-.09-.194-.079-.18a9.564 9.564 0 0 1-.036-.084l-.092-.228c-6.712-16.013 5.226-37.42 30.313-51.884 11.935-6.881 23.781-10.093 34.019-10.218Zm.557 26.94c6.147.003 10.618 1.971 12.6 5.4 1.982 3.427 1.454 8.279-1.618 13.596-3.268 5.656-9.052 11.182-16.274 15.346-7.222 4.163-14.907 6.402-21.446 6.4-6.147-.002-10.619-1.97-12.6-5.398-1.658-2.866-1.56-6.727.31-11.022 4.108 1.248 8.628.046 11.252-3.432 2.398-3.178 2.53-7.353.753-10.795a53.407 53.407 0 0 1 5.576-3.694c7.222-4.163 14.907-6.403 21.447-6.4Zm3.55-52.307c12.084 1.75 22.082 7.364 28.956 16.1h.002a74.803 74.803 0 0 1-.603 17.469h-.002c-.24-.493-.499-.978-.775-1.456-2.952-5.107-7.663-8.86-13.638-11.159-.527-3.882-3.413-7.157-7.407-7.932-4.258-.827-8.413 1.452-10.202 5.278-11.976.364-25.322 4.386-37.766 11.56-13.381 7.715-24.122 18.153-30.266 29.188-6.358 11.419-7.398 22.784-2.289 31.62.208.36.424.711.648 1.056l.227.344a75.053 75.053 0 0 1-15.44-8.204v-.002c-2.997-7.507-3.743-15.682-2.184-23.97 3.134.052 6.25-.95 8.783-3.073 5.73-4.802 6.328-13.54 1.389-19.637 6.514-8.956 15.798-17.171 27.558-24.075l.497-.29 1.128-.646c18.309-10.36 36.13-14.38 51.385-12.17ZM130.706 48.05a66.61 66.61 0 0 1 5.583.33c9.668.987 18.114 4.09 24.902 9.037l.001-.002a76.138 76.138 0 0 1 1.75 2.879 74.97 74.97 0 0 1 7.264 17.166l.545 2.074c-.194-.474-.688-1.09-1.36-1.763l-.18-.177-.187-.18-.194-.181-.2-.184-.103-.092-.21-.185-.216-.186-.22-.187-.225-.187-.23-.187-.232-.187-.235-.186-.239-.185-.24-.184-.364-.273-.243-.18-.245-.176-.366-.26-.243-.17-.242-.166-.24-.162-.239-.158-.235-.153-.233-.149-.286-.178-.334-.203-.27-.16-.21-.12-.303-.167c-5.148-2.768-11.14-4.505-17.446-5.418-17.023-2.464-36.449 1.853-56.077 12.828l-.595.335c-.396.224-.79.45-1.18.676l-.585.342c-12.74 7.483-22.906 16.491-30.118 26.438-4.453-1.521-9.445-.823-13.173 2.3-5.799 4.859-6.34 13.747-1.21 19.853a15.181 15.181 0 0 0 3.026 2.75c-.735 3.495-1.116 6.983-1.116 10.419 0 .1.004.213.01.336l.014.191.018.205.022.215.026.225.029.233.032.238.035.242.056.367.08.485.084.467.042.223.062.315.079.375.037.164.035.146.032.125.029.105.029.095.096.297.208.609.392 1.114.531 1.48.192.529c-.398-.345-.83-.752-1.287-1.205l-.276-.277-.282-.287-.43-.448-.364-.388-.37-.399-.296-.326-.298-.332-.449-.506-.299-.342-.446-.518-.295-.347-.293-.348-.29-.348-.428-.52-.28-.345-.275-.341-.268-.338-.263-.334-.38-.49c-2.087-2.72-3.944-5.619-5.494-8.3a76.126 76.126 0 0 1-1.621-2.952h.002c-.896-8.343.635-17.198 4.614-26.053 7.124-15.855 21.65-30.813 42.168-42.642 9.575-5.52 19.088-9.571 28.271-12.143 3.127 4.755 9.67 7.383 15.029 6.356 4.923-.944 8.74-4.93 9.183-9.689Zm-23.556 89.545c2.952 2.222 3.658 6.135 1.71 8.717-1.033 1.368-2.631 2.124-4.345 2.202l-.194.005c-1.462.023-2.995-.446-4.324-1.446-2.952-2.221-3.658-6.135-1.71-8.716 1.949-2.582 5.91-2.983 8.863-.762Zm23.188-105.704c1.797.284 3.56.637 5.288 1.057v-.001c5.961 3.444 12.401 8.618 17.165 13.767l-.353-.165-.372-.17-.388-.173-.608-.265-.421-.178-.476-.198c-1.601-.658-3.372-1.312-4.927-1.78-1.802-.543-3.866-.897-5.955-1.162l-.597-.073-.747-.086-.744-.08-.297-.03a73.48 73.48 0 0 0-7.504-.374c-2.853-5.284-9.642-8.437-15.447-7.5-5.448.88-9.415 5.773-9.297 11.126-9.628 2.716-19.54 6.949-29.469 12.673-21.566 12.433-36.99 28.316-44.67 45.41a64.808 64.808 0 0 0-3.157 8.56c-.238.82-.448 1.85-.614 2.926l-.057.38a36.98 36.98 0 0 0-.078.576l-.047.383-.043.382c-.114 1.076-.176 2.11-.167 2.938-2.09-6.55-3.289-14.672-3.352-21.45L23 97.878a66.462 66.462 0 0 1 1.729-5.101c5.205-13.545 15.029-26.227 28.856-36.988 2.43 1.418 5.846 1.363 8.787-.402 3.282-1.969 4.96-5.46 4.369-8.526.456-.27.916-.54 1.38-.806 21.578-12.441 43.258-17.164 62.216-14.164Zm-7.064-4.523.276.011c-17.679-.691-38.422 3.973-57.552 15.003-.483.278-.963.559-1.44.842-2.432-1.968-6.313-2.13-9.6-.159-3.653 2.192-5.319 6.27-4.08 9.547-10.824 8.575-19.26 18.392-25.13 28.787l-.264.472-.041.08-.048.102-.053.121-.081.192-.092.228-.131.335-.117.308-.232.625-.306.846-.258.728A75.108 75.108 0 0 1 34.84 57.604a78.488 78.488 0 0 1 5.681-7.752 74.776 74.776 0 0 1 20.052-16.807c19.825-11.43 42.703-12.793 62.7-5.677Zm-8.663 11.636c5.419-.945 9.824 2.277 11.13 6.29 1.127 3.464-1.276 7.11-5.175 7.858-4.35.834-9.796-2.063-11.062-5.953-1.176-3.616 1.331-7.535 5.107-8.195Z" fill="#009191" fill-rule="evenodd"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/docs/_static/hub-dark.svg:
--------------------------------------------------------------------------------
1 | <svg width="196" height="196" xmlns="http://www.w3.org/2000/svg">
2 |   <g fill="#FBCB67">
3 |     <path d="M98 170a31.34 31.34 0 0 1-14.503-3.557L35.978 141.61C29.84 138.405 26 132.116 26 125.266s3.84-13.14 9.978-16.346c3.192-1.67 7.153-.475 8.847 2.672 1.695 3.147.482 7.052-2.709 8.724a5.588 5.588 0 0 0-3.012 4.943 5.588 5.588 0 0 0 3.012 4.943L89.62 155.04a18.09 18.09 0 0 0 16.76 0l47.504-24.838a5.588 5.588 0 0 0 3.012-4.943 5.588 5.588 0 0 0-3.012-4.943c-3.191-1.672-4.404-5.577-2.71-8.724 1.695-3.147 5.656-4.343 8.848-2.672 6.139 3.206 9.978 9.496 9.978 16.346s-3.84 13.139-9.978 16.345l-47.52 24.832A31.34 31.34 0 0 1 98 170Z"/>
4 |     <path d="M85.417 109.11 33.141 81.46A13.404 13.404 0 0 1 26 69.617c0-4.963 2.747-9.52 7.14-11.845l52.277-27.65a26.923 26.923 0 0 1 25.166 0l52.276 27.65A13.404 13.404 0 0 1 170 69.616c0 4.963-2.747 9.52-7.14 11.845l-52.277 27.65a26.923 26.923 0 0 1-25.166 0Z"/>
5 |   </g>
6 | </svg>
7 | 


--------------------------------------------------------------------------------
/docs/_static/hub-light.svg:
--------------------------------------------------------------------------------
1 | <svg width="196" height="196" xmlns="http://www.w3.org/2000/svg">
2 |   <g fill="#009191">
3 |     <path d="M98 170a31.34 31.34 0 0 1-14.503-3.557L35.978 141.61C29.84 138.405 26 132.116 26 125.266s3.84-13.14 9.978-16.346c3.192-1.67 7.153-.475 8.847 2.672 1.695 3.147.482 7.052-2.709 8.724a5.588 5.588 0 0 0-3.012 4.943 5.588 5.588 0 0 0 3.012 4.943L89.62 155.04a18.09 18.09 0 0 0 16.76 0l47.504-24.838a5.588 5.588 0 0 0 3.012-4.943 5.588 5.588 0 0 0-3.012-4.943c-3.191-1.672-4.404-5.577-2.71-8.724 1.695-3.147 5.656-4.343 8.848-2.672 6.139 3.206 9.978 9.496 9.978 16.346s-3.84 13.139-9.978 16.345l-47.52 24.832A31.34 31.34 0 0 1 98 170Z"/>
4 |     <path d="M85.417 109.11 33.141 81.46A13.404 13.404 0 0 1 26 69.617c0-4.963 2.747-9.52 7.14-11.845l52.277-27.65a26.923 26.923 0 0 1 25.166 0l52.276 27.65A13.404 13.404 0 0 1 170 69.616c0 4.963-2.747 9.52-7.14 11.845l-52.277 27.65a26.923 26.923 0 0 1-25.166 0Z"/>
5 |   </g>
6 | </svg>
7 | 


--------------------------------------------------------------------------------
/docs/_static/main.css:
--------------------------------------------------------------------------------
  1 | .sidebar-logo {
  2 |     max-width: 50%;
  3 | }
  4 | 
  5 | 
  6 | table.docutils {
  7 |     border: thin;
  8 | }
  9 | 
 10 | table.docutils td, table.docutils th {
 11 |     padding: 1rem 1rem;
 12 | }
 13 | 
 14 | .highlight {
 15 |     background: #f5f5f5;
 16 | }
 17 | 
 18 | h1, h2, h3 {
 19 |     margin-top: 3rem;
 20 | }
 21 | 
 22 | .highlight-console .highlight {
 23 |     background: #00232b !important;
 24 |     color: whitesmoke;
 25 | }
 26 | 
 27 | .highlight-text .highlight {
 28 |     background: #00232b !important;
 29 |     color: whitesmoke;
 30 | }
 31 | 
 32 | .highlight-json .highlight {
 33 |     background: #00232b !important;
 34 |     color: whitesmoke;
 35 | }
 36 | 
 37 | .highlight-shell .highlight {
 38 |     background: #00232b !important;
 39 |     color: whitesmoke;
 40 | }
 41 | 
 42 | .highlight-bash .highlight {
 43 |     background: #00232b !important;
 44 |     color: whitesmoke;
 45 | }
 46 | 
 47 | .tab-set > input:checked + label {
 48 |     border-color: var(--tabs--label-text--active);
 49 | }
 50 | 
 51 | .tab-set > input:checked + label:hover {
 52 |     border-color: var(--tabs--label-text--active);
 53 | }
 54 | 
 55 | 
 56 | table code {
 57 |     background: var(--color-inline-code-background);
 58 |     border: 1px solid var(--color-background-border);
 59 |     border-radius: .2em;
 60 |     font-size: var(--font-size--small--2);
 61 |     padding: .1em .2em;
 62 | }
 63 | 
 64 | .related-information {
 65 |     justify-content: space-between;
 66 | }
 67 | 
 68 | .social-btn {
 69 |     margin: 0 .3em;
 70 | }
 71 | 
 72 | .social-btn:hover {
 73 |     opacity: .5;
 74 | }
 75 | 
 76 | .social-btns {
 77 |     display: inline-block;
 78 | }
 79 | 
 80 | .announcement {
 81 |     background-color: var(--color-brand-primary);
 82 |     color: var(--color-background-primary) !important;
 83 | }
 84 | 
 85 | .announcement a {
 86 |     color: inherit;
 87 |     text-decoration: none;
 88 | }
 89 | 
 90 | .announcement a:hover {
 91 |     color: inherit;
 92 |     text-decoration: underline;
 93 | }
 94 | 
 95 | .usage-card {
 96 |     display: none;
 97 | }
 98 | 
 99 | .sidebar-ecosys-logo {
100 |     width: 1.2em;
101 |     margin-right: .5em;
102 |     vertical-align: middle
103 | }
104 | 
105 | 
106 | body[data-theme="dark"] .only-dark-line {
107 |     display: inline-block !important;
108 | }
109 | 
110 | body[data-theme="dark"] .only-light-line {
111 |     display: none !important;
112 | }
113 | 
114 | body[data-theme="light"] .only-light-line {
115 |     display: inline-block !important;
116 | }
117 | 
118 | body[data-theme="light"] .only-dark-line {
119 |     display: none !important;
120 | }
121 | 
122 | body[data-theme="auto"] .only-light-line {
123 |     display: inline-block !important;
124 | }
125 | 
126 | body[data-theme="auto"] .only-dark-line {
127 |     display: none !important;
128 | }
129 | 
130 | .version-select {
131 |     font-size: .7em;
132 |     border-radius: 5px;
133 |     cursor: pointer;
134 |     background-color: #fff;
135 |     background-image: linear-gradient(to top, #f9f9f9, #fff 33%);
136 |     border-color: var(--color-background-border);
137 |     height: 1.8em;
138 |     line-height: 1.8em;
139 |     outline: none;
140 |     text-align: center;
141 |     max-width: 7em;
142 |     color: var(--color-foreground-muted);
143 | }


--------------------------------------------------------------------------------
/docs/_static/now-dark.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="600px" height="600px" viewBox="0 0 600 600" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>Now_Yellow</title>
 4 |     <g id="Now_Yellow" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组-2备份-3" transform="translate(82.000000, 60.000000)" fill="#FBCB67">
 6 |             <path d="M24.0530695,0.125986108 C37.2076731,0.125986108 47.8937333,10.5173911 48.1061391,23.4121591 L48.1019736,456.914918 C47.7824556,469.716458 37.1392225,480 24.0530695,480 C10.898466,480 0.212405744,469.608595 0,456.713827 L0.00416544778,23.2110678 C0.323683496,10.4095281 10.9669166,0.125986108 24.0530695,0.125986108 Z" id="形状" fill-rule="nonzero"></path>
 7 |             <path d="M411.940511,0.61997208 C425.095115,0.61997208 435.781175,11.0113771 435.996803,23.9061451 L435.996803,456.627184 C435.996803,469.511053 425.599758,479.74296 412.694666,479.952062 C398.873868,479.306735 388.000249,467.913553 388.000249,454.077697 L388.000249,21.9573791 L388.000249,21.9573791 C389.194217,9.98033318 399.455558,0.61997208 411.940511,0.61997208 Z" id="路径" fill-rule="nonzero"></path>
 8 |             <path d="M108.327258,69.1546886 L344.251064,308.665267 C346.462453,310.910277 347.702011,313.935055 347.702011,317.086294 L347.702011,407.383132 C347.702011,414.010549 342.329428,419.383132 335.702011,419.383132 C332.383093,419.383132 329.212387,418.008573 326.943609,415.586204 L91.2224528,163.907248 C89.1470959,161.691394 87.9887699,158.771262 87.9808952,155.735302 L87.7782455,77.6068411 C87.7610554,70.9794464 93.119685,65.5929461 99.7470797,65.575756 C102.971844,65.5673916 106.064265,66.8572909 108.327258,69.1546886 Z" id="路径" fill-rule="nonzero"></path>
 9 |             <path d="M294.790636,479.873988 L101.501306,478.311882 C93.92346,478.311882 87.7632895,472.420771 87.7002349,465.115303 L87.702472,258.238612 C87.6408416,250.891166 93.7690349,244.886717 101.390175,244.82644 C105.356945,244.796374 109.145675,246.412706 111.789597,249.263858 L305.078467,457.702655 C310.158091,463.180419 309.669922,471.590992 303.98811,476.488195 C301.458348,478.668618 298.183973,479.873988 294.790636,479.873988 Z" id="路径-11备份"></path>
10 |             <path d="M136.847144,0 L333.71245,0 C341.33384,0 347.512196,5.95647666 347.512196,13.304163 L347.512196,222.446734 C347.512196,229.79442 341.33384,235.750897 333.71245,235.750897 C329.820234,235.750897 326.109007,234.166247 323.493341,231.387459 L126.628034,22.244888 C121.506273,16.8037145 121.929517,8.38987303 127.573375,3.45204698 C130.112456,1.23059855 133.418404,0 136.847144,0 Z" id="路径-11备份-2"></path>
11 |         </g>
12 |     </g>
13 | </svg>


--------------------------------------------------------------------------------
/docs/_static/now-light.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg width="600px" height="600px" viewBox="0 0 600 600" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <title>Now_Light_PureColor</title>
 4 |     <g id="Now_Light_PureColor" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
 5 |         <g id="编组-2备份-3" transform="translate(82.000000, 60.000000)" fill="#009191">
 6 |             <path d="M24.0530695,0.125986108 C37.2076731,0.125986108 47.8937333,10.5173911 48.1061391,23.4121591 L48.1019736,456.914918 C47.7824556,469.716458 37.1392225,480 24.0530695,480 C10.898466,480 0.212405744,469.608595 0,456.713827 L0.00416544778,23.2110678 C0.323683496,10.4095281 10.9669166,0.125986108 24.0530695,0.125986108 Z" id="形状" fill-rule="nonzero"></path>
 7 |             <path d="M411.940511,0.61997208 C425.095115,0.61997208 435.781175,11.0113771 435.996803,23.9061451 L435.996803,456.627184 C435.996803,469.511053 425.599758,479.74296 412.694666,479.952062 C398.873868,479.306735 388.000249,467.913553 388.000249,454.077697 L388.000249,21.9573791 L388.000249,21.9573791 C389.194217,9.98033318 399.455558,0.61997208 411.940511,0.61997208 Z" id="路径" fill-rule="nonzero"></path>
 8 |             <path d="M108.327258,69.1546886 L344.251064,308.665267 C346.462453,310.910277 347.702011,313.935055 347.702011,317.086294 L347.702011,407.383132 C347.702011,414.010549 342.329428,419.383132 335.702011,419.383132 C332.383093,419.383132 329.212387,418.008573 326.943609,415.586204 L91.2224528,163.907248 C89.1470959,161.691394 87.9887699,158.771262 87.9808952,155.735302 L87.7782455,77.6068411 C87.7610554,70.9794464 93.119685,65.5929461 99.7470797,65.575756 C102.971844,65.5673916 106.064265,66.8572909 108.327258,69.1546886 Z" id="路径" fill-rule="nonzero"></path>
 9 |             <path d="M294.790636,479.873988 L101.501306,478.311882 C93.92346,478.311882 87.7632895,472.420771 87.7002349,465.115303 L87.702472,258.238612 C87.6408416,250.891166 93.7690349,244.886717 101.390175,244.82644 C105.356945,244.796374 109.145675,246.412706 111.789597,249.263858 L305.078467,457.702655 C310.158091,463.180419 309.669922,471.590992 303.98811,476.488195 C301.458348,478.668618 298.183973,479.873988 294.790636,479.873988 Z" id="路径-11备份"></path>
10 |             <path d="M136.847144,0 L333.71245,0 C341.33384,0 347.512196,5.95647666 347.512196,13.304163 L347.512196,222.446734 C347.512196,229.79442 341.33384,235.750897 333.71245,235.750897 C329.820234,235.750897 326.109007,234.166247 323.493341,231.387459 L126.628034,22.244888 C121.506273,16.8037145 121.929517,8.38987303 127.573375,3.45204698 C130.112456,1.23059855 133.418404,0 136.847144,0 Z" id="路径-11备份-2"></path>
11 |         </g>
12 |     </g>
13 | </svg>


--------------------------------------------------------------------------------
/docs/_static/search-dark.svg:
--------------------------------------------------------------------------------
1 | <svg width="196" height="196" xmlns="http://www.w3.org/2000/svg">
2 |   <g fill="#FBCB67">
3 |     <path d="m114.273 78.817.743.01c14.942.557 23.351 6.6 30.048 15.432-3.066 30.618-29.92 52.984-60.417 51.743-15.517-.928-29.071-7.875-38.753-18.353 16.555-22.724 43.353-48.341 67.638-48.825l.74-.007Z" fill-opacity=".8"/>
4 |     <path d="m32.447 105.7.011-.016c5.811-8.304 15.12-16.555 27.59-13.234 23.713 6.777 45.972 20.659 59.544 20.267 12.916-.373 18.132-4.101 23.791-9.215-6.897 25.397-31.506 43.028-59.08 41.934-24.974-1.456-44.889-18.042-51.856-39.737Z" fill-opacity=".8"/>
5 |     <path d="m150.06 136.033 23.083 20.725c4.783 4.294 5.17 11.632.865 16.401-4.25 4.71-11.503 5.14-16.285 1.001l-.16-.141-24.38-21.888a78.302 78.302 0 0 0 16.877-16.098ZM19.104 83.965c2.072-37.95 35.21-66.93 73.18-64.86 37.971 2.76 67.657 35.19 64.896 73.14-2.762 37.95-35.21 66.93-73.18 64.86-37.97-2.07-66.967-35.19-64.896-73.14ZM91.442 30.93C60.37 29.66 32.469 53.76 31.2 84.836c-1.269 31.076 22.194 58.347 53.9 60.25 31.071 1.268 58.339-22.198 60.24-53.908 1.256-30.764-21.726-57.8-52.95-60.184l-.95-.064Z"/>
6 |   </g>
7 | </svg>
8 | 


--------------------------------------------------------------------------------
/docs/_static/search-light.svg:
--------------------------------------------------------------------------------
1 | <svg width="196" height="196" xmlns="http://www.w3.org/2000/svg">
2 |   <g fill="#009191">
3 |     <path d="m114.273 78.817.743.01c14.942.557 23.351 6.6 30.048 15.432-3.066 30.618-29.92 52.984-60.417 51.743-15.517-.928-29.071-7.875-38.753-18.353 16.555-22.724 43.353-48.341 67.638-48.825l.74-.007Z" fill-opacity=".8"/>
4 |     <path d="m32.447 105.7.011-.016c5.811-8.304 15.12-16.555 27.59-13.234 23.713 6.777 45.972 20.659 59.544 20.267 12.916-.373 18.132-4.101 23.791-9.215-6.897 25.397-31.506 43.028-59.08 41.934-24.974-1.456-44.889-18.042-51.856-39.737Z" fill-opacity=".8"/>
5 |     <path d="m150.06 136.033 23.083 20.725c4.783 4.294 5.17 11.632.865 16.401-4.25 4.71-11.503 5.14-16.285 1.001l-.16-.141-24.38-21.888a78.302 78.302 0 0 0 16.877-16.098ZM19.104 83.965c2.072-37.95 35.21-66.93 73.18-64.86 37.971 2.76 67.657 35.19 64.896 73.14-2.762 37.95-35.21 66.93-73.18 64.86-37.97-2.07-66.967-35.19-64.896-73.14ZM91.442 30.93C60.37 29.66 32.469 53.76 31.2 84.836c-1.269 31.076 22.194 58.347 53.9 60.25 31.071 1.268 58.339-22.198 60.24-53.908 1.256-30.764-21.726-57.8-52.95-60.184l-.95-.064Z"/>
6 |   </g>
7 | </svg>
8 | 


--------------------------------------------------------------------------------
/docs/_templates/sidebar/brand.html:
--------------------------------------------------------------------------------
 1 | <a class="sidebar-brand{% if logo %} centered{% endif %}" href="{{ pathto(master_doc) }}">
 2 |   {% block brand_content %}
 3 |   {%- if logo_url %}
 4 |   <div class="sidebar-logo-container">
 5 |     <img class="sidebar-logo" src="{{ logo_url }}" alt="Logo" />
 6 |   </div>
 7 |   {%- endif %}
 8 |   {%- if theme_light_logo and theme_dark_logo %}
 9 |   <div class="sidebar-logo-container">
10 |     <img class="sidebar-logo only-light" src="{{ pathto('_static/' + theme_light_logo, 1) }}" alt="Light Logo" />
11 |     <img class="sidebar-logo only-dark" src="{{ pathto('_static/' + theme_dark_logo, 1) }}" alt="Dark Logo" />
12 |   </div>
13 |   {%- endif %}
14 |   {% if not theme_sidebar_hide_name %}
15 |   <span class="sidebar-brand-text">{{ docstitle if docstitle else project }}</span>
16 |   {%- endif %}
17 |   {% endblock brand_content %}
18 | </a>
19 | <div class="sd-d-flex-row sd-align-major-spaced">
20 |   <a class="github-button" href="https://github.com/jina-ai/finetuner" data-icon="octicon-star" data-show-count="true" aria-label="Star jina-ai/jina on GitHub" style="opacity: 0;">Star</a>
21 |   {% if versions %}
22 |   <select onChange="window.location.href=this.value" class="version-select">
23 |       {%- for item in versions|reverse %}
24 |         {% if item.name == latest_jina_version %}
25 |           {% set new_url = item.url if current_version.name == latest_jina_version else item.url | replace('/' + latest_jina_version, "") %}
26 |           {% if current_version.version == item.version %}
27 |             <option value="{{ new_url }}" selected="selected" >latest ({{ item.name }})</option>
28 |           {% else %}
29 |             <option value="{{ new_url }}" >latest({{ item.name }})</option>
30 |           {% endif %}
31 |         {% else %}
32 |           {% if current_version.version == item.version %}
33 |             <option value="{{ item.url }}" selected="selected" >{{ item.name }}</option>
34 |           {% else %}
35 |             <option value="{{ item.url }}" >{{ item.name }}</option>
36 |           {% endif %}
37 |         {% endif %}
38 |       {%- endfor %}
39 |   </select>
40 |   {% endif %}
41 | </div>


--------------------------------------------------------------------------------
/docs/_templates/sidebar/navigation.html:
--------------------------------------------------------------------------------
 1 | <div class="sidebar-tree">
 2 |     {{ furo_navigation_tree }}
 3 |     <p class="caption" role="heading"><span class="caption-text">Ecosystem</span></p>
 4 |     <ul>
 5 |         <li class="toctree-l1">
 6 |             <a class="reference external" href="https://docs.jina.ai">
 7 |                 <img class="sidebar-ecosys-logo only-light-line" src="{{ pathto('_static/search-light.svg', 1) }}">
 8 |                 <img class="sidebar-ecosys-logo only-dark-line" src="{{ pathto('_static/search-dark.svg', 1) }}">
 9 |                 Jina</a></li>
10 |         <li class="toctree-l1"><a class="reference external" href="https://cloud.jina.ai">
11 |             <img class="sidebar-ecosys-logo only-light-line" src="{{ pathto('_static/hub-light.svg', 1) }}">
12 |             <img class="sidebar-ecosys-logo only-dark-line" src="{{ pathto('_static/hub-dark.svg', 1) }}">
13 |             Jina Hub</a></li>
14 |         <li class="toctree-l1"><a class="reference internal" href="#">
15 |             <img class="sidebar-ecosys-logo only-light-line" src="{{ pathto('_static/finetuner-light.svg', 1) }}">
16 |             <img class="sidebar-ecosys-logo only-dark-line" src="{{ pathto('_static/finetuner-dark.svg', 1) }}">
17 |             Finetuner</a></li>
18 |         <li class="toctree-l1"><a class="reference external" href="https://docarray.jina.ai">
19 |             <img class="sidebar-ecosys-logo only-light-line" src="{{ pathto('_static/docarray-light.svg', 1) }}">
20 |             <img class="sidebar-ecosys-logo only-dark-line" src="{{ pathto('_static/docarray-dark.svg', 1) }}">
21 |             DocArray</a></li>
22 |         <li class="toctree-l1"><a class="reference external" href="https://clip-as-service.jina.ai">
23 |             <img class="sidebar-ecosys-logo only-light-line" src="{{ pathto('_static/cas-light.svg', 1) }}">
24 |             <img class="sidebar-ecosys-logo only-dark-line" src="{{ pathto('_static/cas-dark.svg', 1) }}">
25 |             CLIP-as-service</a></li>
26 |         <li class="toctree-l1"><a class="reference external" href="https://github.com/jina-ai/jcloud">
27 |             <img class="sidebar-ecosys-logo only-light-line" src="{{ pathto('_static/JCloud-light.svg', 1) }}">
28 |             <img class="sidebar-ecosys-logo only-dark-line" src="{{ pathto('_static/JCloud-dark.svg', 1) }}">
29 |             JCloud</a></li>
30 |         <li class="toctree-l1"><a class="reference external" href="https://github.com/jina-ai/now">
31 |             <img class="sidebar-ecosys-logo only-light-line" src="{{ pathto('_static/now-light.svg', 1) }}">
32 |             <img class="sidebar-ecosys-logo only-dark-line" src="{{ pathto('_static/now-dark.svg', 1) }}">
33 |             NOW</a></li>
34 |     </ul>
35 | </div>


--------------------------------------------------------------------------------
/docs/_templates/template_ft_in_action.md:
--------------------------------------------------------------------------------
 1 | # Finetuner in Action Template
 2 | This is a template for the documentation guides of Finetuner in action, with the general structure and layout to be used for demonstrating how Finetuner can be applied to solve different tasks.
 3 | 
 4 | ```{admonition} See Also: Jina Contribution Guidelines
 5 | :class: seealso
 6 | For more info on best practices for documentation, see Jina's [contribution guidelines](https://github.com/jina-ai/jina/blob/master/CONTRIBUTING.md#-contributing-documentation)
 7 | ```
 8 | 
 9 | ## Task overview
10 | Describe the task which this guide accomplishes, including which model will be fine-tuned and which dataset you will use.
11 | 
12 | Also provide a brief description of what the task entails, what the dataset looks like and a high-level description of how the dataset is processed.
13 | 
14 | 
15 | ## Preparing data
16 | Outline where the data can be found, artifact names in Jina AI Cloud or if relevant, how a user might load their own custom data. 
17 | Add a link to supplementary dataset info, for example as a `See Also` {admonition}.
18 | If you are outlining how to preprocess a dataset from scratch, use {dropdown} to hide long code snippets.
19 | 
20 | 
21 | ## Choosing the model
22 | Mention which model will be used in your fine-tuning task. Feel free to add a `See Also` {admonition} for supplementary info on the model, perhaps a relevant paper or site.
23 | 
24 | You can also add a `Tip` {admonition} for how the user can view all available models, also referring to the `Choose backbone model` documentation.
25 | 
26 | 
27 | ## Creating a fine-tuning run
28 | Show the user how to create a fine-tuning run, then explain why your example run has particular parameters and what they do. Also mention which parameters are optional or required.
29 | Provide a more detailed explanation of parameters that are important for your particular experiment. 
30 | 
31 | Example:
32 | 
33 | ```python
34 | run = finetuner.fit(
35 |     ...
36 |     )
37 | ```
38 | "Let's understand what this piece of code does ..."
39 | 
40 | 
41 | ## Monitoring your runs
42 | 
43 | Also show the user how they can monitor their run, and reconnect to it if they were disconnected. 
44 | 
45 | Example:
46 | 
47 | "Now that we've created a run, let's see its status. You can monitor the run by checking the status - `run.status()` or the logs - `run.logs()`. "
48 | ```python
49 | print(run.status())
50 | ```
51 | 
52 | ```bash
53 | {'status': 'CREATED', 'details': 'Run submitted and awaits execution'}
54 | ```
55 | 
56 | "Since some runs might take up to several hours/days, you can reconnect to your run very easily to monitor its status and logs."
57 | ```python
58 | import finetuner
59 | finetuner.login()
60 | run = finetuner.get_run('my_run')
61 | ```
62 | 
63 | ## Saving your model
64 | Show the user how to save their model when fine-tuning has completed.
65 | 
66 | Example:
67 | 
68 | "If your run has finished successfully, you can save fine-tuned models in the following way:"
69 | ```python
70 | run.save_artifact('my_model')
71 | ```
72 | 
73 | ## Evaluating your model
74 | Explain to the user how they can track the performance of the model(s) they have fine-tuned in their runs. If this is not implemented yet, show the user an example log and how they might deduce model performance from this log.


--------------------------------------------------------------------------------
/docs/advanced-topics/budget.md:
--------------------------------------------------------------------------------
 1 | (budget)=
 2 | # {octicon}`database` How much data?
 3 | 
 4 | ```{admonition} Read full blog
 5 | :class: hint
 6 | Please checkout [Fine-tuning with Low Budget and High Expectations](https://jina.ai/news/fine-tuning-with-low-budget-and-high-expectations/)
 7 | to read the full tech blog.
 8 | ```
 9 | 
10 | Fine-tuning takes a pre-trained model,
11 | trained on a related task, and then further trains it for a new task.
12 | Alternately, it can mean taking a model pre-trained for an open domain task, and further training it for a domain-specific one.
13 | Compared to training from scratch, fine-tuning is a much more cost-efficient solution whenever it is feasible. But:
14 | 
15 | + Exactly how much **data** do you need to get a good result?
16 | + Exactly how much **time** do you need to get good results?
17 | 
18 | ## Experiments
19 | 
20 | We designed two experiments to quantitatively study how labeled data and training time affect fine-tuning performance.
21 | For each experiment, we constructed three search tasks by fine-tuning three models.
22 | We chose seven datasets, two of which are non-domain-specific public datasets, to ensure the generality of our experiment.
23 | 
24 | We measured the performance of the fine-tuned models by evaluating their ability to perform search tasks, as measured by Mean Reciprocal Rank (mRR), Recall, and Mean Average Precision (mAP).
25 | These metrics are calculated using the top 20 results of each search in the validation subset held out from each dataset.
26 | 
27 | ### How much labeled data is needed?
28 | 
29 | We gradually increase the amount of labeled data fed to Finetuner from 100 items to 100,000 and see how this affects performance on the metrics described in the previous section.
30 | 
31 | In the figures below, the X-axis represents the amount of labeled data, and the Y-axis represents the relative improvement over the pre-trained model. The higher, the better.
32 | 
33 | ...             |  ...
34 | :-------------------------:|:-------------------------:
35 | ![text-text-quora](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-text-search-on-QuoraQA--3-.svg)  |  ![text-text-clinc](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-text-search-on-Clinc150--3-.svg)
36 | ![image-image-tll](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Image-to-image-search-on-Totally-looks-like.svg) | ![image-image-celeba](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Image-to-image-search-on-Celeba--4-.svg)
37 | ![image-image-flickr30k](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-image-search-on-Flickr30K--5-.svg) | ![image-image-coco](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-image-search-on-CoCoCaptions--4-.svg)
38 | 
39 | These results are promising but not particularly surprising.
40 | Performance improves with more labeled data on nearly all tasks and all datasets, more for some tasks and datasets than for others.
41 | However, the only conclusion we can draw from these figures is that the Finetuner works as advertised. So far so good.
42 | 
43 | We further calculate the return on investment (ROI),
44 | by dividing the relative improvement (a proxy for net profit) by the amount of labeled data (a proxy for investment cost).
45 | **This is useful because it indicates the point at which adding more data is producing diminishing returns.**
46 | 
47 | In the figures below, the X-axis represents the amount of labeled data, and the Y-axis represents the ROI per labeled data item. The higher, the better.
48 | In particular, `ROI=0` means adding new labeled data at that point no longer contributes to any improvement.
49 | 
50 | ...             |  ...
51 | :-------------------------:|:-------------------------:
52 | ![text-text-quora](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-text-search-on-QuoraQA--7-.svg)  |  ![text-text-clinc](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-text-search-on-Clinc150--7-.svg)
53 | ![image-image-tll](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Image-to-image-search-on-Totally-looks-like--1-.svg) | ![image-image-celeba](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Image-to-image-search-on-Celeba--5-.svg)
54 | ![image-image-flickr30k](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-image-search-on-Flickr30K--6-.svg) | ![image-image-coco](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-image-search-on-CoCoCaptions--5-.svg)
55 | 
56 | Surprisingly, we can see that the ROI per unit of new labeled data starts to drop almost immediately. We expected that it would eventually decrease, but this is an unexpected result.
57 | 
58 | ### How much time is needed?
59 | 
60 | To measure the value of added training time, we fixed the amount of new labeled data to 1000 items, and then we gradually increased the number of training epochs from 1 to 10.
61 | At each increase, we measure improvement over the pre-trained model and calculate the ROI.
62 | For these experiments, the ROI is calculated by dividing the relative improvement by the elapsed time in seconds.
63 | This means that when `ROI=0`, adding training time no longer improves performance.
64 | 
65 | ...            |  ...
66 | :-------------------------:|:-------------------------:
67 | ![text-text-quora](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-text-search-on-QuoraQA--4-.svg)  |  ![text-text-clinc](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-text-search-on-Clinc150--4-.svg)
68 | ![image-image-tll](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Image-to-image-search-on-Totally-look-like--2-.svg) | ![image-image-celeba](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Image-to-image-search-on-Celeba--2-.svg)
69 | ![image-image-flickr30k](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-image-search-on-Flickr30K--3-.svg) | ![image-image-coco](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-image-search-on-CocoCaptions--2-.svg)
70 | 
71 | We knew in advance that adding more time does not guarantee any improvement at all.
72 | It can, in fact, reduce performance due to the overfitting problem.
73 | Some models (e.g. CLIP) are more prone to overfitting than others.
74 | In principle, if we keep training with the same 1000 data points over and over, we are guaranteed to overfit on the data and the overall performance will drop.
75 | 
76 | Let's look at the ROI curves.
77 | 
78 | ...             |  ...
79 | :-------------------------:|:-------------------------:
80 | ![text-text-quora](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-text-search-on-QuoraQA--5-.svg)  |  ![text-text-clinc](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-text-search-on-Clinc150--9-.svg)
81 | ![image-image-tll](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Image-to-image-search-on-Totally-look-like--3-.svg) | ![image-image-celeba](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Image-to-image-search-on-Celeba--3-.svg)
82 | ![image-image-flickr30k](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-image-search-on-Flickr30K--4-.svg) | ![image-image-coco](https://jina-ai-gmbh.ghost.io/content/images/2022/12/Text-to-image-search-on-CocoCaptions--3-.svg)
83 | 
84 | The ROI drops immediately after the first epoch of fine-tuning.
85 | Unlike in the last experiment, where ROI approached zero but stayed positive when increasing the number of epochs, here, the ROI on added time can go negative due to the overfitting problem!
86 | 
87 | ## Summary
88 | 
89 | What does this mean for users looking to maximize gains and minimize costs?
90 | 
91 | + Many state-of-the-art deep neural networks are capable of few-shot learning. They are quick learners and can make large improvements with only a few hundred items of labeled data and only a few minutes of training time. You might have thought that deep neural network training requires millions of data items and a week of runtime, but we have shown in these examples how that stereotype does not hold up to reality.
92 | + Because they can learn so much, so fast, from so little data, ROI drops quickly as you put more time and data into fine-tuning. In the experiments above, ROI shrinks by 70% from its highest value after 500 labeled data items or 600 added seconds of GPU training time. Further investment beyond a few hundred items of training data and very minimal training time may not pay off as well as you would like.


--------------------------------------------------------------------------------
/docs/advanced-topics/finetuner-executor.md:
--------------------------------------------------------------------------------
  1 | (finetuner-executor)=
  2 | # {octicon}`gear` Use FinetunerExecutor inside a Jina Flow
  3 | 
  4 | Finetuner, being part of the Jina AI Cloud, provides a convenient way to use tuned models via [Jina Executors](https://docs.jina.ai/fundamentals/executor/).
  5 | 
  6 | We've created the [`FinetunerExecutor`](https://cloud.jina.ai/executor/13dzxycc) which can be added in a [Jina Flow](https://docs.jina.ai/fundamentals/flow/) and load any tuned model. 
  7 | More specifically, the executor exposes an `/encode` endpoint that embeds [Documents](https://finetuner.jina.ai/walkthrough/create-training-data/#preparing-a-documentarray) using the fine-tuned model.
  8 | 
  9 | Loading a tuned model is simple! You just need to provide a few parameters under the `uses_with` argument when adding the `FinetunerExecutor` to the [Flow]((https://docs.jina.ai/fundamentals/flow/)).
 10 | You have three options:
 11 | 
 12 | ````{tab} Artifact id and token
 13 | ```python
 14 | import finetuner
 15 | from jina import Flow
 16 | 
 17 | finetuner.login()
 18 | 
 19 | token = finetuner.get_token()
 20 | run = finetuner.get_run(
 21 |     experiment_name='YOUR-EXPERIMENT',
 22 |     run_name='YOUR-RUN'
 23 | )
 24 | 	
 25 | f = Flow().add(
 26 |     uses='jinahub+docker://FinetunerExecutor/latest',  # use latest-gpu for gpu executor.
 27 |     uses_with={'artifact': run.artifact_id, 'token': token},
 28 | )
 29 | ```
 30 | ````
 31 | ````{tab} Locally saved artifact
 32 | ```python
 33 | from jina import Flow
 34 | 	
 35 | f = Flow().add(
 36 |     uses='jinahub+docker://FinetunerExecutor/latest',  # use latest-gpu for gpu executor.
 37 |     uses_with={'artifact': '/mnt/YOUR-MODEL.zip'},
 38 |     volumes=['/your/local/path/:/mnt']  # mount your model path to docker.
 39 | )
 40 | ```
 41 | ````
 42 | ````{tab} YAML
 43 | ```yaml
 44 | jtype: Flow
 45 | with:
 46 |   port: 51000
 47 |   protocol: grpc
 48 | executors:
 49 |   uses: jinahub+docker://FinetunerExecutor/latest
 50 |   with:
 51 |     artifact: 'COPY-YOUR-ARTIFACT-ID-HERE'
 52 |     token: 'COPY-YOUR-TOKEN-HERE'  # or better set as env
 53 | ```
 54 | ````
 55 | 
 56 | As you can see, it's super easy! 
 57 | If you did not call {func}`~finetuner.run.Run.save_artifact`,
 58 | you need to provide the `artifact_id` and `token`.
 59 | `FinetunerExecutor` will automatically pull your model from the Jina AI Cloud to the container.
 60 | 
 61 | On the other hand,
 62 | if you have saved artifact locally,
 63 | please mount the zipped artifact to the docker container.
 64 | `FinetunerExecutor` will unzip the artifact and load models.
 65 | 
 66 | You can start your flow with:
 67 | 
 68 | ```python
 69 | with f:
 70 |     # in this example, we fine-tuned a BERT model and embed a Document..
 71 |     returned_docs = f.post(
 72 |         on='/encode',
 73 |         inputs=DocumentArray(
 74 |             [
 75 |                 Document(
 76 |                     text='some text to encode'
 77 |                 )
 78 |             ]
 79 |         )
 80 |     )
 81 | 
 82 | for doc in returned_docs:
 83 |     print(f'Text of the returned document: {doc.text}')
 84 |     print(f'Shape of the embedding: {doc.embedding.shape}')
 85 | ```
 86 | 
 87 | ```console
 88 | Text of the returned document: some text to encode
 89 | Shape of the embedding: (768,)
 90 | ```
 91 | 
 92 | In order to see what other options you can specify when initializing the executor, please go to the [`FinetunerExecutor`](https://cloud.jina.ai/executor/13dzxycc) page and click on `Arguments` on the top-right side.
 93 | 
 94 | ```{admonition} FinetunerExecutor parameters
 95 | :class: tip
 96 | The only required argument is `artifact`. We provide default values for others.
 97 | ```
 98 | 
 99 | ## Special case: Artifacts with CLIP models 
100 | If your fine-tuning job was executed on a CLIP model, your artifact contains two 
101 | models: `clip-vision` and `clip-text`.
102 | The vision model allows you to embed images and the text model can encode text passages
103 | into the same vector space.
104 | To use those models, you have to provide the name of the model via an additional
105 | `select_model` parameter to the {func}`~finetuner.get_model` function.
106 | 
107 | If you want to host the CLIP models, you also have to provide the name of the model via the
108 | `select_model` parameter inside the `uses_with` attribute:
109 | 
110 | ```python
111 | import finetuner
112 | from jina import Flow
113 | 
114 | finetuner.login()
115 | 
116 | token = finetuner.get_token()
117 | run = finetuner.get_run(
118 |     experiment_name='YOUR-EXPERIMENT',
119 |     run_name='YOUR-RUN'
120 | )
121 | 
122 | f = Flow().add(
123 |     uses='jinahub+docker://FinetunerExecutor/latest',  # use latest-gpu for gpu executor.
124 |     uses_with={
125 |         'artifact': run.artifact_id, 'token': token, 'select_model': 'clip-vision'
126 |     },
127 | )
128 | 
129 | ```
130 | 
131 | 


--------------------------------------------------------------------------------
/docs/advanced-topics/linear-probe.md:
--------------------------------------------------------------------------------
 1 | (projection-head)=
 2 | # {octicon}`pin` Projection Head
 3 | 
 4 | ## Why freezing?
 5 | 
 6 | Depending on your task and the amount of training data,
 7 | it is not always necessary to tune the entire model.
 8 | In some cases,
 9 | freezing some of the weights of the pre-trained model and just fine-tuning specific layers produces comparable or better results.
10 | Furthermore, freezing weights can reduce the training time dramatically.
11 | 
12 | Finetuner allows you to fine-tune a Linear Projection Head easily.
13 | 
14 | ```{warning}
15 | Currently, we only allow you to freeze layers for image-to-image search tasks.
16 | These models are built on top of Convolutional Neural Networks (CNNs).
17 | 
18 | For transformer architectures,
19 | we can only fine-tune the entire neural network.
20 | If you need to freeze weights for transformers, consider submitting a feature request in our [Github Issues page](https://github.com/jina-ai/finetuner/issues)
21 | ```
22 | 
23 | ```{admonition} Dimensionality reduction
24 | :class: hint
25 | Use a smaller `output_dim` to get compact embeddings.
26 | ```
27 | 
28 | ## How?
29 | 
30 | Finetuner has a built-in module called Tailor.
31 | Given a general model written in Pytorch,
32 | Tailor performs the micro-operations on the model architecture required for fine-tuning and outputs an embedding model.
33 | 
34 | Given a general model with weights, Tailor performs some or all of the following steps:
35 | 
36 | + Iterating over all layers to find dense layers.
37 | + Chopping off all layers after a certain dense layer.
38 | + Freezing weights on specific layers.
39 | + Adding new layers on top of the model.
40 | 
41 | ![tailor](../imgs/tailor.svg)
42 | 
43 | For example, just using the arguments `freeze=True` and `output_dim=X` with the `fit` function, as shown below:
44 | 
45 | ```diff
46 | run = finetuner.fit(
47 |     model='resnet50',
48 |     ...,
49 | +   freeze=True,
50 | +   output_dim=1024,  # default output_dim of ResNet50 is 2048.
51 |     ...,
52 | )
53 | ```
54 | 
55 | Finetuner will:
56 | 
57 | 1. Remove the classification head of a `ResNet` model, and convert it into an embedding model.
58 | 2. Freeze all layers of the embedding model.
59 | 3. Attach a trainable 3-layer Linear Projection Head on top of the embedding model with an `output_dim=1024`.
60 | 
61 | ```warning
62 | Keep in mind that whenever you use `freeze=True`, always set `output_dim`.
63 | Otherwise, nothing can be tuned since all layers are frozen.
64 | ```
65 | 
66 | ## Summary
67 | 
68 | If you want to achieve efficient fine-tuning without retraining the entire model,
69 | tuning a Linear Projection Head could be a good solution.


--------------------------------------------------------------------------------
/docs/advanced-topics/negative-mining.md:
--------------------------------------------------------------------------------
 1 | (negative-mining)=
 2 | # {octicon}`telescope` Negative Mining
 3 | 
 4 | Negative Mining is an advanced machine learning technique, which optimizes the way data is sampled from your training dataset.
 5 | Usually, it aims at making the metric learning tasks for the model harder during the training. 
 6 | In this way, it can lead to better fine-tuning results.
 7 | 
 8 | ## Context: Deep Metric Learning
 9 | 
10 | First, let's take a look at how we construct the training data for metric learning tasks.
11 | 
12 | Metric Learning algorithms attempt to teach neural network models to tell
13 | which objects are semantically/visually similar and which ones are not.
14 | 
15 | For uni-modal fine-tuning tasks such as text-to-text, image-to-image, or mesh-to-mesh,
16 | Finetuner constructs training data in the following way:
17 | 
18 | ![batch-sample](../imgs/batch-sampling.png)
19 | 
20 | Assume we have a list of Documents belonging to four classes: `1`, `2`, `3`, and `4`,
21 | Finetuner will evenly sample *X* items per class to make a batch *B* which is encoded by the model into a set of embeddings.
22 | 
23 | Afterward, the loss is calculated based on the relations between the embeddings.
24 | Many of Finetuner's loss functions contrast the embeddings of three items, or a __Triplet__. 
25 | Finetuner creates all possible triplets *(anchor, pos, neg)* from this batch which satisfy the following conditions:
26 | For each triplet, the first is the __anchor__, the second is an embedding that ought to be closer to the embedding of the anchor (has the same label), and the third is one that should be further from the anchor (has a different label).
27 | The objective is to pull the embeddings of items that belong to the same class closer together in the embedding space,
28 | while pushing the embeddings of items which belong to different classes farther away from each other.
29 | 
30 | ![training](../imgs/metric-train.png)
31 | 
32 | 
33 | ## The Triplet Margin Miner
34 | 
35 | For some triplets, the pre-trained model already performs well, i.e.
36 | 
37 | the distance between the `anchor` embedding and `pos` is much smaller than
38 | the distance between `anchor` and `neg`?
39 | These triplets do not contribute to improving the model, since they are already in the desired relation to each other in the embedding space.
40 | A more effective way is to use only a subset of all triplets for model training. We call this subset the **hard** or **semi-hard negative samples**.
41 | 
42 | ![mining](../imgs/mining.png)
43 | 
44 | Let's say `1₀` is an `anchor`, `1₁` is the `pos` while `2₄` is the `neg`, and `D(x,y)` is the distance between the embeddings of `x` and `y`.
45 | 
46 | If:
47 | 
48 | + `D(anchor, neg) < D(anchor, pos) `, then `neg` can be considered as a "hard negative" (`2₄ - H`).
49 | + `D(anchor, pos) < D(anchor, neg) < D(anchor, pos) + margin`, where `neg` is a little further from the `pos`, but within the margin, then `neg` can be considered as a "semi-hard negative" (`2₄ - S`).
50 | + `D(anchor, neg) > D(anchor, pos) + margin`, then `neg` can be considered as "easy negative" (`2₄ - E`).
51 | 
52 | Training is more effective when using only **hard** and **semi-hard** negatives, given a reasonable margin value to distinguish them from **easy** triplets.
53 | 
54 | ## Doing Negative Mining in Finetuner
55 | 
56 | Finetuner is compatible with the miners provided by the [PyTorch Metric Learning](https://kevinmusgrave.github.io/pytorch-metric-learning) framework.
57 | To select a specific miner, pass its name to the `fit` function, e.g., `AngularMiner`, `TripletMarginMiner`, ...
58 | 
59 | Please note that the miner has to be compatible with the loss function you selected.
60 | For instance, if you choose to train a model with the `TripleMarginLoss`, you can use the `TripletMarginMiner`.
61 | While without this miner, all possible triples with an anchor, a positive, and a negative candidate are used to calculate the loss, the miner reduces this set of triples.
62 | By default, the miner only selects triples with hard negatives where the distance between the positive and the negative example is inside a margin of `0.2`.
63 | To pass additional parameters to configure the miner, use the `miner_options` parameter of the fit function.
64 | For example, add the following to use only hard-negative triplets and set the margin to `0.3`:
65 | 
66 | ```diff
67 | run = finetuner.fit(
68 |     ...,
69 |     loss='TripleMarginLoss',
70 | +   miner='TripletMarginMiner',
71 | +   miner_options={'margin': 0.3, 'type_of_triplets': 'hard'}
72 | )
73 | ```
74 | 
75 | Possible choices for `type_of_triplets` are:
76 | 
77 | + `easy`: Use all easy triplets - all triplets that do not violate the margin.
78 | + `semihard`: Use semi-hard triplets, but not hard triplets, i.e. those where difference in distance is within the specified margin.
79 | + `hard`: Use only hard triplets - the negative is closer to the anchor than the positive.
80 | + `all`: Use `hard` and `semihard` triples - all but the `easy` triples
81 | 
82 | Finetuner takes `TripleMarginLoss` as its default loss function with no negative mining.
83 | For a detailed description of the miners and their parameters, see the [PyTorch Metric Learning documentation](https://kevinmusgrave.github.io/pytorch-metric-learning/miners/).
84 | 
85 | ## Summary
86 | 
87 | Metric Learning and triplets are extremely useful for fine-tuning models for similarity search.
88 | Easy triplets have little impact on improving the model.
89 | Consider using semi-hard/hard triplets for model tuning.


--------------------------------------------------------------------------------
/docs/api-rst.rst:
--------------------------------------------------------------------------------
 1 | ======================
 2 | :fab:`python` Python API
 3 | ======================
 4 | 
 5 | This section includes the API documentation from the `Finetuner` codebase, as extracted from the `docstrings <https://peps.python.org/pep-0257/>`_ in the code.
 6 | 
 7 | :mod:`finetuner.__init__` - Finetuner
 8 | --------------------
 9 | 
10 | .. currentmodule:: finetuner.__init__
11 | 
12 | .. autosummary::
13 |    :nosignatures:
14 |    :template: class.rst
15 | 
16 |    finetuner.login
17 |    finetuner.describe_models
18 |    finetuner.fit
19 |    finetuner.list_callbacks
20 |    finetuner.get_run
21 |    finetuner.get_experiment
22 |    finetuner.get_token
23 |    finetuner.build_model
24 |    finetuner.get_model
25 |    finetuner.encode
26 |    finetuner.list_runs
27 |    finetuner.delete_run
28 |    finetuner.delete_runs
29 |    finetuner.create_experiment
30 |    finetuner.list_experiments
31 |    finetuner.delete_experiment
32 |    finetuner.delete_experiments
33 | 
34 | :mod:`finetuner.run.Run` - Run
35 | --------------------
36 | 
37 | .. currentmodule:: finetuner.run.Run
38 | 
39 | .. autosummary::
40 |    :nosignatures:
41 |    :template: class.rst
42 | 
43 |    finetuner.run.Run.name
44 |    finetuner.run.Run.config
45 |    finetuner.run.Run.status
46 |    finetuner.run.Run.logs
47 |    finetuner.run.Run.stream_logs
48 |    finetuner.run.Run.save_artifact
49 |    finetuner.run.Run.artifact_id
50 | 
51 | 
52 | :mod:`finetuner.experiment.Experiment` - Experiment
53 | --------------------
54 | 
55 | .. currentmodule:: finetuner.experiment.Experiment
56 | 
57 | .. autosummary::
58 |    :nosignatures:
59 |    :template: class.rst
60 | 
61 |    finetuner.experiment.Experiment.name
62 |    finetuner.experiment.Experiment.create_run
63 |    finetuner.experiment.Experiment.get_run
64 |    finetuner.experiment.Experiment.list_runs
65 |    finetuner.experiment.Experiment.delete_run
66 |    finetuner.experiment.Experiment.delete_runs
67 | 
68 | 


--------------------------------------------------------------------------------
/docs/get-started/how-it-works.md:
--------------------------------------------------------------------------------
 1 | # {octicon}`question` How Does it Work?
 2 | 
 3 | Finetuner is a framework for using the contrastive learning approach to improve similarity matching with models that encode data into embeddings.
 4 | This involves three steps:
 5 | 
 6 | ## Step 1: Build an embedding model
 7 | 
 8 | Finetuner takes an existing, pre-trained model, typically called the __backbone__, and analyzes its architecture.
 9 | If this model does not already produce embeddings or the architecture is not suitable for training, Finetuner is able to remove the default *head* (the last layers of the network), add new projection layers, apply *pooling*, and freeze layers that do not need to be trained.
10 | 
11 | For instance, Finetuner will turn an image classification model, e.g., for separating cats from dogs, into an *embedding model* 
12 | by removing its last layer - the classification head (cat-dog classifier).
13 | 
14 | This embedding model does not make predictions or output a probability,
15 | but instead outputs a feature vector (an __embedding__) that represents its input.
16 | 
17 | ## Step 2: Tuple/Triplet construction
18 | 
19 | ````{tab} Uni-modal (with label)
20 | Finetuner works on labeled data.
21 | It expects either a CSV file or a {class}`~docarray.array.document.DocumentArray` consisting of {class}`~docarray.document.Document`s where each one contains `finetuner_label` corresponding to the class of a specific training example. After receiving a CSV file, its contents are parsed and a {class}`~docarray.array.document.DocumentArray` is constructed.
22 | 
23 | During the fine-tuning, Finetuner creates Triplets `(anchor, positive, negative)` on-the-fly.
24 | For each anchor,
25 | which can be any training example,
26 | Finetuner looks for a `Document` with the same `finetuner_label` (positive),
27 | and a `Document` with a different `finetuner_label` (negative).
28 | The objective is to pull `Document`s which belong to the same class together,
29 | while pushing the `Document`s which belong to a different class away from each other.
30 | ````
31 | ````{tab} Cross-modal (without label)
32 | Finetuner works on unlabeled text-image pairs.
33 | You can fine-tune a CLIP-like model for text to images search directly without any labels.
34 | It expects either a CSV file or a {class}`~docarray.array.document.DocumentArray` consisting a list of {class}`~docarray.array.document.Document` that contain two chunks: an image chunk and a text chunk.
35 | 
36 | During fine-tuning, Finetuner leverages text-image pairs and jointly optimizes two models (`CLIPTextEncoder` and `CLIPImageEncoder`) with respect to two classification losses: (1) given a text, find the best matching
37 | image and (2) given an image, find the best matching text. Then it aggregates the two losses into the `CLIPLoss`.
38 | At the end, the output embedding of your data from the `CLIPTextEncoder` is comparable to `CLIPImageEncoder`.
39 | ````
40 | 
41 | ## Step 3: Tuning in the cloud
42 | 
43 | From an operational perspective,
44 | we have hidden all the complexity of machine learning algorithms and resource configuration (such as GPUs).
45 | All you need to do is decide on your backbone model and prepare your training data.
46 | 
47 | Once you have logged in to the Jina Ecosystem with {meth}`~finetuner.login()`,
48 | Finetuner will push your training data into the *Jina AI Cloud* (only visible to you).
49 | At the same time, we will spin-up an isolated computational resource
50 | with proper memory, CPU, and a GPU dedicated to your fine-tuning job.
51 | 
52 | Once fine-tuning is done, Finetuner will push your fine-tuned model to the *Jina AI Cloud*
53 | and make it available for you to download.
54 | That's it!
55 | 
56 | On the other hand,
57 | if you have a certain level of machine learning knowledge,
58 | Finetuner gives you enough flexibility to adjust the training parameters.
59 | This will be explained in a later section.
60 | 


--------------------------------------------------------------------------------
/docs/get-started/installation.md:
--------------------------------------------------------------------------------
 1 | (install-finetuner)=
 2 | # {octicon}`desktop-download` Installation
 3 | 
 4 | ![PyPI](https://img.shields.io/pypi/v/finetuner?color=%23ffffff&label=%20) is the latest version.
 5 | 
 6 | Make sure you have `Python 3.8+` installed on Linux/Mac/Windows:
 7 | 
 8 | ```bash
 9 | pip install -U finetuner
10 | ```
11 | 
12 | If you want to submit a fine-tuning job on the cloud, please use:
13 | 
14 | ```bash
15 | pip install "finetuner[full]"
16 | ```
17 | 
18 | To check your installation run:
19 | ```bash
20 | pip show finetuner
21 | ```


--------------------------------------------------------------------------------
/docs/get-started/pretrained.md:
--------------------------------------------------------------------------------
 1 | (pretrained-models)=
 2 | # {octicon}`rocket` Jina Embeddings
 3 | 
 4 | Starting with Finetuner 0.8.0,
 5 | we have introduced a suite of pre-trained text embedding models licensed under Apache 2.0.
 6 | These models have a variety of use cases, including information retrieval, semantic textual similarity, text reranking, and more.
 7 | The suite consists of the following models:
 8 | 
 9 | - `jina-embedding-t-en-v1` [**[Huggingface](https://huggingface.co/jinaai/jina-embedding-t-en-v1)**]: The fastest embedding model in the world with 14 million parameters.
10 | - `jina-embedding-s-en-v1` [**[Huggingface](https://huggingface.co/jinaai/jina-embedding-s-en-v1)**]: This is a compact model with just 35 million parameters, that performs lightning-fast inference while delivering impressive performance.
11 | - `jina-embedding-b-en-v1` [**[Huggingface](https://huggingface.co/jinaai/jina-embedding-b-en-v1)**]: This model has a size of 110 million parameters, performs fast inference and delivers better performance than our smaller model.
12 | - `jina-embedding-l-en-v1` [**[Huggingface](https://huggingface.co/jinaai/jina-embedding-l-en-v1)**]: This is a relatively large model with a size of 330 million parameters, that performs single-gpu inference and delivers better performance than the other models.
13 | 
14 | ## Usage
15 | 
16 | ```python
17 | import finetuner
18 | 
19 | model = finetuner.build_model('jinaai/jina-embedding-s-en-v1')
20 | embeddings = finetuner.encode(
21 |     model=model,
22 |     data=['how is the weather today', 'What is the current weather like today?']
23 | )
24 | print(finetuner.cos_sim(embeddings[0], embeddings[1]))
25 | ```
26 | 
27 | ## Training Data
28 | 
29 | Jina Embeddings is a suite of language models that have been trained using Jina AI's Linnaeus-Clean dataset.
30 | This dataset consists of 380 million query-document pairs of sentences.
31 | These pairs were obtained from various domains and were carefully selected through a thorough cleaning process.
32 | The Linnaeus-Full dataset, from which the Linnaeus-Clean dataset is derived, originally contained 1.6 billion sentence pairs.
33 | 
34 | ## Characteristics
35 | 
36 | Each Jina embedding model can encode up to 512 tokens,
37 | with any further tokens being truncated.
38 | The models have different output dimensionalities, as shown in the table below:
39 | 
40 | | Name                   | param |context| Dimension |
41 | |------------------------|-------|------|-----------|
42 | | jina-embedding-t-en-v1 | 14m   |512| 312       |
43 | | jina-embedding-s-en-v1 | 35m   |512| 512       |
44 | | jina-embedding-b-en-v1 | 110m  |512| 768       |
45 | | jina-embedding-l-en-v1 | 330m  |512| 1024      |
46 | 
47 | ## Performance
48 | 
49 | Please refer to the [Huggingface](https://huggingface.co/jinaai/jina-embedding-s-en-v1) page.
50 | 
51 | ## Citations
52 | 
53 | If you find Jina Embeddings useful in your research, please cite the following paper:
54 | 
55 | ```text
56 | @misc{günther2023jina,
57 |       title={Jina Embeddings: A Novel Set of High-Performance Sentence Embedding Models}, 
58 |       author={Michael Günther and Louis Milliken and Jonathan Geuter and Georgios Mastrapas and Bo Wang and Han Xiao},
59 |       year={2023},
60 |       eprint={2307.11224},
61 |       archivePrefix={arXiv},
62 |       primaryClass={cs.CL}
63 | }
64 | 
65 | ```
66 | 


--------------------------------------------------------------------------------
/docs/html_extra/robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | sitemap: https://finetuner.jina.ai/sitemap.xml


--------------------------------------------------------------------------------
/docs/imgs/DocumentArray_plot_image_sprites.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/DocumentArray_plot_image_sprites.png


--------------------------------------------------------------------------------
/docs/imgs/DocumentArray_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/DocumentArray_summary.png


--------------------------------------------------------------------------------
/docs/imgs/Document_display.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/Document_display.png


--------------------------------------------------------------------------------
/docs/imgs/Document_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/Document_summary.png


--------------------------------------------------------------------------------
/docs/imgs/SphereFace-training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/SphereFace-training.png


--------------------------------------------------------------------------------
/docs/imgs/batch-sampling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/batch-sampling.png


--------------------------------------------------------------------------------
/docs/imgs/distributions-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/distributions-loss.png


--------------------------------------------------------------------------------
/docs/imgs/metric-train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/metric-train.png


--------------------------------------------------------------------------------
/docs/imgs/mining.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/docs/imgs/mining.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome to Finetuner!
 2 | 
 3 | ```{include} ../README.md
 4 | :start-after: <!-- start elevator-pitch -->
 5 | :end-before: <!-- end elevator-pitch -->
 6 | ```
 7 | 
 8 | ```{include} ../README.md
 9 | :start-after: <!-- start install-instruction -->
10 | :end-before: <!-- end install-instruction -->
11 | ```
12 | 
13 | ```{include} ../README.md
14 | :start-after: <!-- start finetuner-articles -->
15 | :end-before: <!-- end finetuner-articles -->
16 | ```
17 | 
18 | ```{include} ../README.md
19 | :start-after: <!-- start support-pitch -->
20 | :end-before: <!-- end support-pitch -->
21 | ```
22 | 
23 | ```{toctree}
24 | :caption: Get Started
25 | :hidden:
26 | 
27 | get-started/how-it-works
28 | get-started/installation
29 | get-started/pretrained
30 | walkthrough/index
31 | ```
32 | 
33 | ```{toctree}
34 | :caption: Advanced Topics
35 | :hidden:
36 | 
37 | advanced-topics/budget
38 | advanced-topics/negative-mining
39 | advanced-topics/using-callbacks
40 | advanced-topics/linear-probe
41 | advanced-topics/advanced-losses-optimizers-and-poolers
42 | advanced-topics/finetuner-executor
43 | ```
44 | 
45 | 
46 | 
47 | ```{toctree}
48 | :caption: Finetuning Tasks
49 | :hidden:
50 | 
51 | notebooks/text_to_text
52 | notebooks/image_to_image
53 | notebooks/image_to_image_arcface
54 | notebooks/text_to_image
55 | notebooks/multilingual_text_to_image
56 | notebooks/mesh_to_mesh
57 | notebooks/data_synthesis
58 | ```
59 | 
60 | ```{toctree}
61 | :caption: Developer Reference
62 | :hidden:
63 | :maxdepth: 1
64 | 
65 | api-rst
66 | ```
67 | 
68 | ---
69 | {ref}`genindex` | {ref}`modindex`
70 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.https://www.sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/makedoc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | 
 5 | if [[ $1 == "local-only" ]]; then
 6 |   rm -rf api && make clean
 7 |   make dirhtml
 8 | else
 9 |   export NUM_RELEASES=${NUM_RELEASES:-5}
10 |   export DEFAULT_BRANCH='main'
11 |   export BUILD_DIR=_build/dirhtml
12 | 
13 |   declare -a ARR_SMV_TAG_WHITELIST=()
14 |   declare -a ARR_SMV_BRANCH_WHITELIST=()
15 | 
16 |   rm -rf api && rm -rf ${BUILD_DIR}
17 | 
18 |   # Might error out with "API Limit exceeds" on local (would need api token), but on CI shouldn't face issues.
19 |   declare -a LAST_N_TAGS=( $(curl -s -H "Accept: application/vnd.github.v3+json" \
20 |       "https://api.github.com/repos/jina-ai/finetuner/releases?per_page=${NUM_RELEASES}" \
21 |       | jq -r '.[].tag_name') )
22 | 
23 |   export LATEST_FINETUNER_VERSION="${LAST_N_TAGS[0]}"
24 | 
25 |   if [[ $1 == "development" ]]; then
26 |     current_branch=$(git branch --show-current)
27 |     if [[ ${current_branch} != ${DEFAULT_BRANCH} ]]; then
28 |       ARR_SMV_BRANCH_WHITELIST+=" ${current_branch}"
29 |     fi
30 |   fi
31 | 
32 |   ARR_SMV_BRANCH_WHITELIST+=" ${DEFAULT_BRANCH}"
33 |   ARR_SMV_TAG_WHITELIST+=" ${LAST_N_TAGS[@]}"
34 |   export SMV_BRANCH_WHITELIST="${ARR_SMV_BRANCH_WHITELIST}"
35 |   export SMV_TAG_WHITELIST="${ARR_SMV_TAG_WHITELIST}"
36 | 
37 |   echo -e "Latest Finetuner Version: ${LATEST_FINETUNER_VERSION}"
38 |   echo -e "Branches to whitelist: ${SMV_BRANCH_WHITELIST}"
39 |   echo -e "Tags to whitelist: ${SMV_TAG_WHITELIST}"
40 | 
41 |   sphinx-multiversion . ${BUILD_DIR} -b dirhtml
42 |   mv -v _build/dirhtml/${LATEST_FINETUNER_VERSION}/* _build/dirhtml
43 | fi


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | furo
 2 | gitpython==3.1.13
 3 | git+https://github.com/Holzhaus/sphinx-multiversion.git
 4 | jupytext==1.14.1
 5 | sphinx
 6 | myst-parser==0.15.1
 7 | nbsphinx==0.8.9
 8 | sphinx-argparse==0.3.1
 9 | sphinx-design
10 | sphinx-inline-tabs
11 | sphinx-autodoc-typehints==1.12.0
12 | sphinxext-opengraph
13 | sphinx-notfound-page==0.7.1
14 | sphinx-sitemap==2.2.0
15 | sphinx_copybutton==0.4.0
16 | sphinx_markdown_tables==0.0.16
17 | sphinxcontrib-apidoc==0.3.0
18 | 


--------------------------------------------------------------------------------
/docs/walkthrough/basic-concepts.md:
--------------------------------------------------------------------------------
 1 | (experiment-and-runs)=
 2 | # Basic Concepts
 3 | 
 4 | Finetuner organizes your training based on two concepts: 
 5 | {class}`~finetuner.experiment.Experiment` and {class}`~finetuner.run.Run`.
 6 | 
 7 | An Experiment defines the machine learning task you're fine-tuning for.
 8 | A Run refers to a single execution of the Experiment with a specific configuration.
 9 | An Experiment contains a list of Runs, each with different configurations. 
10 | For example:
11 | 
12 | + Experiment: Fine-tune a transformer on the QuoraQA dataset.
13 |   - Run1: Use bert-based model.
14 |   - Run2: Use sentence-transformer model.
15 | + Experiment: Fine-tune ResNet on WILD dataset.
16 |   - Run1: Use ResNet18 with learning rate 0.01 and SGD optimizer.
17 |   - Run2: Use ResNet50 with learning rate 0.01 and SGD optimizer.
18 |   - Run3: Use ResNet50 with learning rate 0.0001 and Adam optimizer.
19 | 
20 | All information and data produced during using Finetuner is linked to those two concepts.
21 | Each Experiment and each Run has a name.
22 | The name of the Experiment should be unique and the name of the Run is also required
23 | to be unique for each Experiment.
24 | Thus, if you want to retrieve the logs of a run or download the fine-tuned model later
25 | on, you can do this with the respective experiment and run names, as explained in section
26 | {doc}`/walkthrough/save-model`.
27 | 
28 | When you start the fine-tuning job, you can declare the `experiment_name` and `run_name` like this:
29 | 
30 | ```python
31 | import finetuner
32 | 
33 | finetuner.fit(
34 |   ...,
35 |   experiment_name='quora-qa-finetune',
36 |   run_name='quora-qa-finetune-bert',
37 | )
38 | ```
39 | 
40 | Please note that these two arguments are optional.
41 | If not supplied,
42 | Finetuner will use the current working directory as a default `experiment_name`,
43 | and generate a random `run_name` for you, e.g., "infallible-colden".


--------------------------------------------------------------------------------
/docs/walkthrough/choose-backbone.md:
--------------------------------------------------------------------------------
  1 | (choose-backbone)=
  2 | # Backbone Model
  3 | 
  4 | Finetuner provides several widely used backbone models,
  5 | including `resnet`, `efficientnet`, `clip` and `bert`.
  6 | Thereby, for most of them, Finetuner provides multiple variants, e.g., the common `resnet50 ` and the more complex `resnet152` model.
  7 | 
  8 | Finetuner will convert these backbone models to embedding models by removing
  9 | the *head* or applying *pooling*,
 10 | performing fine-tuning and producing the final embedding model.
 11 | The embedding model can be fine-tuned for text-to-text, image-to-image or text-to-image
 12 | search tasks.
 13 | 
 14 | You can call:
 15 | ````{tab} text-to-text
 16 | ```python
 17 | import finetuner
 18 | 
 19 | finetuner.describe_models(task='text-to-text')
 20 | ```
 21 | ````
 22 | ````{tab} image-to-image
 23 | ```python
 24 | import finetuner
 25 | 
 26 | finetuner.describe_models(task='image-to-image')
 27 | ```
 28 | ````
 29 | ````{tab} text-to-image
 30 | ```python
 31 | import finetuner
 32 | 
 33 | finetuner.describe_models(task='text-to-image')
 34 | ```
 35 | ````
 36 | ````{tab} mesh-to-mesh
 37 | ```python
 38 | import finetuner
 39 | 
 40 | finetuner.describe_models(task='mesh-to-mesh')
 41 | ```
 42 | ````
 43 | 
 44 | to get a list of supported models:
 45 | 
 46 | ````{tab} text-to-text
 47 | ```bash
 48 |                                                        Finetuner backbones: text-to-text                                                       
 49 | ┏━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
 50 | ┃                   name ┃         task ┃ output_dim ┃ architecture ┃                                                             description ┃
 51 | ┡━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
 52 | │ jina-embedding-t-en-v1 │ text-to-text │        312 │  transformer │    Text embedding model trained using Linnaeus-Clean dataset by Jina AI │
 53 | │ jina-embedding-s-en-v1 │ text-to-text │        512 │  transformer │    Text embedding model trained using Linnaeus-Clean dataset by Jina AI │
 54 | │ jina-embedding-b-en-v1 │ text-to-text │        768 │  transformer │    Text embedding model trained using Linnaeus-Clean dataset by Jina AI │
 55 | │ jina-embedding-l-en-v1 │ text-to-text │       1024 │  transformer │    Text embedding model trained using Linnaeus-Clean dataset by Jina AI │
 56 | │           bert-base-en │ text-to-text │        768 │  transformer │              BERT model pre-trained on BookCorpus and English Wikipedia │
 57 | │        bert-base-multi │ text-to-text │        768 │  transformer │                        BERT model pre-trained on multilingual Wikipedia │
 58 | │   distiluse-base-multi │ text-to-text │        512 │  transformer │      Knowledge distilled version of the multilingual Sentence Encoder   │
 59 | │          sbert-base-en │ text-to-text │        768 │  transformer │                                 Pretrained BERT, fine-tuned on MS Marco │
 60 | └────────────────────────┴──────────────┴────────────┴──────────────┴─────────────────────────────────────────────────────────────────────────┘
 61 | ```
 62 | ````
 63 | ````{tab} image-to-image
 64 | ```bash
 65 |                                      Finetuner backbones: image-to-image                                     
 66 | ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
 67 | ┃               name ┃           task ┃ output_dim ┃ architecture ┃                             description ┃
 68 | ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
 69 | │  efficientnet-base │ image-to-image │       1792 │          cnn │ EfficientNet B4 pre-trained on ImageNet │
 70 | │ efficientnet-large │ image-to-image │       2560 │          cnn │ EfficientNet B7 pre-trained on ImageNet │
 71 | │       resnet-large │ image-to-image │       2048 │          cnn │       ResNet152 pre-trained on ImageNet │
 72 | │        resnet-base │ image-to-image │       2048 │          cnn │        ResNet50 pre-trained on ImageNet │
 73 | └────────────────────┴────────────────┴────────────┴──────────────┴─────────────────────────────────────────┘
 74 | ```
 75 | ````
 76 | ````{tab} text-to-image
 77 | ```bash
 78 |                                           Finetuner backbones: text-to-image                                           
 79 | ┏━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
 80 | ┃            name ┃          task ┃ output_dim ┃ architecture ┃                                           description ┃
 81 | ┡━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
 82 | │    clip-base-en │ text-to-image │        512 │  transformer │                                       CLIP base model │
 83 | │   clip-large-en │ text-to-image │       1024 │  transformer │                   CLIP large model with patch size 14 │
 84 | │ clip-base-multi │ text-to-image │        512 │  transformer │                                            Open MCLIP │
 85 | │                 │               │            │              │  "xlm-roberta-base-ViT-B-32::laion5b_s13b_b90k" model │
 86 | └─────────────────┴───────────────┴────────────┴──────────────┴───────────────────────────────────────────────────────┘
 87 | ```
 88 | ````
 89 | ````{tab} mesh-to-mesh
 90 | ```bash
 91 |                                         Finetuner backbones: mesh-to-mesh                                         
 92 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
 93 | ┃          name ┃         task ┃ output_dim ┃ architecture ┃                                         description ┃
 94 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
 95 | │ pointnet-base │ mesh-to-mesh │        512 │     pointnet │ PointNet++ embedding model for 3D mesh point clouds │
 96 | └───────────────┴──────────────┴────────────┴──────────────┴─────────────────────────────────────────────────────┘
 97 | ```
 98 | ````
 99 | 
100 | + ResNets are suitable for image-to-image search tasks with high performance requirements, where `resnet152` is bigger and requires higher computational resources than `resnet50`.
101 | + EfficientNets are suitable for image-to-image search tasks with low training and inference times. The model is more light-weighted than ResNet. Here, `efficientnet_b4` is the bigger and more complex model.
102 | + CLIP is the one for text-to-image search, where the images do not need to have any text descriptors.
103 | + BERT is generally suitable for text-to-text search tasks.
104 | + Msmarco-distilbert-base-v3 is designed for matching web search queries to short text passages and is a suitable backbone for similar text-to-text search tasks.
105 | + PointNet++ is an embedding model, which we derived from the popular [PointNet++ model](https://proceedings.neurips.cc/paper/2017/file/d8bf84be3800d12f74d8b05e9b89836f-Paper.pdf).
106 |   The original model is designed for classifying 3D meshes. Our derived model can be used to encode meshes into vectors for search.
107 | 
108 | It should be noted that:
109 | 
110 | + ResNet/EfficientNet models are loaded from the [torchvision](https://pytorch.org/vision/stable/index.html) library.
111 | + Transformer-based models are loaded from the huggingface [transformers](https://github.com/huggingface/transformers) library.
112 | + `msmarco-distilbert-base-v3` has been fine-tuned once by [sentence-transformers](https://www.sbert.net/) on the [MS MARCO](https://microsoft.github.io/msmarco/) dataset on top of BERT.


--------------------------------------------------------------------------------
/docs/walkthrough/index.md:
--------------------------------------------------------------------------------
 1 | # {octicon}`list-ordered` Walkthrough
 2 | 
 3 | Why do I need Finetuner?
 4 | 
 5 | Because search quality matters. 
 6 | 
 7 | When you bring a pre-trained model to encode your data to embeddings, you are likely to get irrelevant search results.
 8 | Pre-trained deep learning models are usually trained on large-scale datasets, that have a different *data distribution* over your own datasets or domains.
 9 | This is referred to as a *distribution shift*.
10 | 
11 | Finetuner provides a solution to this problem by leveraging a pre-trained model from a large dataset and fine-tuning the parameters of
12 | this model on your dataset.
13 | 
14 | Once fine-tuning is done, you get a model adapted to your domain. This new model leverages better search performance on your-task-of-interest.
15 | 
16 | Fine-tuning a pre-trained model includes a certain complexity and requires Machine Learning plus domain knowledge (on NLP, Computer Vision, etc.).
17 | Thus, it is a non-trivial task for business owners and engineers who lack practical deep-learning knowledge. Finetuner attempts
18 | to address this by providing a simple interface, which can be as easy as:
19 | 
20 | ```python
21 | import finetuner
22 | from finetuner import DocumentArray
23 | 
24 | # Login to Jina AI Cloud
25 | finetuner.login()
26 | 
27 | # Prepare training data
28 | train_data = DocumentArray(...)
29 | 
30 | # Fine-tune in the cloud
31 | run = finetuner.fit(
32 |     model='resnet50', train_data=train_data, epochs=5, batch_size=128,
33 | )
34 | 
35 | print(run.name)
36 | for log_entry in run.stream_logs():
37 |     print(log_entry)
38 | 
39 | # When ready
40 | run.save_artifact(directory='experiment')
41 | ```
42 | 
43 | You should see this in your terminal:
44 | 
45 | ```bash
46 | 🔐 Successfully logged in to Jina AI as [USER NAME]!
47 | Run name: vigilant-tereshkova
48 | Run logs:
49 | 
50 |   Training [2/2] ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 50/50 0:00:00 0:01:08 • loss: 0.050
51 | [09:13:23] INFO     [__main__] Done ✨                           __main__.py:214
52 |            INFO     [__main__] Saving fine-tuned models ...      __main__.py:217
53 |            INFO     [__main__] Saving model 'tuned_model' in     __main__.py:228
54 |                     /usr/src/app/tuned-models/model ...                         
55 |            INFO     [__main__] Pushing saved model to Hubble ... __main__.py:232
56 | [09:13:54] INFO     [__main__] Pushed model artifact ID:         __main__.py:238
57 |                     '62972acb5de25a53fdbfcecc'                                  
58 |            INFO     [__main__] Finished 🚀                       __main__.py:240
59 | ```
60 | 
61 | Submitted fine-tuning jobs run efficiently on the Jina AI Cloud on either CPU or GPU enabled hardware.
62 | 
63 | Finetuner fully owns the complexity of setting up and maintaining the model training infrastructure plus the complexity of delivering SOTA training methods to production use cases.
64 | 
65 | Please check out the following steps for more information:
66 | 
67 | 
68 | ```{toctree}
69 | basic-concepts
70 | login
71 | create-training-data
72 | choose-backbone
73 | run-job
74 | save-model
75 | inference
76 | ```


--------------------------------------------------------------------------------
/docs/walkthrough/inference.md:
--------------------------------------------------------------------------------
  1 | # Inference
  2 | 
  3 | Once fine-tuning is finished, it's time to actually use the model.
  4 | You can use the fine-tuned models directly to encode DocumentArray objects or to set up an encoding service.
  5 | When encoding, data can also be provided as a regular list.
  6 | 
  7 | ```{admonition} Use FinetunerExecutor inside a Jina Flow
  8 | :class: hint
  9 | Finetuner offers the {class}`~finetuner.encode` interface to embed your data locally
 10 | If you would like to use fine-tuned model inside a Jina Flow as an Executor, checkout
 11 | {doc}`/advanced-topics/finetuner-executor`.
 12 | ```
 13 | 
 14 | (integrate-with-list)=
 15 | ## Encoding a List
 16 | Data that is stored in a regular list can be embedded in the same way you would embed a DocumentArray.
 17 | Since the modality of your input data can be inferred from the model being used, there is no need to provide any additional information besides the content you want to encode.
 18 | When providing data as a list, the `finetuner.encode` method will return a `np.ndarray` of embeddings, instead of a `docarray.DocumentArray`:
 19 | 
 20 | ````{tab} Artifact id and token
 21 | ```python
 22 | import finetuner
 23 | 
 24 | finetuner.login()
 25 | 
 26 | token = finetuner.get_token()
 27 | run = finetuner.get_run(
 28 |     experiment_name='YOUR-EXPERIMENT',
 29 |     run_name='YOUR-RUN'
 30 | )
 31 | 
 32 | model = finetuner.get_model(
 33 |     run.artifact_id,
 34 |     token=token,
 35 | )
 36 | 
 37 | texts = ['some text to encode']
 38 | 
 39 | embeddings = finetuner.encode(model=model, data=texts)
 40 | 
 41 | for text, embedding in zip(texts, embeddings):
 42 |     print(f'Text of the returned document: {text}')
 43 |     print(f'Shape of the embedding: {embedding.shape}')
 44 | ```
 45 | ````
 46 | ````{tab} Locally saved artifact
 47 | ```python
 48 | import finetuner
 49 | 
 50 | model = finetuner.get_model('/path/to/YOUR-MODEL.zip')
 51 | 
 52 | texts = ['some text to encode']
 53 | 
 54 | embeddings = finetuner.encode(model=model, data=texts)
 55 | 
 56 | for text, embedding in zip(texts, embeddings):
 57 |     print(f'Text of the returned document: {text}')
 58 |     print(f'Shape of the embedding: {embedding.shape}')
 59 | ```
 60 | ````
 61 | ````{tab} (Special case) CLIP inference
 62 | ```python
 63 | import finetuner
 64 | 
 65 | finetuner.login()
 66 | 
 67 | token = finetuner.get_token()
 68 | run = finetuner.get_run(
 69 |     experiment_name='YOUR-EXPERIMENT',
 70 |     run_name='YOUR-RUN'
 71 | )
 72 | 
 73 | model = finetuner.get_model(
 74 |     run.artifact_id,
 75 |     token=token,
 76 |     select_model='clip-text'  # use `clip-vision` to encode image.
 77 | )
 78 | 
 79 | texts = ['some text to encode']
 80 | embeddings = finetuner.encode(model=model, data=texts)
 81 | 
 82 | for text, embedding in zip(texts, embeddings):
 83 |     print(f'Text of the returned document: {text}')
 84 |     print(f'Shape of the embedding: {embedding.shape}')
 85 | ```
 86 | ````
 87 | 
 88 | 
 89 | ```{admonition} Inference with ONNX
 90 | :class: tip
 91 | In case you set `to_onnx=True` when calling `finetuner.fit` function,
 92 | please use `model = finetuner.get_model('/path/to/YOUR-MODEL.zip', is_onnx=True)`.
 93 | ```
 94 | 
 95 | ```{admonition} Encoding other Modalities
 96 | :class: tip
 97 | Of course you can not only encode texts.
 98 | For encoding a list of images, you can provide URIs, e.g.,
 99 | `embeddings = finetuner.encode(model=model, data=['path/to/apple.png'])`.
100 | ```
101 | 
102 | (integrate-with-docarray)=
103 | ## Encoding a DocumentArray
104 | 
105 | To embed a DocumentArray with a fine-tuned model, you can get the model of your Run via the {func}`~finetuner.get_model` function and embed it via the {func}`finetuner.encode` function:
106 | 
107 | ````{tab} Artifact id and token
108 | ```python
109 | from finetuner import DocumentArray, Document
110 | import finetuner
111 | 
112 | finetuner.login()
113 | 
114 | token = finetuner.get_token()
115 | run = finetuner.get_run(
116 |     experiment_name='YOUR-EXPERIMENT',
117 |     run_name='YOUR-RUN'
118 | )
119 | 
120 | model = finetuner.get_model(
121 |     run.artifact_id,
122 |     token=token,
123 | )
124 | 
125 | da = DocumentArray([Document(text='some text to encode')])
126 | finetuner.encode(model=model, data=da)
127 | 
128 | for doc in da:
129 |     print(f'Text of the returned document: {doc.text}')
130 |     print(f'Shape of the embedding: {doc.embedding.shape}')
131 | ```
132 | ````
133 | ````{tab} Locally saved artifact
134 | ```python
135 | from finetuner import DocumentArray, Document
136 | import finetuner
137 | 
138 | model = finetuner.get_model('/path/to/YOUR-MODEL.zip')
139 | 
140 | da = DocumentArray([Document(text='some text to encode')])
141 | finetuner.encode(model=model, data=da)
142 | 
143 | for doc in da:
144 |     print(f'Text of the returned document: {doc.text}')
145 |     print(f'Shape of the embedding: {doc.embedding.shape}')
146 | ```
147 | ````
148 | ````{tab} (Special case) CLIP inference
149 | ```python
150 | from finetuner import DocumentArray, Document
151 | import finetuner
152 | 
153 | finetuner.login()
154 | 
155 | token = finetuner.get_token()
156 | run = finetuner.get_run(
157 |     experiment_name='YOUR-EXPERIMENT',
158 |     run_name='YOUR-RUN'
159 | )
160 | 
161 | model = finetuner.get_model(
162 |     run.artifact_id,
163 |     token=token,
164 |     select_model='clip-text'  # use `clip-vision` to encode image.
165 | )
166 | 
167 | da = DocumentArray([Document(text='some text to encode')])
168 | finetuner.encode(model=model, data=da)
169 | 
170 | for doc in da:
171 |     print(f'Text of the returned document: {doc.text}')
172 |     print(f'Shape of the embedding: {doc.embedding.shape}')
173 | ```
174 | ````
175 | 
176 | ```console
177 | Text of the returned document: some text to encode
178 | Shape of the embedding: (768,)
179 | ```
180 | 


--------------------------------------------------------------------------------
/docs/walkthrough/login.md:
--------------------------------------------------------------------------------
 1 | (login-to-jina-ecosystem)=
 2 | # Login
 3 | 
 4 | Since Finetuner leverages cloud resources for fine-tuning,
 5 | you are required to {meth}`~finetuner.login()` and obtain a token from Jina before starting a fine-tuning job.
 6 | It is as simple as:
 7 | 
 8 | ```python
 9 | import finetuner
10 | 
11 | finetuner.login()
12 | ```
13 | 
14 | A browser window should pop up with different login options.
15 | After {meth}`~finetuner.login()` you will see the following message in your terminal:
16 | 
17 | ```bash
18 | 🔐 Successfully logged in to Jina AI as [USER NAME]!
19 | ```
20 | 
21 |  Now, an authentication token is generated which can be read with the {func}`~finetuner.get_token` function.
22 | If you have been logged in before, the existing token will not be overwritten, however, if you want this to happen, you can set the `force` attribute in the login function to true.
23 | 
24 | ```
25 | finetuner.login(force=True)
26 | ```
27 | 
28 | ```{admonition} Why do I need to login?
29 | :class: hint
30 | Login is required since Finetuner needs to push your {class}`~docarray.array.document.DocumentArray` or CSV file into the Jina AI Cloud as training or evaluation data.
31 | Once you have successfully logged in, your training data will be linked to your personal user profile and will only be visible to you.
32 | 
33 | Once fine-tuning is completed, the fine-tuned model will be visible only to you in the Jina AI Cloud.
34 | ```


--------------------------------------------------------------------------------
/docs/walkthrough/save-model.md:
--------------------------------------------------------------------------------
 1 | (retrieve-tuned-model)=
 2 | # Save Artifact
 3 | 
 4 | Perfect!
 5 | Now, you have started the fine-tuning job in the Jina AI Cloud.
 6 | When the fine-tuning job is finished, the resulting model is automatically stored under your Jina account in the Jina AI Cloud.
 7 | Next, we can get its artifact id and download the model.
 8 | 
 9 | ```{admonition} Managing fine-tuned models
10 | :class: hint
11 | To use a fine-tuned model in a Jina service running on [JCloud](https://github.com/jina-ai/jcloud), you do not need to download it.
12 | Each model has a artifact id, which is sufficient to setup an encoding serivce as explained in the section {doc}`/walkthrough/integrate-with-jina`.
13 | Alternatively, you can also download the model using the artifact id, as explained below, e.g., to use it in a locally runnig Jina service. 
14 | ```
15 | 
16 | Please note that fine-tuning takes time. It highly depends on the size of your training data, evaluation data, and other hyperparameters.
17 | Because of this, you might have to close the session and reconnect to it several times.
18 | 
19 | In the example below, we show how to connect to an existing run and download a tuned model:
20 | 
21 | ```python
22 | import finetuner
23 | 
24 | finetuner.login()
25 | 
26 | # connect to the run we created previously.
27 | run = finetuner.get_run(
28 |     run_name='finetune-flickr-dataset-efficientnet-1',
29 |     experiment_name='finetune-flickr-dataset',
30 | )
31 | print(f'Run status: {run.status()}')
32 | print(f'Run artifact id: {run.artifact_id}')
33 | ```
34 | 
35 | You can monitor your run status in two ways:
36 | 
37 | 1. Log streaming: Pull logs from Jina AI Cloud lively, suitable for small fine-tuning tasks.
38 | 2. Query logs: Pull up-to-date logs from Jina AI Cloud, suitable for long-running tasks.
39 | 
40 | ````{tab} Stream logs
41 | ```python
42 | for entry in run.stream_logs():
43 |     print(entry)
44 | ```
45 | ````
46 | ````{tab} Query logs
47 | ```python
48 | print(run.status())
49 | print(run.logs())
50 | ```
51 | ````
52 | 
53 | Once run status is `FINISHED`, you can save the artifact with:
54 | 
55 | ```python
56 | run.save_artifact('tuned_model')
57 | ```
58 | 
59 | ```{admonition} Share artifact with others
60 | :class: hint
61 | Finetuner allows you to set your artifact as a public artifact.
62 | At training time, you need to set `public=True` when calling the `fit` function.
63 | If `public=True`, anyone who knows the artifact id can download your artifact with the above function.
64 | ```
65 | 
66 | If the fine-tuning is finished, you will see the following message in the terminal:
67 | 
68 | ```bash
69 | 🔐 Successfully logged in to Jina AI as [USER NAME]!
70 | Run status: FINISHED
71 | Run Artifact id: 62972acb5de25a53fdbfcecc
72 | Run logs:
73 | 
74 |   Training [2/2] ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 50/50 0:00:00 0:01:08 • loss: 0.050
75 | [09:13:23] INFO     [__main__] Done ✨                           __main__.py:214
76 |            INFO     [__main__] Saving fine-tuned models ...      __main__.py:217
77 |            INFO     [__main__] Saving model 'model' in           __main__.py:228
78 |                     /usr/src/app/tuned-models/model ...                         
79 |            INFO     [__main__] Pushing saved model to Hubble ... __main__.py:232
80 | [09:13:54] INFO     [__main__] Pushed model artifact ID:         __main__.py:238
81 |                     '62972acb5de25a53fdbfcecc'                                  
82 |            INFO     [__main__] Finished 🚀                       __main__.py:240```
83 | ```
84 | 


--------------------------------------------------------------------------------
/finetuner/callback.py:
--------------------------------------------------------------------------------
1 | from _finetuner.runner.stubs.callback import *  # noqa F401
2 | 


--------------------------------------------------------------------------------
/finetuner/client/__init__.py:
--------------------------------------------------------------------------------
1 | from finetuner.client.client import FinetunerV1Client  # noqa: F401
2 | 


--------------------------------------------------------------------------------
/finetuner/client/base.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List, Optional, Union
 3 | 
 4 | import requests
 5 | 
 6 | import hubble
 7 | from finetuner.client.session import _HeaderPreservingSession
 8 | from finetuner.constants import (
 9 |     AUTHORIZATION,
10 |     CHARSET,
11 |     DATA,
12 |     HOST,
13 |     HUBBLE_USER_ID,
14 |     TEXT,
15 |     TOKEN_PREFIX,
16 |     UTF_8,
17 | )
18 | from finetuner.excepts import FinetunerServerError
19 | 
20 | 
21 | class _BaseClient:
22 |     """
23 |     Base Finetuner API client.
24 |     """
25 | 
26 |     def __init__(self):
27 |         self._base_url = os.environ.get(HOST)
28 |         self._session = self._get_client_session()
29 |         self.hubble_client = hubble.Client(max_retries=None, jsonify=True)
30 |         self.hubble_user_id = self._get_hubble_user_id()
31 | 
32 |     def _get_hubble_user_id(self):
33 |         user_info = self.hubble_client.get_user_info()
34 |         if user_info['code'] >= 400:
35 |             # will implement error-handling later
36 |             pass
37 |         hubble_user_id = user_info[DATA][HUBBLE_USER_ID]
38 |         return hubble_user_id
39 | 
40 |     @staticmethod
41 |     def _get_client_session() -> _HeaderPreservingSession:
42 |         session = _HeaderPreservingSession(trusted_domains=[])
43 |         api_token = TOKEN_PREFIX + str(hubble.Auth.get_auth_token())
44 |         session.headers.update({CHARSET: UTF_8, AUTHORIZATION: api_token})
45 |         return session
46 | 
47 |     @staticmethod
48 |     def _construct_url(*args) -> str:
49 |         return '/'.join(args)
50 | 
51 |     def _handle_request(
52 |         self,
53 |         url: str,
54 |         method: str,
55 |         params: Optional[dict] = None,
56 |         json_data: Optional[dict] = None,
57 |         stream: bool = False,
58 |         timeout: Optional[int] = None,
59 |     ) -> Union[dict, List[dict], str, requests.Response]:
60 |         """The base request handler.
61 | 
62 |         :param url: The url of the request.
63 |         :param method: The request type (GET, POST or DELETE).
64 |         :param params: Optional parameters for the request.
65 |         :param json_data: Optional data payloads to be sent along with the request.
66 |         :param stream: If the request is a streaming request set to True.
67 |         :return: Response to the request.
68 |         """
69 |         response = self._session.request(
70 |             url=url,
71 |             method=method,
72 |             json=json_data,
73 |             params=params,
74 |             allow_redirects=True,
75 |             stream=stream,
76 |             timeout=timeout,
77 |         )
78 |         if not response.ok:
79 |             raise FinetunerServerError(
80 |                 message=response.reason,
81 |                 code=response.status_code,
82 |                 details=response.json()['detail'],
83 |             )
84 |         if stream:
85 |             return response
86 |         else:
87 |             if TEXT in response.headers['content-type']:
88 |                 return response.text
89 |             return response.json()
90 | 


--------------------------------------------------------------------------------
/finetuner/client/session.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from requests import Session
 4 | from requests.utils import urlparse
 5 | 
 6 | 
 7 | class _HeaderPreservingSession(Session):
 8 |     def __init__(self, trusted_domains: List[str]):
 9 |         super(_HeaderPreservingSession, self).__init__()
10 |         self._trusted_domains = trusted_domains
11 | 
12 |     def rebuild_auth(self, prepared_request, response):
13 |         """
14 |         Keep headers upon redirect as long as we are on any of the
15 |         self._trusted_domains
16 |         """
17 |         headers = prepared_request.headers
18 |         url = prepared_request.url
19 |         if 'Authorization' in headers:
20 |             _original_parsed = urlparse(response.request.url)
21 |             _redirect_parsed = urlparse(url)
22 |             _original_domain = '.'.join(_original_parsed.hostname.split('.')[-2:])
23 |             _redirect_domain = '.'.join(_redirect_parsed.hostname.split('.')[-2:])
24 |             if (
25 |                 _original_domain != _redirect_domain
26 |                 and _original_domain not in self._trusted_domains
27 |                 and _redirect_domain not in self._trusted_domains
28 |             ):
29 |                 del headers['Authorization']
30 | 


--------------------------------------------------------------------------------
/finetuner/console.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | 
 3 | from rich.console import Console
 4 | from rich.table import Table
 5 | 
 6 | from finetuner.model import list_model_classes
 7 | 
 8 | console = Console()
 9 | 
10 | 
11 | def print_model_table(model, task: Optional[str] = None):
12 |     """Prints a table of model descriptions.
13 | 
14 |     :param model: Module with model definitions
15 |     :param task: The fine-tuning task, should be one of `text-to-text`,
16 |     """
17 |     title = 'Finetuner backbones'
18 |     if task:
19 |         title += f': {task}'
20 |     table = Table(title=title)
21 |     header = model.get_header()
22 |     model_display_names = set()
23 | 
24 |     for column in header:
25 |         table.add_column(column, justify='right', style='cyan', no_wrap=False)
26 | 
27 |     for _, _model_class in list_model_classes().items():
28 |         if _model_class.display_name not in model_display_names:
29 |             row = model.get_row(_model_class)
30 |             if task and row[1] != task:
31 |                 continue
32 |             table.add_row(*row)
33 |             model_display_names.add(_model_class.display_name)
34 | 
35 |     console.print(table)
36 | 
37 | 
38 | def print_examples(stage: str, results: Dict[str, List[Any]], k: int = 5):
39 |     """
40 |     Prints a table of results of example queries from the evaluation data.
41 | 
42 |     :param stage: either 'before' or 'after'
43 |     :param results: The example results to display
44 |     :param k: maximal number of results per query to display
45 |     """
46 |     table = Table(
47 |         title=f'Results {stage} fine-tuning:', show_header=False, title_justify='left'
48 |     )
49 |     table.add_column(justify='left', style='cyan', no_wrap=False)
50 |     table.add_column(justify='left', style='cyan', no_wrap=False)
51 |     for query in results:
52 |         table.add_row('Query', str(query), style='yellow bold')
53 |         for i, match in enumerate(results[query][:k]):
54 |             table.add_row(f'Match {i+1}', str(match))
55 |     console.print(table)
56 | 
57 | 
58 | def print_metrics(stage: str, metrics: Dict[str, List[Any]]):
59 |     """
60 |     Prints a table of retrieval metrics.
61 |     :param stage: either 'before' or 'after'
62 |     :param metrics: dictionary with retrieval metrics before and after fine-tuning.
63 |     """
64 |     table = Table(title=f'Retrieval metrics {stage} fine-tuning:', title_justify='left')
65 |     table.add_column('Retrieval Metric', justify='left', style='cyan', no_wrap=False)
66 |     table.add_column('Value', justify='left', style='cyan', no_wrap=False)
67 |     for metric, value in metrics.items():
68 |         table.add_row(metric, str(value))
69 |     console.print(table)
70 | 


--------------------------------------------------------------------------------
/finetuner/constants.py:
--------------------------------------------------------------------------------
 1 | DELETE = 'DELETE'
 2 | POST = 'POST'
 3 | GET = 'GET'
 4 | NAME = 'name'
 5 | HUBBLE_USER_ID = '_id'
 6 | ID = 'id'
 7 | 
 8 | HOST = 'JINA_FINETUNER_REGISTRY'
 9 | HUBBLE_REGISTRY = 'JINA_HUBBLE_REGISTRY'
10 | DEFAULT_FINETUNER_HOST = 'https://api.compute.finetuner.fit'
11 | DEFAULT_HUBBLE_REGISTRY = 'https://api.hubble.jina.ai'
12 | 
13 | CONFIG = 'config'
14 | FINETUNER_VERSION = 'finetuner_version'
15 | DEVICE = 'device'
16 | CPUS = 'cpus'
17 | GPUS = 'gpus'
18 | NUM_WORKERS = 'num_workers'
19 | RUNS = 'runs'
20 | STATUS = 'status'
21 | LOGS = 'logs'
22 | LOGSTREAM = 'logstream'
23 | METRICS = 'metrics'
24 | EXAMPLES = 'examples'
25 | EXPERIMENTS = 'experiments'
26 | API_VERSION = 'api/v1'
27 | AUTHORIZATION = 'Authorization'
28 | CHARSET = 'Accept-Charset'
29 | UTF_8 = 'utf-8'
30 | TEXT = 'text'
31 | TOKEN_PREFIX = 'token '
32 | DATA = 'data'
33 | TRAIN_DATA = 'train_data'
34 | EVAL_DATA = 'eval_data'
35 | VAL_SPLIT = 'val_split'
36 | EVALUATE = 'evaluate'
37 | ARTIFACTS_DIR = 'artifacts/'
38 | MODEL = 'model'
39 | MODEL_OPTIONS = 'model_options'
40 | MODEL_ARTIFACT = 'model_artifact'
41 | ARTIFACT = 'artifact'
42 | ARTIFACT_ID = 'artifact_id'
43 | DEFAULT_TAG_KEY = 'finetuner_label'
44 | DEFAULT_TAG_SCORE_KEY = 'finetuner_score'
45 | # Run status
46 | CREATED = 'CREATED'
47 | STARTED = 'STARTED'
48 | FINISHED = 'FINISHED'
49 | FAILED = 'FAILED'
50 | DEFAULT_EXPERIMENT_NAME = 'default_experiment'
51 | CREATED_AT = 'created_at'
52 | DESCRIPTION = 'description'
53 | FREEZE = 'freeze'
54 | OUTPUT_DIM = 'output_dim'
55 | MULTI_MODAL = 'multi_modal'
56 | IMAGE_MODALITY = 'image_modality'
57 | TEXT_MODALITY = 'text_modality'
58 | HYPER_PARAMETERS = 'hyper_parameters'
59 | LOSS = 'loss'
60 | LOSS_OPTIONS = 'loss_options'
61 | OPTIMIZER = 'optimizer'
62 | LOSS_OPTIMIZER = 'loss_optimizer'
63 | LOSS_OPTIMIZER_OPTIONS = 'loss_optimizer_options'
64 | SAMPLER = 'sampler'
65 | MINER = 'miner'
66 | MINER_OPTIONS = 'miner_options'
67 | BATCH_SIZE = 'batch_size'
68 | LEARNING_RATE = 'learning_rate'
69 | EPOCHS = 'epochs'
70 | EXPERIMENT_NAME = 'experiment_name'
71 | RUN_NAME = 'run_name'
72 | OPTIMIZER_OPTIONS = 'optimizer_options'
73 | SCHEDULER = 'scheduler'
74 | SCHEDULER_OPTIONS = 'scheduler_options'
75 | CALLBACKS = 'callbacks'
76 | OPTIONS = 'options'
77 | QUERY_DATA = 'query_data'
78 | INDEX_DATA = 'index_data'
79 | DA_PREFIX = 'finetuner-dastorage'
80 | ONNX = 'to_onnx'
81 | PUBLIC = 'public'
82 | NUM_ITEMS_PER_CLASS = 'num_items_per_class'
83 | VAL_SPLIT = 'val_split'
84 | TASK = 'task'
85 | TRAINING_TASK = 'training'
86 | SYNTHESIS_TASK = 'generation'
87 | # Synthesis job
88 | RAW_DATA_CONFIG = 'data'
89 | RELATION_MINING = 'relation_mining'
90 | CROSS_ENCODER = 'cross_encoder'
91 | QUERIES = 'queries'
92 | CORPUS = 'corpus'
93 | MODELS = 'models'
94 | NUM_RELATIONS = 'num_relations'
95 | TRAIN_DATA = 'train_data'
96 | MAX_NUM_DOCS = 'max_num_docs'
97 | HF_URL_PREFIX = 'https://huggingface.co/jinaai/'
98 | HF_ORG_PREFIX = 'jinaai/'
99 | 


--------------------------------------------------------------------------------
/finetuner/excepts.py:
--------------------------------------------------------------------------------
 1 | class FinetunerServerError(Exception):
 2 |     def __init__(
 3 |         self,
 4 |         message: str = 'An unknown error occurred',
 5 |         details: str = '',
 6 |         code: int = -1,
 7 |     ):
 8 |         self.details = details
 9 |         self.message = message
10 |         self.code = code
11 | 
12 |     def __str__(self):
13 |         return f'{self.message} ({self.code}): {self.details}'
14 | 
15 | 
16 | class RunInProgressError(Exception):
17 |     ...
18 | 
19 | 
20 | class RunPreparingError(Exception):
21 |     ...
22 | 
23 | 
24 | class RunFailedError(Exception):
25 |     ...
26 | 


--------------------------------------------------------------------------------
/finetuner/hubble.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, Optional, Tuple, Union
  3 | 
  4 | from finetuner import DocumentArray
  5 | from finetuner.constants import ARTIFACTS_DIR, DA_PREFIX
  6 | 
  7 | 
  8 | def push_docarray(
  9 |     data: Union[None, str, DocumentArray],
 10 |     name: str,
 11 |     ids2names: Optional[Dict[int, str]] = None,
 12 | ) -> Optional[str]:
 13 |     """Upload a DocumentArray to Jina AI Cloud and return its name."""
 14 |     if isinstance(data, DocumentArray):
 15 |         _id = id(data)  # get the reference id
 16 |         if ids2names is not None and _id in ids2names:
 17 |             return ids2names[_id]
 18 |         print(f'Pushing a DocumentArray to Jina AI Cloud under the name {name} ...')
 19 |         data.push(name=name, show_progress=True, public=False)
 20 |         if ids2names is not None:
 21 |             ids2names[id(data)] = name
 22 |         return name
 23 |     return data
 24 | 
 25 | 
 26 | def push_training_data(
 27 |     experiment_name: str,
 28 |     run_name: str,
 29 |     train_data: Union[str, DocumentArray],
 30 |     eval_data: Union[None, str, DocumentArray] = None,
 31 |     query_data: Union[None, str, DocumentArray] = None,
 32 |     index_data: Union[None, str, DocumentArray] = None,
 33 | ) -> Tuple[Optional[str], ...]:
 34 |     """Upload data to Jina AI Cloud and returns their names.
 35 | 
 36 |     Uploads all data needed for fine-tuning - training data,
 37 |     evaluation data and query/index data for `EvaluationCallback`.
 38 | 
 39 |     Data is given either as a `DocumentArray` or
 40 |     a name of the `DocumentArray` that is already pushed to Jina AI Cloud.
 41 | 
 42 |     Checks not to upload same dataset twice.
 43 | 
 44 |     :param experiment_name: Name of the experiment.
 45 |     :param run_name: Name of the run.
 46 |     :param train_data: Training data.
 47 |     :param eval_data: Evaluation data.
 48 |     :param query_data: Query data for `EvaluationCallback`.
 49 |     :param index_data: Index data for `EvaluationCallback`.
 50 |     :return: Name(s) of the uploaded data.
 51 |     """
 52 |     _ids2names = dict()
 53 |     return (
 54 |         push_docarray(
 55 |             train_data, f'{DA_PREFIX}-{experiment_name}-{run_name}-train', _ids2names
 56 |         ),
 57 |         push_docarray(
 58 |             eval_data, f'{DA_PREFIX}-{experiment_name}-{run_name}-eval', _ids2names
 59 |         ),
 60 |         push_docarray(
 61 |             query_data, f'{DA_PREFIX}-{experiment_name}-{run_name}-query', _ids2names
 62 |         ),
 63 |         push_docarray(
 64 |             index_data, f'{DA_PREFIX}-{experiment_name}-{run_name}-index', _ids2names
 65 |         ),
 66 |     )
 67 | 
 68 | 
 69 | def push_synthesis_data(
 70 |     experiment_name: str,
 71 |     run_name: str,
 72 |     query_data: Union[str, DocumentArray],
 73 |     corpus_data: Union[str, DocumentArray],
 74 | ) -> Tuple[Optional[str], Optional[str]]:
 75 |     """Upload data to Jina AI Cloud and returns their names.
 76 | 
 77 |     Uploads all data needed for data synthesis - query data and corpus data.
 78 | 
 79 |     Data is given either as a `DocumentArray` or
 80 |     a name of the `DocumentArray` that is already pushed to Jina AI Cloud.
 81 | 
 82 |     Checks not to upload same dataset twice.
 83 | 
 84 |     :param experiment_name: Name of the experiment.
 85 |     :param run_name: Name of the run.
 86 |     :param query_data: Query data.
 87 |     :param corpus_data: Corpus data.
 88 |     :return: Names of the uploaded query and corpus data.
 89 |     """
 90 |     _ids2names = dict()
 91 |     return (
 92 |         push_docarray(
 93 |             query_data, f'{DA_PREFIX}-{experiment_name}-{run_name}-query', _ids2names
 94 |         ),
 95 |         push_docarray(
 96 |             corpus_data, f'{DA_PREFIX}-{experiment_name}-{run_name}-corpus', _ids2names
 97 |         ),
 98 |     )
 99 | 
100 | 
101 | def download_artifact(
102 |     client, artifact_id: str, run_name: str, directory: str = ARTIFACTS_DIR
103 | ) -> str:
104 |     """Download artifact from Jina AI Cloud by its ID.
105 | 
106 |     :param client: Hubble client instance.
107 |     :param artifact_id: The artifact id stored in the Jina AI Cloud.
108 |     :param run_name: The name of the run as artifact name to store locally.
109 |     :param directory: Directory where the artifact will be stored.
110 |     :returns: A string that indicates the download path.
111 |     """
112 |     os.makedirs(directory, exist_ok=True)
113 | 
114 |     path = os.path.join(directory, f'{run_name}.zip')
115 | 
116 |     return client.hubble_client.download_artifact(
117 |         id=artifact_id, f=path, show_progress=True
118 |     )
119 | 


--------------------------------------------------------------------------------
/finetuner/model.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import List, Union
 3 | 
 4 | from _finetuner.runner.stubs import model
 5 | from _finetuner.runner.stubs.model import *  # noqa F401
 6 | from _finetuner.runner.stubs.model import _EmbeddingModelStub
 7 | 
 8 | 
 9 | def get_header() -> Tuple[str, ...]:
10 |     """Get table header."""
11 |     return 'name', 'task', 'output_dim', 'architecture', 'description'
12 | 
13 | 
14 | def get_row(model_stub) -> Tuple[str, ...]:
15 |     """Get table row."""
16 |     return (
17 |         model_stub.display_name,
18 |         model_stub.task,
19 |         str(model_stub.output_shape[1]),
20 |         model_stub.architecture,
21 |         model_stub.description,
22 |     )
23 | 
24 | 
25 | def list_model_classes() -> Dict[str, ModelStubType]:
26 |     rv = {}
27 |     members = inspect.getmembers(model, inspect.isclass)
28 |     parent_class = _EmbeddingModelStub
29 |     for name, stub in members:
30 |         if (
31 |             name != 'MLPStub'
32 |             and not name.startswith('_')
33 |             and type(stub) != type
34 |             and issubclass(stub, parent_class)
35 |         ):
36 |             rv[name] = stub
37 |     return rv
38 | 
39 | 
40 | @dataclass
41 | class SynthesisModels:
42 |     """Class specifying the models to be used in a data synthesis job.
43 |     :param: relation_miner: The name of the model or list of models to use for
44 |         relation mining.
45 |     :param cross_encoder: The name of the model to use as the cross encoder
46 |     """
47 | 
48 |     relation_miner: Union[str, List[str]]
49 |     cross_encoder: str
50 | 
51 | 
52 | synthesis_model_en = SynthesisModels(
53 |     relation_miner='sbert-base-en',
54 |     cross_encoder='crossencoder-base-en',
55 | )
56 | 
57 | synthesis_model_multi = SynthesisModels(
58 |     relation_miner='distiluse-base-multi',
59 |     cross_encoder='crossencoder-base-ml',
60 | )
61 | 


--------------------------------------------------------------------------------
/finetuner/names.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | # moby-like silly name generator
  4 | # taken from
  5 | # https://github.com/moby/moby/blob/master/pkg/namesgenerator/names-generator.go
  6 | 
  7 | 
  8 | adjectives = [
  9 |     'admiring',
 10 |     'adoring',
 11 |     'affectionate',
 12 |     'agitated',
 13 |     'amazing',
 14 |     'angry',
 15 |     'awesome',
 16 |     'beautiful',
 17 |     'blissful',
 18 |     'bold',
 19 |     'boring',
 20 |     'brave',
 21 |     'busy',
 22 |     'charming',
 23 |     'clever',
 24 |     'cool',
 25 |     'compassionate',
 26 |     'competent',
 27 |     'condescending',
 28 |     'confident',
 29 |     'cranky',
 30 |     'crazy',
 31 |     'dazzling',
 32 |     'determined',
 33 |     'distracted',
 34 |     'dreamy',
 35 |     'eager',
 36 |     'ecstatic',
 37 |     'elastic',
 38 |     'elated',
 39 |     'elegant',
 40 |     'eloquent',
 41 |     'epic',
 42 |     'exciting',
 43 |     'fervent',
 44 |     'festive',
 45 |     'flamboyant',
 46 |     'focused',
 47 |     'friendly',
 48 |     'frosty',
 49 |     'funny',
 50 |     'gallant',
 51 |     'gifted',
 52 |     'goofy',
 53 |     'gracious',
 54 |     'great',
 55 |     'happy',
 56 |     'hardcore',
 57 |     'heuristic',
 58 |     'hopeful',
 59 |     'hungry',
 60 |     'infallible',
 61 |     'inspiring',
 62 |     'interesting',
 63 |     'intelligent',
 64 |     'jolly',
 65 |     'jovial',
 66 |     'keen',
 67 |     'kind',
 68 |     'laughing',
 69 |     'loving',
 70 |     'lucid',
 71 |     'magical',
 72 |     'mystifying',
 73 |     'modest',
 74 |     'musing',
 75 |     'naughty',
 76 |     'nervous',
 77 |     'nice',
 78 |     'nifty',
 79 |     'nostalgic',
 80 |     'objective',
 81 |     'optimistic',
 82 |     'peaceful',
 83 |     'pedantic',
 84 |     'pensive',
 85 |     'practical',
 86 |     'priceless',
 87 |     'quirky',
 88 |     'quizzical',
 89 |     'recursing',
 90 |     'relaxed',
 91 |     'reverent',
 92 |     'romantic',
 93 |     'sad',
 94 |     'serene',
 95 |     'sharp',
 96 |     'silly',
 97 |     'sleepy',
 98 |     'stoic',
 99 |     'strange',
100 |     'stupefied',
101 |     'suspicious',
102 |     'sweet',
103 |     'tender',
104 |     'thirsty',
105 |     'trusting',
106 |     'unruffled',
107 |     'upbeat',
108 |     'vibrant',
109 |     'vigilant',
110 |     'vigorous',
111 |     'wizardly',
112 |     'wonderful',
113 |     'xenodochial',
114 |     'youthful',
115 |     'zealous',
116 |     'zen',
117 | ]
118 | 
119 | 
120 | surnames = [
121 |     'albattani',
122 |     'allen',
123 |     'almeida',
124 |     'antonelli',
125 |     'agnesi',
126 |     'archimedes',
127 |     'ardinghelli',
128 |     'aryabhata',
129 |     'austin',
130 |     'babbage',
131 |     'banach',
132 |     'banzai',
133 |     'bardeen',
134 |     'bartik',
135 |     'bassi',
136 |     'beaver',
137 |     'bell',
138 |     'benz',
139 |     'bhabha',
140 |     'bhaskara',
141 |     'black',
142 |     'blackburn',
143 |     'blackwell',
144 |     'bohr',
145 |     'booth',
146 |     'borg',
147 |     'bose',
148 |     'bouman',
149 |     'boyd',
150 |     'brahmagupta',
151 |     'brattain',
152 |     'brown',
153 |     'buck',
154 |     'burnell',
155 |     'cannon',
156 |     'carson',
157 |     'cartwright',
158 |     'carver',
159 |     'cerf',
160 |     'chandrasekhar',
161 |     'chaplygin',
162 |     'chatelet',
163 |     'chatterjee',
164 |     'chebyshev',
165 |     'cohen',
166 |     'chaum',
167 |     'clarke',
168 |     'colden',
169 |     'cori',
170 |     'cray',
171 |     'curran',
172 |     'curie',
173 |     'darwin',
174 |     'davinci',
175 |     'dewdney',
176 |     'dhawan',
177 |     'diffie',
178 |     'dijkstra',
179 |     'dirac',
180 |     'driscoll',
181 |     'dubinsky',
182 |     'easley',
183 |     'edison',
184 |     'einstein',
185 |     'elbakyan',
186 |     'elgamal',
187 |     'elion',
188 |     'ellis',
189 |     'engelbart',
190 |     'euclid',
191 |     'euler',
192 |     'faraday',
193 |     'feistel',
194 |     'fermat',
195 |     'fermi',
196 |     'feynman',
197 |     'franklin',
198 |     'gagarin',
199 |     'galileo',
200 |     'galois',
201 |     'ganguly',
202 |     'gates',
203 |     'gauss',
204 |     'germain',
205 |     'goldberg',
206 |     'goldstine',
207 |     'goldwasser',
208 |     'golick',
209 |     'goodall',
210 |     'gould',
211 |     'greider',
212 |     'grothendieck',
213 |     'haibt',
214 |     'hamilton',
215 |     'haslett',
216 |     'hawking',
217 |     'hellman',
218 |     'heisenberg',
219 |     'hermann',
220 |     'herschel',
221 |     'hertz',
222 |     'heyrovsky',
223 |     'hodgkin',
224 |     'hofstadter',
225 |     'hoover',
226 |     'hopper',
227 |     'hugle',
228 |     'hypatia',
229 |     'ishizaka',
230 |     'jackson',
231 |     'jang',
232 |     'jemison',
233 |     'jennings',
234 |     'jepsen',
235 |     'johnson',
236 |     'joliot',
237 |     'jones',
238 |     'kalam',
239 |     'kapitsa',
240 |     'kare',
241 |     'keldysh',
242 |     'keller',
243 |     'kepler',
244 |     'khayyam',
245 |     'khorana',
246 |     'kilby',
247 |     'kirch',
248 |     'knuth',
249 |     'kowalevski',
250 |     'lalande',
251 |     'lamarr',
252 |     'lamport',
253 |     'leakey',
254 |     'leavitt',
255 |     'lederberg',
256 |     'lehmann',
257 |     'lewin',
258 |     'lichterman',
259 |     'liskov',
260 |     'lovelace',
261 |     'lumiere',
262 |     'mahavira',
263 |     'margulis',
264 |     'matsumoto',
265 |     'maxwell',
266 |     'mayer',
267 |     'mccarthy',
268 |     'mcclintock',
269 |     'mclaren',
270 |     'mclean',
271 |     'mcnulty',
272 |     'mendel',
273 |     'mendeleev',
274 |     'meitner',
275 |     'meninsky',
276 |     'merkle',
277 |     'mestorf',
278 |     'mirzakhani',
279 |     'moore',
280 |     'morse',
281 |     'murdock',
282 |     'moser',
283 |     'napier',
284 |     'nash',
285 |     'neumann',
286 |     'newton',
287 |     'nightingale',
288 |     'nobel',
289 |     'noether',
290 |     'northcutt',
291 |     'noyce',
292 |     'panini',
293 |     'pare',
294 |     'pascal',
295 |     'pasteur',
296 |     'payne',
297 |     'perlman',
298 |     'pike',
299 |     'poincare',
300 |     'poitras',
301 |     'proskuriakova',
302 |     'ptolemy',
303 |     'raman',
304 |     'ramanujan',
305 |     'ride',
306 |     'montalcini',
307 |     'ritchie',
308 |     'rhodes',
309 |     'robinson',
310 |     'roentgen',
311 |     'rosalind',
312 |     'rubin',
313 |     'saha',
314 |     'sammet',
315 |     'sanderson',
316 |     'satoshi',
317 |     'shamir',
318 |     'shannon',
319 |     'shaw',
320 |     'shirley',
321 |     'shockley',
322 |     'shtern',
323 |     'sinoussi',
324 |     'snyder',
325 |     'solomon',
326 |     'spence',
327 |     'stonebraker',
328 |     'sutherland',
329 |     'swanson',
330 |     'swartz',
331 |     'swirles',
332 |     'taussig',
333 |     'tereshkova',
334 |     'tesla',
335 |     'tharp',
336 |     'thompson',
337 |     'torvalds',
338 |     'tu',
339 |     'turing',
340 |     'varahamihira',
341 |     'vaughan',
342 |     'visvesvaraya',
343 |     'volhard',
344 |     'villani',
345 |     'wescoff',
346 |     'wilbur',
347 |     'wiles',
348 |     'williams',
349 |     'williamson',
350 |     'wilson',
351 |     'wing',
352 |     'wozniak',
353 |     'wright',
354 |     'wu',
355 |     'yalow',
356 |     'yonath',
357 |     'zhukovsky',
358 | ]
359 | 
360 | 
361 | def get_random_name() -> str:
362 |     adjective = adjectives[random.randrange(0, len(adjectives))]
363 |     surname = surnames[random.randrange(0, len(surnames))]
364 |     return ''.join([adjective, '-', surname])
365 | 


--------------------------------------------------------------------------------
/finetuner/run.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from typing import Any, Dict, Iterator, Optional
  3 | 
  4 | from finetuner.client import FinetunerV1Client
  5 | from finetuner.console import console, print_examples, print_metrics
  6 | from finetuner.constants import (
  7 |     ARTIFACT_ID,
  8 |     ARTIFACTS_DIR,
  9 |     CREATED,
 10 |     FAILED,
 11 |     STARTED,
 12 |     STATUS,
 13 |     SYNTHESIS_TASK,
 14 |     TRAIN_DATA,
 15 |     TRAINING_TASK,
 16 | )
 17 | from finetuner.excepts import RunFailedError, RunInProgressError, RunPreparingError
 18 | from finetuner.hubble import download_artifact
 19 | 
 20 | 
 21 | class Run:
 22 |     """Class for a run.
 23 | 
 24 |     :param client: Client object for sending api requests.
 25 |     :param name: Name of the run.
 26 |     :param experiment_name: Name of the experiment.
 27 |     :param config: Configuration for the run.
 28 |     :param created_at: Creation time of the run.
 29 |     :param description: Optional description of the run.
 30 |     :param train_data: The name of the `DocumentArray` created if this run is a data
 31 |         synthesis job.
 32 |     """
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         client: FinetunerV1Client,
 37 |         name: str,
 38 |         experiment_name: str,
 39 |         config: dict,
 40 |         created_at: str,
 41 |         description: str = '',
 42 |         task: str = TRAINING_TASK,
 43 |         train_data: Optional[str] = None,
 44 |     ):
 45 |         self._client = client
 46 |         self._name = name
 47 |         self._experiment_name = experiment_name
 48 |         self._config = config
 49 |         self._created_at = created_at
 50 |         self._description = description
 51 |         self._run = self._get_run()
 52 |         self.task = task
 53 |         self._train_data = train_data
 54 | 
 55 |     @property
 56 |     def name(self) -> str:
 57 |         """Get the name of the :class:`Run`."""
 58 |         return self._name
 59 | 
 60 |     @property
 61 |     def config(self) -> dict:
 62 |         """Get the config of the :class:`Run`."""
 63 |         return self._config
 64 | 
 65 |     @property
 66 |     def train_data(self) -> str:
 67 |         """Get the data generated by the :class:`Run` In the case that it was a
 68 |         Synthesis job, if it is a training job, a ValueError is thrown.
 69 |         """
 70 |         if self.task != SYNTHESIS_TASK:
 71 |             raise ValueError(f'{self.task} run does not produce data.')
 72 |         else:
 73 |             self._check_run_status_finished()
 74 |             if self._train_data:
 75 |                 return self._train_data
 76 |             else:
 77 |                 run = self._get_run()
 78 |                 try:
 79 |                     train_data = run[TRAIN_DATA]
 80 |                 except KeyError:
 81 |                     raise ValueError(f'run {self.name} has no train_data.')
 82 |                 self._train_data = train_data
 83 |                 return train_data
 84 | 
 85 |     def _get_run(self) -> dict:
 86 |         """Get Run object as dict."""
 87 |         return self._client.get_run(
 88 |             experiment_name=self._experiment_name, run_name=self._name
 89 |         )
 90 | 
 91 |     def status(self) -> dict:
 92 |         """Get :class:`Run` status.
 93 | 
 94 |         :returns: A dict representing the :class:`Run` status.
 95 |         """
 96 |         return self._client.get_run_status(
 97 |             experiment_name=self._experiment_name, run_name=self._name
 98 |         )
 99 | 
100 |     def logs(self) -> str:
101 |         """Check the :class:`Run` logs.
102 | 
103 |         :returns: A string dump of the run logs.
104 |         """
105 |         self._check_run_status_started()
106 |         return self._client.get_run_logs(
107 |             experiment_name=self._experiment_name, run_name=self._name
108 |         )
109 | 
110 |     def stream_logs(self, interval: int = 5) -> Iterator[str]:
111 |         """Stream the :class:`Run` logs lively.
112 | 
113 |         :param interval: The time interval to sync the status of finetuner `Run`.
114 |         :yield: An iterators keep stream the logs from server.
115 |         """
116 |         status = self.status()[STATUS]
117 |         msg_template = (
118 |             'Preparing to run, logs will be ready to pull when '
119 |             '`status` is `STARTED`. Current status is `%s`'
120 |         )
121 |         with console.status(msg_template % status, spinner="dots") as rich_status:
122 |             while status == CREATED:
123 |                 time.sleep(interval)
124 |                 status = self.status()[STATUS]
125 |                 rich_status.update(msg_template % status)
126 | 
127 |         return self._client.stream_run_logs(
128 |             experiment_name=self._experiment_name, run_name=self._name
129 |         )
130 | 
131 |     def metrics(self) -> Dict[str, Dict[str, float]]:
132 |         """Get the evaluation metrics of the :class:`Run`.
133 | 
134 |         :return: dictionary with evaluation metrics before and after fine-tuning.
135 |         """
136 |         self._check_run_status_finished()
137 |         return self._client.get_run_metrics(
138 |             experiment_name=self._experiment_name, run_name=self._name
139 |         )
140 | 
141 |     def display_metrics(self):
142 |         """
143 |         Prints a table of retrieval metrics before and after fine-tuning
144 |         """
145 |         metrics = self.metrics()
146 |         for stage in metrics:
147 |             print_metrics(stage, metrics[stage])
148 | 
149 |     def example_results(self) -> Dict[str, Any]:
150 |         """Get the results of example queries from the evaluation data of the
151 |         :class:`Run`.
152 | 
153 |         :return: dictionary with results before and after fine-tuning.
154 |         """
155 |         self._check_run_status_finished()
156 |         return self._client.get_run_examples(
157 |             experiment_name=self._experiment_name, run_name=self._name
158 |         )
159 | 
160 |     def display_examples(self, k: int = 5):
161 |         """
162 |         Prints a table of results of example queries before and after fine-tuning.
163 | 
164 |         :param k: maximal number of results per query to display
165 |         """
166 |         example_results = self.example_results()
167 |         for stage in example_results:
168 |             print_examples(stage, example_results[stage], k=k)
169 | 
170 |     def _check_run_status_finished(self):
171 |         status = self.status()[STATUS]
172 |         if status in [CREATED, STARTED]:
173 |             raise RunInProgressError(
174 |                 'The run needs to be finished in order to save the artifact.'
175 |             )
176 |         if status == FAILED:
177 |             raise RunFailedError(
178 |                 'The run failed, please check the `logs` for detailed information.'
179 |             )
180 | 
181 |     def _check_run_status_started(self):
182 |         status = self.status()[STATUS]
183 |         if status == CREATED:
184 |             raise RunPreparingError(
185 |                 'Preparing to run, logs will be ready to pull when '
186 |                 '`status` is `STARTED`.'
187 |             )
188 | 
189 |     def save_artifact(self, directory: str = ARTIFACTS_DIR) -> str:
190 |         """Save artifact if the :class:`Run` is finished.
191 | 
192 |         :param directory: Directory where the artifact will be stored.
193 |         :returns: A string object that indicates the download path.
194 |         """
195 |         self._check_run_status_finished()
196 |         return download_artifact(
197 |             client=self._client,
198 |             artifact_id=self._run[ARTIFACT_ID],
199 |             run_name=self._name,
200 |             directory=directory,
201 |         )
202 | 
203 |     @property
204 |     def artifact_id(self):
205 |         """Get artifact id of the :class:`Run`.
206 | 
207 |         An artifact in finetuner contains fine-tuned model and its metadata.
208 |         Such as preprocessing function, collate function. This id could be useful
209 |         if you want to directly pull the artifact from the cloud storage, such as
210 |         using `FinetunerExecutor`.
211 | 
212 |         :return: Artifact id as string object.
213 |         """
214 |         self._check_run_status_finished()
215 |         return self._run[ARTIFACT_ID]
216 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 88
 3 | skip-string-normalization = true
 4 | extend-exclude = 'docs/conf.py'
 5 | 
 6 | [tool.isort]
 7 | profile = 'black'
 8 | extend_skip = ['docs/conf.py']
 9 | skip_gitignore = true
10 | src_paths = ['finetuner']
11 | 
12 | [tool.mypy]
13 | python_version = 3.9
14 | ignore_missing_imports = true
15 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | version = 0.8.1
 3 | 
 4 | [flake8]
 5 | # E501 is too long lines - ignore as black takes care of that
 6 | # E203 is whitespace before ':' - which occurs in numpy slicing, e.g. in
 7 | #     dists[2 * i : 2 * i + 2, :]
 8 | # W503 is line break before binary operator - happens when black splits up lines
 9 | ignore = E203, W503, F405, F403
10 | exclude = .git,__pycache__,docs/conf.py,old,build,dist,scripts,target,setup.py
11 | max-line-length = 88
12 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | # package name
 4 | _name = 'finetuner'
 5 | 
 6 | # package long description
 7 | try:
 8 |     with open('README.md', encoding='utf8') as fp:
 9 |         _long_description = fp.read()
10 | except FileNotFoundError:
11 |     _long_description = ''
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     setup(
16 |         name=_name,
17 |         packages=find_packages(exclude=['*.tests', '*.tests.*', 'tests.*', 'tests']),
18 |         include_package_data=True,
19 |         description='Task-oriented finetuning for better embeddings on neural search.',
20 |         author='Jina AI',
21 |         author_email='hello@jina.ai',
22 |         url='https://github.com/jina-ai/finetuner/',
23 |         license='Apache 2.0',
24 |         download_url='https://github.com/jina-ai/finetuner/tags',
25 |         long_description=_long_description,
26 |         long_description_content_type='text/markdown',
27 |         zip_safe=False,
28 |         setup_requires=['setuptools>=18.0', 'wheel'],
29 |         install_requires=[
30 |             'docarray[common]<0.30.0',
31 |             'finetuner-stubs==0.13.10',
32 |             'finetuner-commons==0.13.10',
33 |         ],
34 |         extras_require={
35 |             'full': [
36 |                 'jina-hubble-sdk==0.33.1',
37 |                 'trimesh==3.16.4',
38 |             ],
39 |             'test': [
40 |                 'black==23.3.0',
41 |                 'flake8==6.0.0',
42 |                 'isort==5.12.0',
43 |                 'pytest==7.0.0',
44 |                 'pytest-cov==3.0.0',
45 |                 'pytest-mock==3.7.0',
46 |             ],
47 |         },
48 |         python_requires='>=3.8.0',
49 |         classifiers=[
50 |             'Development Status :: 5 - Production/Stable',
51 |             'Intended Audience :: Developers',
52 |             'Intended Audience :: Education',
53 |             'Intended Audience :: Science/Research',
54 |             'Programming Language :: Python :: 3.8',
55 |             'Programming Language :: Python :: 3.9',
56 |             'Programming Language :: Python :: 3.10',
57 |             'License :: OSI Approved :: Apache Software License',
58 |             'Environment :: Console',
59 |             'Operating System :: OS Independent',
60 |             'Topic :: Scientific/Engineering :: Artificial Intelligence',
61 |         ],
62 |         project_urls={
63 |             'Documentation': 'https://finetuner.jina.ai',
64 |             'Source': 'https://github.com/jina-ai/finetuner/',
65 |             'Tracker': 'https://github.com/jina-ai/finetuner/issues',
66 |         },
67 |         keywords=(
68 |             'jina neural-search neural-network deep-learning pretraining '
69 |             'fine-tuning pretrained-models triplet-loss metric-learning '
70 |             'siamese-network few-shot-learning'
71 |         ),
72 |     )
73 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.fixture(autouse=True)
 7 | def overwrite_hubble_registry():
 8 |     os.environ['JINA_FINETUNER_REGISTRY'] = 'https://api.staging.finetuner.fit'
 9 |     os.environ['JINA_HUBBLE_REGISTRY'] = 'https://api.hubble.jina.ai'
10 |     yield
11 |     del os.environ['JINA_HUBBLE_REGISTRY']
12 |     del os.environ['JINA_FINETUNER_REGISTRY']
13 | 


--------------------------------------------------------------------------------
/tests/constants.py:
--------------------------------------------------------------------------------
1 | FINETUNER_LABEL = 'finetuner_label'
2 | HUBBLE_USER_TEST_ID = '1'
3 | 


--------------------------------------------------------------------------------
/tests/helper.py:
--------------------------------------------------------------------------------
1 | import random
2 | import string
3 | 
4 | 
5 | def create_random_name(prefix='experiment', length=6):
6 |     return f'{prefix}-' + ''.join(
7 |         random.choices(string.ascii_uppercase + string.digits, k=length)
8 |     )
9 | 


--------------------------------------------------------------------------------
/tests/integration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/tests/integration/__init__.py


--------------------------------------------------------------------------------
/tests/integration/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import pytest
 5 | from tests.constants import FINETUNER_LABEL
 6 | 
 7 | import finetuner
 8 | import hubble
 9 | from finetuner import Document, DocumentArray
10 | 
11 | 
12 | @pytest.fixture()
13 | def get_image_data():
14 |     def generate_random_data(num_classes, images_per_class):
15 |         da = DocumentArray()
16 |         for class_id in range(num_classes):
17 |             for _ in range(images_per_class):
18 |                 doc = Document(
19 |                     tensor=np.random.rand(28, 28, 3),
20 |                     tags={FINETUNER_LABEL: str(class_id)},
21 |                 )
22 |                 da.append(doc)
23 |         return da
24 | 
25 |     train_da = generate_random_data(num_classes=10, images_per_class=10)
26 |     eval_da = generate_random_data(num_classes=10, images_per_class=2)
27 | 
28 |     return train_da, eval_da
29 | 
30 | 
31 | @pytest.fixture()
32 | def get_feature_data():
33 |     def generate_random_data(num_classes, samples_per_class, dim):
34 |         da = DocumentArray()
35 |         for class_id in range(num_classes):
36 |             for _ in range(samples_per_class):
37 |                 doc = Document(
38 |                     tensor=np.random.rand(dim).astype(np.float32),
39 |                     tags={FINETUNER_LABEL: str(class_id)},
40 |                 )
41 |                 da.append(doc)
42 |         return da
43 | 
44 |     train_da = generate_random_data(num_classes=10, samples_per_class=32, dim=128)
45 |     eval_da = generate_random_data(num_classes=10, samples_per_class=32, dim=128)
46 | 
47 |     return train_da, eval_da
48 | 
49 | 
50 | @pytest.fixture()
51 | def finetuner_mocker(mocker):
52 |     def hubble_login_mocker(force: bool = False, post_success=None, **kwargs):
53 |         print('Successfully logged in to Hubble!')
54 |         if post_success:
55 |             post_success()
56 | 
57 |     def get_auth_token():
58 |         if not os.environ.get('JINA_AUTH_TOKEN'):
59 |             raise ValueError('Please set `JINA_AUTH_TOKEN` as an environment variable.')
60 |         return os.environ.get('JINA_AUTH_TOKEN')
61 | 
62 |     mocker.patch.object(hubble, 'login', hubble_login_mocker)
63 |     mocker.patch.object(hubble.Auth, 'get_auth_token', get_auth_token)
64 | 
65 |     finetuner.login()
66 | 
67 |     yield finetuner.ft
68 | 
69 | 
70 | @pytest.fixture()
71 | def synthesis_query_data():
72 |     return 'finetuner/xmarket_queries_da_s'
73 | 
74 | 
75 | @pytest.fixture()
76 | def synthesis_corpus_data():
77 |     return 'finetuner/xmarket_corpus_da_s'
78 | 


--------------------------------------------------------------------------------
/tests/integration/test_data.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from finetuner import build_model
 4 | from finetuner.data import build_encoding_dataset
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'data, model_name, modality',
 9 |     [
10 |         (['text1', 'text2'], 'bert-base-en', 'text'),
11 |         (['image1', 'image2', 'image3'], 'resnet-base', 'vision'),
12 |         (['text1', 'text2'], 'clip-base-en', 'text'),
13 |         (['image1', 'image2', 'image3'], 'clip-base-en', 'vision'),
14 |     ],
15 | )
16 | def test_build_encoding_dataset_str(data, model_name, modality):
17 |     model = build_model(name=model_name, select_model='clip-' + modality)
18 |     da = build_encoding_dataset(model=model, data=data)
19 |     for doc, expected in zip(da, data):
20 |         if modality == 'text':
21 |             assert doc.text == expected
22 |         else:
23 |             assert doc.uri == expected
24 | 


--------------------------------------------------------------------------------
/tests/integration/test_experiments.py:
--------------------------------------------------------------------------------
 1 | from tests.helper import create_random_name
 2 | 
 3 | 
 4 | def test_experiments(finetuner_mocker):
 5 |     first_exp_name, second_exp_name = [create_random_name() for _ in range(2)]
 6 | 
 7 |     # create an experiment and retrieve it
 8 |     finetuner_mocker.create_experiment(name=first_exp_name)
 9 |     exp1 = finetuner_mocker.get_experiment(name=first_exp_name)
10 |     assert exp1.name == first_exp_name
11 |     assert exp1.status == 'ACTIVE'
12 | 
13 |     # create another experiment and list all experiments
14 |     finetuner_mocker.create_experiment(second_exp_name)
15 |     experiments = finetuner_mocker.list_experiments()
16 |     experiment_names = [experiment.name for experiment in experiments]
17 |     assert first_exp_name in experiment_names and second_exp_name in experiment_names
18 | 
19 |     for experiment in experiments:
20 |         assert experiment.status == 'ACTIVE'
21 | 
22 |     # delete the first experiment
23 |     finetuner_mocker.delete_experiment(first_exp_name)
24 |     experiments = finetuner_mocker.list_experiments()
25 |     assert second_exp_name in [experiment.name for experiment in experiments]
26 | 
27 |     # delete all experiments
28 |     finetuner_mocker.delete_experiment(second_exp_name)
29 |     experiments = finetuner_mocker.list_experiments()
30 |     assert second_exp_name not in [experiment.name for experiment in experiments]
31 |     # clear experiments
32 |     finetuner_mocker.delete_experiments()
33 | 


--------------------------------------------------------------------------------
/tests/integration/test_hf_models.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from finetuner import build_model
 4 | 
 5 | 
 6 | @pytest.mark.parametrize(
 7 |     'model',
 8 |     [
 9 |         'jinaai/jina-embedding-s-en-v1',
10 |     ],
11 | )
12 | def test_build_model(model):
13 |     model = build_model(name=model)
14 |     assert model
15 | 


--------------------------------------------------------------------------------
/tests/integration/test_runs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import pytest
  5 | from docarray import DocumentArray
  6 | from tests.helper import create_random_name
  7 | 
  8 | import finetuner
  9 | from finetuner.constants import FAILED, FINISHED, STATUS
 10 | from finetuner.model import synthesis_model_en
 11 | 
 12 | 
 13 | def test_runs(finetuner_mocker, get_feature_data):
 14 |     experiment_name = create_random_name()
 15 | 
 16 |     # get preprocessed data
 17 |     train_data, eval_data = get_feature_data
 18 | 
 19 |     # create an experiment and retrieve it
 20 |     finetuner_mocker.create_experiment(experiment_name)
 21 |     experiment = finetuner_mocker.get_experiment(name=experiment_name)
 22 |     assert experiment.name == experiment_name
 23 |     assert experiment.status == 'ACTIVE'
 24 | 
 25 |     # Create Runs
 26 |     first_run, second_run = [create_random_name(prefix='run') for _ in range(2)]
 27 | 
 28 |     # create a first run
 29 |     finetuner_mocker.create_training_run(
 30 |         model='mlp',
 31 |         model_options={'input_size': 128, 'hidden_sizes': [32]},
 32 |         train_data=train_data,
 33 |         eval_data=eval_data,
 34 |         experiment_name=experiment_name,
 35 |         run_name=first_run,
 36 |         loss='TripletMarginLoss',
 37 |         optimizer='Adam',
 38 |         learning_rate=1e-3,
 39 |         batch_size=12,
 40 |         epochs=2,
 41 |         device='cpu',
 42 |     )
 43 | 
 44 |     # get the first run
 45 |     run = finetuner_mocker.get_run(experiment_name=experiment_name, run_name=first_run)
 46 |     assert run.name == first_run
 47 | 
 48 |     # create another run
 49 |     finetuner_mocker.create_training_run(
 50 |         model='mlp',
 51 |         model_options={'input_size': 128, 'hidden_sizes': [32]},
 52 |         train_data=train_data,
 53 |         eval_data=eval_data,
 54 |         experiment_name=experiment_name,
 55 |         run_name=second_run,
 56 |         loss='TripletMarginLoss',
 57 |         optimizer='Adam',
 58 |         learning_rate=1e-3,
 59 |         batch_size=12,
 60 |         epochs=1,
 61 |         device='cpu',
 62 |     )
 63 | 
 64 |     # list all runs
 65 |     runs = finetuner_mocker.list_runs(experiment_name=experiment_name)
 66 |     assert len(runs) == 2
 67 |     run_names = [run.name for run in runs]
 68 |     assert first_run in run_names and second_run in run_names
 69 | 
 70 |     # delete the first run
 71 |     finetuner_mocker.delete_run(experiment_name=experiment_name, run_name=first_run)
 72 |     runs = finetuner_mocker.list_runs(experiment_name=experiment_name)
 73 |     assert len(runs) == 1
 74 | 
 75 |     # delete all existing runs
 76 |     finetuner_mocker.delete_runs(experiment_name=experiment_name)
 77 |     runs = finetuner_mocker.list_runs(experiment_name=experiment_name)
 78 |     assert not runs
 79 | 
 80 |     # delete experiment
 81 |     finetuner_mocker.delete_experiment(experiment_name)
 82 |     experiments = finetuner_mocker.list_experiments()
 83 |     assert experiment_name not in [experiment.name for experiment in experiments]
 84 | 
 85 | 
 86 | @pytest.mark.parametrize('use_onnx', [True, False])
 87 | def test_create_training_run_and_save_model(
 88 |     finetuner_mocker, get_feature_data, tmp_path, use_onnx
 89 | ):
 90 |     import time
 91 | 
 92 |     train_da, test_da = get_feature_data
 93 |     experiment_name = create_random_name()
 94 |     finetuner_mocker.create_experiment(name=experiment_name)
 95 |     run = finetuner_mocker.create_training_run(
 96 |         model='mlp',
 97 |         model_options={'input_size': 128, 'hidden_sizes': [32]},
 98 |         train_data=train_da,
 99 |         loss='TripletMarginLoss',
100 |         optimizer='Adam',
101 |         learning_rate=0.001,
102 |         batch_size=12,
103 |         epochs=2,
104 |         experiment_name=experiment_name,
105 |         to_onnx=use_onnx,
106 |         device='cpu',
107 |     )
108 |     status = run.status()[STATUS]
109 | 
110 |     # wait for up to 20 minutes for the run to finish
111 |     for _ in range(6 * 20):
112 |         if status in [FAILED, FINISHED]:
113 |             break
114 |         time.sleep(10)
115 |         status = run.status()[STATUS]
116 | 
117 |     assert status == FINISHED
118 | 
119 |     artifact_id = run.artifact_id
120 |     assert isinstance(artifact_id, str)
121 |     # the artifact id is a 24 character hex string defined in mongo db.
122 |     assert len(artifact_id) == 24
123 | 
124 |     artifact = run.save_artifact(directory=tmp_path / 'finetuned_model')
125 |     assert os.path.exists(tmp_path / 'finetuned_model')
126 | 
127 |     # encode and check the embeddings
128 |     model = finetuner.get_model(artifact=artifact, is_onnx=use_onnx)
129 |     finetuner.encode(model=model, data=test_da)
130 |     assert test_da.embeddings is not None
131 |     assert isinstance(test_da.embeddings, np.ndarray)
132 | 
133 |     # delete created experiments (and runs)
134 |     finetuner_mocker.delete_experiment(experiment_name)
135 |     experiments = finetuner_mocker.list_experiments()
136 |     assert experiment_name not in [experiment.name for experiment in experiments]
137 | 
138 | 
139 | def test_create_synthesis_run_and_save_data(
140 |     finetuner_mocker, synthesis_query_data, synthesis_corpus_data
141 | ):
142 |     import time
143 | 
144 |     experiment_name = create_random_name()
145 |     finetuner_mocker.create_experiment(name=experiment_name)
146 |     run = finetuner_mocker.create_synthesis_run(
147 |         query_data=synthesis_query_data,
148 |         corpus_data=synthesis_corpus_data,
149 |         models=synthesis_model_en,
150 |         num_relations=3,
151 |         experiment_name=experiment_name,
152 |     )
153 |     status = run.status()[STATUS]
154 | 
155 |     # wait for up to 20 minutes for the run to finish
156 |     for _ in range(6 * 20):
157 |         if status in [FAILED, FINISHED]:
158 |             break
159 |         time.sleep(10)
160 |         status = run.status()[STATUS]
161 | 
162 |     assert status == FINISHED
163 | 
164 |     train_data = run.train_data
165 |     assert isinstance(train_data, str)
166 |     train_data = DocumentArray.pull(train_data)
167 | 
168 |     for doc in train_data['@c']:
169 |         assert doc.content is not None
170 | 
171 |     # delete created experiments (and runs)
172 |     finetuner_mocker.delete_experiment(experiment_name)
173 |     experiments = finetuner_mocker.list_experiments()
174 |     assert experiment_name not in [experiment.name for experiment in experiments]
175 | 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | from tests.unit.mocks import create_client_mocker, create_request_mocker
 5 | 
 6 | from finetuner import Finetuner
 7 | 
 8 | current_dir = os.path.dirname(os.path.abspath(__file__))
 9 | 
10 | 
11 | @pytest.fixture
12 | def client_mocker(mocker):
13 |     return create_request_mocker(mocker)
14 | 
15 | 
16 | @pytest.fixture
17 | def finetuner_mocker(mocker):
18 |     base = create_client_mocker(mocker)
19 |     finetuner = Finetuner()
20 |     finetuner._client = base
21 |     finetuner._default_experiment = finetuner._get_default_experiment()
22 |     return finetuner
23 | 


--------------------------------------------------------------------------------
/tests/unit/mocks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | 
  4 | import docarray
  5 | from tests.constants import HUBBLE_USER_TEST_ID
  6 | 
  7 | import hubble
  8 | from finetuner.client import FinetunerV1Client
  9 | from finetuner.constants import (
 10 |     CONFIG,
 11 |     CREATED,
 12 |     CREATED_AT,
 13 |     DESCRIPTION,
 14 |     FAILED,
 15 |     FINISHED,
 16 |     NAME,
 17 |     RUN_NAME,
 18 |     STARTED,
 19 |     STATUS,
 20 | )
 21 | 
 22 | 
 23 | def _create_base_mocker(mocker):
 24 |     def hubble_login_mocker(force: bool = False, **kwargs):
 25 |         print('Successfully logged in to Hubble!')
 26 | 
 27 |     def get_auth_token():
 28 |         if not os.environ.get('JINA_AUTH_TOKEN'):
 29 |             raise ValueError('Please set `JINA_AUTH_TOKEN` as an environment variable.')
 30 |         return os.environ.get('JINA_AUTH_TOKEN')
 31 | 
 32 |     mocker.patch.object(hubble, 'login', hubble_login_mocker)
 33 |     mocker.patch.object(hubble.Auth, 'get_auth_token', get_auth_token)
 34 |     mocker.patch.object(docarray.DocumentArray, 'push', _return_args)
 35 |     hubble.login()
 36 |     client = FinetunerV1Client()
 37 |     mocker.patch.object(client, 'hubble_user_id', HUBBLE_USER_TEST_ID)
 38 |     return client
 39 | 
 40 | 
 41 | def _return_args(*_, **kwargs):
 42 |     return kwargs
 43 | 
 44 | 
 45 | def create_request_mocker(mocker):
 46 |     base_mocker = _create_base_mocker(mocker)
 47 |     mocker.patch.object(base_mocker, '_handle_request', _return_args)
 48 |     return base_mocker
 49 | 
 50 | 
 51 | def create_client_mocker(mocker):
 52 |     def return_experiment(**kwargs):
 53 |         name = kwargs.get(NAME) or 'experiment name'
 54 |         return {
 55 |             STATUS: 'ACTIVE',
 56 |             NAME: name,
 57 |             DESCRIPTION: 'description',
 58 |             CREATED_AT: 'some time',
 59 |         }
 60 | 
 61 |     def return_experiments(**_):
 62 |         names = ['first experiment', 'second experiment']
 63 |         return {
 64 |             'items': [return_experiment(name=name) for name in names],
 65 |             'total': 0,
 66 |             'page': 1,
 67 |             'size': len(names),
 68 |         }
 69 | 
 70 |     def return_status(**_):
 71 |         return {
 72 |             'status': random.choice([CREATED, STARTED, FINISHED, FAILED]),
 73 |             'details': '',
 74 |         }
 75 | 
 76 |     def return_run(**kwargs):
 77 |         name = kwargs.get(RUN_NAME) or 'run name'
 78 |         config = kwargs.get('run_config') or {}
 79 |         return {
 80 |             NAME: name,
 81 |             CONFIG: config,
 82 |             DESCRIPTION: 'description',
 83 |             CREATED_AT: 'some time',
 84 |         }
 85 | 
 86 |     def return_runs(**_):
 87 |         names = ['first run', 'second run']
 88 |         return {
 89 |             'items': [return_run(run_name=name) for name in names],
 90 |             'total': 0,
 91 |             'page': 1,
 92 |             'size': len(names),
 93 |         }
 94 | 
 95 |     base_mocker = _create_base_mocker(mocker)
 96 | 
 97 |     mocker.patch.object(base_mocker, 'create_experiment', return_experiment)
 98 |     mocker.patch.object(base_mocker, 'get_experiment', return_experiment)
 99 |     mocker.patch.object(base_mocker, 'delete_experiment', return_experiment)
100 |     mocker.patch.object(base_mocker, 'list_experiments', return_experiments)
101 |     mocker.patch.object(base_mocker, 'delete_experiments', return_experiments)
102 |     mocker.patch.object(base_mocker, 'get_run_status', return_status)
103 |     mocker.patch.object(base_mocker, 'get_run', return_run)
104 |     mocker.patch.object(base_mocker, 'create_run', return_run)
105 |     mocker.patch.object(base_mocker, 'delete_run', return_run)
106 |     mocker.patch.object(base_mocker, 'list_runs', return_runs)
107 |     mocker.patch.object(base_mocker, 'delete_runs', return_runs)
108 | 
109 |     return base_mocker
110 | 


--------------------------------------------------------------------------------
/tests/unit/resources/cube.off:
--------------------------------------------------------------------------------
 1 | OFF
 2 | 8 6 0
 3 | 1.0 0.0 1.0
 4 | 0.0 1.0 1.0
 5 | -1.0 0.0 1.0
 6 | 0.0 -1.0 1.0
 7 | 1.0 0.0 0.0
 8 | 0.0 1.0 0.0
 9 | -1.0 0.0 0.0
10 | 0.0 -1.0 0.0
11 | 4 0 1 2 3
12 | 4 7 4 0 3
13 | 4 4 5 1 0
14 | 4 5 6 2 1
15 | 4 3 2 6 7
16 | 4 6 5 4 7
17 | 
18 | 


--------------------------------------------------------------------------------
/tests/unit/resources/dummy.csv:
--------------------------------------------------------------------------------
1 | This is an English sentence,Das ist ein englischer Satz
2 | This is another English sentence,Dies ist ein weiterer englischer Satz


--------------------------------------------------------------------------------
/tests/unit/resources/lena.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jina-ai/finetuner/69ae77cb51c736e791126792de20015a70658b53/tests/unit/resources/lena.png


--------------------------------------------------------------------------------
/tests/unit/test___init__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from _finetuner.excepts import SelectModelRequired
 4 | from _finetuner.models.inference import ONNXRuntimeInferenceEngine, TorchInferenceEngine
 5 | 
 6 | import finetuner
 7 | from finetuner import Document, DocumentArray
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'descriptor, select_model, is_onnx, expect_error',
12 |     [
13 |         ('bert-base-en', None, False, None),
14 |         ('bert-base-en', None, True, None),
15 |         ('clip-base-en', 'clip-text', False, None),
16 |         ('clip-base-en', 'clip-vision', False, None),
17 |         ('clip-base-en', None, False, SelectModelRequired),
18 |         ('MADE UP MODEL', None, False, ValueError),
19 |     ],
20 | )
21 | def test_build_model(descriptor, select_model, is_onnx, expect_error):
22 |     if expect_error:
23 |         with pytest.raises(expect_error):
24 |             model = finetuner.build_model(
25 |                 name=descriptor,
26 |                 select_model=select_model,
27 |                 is_onnx=is_onnx,
28 |             )
29 |     else:
30 |         model = finetuner.build_model(
31 |             name=descriptor, select_model=select_model, is_onnx=is_onnx
32 |         )
33 | 
34 |         if is_onnx:
35 |             assert isinstance(model, ONNXRuntimeInferenceEngine)
36 |         else:
37 |             assert isinstance(model, TorchInferenceEngine)
38 | 
39 | 
40 | @pytest.mark.parametrize('is_onnx', [True, False])
41 | def test_build_model_embedding(is_onnx):
42 |     model = finetuner.build_model(name='bert-base-cased', is_onnx=is_onnx)
43 | 
44 |     da = DocumentArray(Document(text='TEST TEXT'))
45 |     finetuner.encode(model=model, data=da)
46 |     assert da.embeddings is not None
47 |     assert isinstance(da.embeddings, np.ndarray)
48 | 
49 | 
50 | def test_embedding_with_list():
51 |     model = finetuner.build_model(name='bert-base-cased')
52 | 
53 |     da = DocumentArray(Document(text='TEST TEXT'))
54 |     lst = ['TEST TEXT']
55 |     da_embeddings = finetuner.encode(model=model, data=da)
56 |     lst_embeddings = finetuner.encode(model=model, data=lst)
57 | 
58 |     for expected, actual in zip(da_embeddings.embeddings, lst_embeddings):
59 |         assert np.array_equal(expected, actual)
60 | 


--------------------------------------------------------------------------------
/tests/unit/test_client.py:
--------------------------------------------------------------------------------
  1 | from finetuner.constants import (
  2 |     API_VERSION,
  3 |     CONFIG,
  4 |     DELETE,
  5 |     EXAMPLES,
  6 |     EXPERIMENTS,
  7 |     GET,
  8 |     LOGS,
  9 |     METRICS,
 10 |     NAME,
 11 |     POST,
 12 |     RUNS,
 13 |     STATUS,
 14 |     SYNTHESIS_TASK,
 15 |     TRAINING_TASK,
 16 | )
 17 | from finetuner.experiment import Experiment
 18 | from finetuner.model import synthesis_model_en
 19 | 
 20 | 
 21 | def test_create_experiment(client_mocker, name='name'):
 22 |     response = client_mocker.create_experiment(name)
 23 |     assert (
 24 |         response['url']
 25 |         == client_mocker._construct_url(
 26 |             client_mocker._base_url, API_VERSION, EXPERIMENTS
 27 |         )
 28 |         + '/'
 29 |     )
 30 |     assert response['method'] == POST
 31 |     assert response['json_data'][NAME] == name
 32 | 
 33 | 
 34 | def test_get_experiment(client_mocker, name='name'):
 35 |     sent_request = client_mocker.get_experiment(name)
 36 |     assert sent_request['url'] == client_mocker._construct_url(
 37 |         client_mocker._base_url, API_VERSION, EXPERIMENTS, name
 38 |     )
 39 |     assert sent_request['method'] == GET
 40 | 
 41 | 
 42 | def test_list_experiments(client_mocker):
 43 |     sent_request = client_mocker.list_experiments()
 44 |     assert (
 45 |         sent_request['url']
 46 |         == client_mocker._construct_url(
 47 |             client_mocker._base_url, API_VERSION, EXPERIMENTS
 48 |         )
 49 |         + '/'
 50 |     )
 51 |     assert sent_request['method'] == GET
 52 | 
 53 | 
 54 | def test_delete_experiment(client_mocker, name='name'):
 55 |     sent_request = client_mocker.delete_experiment(name)
 56 |     assert sent_request['url'] == client_mocker._construct_url(
 57 |         client_mocker._base_url, API_VERSION, EXPERIMENTS, name
 58 |     )
 59 |     assert sent_request['method'] == DELETE
 60 | 
 61 | 
 62 | def test_delete_experiments(client_mocker):
 63 |     sent_request = client_mocker.delete_experiments()
 64 |     assert (
 65 |         sent_request['url']
 66 |         == client_mocker._construct_url(
 67 |             client_mocker._base_url, API_VERSION, EXPERIMENTS
 68 |         )
 69 |         + '/'
 70 |     )
 71 |     assert sent_request['method'] == DELETE
 72 | 
 73 | 
 74 | def test_create_training_run(client_mocker, experiment_name='exp', run_name='run'):
 75 |     config = Experiment._create_finetuning_config(
 76 |         model='resnet50',
 77 |         train_data='data name',
 78 |         experiment_name=experiment_name,
 79 |         run_name=run_name,
 80 |     )
 81 |     sent_request = client_mocker.create_run(
 82 |         experiment_name=experiment_name,
 83 |         run_name=run_name,
 84 |         run_config=config,
 85 |         task=TRAINING_TASK,
 86 |         device='cpu',
 87 |         cpus=1,
 88 |         gpus=1,
 89 |     )
 90 |     assert sent_request['url'] == client_mocker._construct_url(
 91 |         client_mocker._base_url, API_VERSION, EXPERIMENTS, experiment_name, RUNS
 92 |     )
 93 |     assert sent_request['method'] == POST
 94 |     assert sent_request['json_data'][NAME] == run_name
 95 |     assert sent_request['json_data'][CONFIG] == config
 96 | 
 97 | 
 98 | def test_create_synthesis_run(client_mocker, experiment_name='exp', run_name='run'):
 99 |     config = Experiment._create_synthesis_config(
100 |         query_data='query_data_name',
101 |         corpus_data='corpus_data_name',
102 |         models=synthesis_model_en,
103 |         num_relations=3,
104 |         experiment_name=experiment_name,
105 |         run_name=run_name,
106 |     )
107 |     sent_request = client_mocker.create_run(
108 |         experiment_name=experiment_name,
109 |         run_name=run_name,
110 |         run_config=config,
111 |         task=SYNTHESIS_TASK,
112 |         device='cpu',
113 |         cpus=1,
114 |         gpus=1,
115 |     )
116 |     assert sent_request['url'] == client_mocker._construct_url(
117 |         client_mocker._base_url, API_VERSION, EXPERIMENTS, experiment_name, RUNS
118 |     )
119 |     assert sent_request['method'] == POST
120 |     assert sent_request['json_data'][NAME] == run_name
121 |     assert sent_request['json_data'][CONFIG] == config
122 | 
123 | 
124 | def test_get_run(client_mocker, experiment_name='exp', run_name='run1'):
125 |     sent_request = client_mocker.get_run(
126 |         experiment_name=experiment_name, run_name=run_name
127 |     )
128 |     assert sent_request['url'] == client_mocker._construct_url(
129 |         client_mocker._base_url,
130 |         API_VERSION,
131 |         EXPERIMENTS,
132 |         experiment_name,
133 |         RUNS,
134 |         run_name,
135 |     )
136 |     assert sent_request['method'] == GET
137 | 
138 | 
139 | def test_delete_run(client_mocker, experiment_name='exp', run_name='run1'):
140 |     sent_request = client_mocker.delete_run(
141 |         experiment_name=experiment_name, run_name=run_name
142 |     )
143 |     assert sent_request['url'] == client_mocker._construct_url(
144 |         client_mocker._base_url,
145 |         API_VERSION,
146 |         EXPERIMENTS,
147 |         experiment_name,
148 |         RUNS,
149 |         run_name,
150 |     )
151 |     assert sent_request['method'] == DELETE
152 | 
153 | 
154 | def test_delete_runs(client_mocker, experiment_name='exp'):
155 |     sent_request = client_mocker.delete_runs(experiment_name=experiment_name)
156 |     assert sent_request['url'] == client_mocker._construct_url(
157 |         client_mocker._base_url, API_VERSION, EXPERIMENTS, experiment_name, RUNS
158 |     )
159 |     assert sent_request['method'] == DELETE
160 | 
161 | 
162 | def test_get_run_status(client_mocker, experiment_name='exp', run_name='run1'):
163 |     sent_request = client_mocker.get_run_status(
164 |         experiment_name=experiment_name, run_name=run_name
165 |     )
166 |     assert sent_request['url'] == client_mocker._construct_url(
167 |         client_mocker._base_url,
168 |         API_VERSION,
169 |         EXPERIMENTS,
170 |         experiment_name,
171 |         RUNS,
172 |         run_name,
173 |         STATUS,
174 |     )
175 |     assert sent_request['method'] == GET
176 | 
177 | 
178 | def test_get_run_logs(client_mocker, experiment_name='exp', run_name='run1'):
179 |     sent_request = client_mocker.get_run_logs(
180 |         experiment_name=experiment_name, run_name=run_name
181 |     )
182 |     assert sent_request['url'] == client_mocker._construct_url(
183 |         client_mocker._base_url,
184 |         API_VERSION,
185 |         EXPERIMENTS,
186 |         experiment_name,
187 |         RUNS,
188 |         run_name,
189 |         LOGS,
190 |     )
191 |     assert sent_request['method'] == GET
192 | 
193 | 
194 | def test_get_run_metrics(client_mocker, experiment_name='exp', run_name='run1'):
195 |     sent_request = client_mocker.get_run_metrics(
196 |         experiment_name=experiment_name, run_name=run_name
197 |     )
198 |     assert sent_request['url'] == client_mocker._construct_url(
199 |         client_mocker._base_url,
200 |         API_VERSION,
201 |         EXPERIMENTS,
202 |         experiment_name,
203 |         RUNS,
204 |         run_name,
205 |         METRICS,
206 |     )
207 |     assert sent_request['method'] == GET
208 | 
209 | 
210 | def test_get_run_examples(client_mocker, experiment_name='exp', run_name='run1'):
211 |     sent_request = client_mocker.get_run_examples(
212 |         experiment_name=experiment_name, run_name=run_name
213 |     )
214 |     assert sent_request['url'] == client_mocker._construct_url(
215 |         client_mocker._base_url,
216 |         API_VERSION,
217 |         EXPERIMENTS,
218 |         experiment_name,
219 |         RUNS,
220 |         run_name,
221 |         EXAMPLES,
222 |     )
223 |     assert sent_request['method'] == GET
224 | 


--------------------------------------------------------------------------------
/tests/unit/test_experiment.py:
--------------------------------------------------------------------------------
  1 | import docarray
  2 | import pytest
  3 | 
  4 | from finetuner.callback import TrainingCheckpoint
  5 | from finetuner.constants import (
  6 |     ARTIFACT,
  7 |     BATCH_SIZE,
  8 |     CALLBACKS,
  9 |     CORPUS,
 10 |     CREATED,
 11 |     CROSS_ENCODER,
 12 |     DA_PREFIX,
 13 |     DATA,
 14 |     EPOCHS,
 15 |     EVAL_DATA,
 16 |     EVALUATE,
 17 |     EXPERIMENT_NAME,
 18 |     FAILED,
 19 |     FINISHED,
 20 |     FREEZE,
 21 |     HYPER_PARAMETERS,
 22 |     LEARNING_RATE,
 23 |     LOSS,
 24 |     LOSS_OPTIMIZER,
 25 |     LOSS_OPTIMIZER_OPTIONS,
 26 |     LOSS_OPTIONS,
 27 |     MAX_NUM_DOCS,
 28 |     MINER,
 29 |     MINER_OPTIONS,
 30 |     MODEL,
 31 |     MODELS,
 32 |     NAME,
 33 |     NUM_ITEMS_PER_CLASS,
 34 |     NUM_RELATIONS,
 35 |     NUM_WORKERS,
 36 |     ONNX,
 37 |     OPTIMIZER,
 38 |     OPTIMIZER_OPTIONS,
 39 |     OPTIONS,
 40 |     OUTPUT_DIM,
 41 |     PUBLIC,
 42 |     QUERIES,
 43 |     RAW_DATA_CONFIG,
 44 |     RELATION_MINING,
 45 |     RUN_NAME,
 46 |     SAMPLER,
 47 |     SCHEDULER,
 48 |     SCHEDULER_OPTIONS,
 49 |     STARTED,
 50 |     STATUS,
 51 |     TRAIN_DATA,
 52 |     VAL_SPLIT,
 53 | )
 54 | from finetuner.experiment import Experiment
 55 | from finetuner.model import synthesis_model_en
 56 | 
 57 | 
 58 | @pytest.fixture
 59 | def experiment(finetuner_mocker):
 60 |     experiment = Experiment(
 61 |         client=finetuner_mocker._client,
 62 |         name='experiment name',
 63 |         status='ACTIVE',
 64 |         created_at='some time',
 65 |         description='test description',
 66 |     )
 67 |     return experiment
 68 | 
 69 | 
 70 | def test_get_experiment_name(experiment):
 71 |     assert experiment.name == 'experiment name'
 72 | 
 73 | 
 74 | def test_get_run(experiment):
 75 |     run = experiment.get_run(name='run name')
 76 |     assert run.name == 'run name'
 77 |     assert run.status()[STATUS] in [CREATED, STARTED, FINISHED, FAILED]
 78 | 
 79 | 
 80 | def test_list_runs(experiment):
 81 |     runs = experiment.list_runs()
 82 |     # depends on `return_runs` in `unit/conftest.py`
 83 |     assert len(runs) == 2
 84 |     for run, expected_name in zip(runs, ['first run', 'second run']):
 85 |         assert run.name == expected_name
 86 |         assert run.status()[STATUS] in [CREATED, STARTED, FINISHED, FAILED]
 87 | 
 88 | 
 89 | def test_create_training_run(experiment):
 90 |     data = docarray.DocumentArray().empty(1)
 91 |     run_name = 'run1'
 92 |     data_name = f'{DA_PREFIX}-{experiment.name}-{run_name}-train'
 93 |     run = experiment.create_training_run(
 94 |         model='resnet50',
 95 |         model_options={},
 96 |         train_data=data,
 97 |         run_name=run_name,
 98 |     )
 99 |     expected_config = Experiment._create_finetuning_config(
100 |         model='resnet50',
101 |         model_options={},
102 |         train_data=data_name,
103 |         experiment_name=experiment.name,
104 |         run_name=run_name,
105 |     )
106 |     assert run.name == run_name
107 |     assert run.status()[STATUS] in [CREATED, STARTED, FINISHED, FAILED]
108 |     assert run.config == expected_config
109 | 
110 | 
111 | def test_create_synthesis_run(experiment):
112 |     query_data = docarray.DocumentArray().empty(1)
113 |     corpus_data = docarray.DocumentArray().empty(2)
114 |     run_name = 'run1'
115 |     query_data_name = f'{DA_PREFIX}-{experiment.name}-{run_name}-query'
116 |     corpus_data_name = f'{DA_PREFIX}-{experiment.name}-{run_name}-corpus'
117 |     run = experiment.create_synthesis_run(
118 |         query_data=query_data,
119 |         corpus_data=corpus_data,
120 |         models=synthesis_model_en,
121 |         num_relations=3,
122 |         run_name=run_name,
123 |     )
124 |     expected_config = Experiment._create_synthesis_config(
125 |         query_data=query_data_name,
126 |         corpus_data=corpus_data_name,
127 |         models=synthesis_model_en,
128 |         num_relations=3,
129 |         experiment_name=experiment.name,
130 |         run_name=run_name,
131 |     )
132 |     assert run.name == run_name
133 |     assert run.status()[STATUS] in [CREATED, STARTED, FINISHED, FAILED]
134 |     assert run.config == expected_config
135 | 
136 | 
137 | def test_create_training_run_config():
138 |     expected_config = {
139 |         MODEL: {
140 |             NAME: 'resnet50',
141 |             ARTIFACT: None,
142 |             FREEZE: False,
143 |             OUTPUT_DIM: None,
144 |             OPTIONS: None,
145 |             ONNX: False,
146 |         },
147 |         DATA: {
148 |             TRAIN_DATA: 'train_data',
149 |             EVAL_DATA: 'eval_data',
150 |             EVALUATE: False,
151 |             NUM_WORKERS: 8,
152 |             NUM_ITEMS_PER_CLASS: 4,
153 |             VAL_SPLIT: 0.0,
154 |             SAMPLER: 'auto',
155 |         },
156 |         HYPER_PARAMETERS: {
157 |             LOSS: 'TripletMarginLoss',
158 |             LOSS_OPTIONS: None,
159 |             OPTIMIZER: 'Adam',
160 |             OPTIMIZER_OPTIONS: {'weight_decay': 0.01},
161 |             MINER: 'TripletMarginMiner',
162 |             MINER_OPTIONS: {'margin': 0.3},
163 |             BATCH_SIZE: 8,
164 |             LEARNING_RATE: 0.001,
165 |             EPOCHS: 20,
166 |             SCHEDULER: 'linear',
167 |             SCHEDULER_OPTIONS: {
168 |                 'num_training_steps': 'auto',
169 |                 'num_warmup_steps': 2,
170 |                 'scheduler_step': 'batch',
171 |             },
172 |             LOSS_OPTIMIZER: None,
173 |             LOSS_OPTIMIZER_OPTIONS: None,
174 |         },
175 |         CALLBACKS: [
176 |             {
177 |                 NAME: 'TrainingCheckpoint',
178 |                 OPTIONS: {
179 |                     'last_k_epochs': 2,
180 |                 },
181 |             }
182 |         ],
183 |         EXPERIMENT_NAME: 'exp name',
184 |         PUBLIC: False,
185 |         RUN_NAME: 'run name',
186 |     }
187 |     config = Experiment._create_finetuning_config(
188 |         model='resnet50',
189 |         train_data='train_data',
190 |         experiment_name='exp name',
191 |         run_name='run name',
192 |         eval_data='eval_data',
193 |         description=None,
194 |         loss='TripletMarginLoss',
195 |         miner='TripletMarginMiner',
196 |         miner_options={'margin': 0.3},
197 |         optimizer='Adam',
198 |         optimizer_options={'weight_decay': 0.01},
199 |         learning_rate=0.001,
200 |         epochs=20,
201 |         batch_size=8,
202 |         callbacks=[TrainingCheckpoint(last_k_epochs=2)],
203 |         scheduler='linear',
204 |         scheduler_options={
205 |             'num_warmup_steps': 2,
206 |             'scheduler_step': 'batch',
207 |         },
208 |         freeze=False,
209 |         output_dim=None,
210 |         multi_modal=False,
211 |         device='cuda',
212 |     )
213 |     assert config == expected_config
214 | 
215 | 
216 | def test_create_synthesis_run_config():
217 |     expected_config = {
218 |         RAW_DATA_CONFIG: {
219 |             QUERIES: 'query_data',
220 |             CORPUS: 'corpus_data',
221 |         },
222 |         RELATION_MINING: {
223 |             MODELS: [synthesis_model_en.relation_miner],
224 |             NUM_RELATIONS: 3,
225 |         },
226 |         CROSS_ENCODER: synthesis_model_en.cross_encoder,
227 |         MAX_NUM_DOCS: None,
228 |         EXPERIMENT_NAME: 'exp name',
229 |         PUBLIC: False,
230 |         RUN_NAME: 'run name',
231 |     }
232 | 
233 |     config = Experiment._create_synthesis_config(
234 |         train_data='train_data',
235 |         experiment_name='exp name',
236 |         models=synthesis_model_en,
237 |         run_name='run name',
238 |         query_data='query_data',
239 |         corpus_data='corpus_data',
240 |         num_relations=3,
241 |     )
242 | 
243 |     assert config == expected_config
244 | 


--------------------------------------------------------------------------------
/tests/unit/test_finetuner.py:
--------------------------------------------------------------------------------
 1 | import docarray
 2 | import pytest
 3 | 
 4 | from finetuner.constants import CREATED, FAILED, FINISHED, STARTED, STATUS
 5 | from finetuner.model import synthesis_model_en
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'experiment_name',
10 |     ['exp name', None],
11 | )
12 | def test_create_experiment(finetuner_mocker, experiment_name):
13 |     if experiment_name:
14 |         experiment = finetuner_mocker.create_experiment(name=experiment_name)
15 |     else:
16 |         experiment = finetuner_mocker.create_experiment()
17 |     expected_name = experiment_name or 'default'
18 |     assert experiment.name == expected_name
19 |     assert experiment._status == 'ACTIVE'
20 | 
21 | 
22 | def test_get_experiment(finetuner_mocker, experiment_name='exp_name'):
23 |     experiment = finetuner_mocker.get_experiment(name=experiment_name)
24 |     assert experiment.name == experiment_name
25 | 
26 | 
27 | def test_list_experiments(finetuner_mocker):
28 |     experiments = finetuner_mocker.list_experiments()
29 |     # depends on `return_experiments` in `unit/conftest.py`
30 |     assert len(experiments) == 2
31 |     assert experiments[0].name == 'first experiment'
32 |     assert experiments[1].name == 'second experiment'
33 | 
34 | 
35 | @pytest.mark.parametrize(
36 |     'experiment_name',
37 |     ['exp name', None],
38 | )
39 | def test_create_training_run(finetuner_mocker, experiment_name):
40 |     data = docarray.DocumentArray().empty(1)
41 |     run_name = 'run1'
42 |     exp_name = experiment_name or 'default'
43 |     run = finetuner_mocker.create_training_run(
44 |         model='resnet50',
45 |         train_data=data,
46 |         run_name=run_name,
47 |         experiment_name=experiment_name,
48 |     )
49 |     assert run.name == run_name
50 |     assert run.status()[STATUS] in [CREATED, STARTED, FINISHED, FAILED]
51 |     assert run._experiment_name == exp_name
52 | 
53 | 
54 | @pytest.mark.parametrize(
55 |     'experiment_name',
56 |     ['exp name', None],
57 | )
58 | def test_create_synthesis_run(finetuner_mocker, experiment_name):
59 |     data = docarray.DocumentArray().empty(1)
60 |     run_name = 'run1'
61 |     exp_name = experiment_name or 'default'
62 |     run = finetuner_mocker.create_synthesis_run(
63 |         query_data=data,
64 |         corpus_data=data,
65 |         models=synthesis_model_en,
66 |         num_relations=3,
67 |         run_name=run_name,
68 |         experiment_name=experiment_name,
69 |     )
70 |     assert run.name == run_name
71 |     assert run.status()[STATUS] in [CREATED, STARTED, FINISHED, FAILED]
72 |     assert run._experiment_name == exp_name
73 | 
74 | 
75 | @pytest.mark.parametrize(
76 |     'experiment_name',
77 |     ['exp name', None],
78 | )
79 | def test_get_run(finetuner_mocker, experiment_name):
80 |     run = finetuner_mocker.get_run(run_name='run_name', experiment_name=experiment_name)
81 |     exp_name = experiment_name or 'default'
82 |     assert run.name == 'run_name'
83 |     assert run._experiment_name == exp_name
84 | 


--------------------------------------------------------------------------------
/tests/unit/test_hubble.py:
--------------------------------------------------------------------------------
 1 | import docarray
 2 | 
 3 | from finetuner.constants import DA_PREFIX
 4 | from finetuner.hubble import push_training_data
 5 | 
 6 | 
 7 | def test_push_training_data(client_mocker, experiment_name='exp', run_name='run'):
 8 |     train_data = docarray.DocumentArray.empty(10)
 9 |     eval_data = query_data = docarray.DocumentArray.empty(5)
10 |     index_data = None
11 | 
12 |     train_name, eval_name, query_name, index_name = push_training_data(
13 |         experiment_name=experiment_name,
14 |         run_name=run_name,
15 |         train_data=train_data,
16 |         eval_data=eval_data,
17 |         query_data=query_data,
18 |         index_data=index_data,
19 |     )
20 |     assert train_name == f'{DA_PREFIX}-{experiment_name}-{run_name}-train'
21 |     assert eval_name == query_name == f'{DA_PREFIX}-{experiment_name}-{run_name}-eval'
22 |     assert not index_name
23 | 


--------------------------------------------------------------------------------
/tests/unit/test_run.py:
--------------------------------------------------------------------------------
 1 | from finetuner.constants import CREATED, FAILED, FINISHED, STARTED, STATUS
 2 | from finetuner.run import Run
 3 | 
 4 | 
 5 | def test_run_obj(finetuner_mocker):
 6 |     test_config = {'type': 'test'}
 7 |     run = Run(
 8 |         client=finetuner_mocker._client,
 9 |         name='run name',
10 |         experiment_name='exp name',
11 |         config=test_config,
12 |         created_at='some time',
13 |         description='description',
14 |     )
15 | 
16 |     assert run.name == 'run name'
17 |     assert run.status()[STATUS] in [CREATED, STARTED, FINISHED, FAILED]
18 |     assert run.config == test_config
19 | 


--------------------------------------------------------------------------------