├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   └── feature-request.yml
    ├── PULL_REQUEST_TEMPLATE.md
    ├── copilot-instructions.md
    ├── dependabot.yaml
    ├── semantic.yml
    └── workflows
    │   ├── codeql.yml
    │   ├── dependabot.yaml
    │   ├── dependency-review.yml
    │   ├── deploy-docs.yaml
    │   ├── lint.yaml
    │   ├── mirror-localai.yml
    │   ├── patch-models.yaml
    │   ├── pre-release.yaml
    │   ├── release-base.yaml
    │   ├── release-pr.yaml
    │   ├── release.yaml
    │   ├── scorecards.yml
    │   ├── test-docker-args.yaml
    │   ├── test-docker-gpu.yaml
    │   ├── test-docker.yaml
    │   ├── test-finetune.yaml
    │   ├── test-helm.yaml
    │   ├── test-kubernetes.yaml
    │   ├── test-podman-applesilicon.yaml
    │   ├── unit-test.yaml
    │   └── update-models.yaml
├── .gitignore
├── .golangci.yaml
├── .pre-commit-config.yaml
├── AGENTS.md
├── CONTRIBUTING.md
├── Dockerfile
├── Dockerfile.base
├── Dockerfile.base-applesilicon
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── charts
    └── aikit
    │   ├── Chart.yaml
    │   ├── templates
    │       ├── NOTES.txt
    │       ├── _helpers.tpl
    │       ├── deployment.yaml
    │       ├── hpa.yaml
    │       ├── namespace-post-install.yaml
    │       └── service.yaml
    │   └── values.yaml
├── cmd
    └── frontend
    │   └── main.go
├── demo
    ├── demo.sh
    └── third_party
    │   └── demo-magic
    │       ├── README.md
    │       ├── demo-magic.sh
    │       └── license.txt
├── go.mod
├── go.sum
├── models
    ├── aikitfile.yaml
    ├── codestral-22b.yaml
    ├── flux-1-dev.yaml
    ├── gemma-2-2b-instruct.yaml
    ├── gpt-oss-120b.yaml
    ├── gpt-oss-20b.yaml
    ├── llama-3.1-8b-instruct.yaml
    ├── llama-3.2-1b-instruct.yaml
    ├── llama-3.2-3b-instruct.yaml
    ├── llama-3.3-70b-instruct.yaml
    ├── mixtral-8x7b-instruct.yaml
    ├── phi-4-14b-instruct.yaml
    └── qwq-32b.yaml
├── pkg
    ├── aikit
    │   └── config
    │   │   ├── finetune_specs.go
    │   │   ├── inference_specs.go
    │   │   ├── specs.go
    │   │   └── specs_test.go
    ├── aikit2llb
    │   ├── finetune
    │   │   └── convert.go
    │   └── inference
    │   │   ├── backend.go
    │   │   ├── backend_test.go
    │   │   ├── convert.go
    │   │   ├── diffusers.go
    │   │   ├── diffusers_test.go
    │   │   ├── download.go
    │   │   ├── exllama.go
    │   │   ├── exllama_test.go
    │   │   └── image.go
    ├── build
    │   ├── args.go
    │   ├── build.go
    │   └── build_test.go
    ├── finetune
    │   └── target_unsloth.py
    ├── utils
    │   ├── const.go
    │   ├── util.go
    │   └── util_test.go
    └── version
    │   └── version.go
├── scripts
    └── parse-models.sh
├── test
    ├── aikitfile-args.yaml
    ├── aikitfile-cpu-exllama2.yaml
    ├── aikitfile-dev.yaml
    ├── aikitfile-diffusers.yaml
    ├── aikitfile-exllama2-exl2.yaml
    ├── aikitfile-exllama2-gptq.yaml
    ├── aikitfile-flux-schnell.yaml
    ├── aikitfile-hf.yaml
    ├── aikitfile-llama-cpp.yaml
    ├── aikitfile-llama-cuda.yaml
    ├── aikitfile-llama.yaml
    ├── aikitfile-llava.yaml
    ├── aikitfile-oci.yaml
    ├── aikitfile-unsloth-custom.yaml
    └── aikitfile-unsloth.yaml
└── website
    ├── .gitignore
    ├── README.md
    ├── babel.config.js
    ├── docs
        ├── architecture.md
        ├── create-images.md
        ├── demo.md
        ├── diffusion.md
        ├── exllama2.md
        ├── fine-tune.md
        ├── gpu.md
        ├── intro.md
        ├── kubernetes.md
        ├── llama-cpp.md
        ├── premade-models.md
        ├── quick-start.md
        ├── release.md
        ├── security.md
        ├── specs-finetune.md
        ├── specs-inference.md
        └── vision.md
    ├── docusaurus.config.js
    ├── osv-scanner.toml
    ├── package.json
    ├── sidebars.js
    ├── src
        └── css
        │   └── custom.css
    ├── static
        ├── .nojekyll
        └── img
        │   ├── architecture.png
        │   ├── favicon.ico
        │   ├── logo.png
        │   └── logo.svg
    └── yarn.lock


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *       @sozercan
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: Report a bug in AIKit
 3 | title: "[BUG] <title>"
 4 | labels:
 5 |   - "bug"
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: |
10 |         Please search to see if an issue already exists for your bug before continuing.
11 |         > If you need to report a security issue please see https://github.com/kaito-project/aikit/security/policy instead.
12 |   - type: textarea
13 |     attributes:
14 |       label: Expected Behavior
15 |       description: Briefly describe what you expect to happen.
16 |   - type: textarea
17 |     attributes:
18 |       label: Actual Behavior
19 |       description: Briefly describe what is actually happening.
20 |   - type: textarea
21 |     attributes:
22 |       label: Steps To Reproduce
23 |       description: Detailed steps to reproduce the behavior.
24 |       placeholder: |
25 |         1. In ...
26 |         2. With this config...
27 |         3. Run '...'
28 |         4. See error...
29 |   - type: checkboxes
30 |     id: idea
31 |     attributes:
32 |       label: "Are you willing to submit PRs to contribute to this bug fix?"
33 |       options:
34 |         - label: Yes, I am willing to implement it.
35 |   - type: markdown
36 |     attributes:
37 |       value: |
38 |         Thanks for taking the time to fill out a bug report!
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: Request
 2 | description: Request a new feature or propose an enhancement to AIKit
 3 | title: "[REQ] <title>"
 4 | labels:
 5 |   - "enhancement"
 6 | body:
 7 |   - type: markdown
 8 |     attributes:
 9 |       value: |
10 |         Please search to see if an issue already exists for your request before continuing.
11 |   - type: dropdown
12 |     attributes:
13 |       label: What kind of request is this?
14 |       multiple: false
15 |       options:
16 |         - New feature
17 |         - Improvement of existing experience
18 |         - Other
19 |   - type: textarea
20 |     attributes:
21 |       label: What is your request or suggestion?
22 |       placeholder: |
23 |         e.g. I would like AIKit to add this <feature> so that I can use it in my <scenario>.
24 |         e.g. When using AIKit the <current behavior> has this <limitation> and it would be better if it has this <improvement>.
25 |   - type: checkboxes
26 |     id: idea
27 |     attributes:
28 |       label: "Are you willing to submit PRs to contribute to this feature request?"
29 |       options:
30 |         - label: Yes, I am willing to implement it.
31 |   - type: markdown
32 |     attributes:
33 |       value: |
34 |         Thanks for taking the time to fill out a request!


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | **What this PR does / why we need it**:
2 | 
3 | **Which issue(s) this PR fixes** *(optional, using `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when the PR gets merged)*:
4 | Fixes #
5 | 
6 | **Special notes for your reviewer**:
7 | 


--------------------------------------------------------------------------------
/.github/copilot-instructions.md:
--------------------------------------------------------------------------------
1 | ../AGENTS.md


--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "gomod"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "weekly"
 7 | 
 8 |   - package-ecosystem: "github-actions"
 9 |     directory: "/"
10 |     schedule:
11 |       interval: "weekly"
12 |     groups:
13 |       actions-all:
14 |         patterns:
15 |         - "*"
16 | 
17 |   - package-ecosystem: docker
18 |     directory: /
19 |     schedule:
20 |       interval: "weekly"
21 | 
22 |   - package-ecosystem: docker
23 |     directory: /charts/aikit
24 |     schedule:
25 |       interval: "weekly"
26 | 


--------------------------------------------------------------------------------
/.github/semantic.yml:
--------------------------------------------------------------------------------
 1 | titleOnly: true
 2 | types:
 3 |   - build
 4 |   - chore
 5 |   - ci
 6 |   - docs
 7 |   - feat
 8 |   - fix
 9 |   - perf
10 |   - refactor
11 |   - revert
12 |   - style
13 |   - test
14 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: ["main"]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: ["main"]
20 |   schedule:
21 |     - cron: "0 0 * * 1"
22 | 
23 | permissions:
24 |   contents: read
25 | 
26 | jobs:
27 |   analyze:
28 |     name: Analyze
29 |     runs-on: ubuntu-latest
30 |     permissions:
31 |       actions: read
32 |       contents: read
33 |       security-events: write
34 | 
35 |     strategy:
36 |       fail-fast: false
37 |       matrix:
38 |         language: ["go"]
39 |         # CodeQL supports [ $supported-codeql-languages ]
40 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
41 | 
42 |     steps:
43 |       - name: Harden Runner
44 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
45 |         with:
46 |           disable-sudo: true
47 |           egress-policy: audit
48 |           allowed-endpoints: >
49 |             *.github.com:443
50 |             github.com:443
51 |             *.githubusercontent.com:443
52 |             proxy.golang.org:443
53 |             storage.googleapis.com:443
54 |             sum.golang.org:443
55 | 
56 |       - name: Checkout repository
57 |         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
58 | 
59 |       - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
60 |         with:
61 |           go-version: "1.24"
62 |           check-latest: true
63 | 
64 |       # Initializes the CodeQL tools for scanning.
65 |       - name: Initialize CodeQL
66 |         uses: github/codeql-action/init@192325c86100d080feab897ff886c34abd4c83a3 # v3.29.5
67 |         with:
68 |           languages: ${{ matrix.language }}
69 |           # If you wish to specify custom queries, you can do so here or in a config file.
70 |           # By default, queries listed here will override any specified in a config file.
71 |           # Prefix the list here with "+" to use these queries and those in the config file.
72 | 
73 |       # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
74 |       # If this step fails, then you should remove it and run the build manually (see below)
75 |       - name: Autobuild
76 |         uses: github/codeql-action/autobuild@192325c86100d080feab897ff886c34abd4c83a3 # v3.29.5
77 | 
78 |       # ℹ️ Command-line programs to run using the OS shell.
79 |       # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
80 | 
81 |       #   If the Autobuild fails above, remove it and uncomment the following three lines.
82 |       #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
83 | 
84 |       # - run: |
85 |       #   echo "Run, Build Application using script"
86 |       #   ./location_of_script_within_repo/buildscript.sh
87 | 
88 |       - name: Perform CodeQL Analysis
89 |         uses: github/codeql-action/analyze@192325c86100d080feab897ff886c34abd4c83a3 # v3.29.5
90 |         with:
91 |           category: "/language:${{matrix.language}}"
92 | 


--------------------------------------------------------------------------------
/.github/workflows/dependabot.yaml:
--------------------------------------------------------------------------------
 1 | name: Dependabot auto-merge
 2 | on: pull_request
 3 | 
 4 | permissions:
 5 |   contents: read
 6 | 
 7 | jobs:
 8 |   dependabot:
 9 |     permissions:
10 |       contents: write
11 |       pull-requests: write
12 |     runs-on: ubuntu-latest
13 |     if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'kaito-project/aikit'
14 |     steps:
15 |       - name: Dependabot metadata
16 |         id: metadata
17 |         uses: dependabot/fetch-metadata@08eff52bf64351f401fb50d4972fa95b9f2c2d1b # v1.3.1
18 |         with:
19 |           github-token: "${{ secrets.GITHUB_TOKEN }}"
20 |       - name: Enable auto-merge for Dependabot PRs
21 |         run: gh pr merge --auto --merge "$PR_URL"
22 |         env:
23 |           PR_URL: ${{github.event.pull_request.html_url}}
24 |           GH_TOKEN: ${{secrets.GITHUB_TOKEN}}
25 | 


--------------------------------------------------------------------------------
/.github/workflows/dependency-review.yml:
--------------------------------------------------------------------------------
 1 | # Dependency Review Action
 2 | #
 3 | # This Action will scan dependency manifest files that change as part of a Pull Request,
 4 | # surfacing known-vulnerable versions of the packages declared or updated in the PR.
 5 | # Once installed, if the workflow run is marked as required,
 6 | # PRs introducing known-vulnerable packages will be blocked from merging.
 7 | #
 8 | # Source repository: https://github.com/actions/dependency-review-action
 9 | name: 'Dependency Review'
10 | on: [pull_request]
11 | 
12 | permissions:
13 |   contents: read
14 | 
15 | jobs:
16 |   dependency-review:
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |       - name: Harden Runner
20 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
21 |         with:
22 |           disable-sudo: true
23 |           egress-policy: audit
24 |           allowed-endpoints: >
25 |             api.github.com:443
26 |             github.com:443
27 |             *.githubusercontent.com:443
28 |             api.securityscorecards.dev:443
29 |             api.deps.dev:443
30 | 
31 |       - name: 'Checkout Repository'
32 |         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
33 |       - name: 'Dependency Review'
34 |         uses: actions/dependency-review-action@595b5aeba73380359d98a5e087f648dbb0edce1b # v4.7.3
35 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yaml:
--------------------------------------------------------------------------------
 1 | name: deploy-docs
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |     paths:
 7 |       - '.github/workflows/deploy-docs.yaml'
 8 |       - 'website/**'
 9 |   pull_request:
10 |     branches:
11 |       - main
12 |     paths:
13 |       - '.github/workflows/deploy-docs.yaml'
14 |       - 'website/**'
15 | 
16 | permissions:
17 |   contents: read
18 | 
19 | jobs:
20 |   deploy:
21 |     name: Generate docs website to GitHub Pages
22 |     runs-on: ubuntu-latest
23 |     permissions:
24 |       contents: write
25 |     defaults:
26 |       run:
27 |         working-directory: website
28 |     steps:
29 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
30 | 
31 |       - name: Harden Runner
32 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a
33 |         with:
34 |           disable-sudo: true
35 |           egress-policy: audit
36 |           allowed-endpoints: >
37 |             github.com:443
38 |             registry.yarnpkg.com:443
39 |             *.githubusercontent.com:443
40 |             *.blob.core.windows.net:443
41 | 
42 |       - name: Setup Node
43 |         uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0
44 |         with:
45 |           node-version: 20.x
46 | 
47 |       - name: Get yarn cache
48 |         id: yarn-cache
49 |         run: echo "dir=$(yarn cache dir)" > $GITHUB_OUTPUT
50 | 
51 |       - name: Cache dependencies
52 |         uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
53 |         with:
54 |           path: ${{ steps.yarn-cache.outputs.dir }}
55 |           key: ${{ runner.os }}-website-${{ hashFiles('**/yarn.lock') }}
56 |           restore-keys: |
57 |             ${{ runner.os }}-website-
58 | 
59 |       - run: yarn install --frozen-lockfile
60 |       - run: yarn build
61 | 
62 |       - name: Deploy to GitHub Pages
63 |         if: github.ref == 'refs/heads/main' && github.event_name == 'push' && github.repository == 'kaito-project/aikit'
64 |         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
65 |         with:
66 |           github_token: ${{ secrets.GITHUB_TOKEN }}
67 |           publish_dir: ./website/build
68 |           destination_dir: ./docs
69 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
 1 | name: lint
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |       - '**.md'
 7 |       - 'website/**'
 8 |   pull_request:
 9 |     paths-ignore:
10 |       - '**.md'
11 |       - 'website/**'
12 | 
13 | permissions: read-all
14 | 
15 | concurrency:
16 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
17 |   cancel-in-progress: true
18 | 
19 | jobs:
20 |   lint:
21 |     runs-on: ubuntu-latest-16-cores
22 |     steps:
23 |       - name: Harden Runner
24 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
25 |         with:
26 |           disable-sudo: true
27 |           egress-policy: audit
28 |           allowed-endpoints: >
29 |             api.github.com:443
30 |             github.com:443
31 |             *.githubusercontent.com:443
32 |             proxy.golang.org:443
33 |             storage.googleapis.com:443
34 |             golangci-lint.run:443
35 | 
36 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
37 | 
38 |       - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
39 |         with:
40 |           go-version: "1.24"
41 |           check-latest: true
42 | 
43 |       - name: lint
44 |         uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8.0.0
45 |         with:
46 |           version: v2.1.6
47 | 


--------------------------------------------------------------------------------
/.github/workflows/mirror-localai.yml:
--------------------------------------------------------------------------------
 1 | name: Mirror LocalAI Binary to GHCR
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       version:
 7 |         description: "LocalAI version tag (e.g. v3.4.0)"
 8 |         required: true
 9 |         default: v3.4.0
10 | 
11 | permissions:
12 |   contents: read
13 |   actions: read
14 |   packages: write
15 | 
16 | jobs:
17 |   mirror:
18 |     runs-on: ubuntu-latest
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         arch: [linux-amd64, linux-arm64]
23 |     env:
24 |       ORAS_VERSION: v1.2.0
25 |     steps:
26 |       - name: Compute variables
27 |         id: vars
28 |         run: |
29 |           VERSION="${{ github.event.inputs.version }}"
30 |           ARCH_INPUT="${{ matrix.arch }}"
31 |           # Normalize arch for tag suffix (linux-amd64 -> amd64, linux-arm64 -> arm64)
32 |           case "${ARCH_INPUT}" in
33 |             linux-amd64) ARCH_TAG=amd64 ;;
34 |             linux-arm64) ARCH_TAG=arm64 ;;
35 |             *) echo "Unsupported arch: ${ARCH_INPUT}"; exit 1 ;;
36 |           esac
37 |           ASSET_NAME="local-ai-${VERSION}-${ARCH_INPUT}"
38 |           UPSTREAM_URL="https://github.com/mudler/LocalAI/releases/download/${VERSION}/${ASSET_NAME}"
39 |           TARGET_TAG="${VERSION}-${ARCH_TAG}"
40 |           echo "version=${VERSION}" >> $GITHUB_OUTPUT
41 |           echo "arch=${ARCH_INPUT}" >> $GITHUB_OUTPUT
42 |           echo "arch_tag=${ARCH_TAG}" >> $GITHUB_OUTPUT
43 |           echo "asset=${ASSET_NAME}" >> $GITHUB_OUTPUT
44 |           echo "url=${UPSTREAM_URL}" >> $GITHUB_OUTPUT
45 |           echo "tag=${TARGET_TAG}" >> $GITHUB_OUTPUT
46 | 
47 |       - name: Show plan
48 |         run: |
49 |           echo "Mirroring ${{ steps.vars.outputs.url }} -> ghcr.io/kaito-project/aikit/localai:${{ steps.vars.outputs.tag }}"
50 | 
51 |       - name: Install ORAS
52 |         run: |
53 |           curl -sSL https://github.com/oras-project/oras/releases/download/${ORAS_VERSION}/oras_${ORAS_VERSION#v}_linux_amd64.tar.gz | sudo tar -xz -C /usr/local/bin oras
54 |           oras version
55 | 
56 |       - name: Download LocalAI binary
57 |         run: |
58 |           curl -fL "${{ steps.vars.outputs.url }}" -o local-ai
59 |           chmod +x local-ai
60 |           echo "Downloaded binary size:" $(stat -c%s local-ai) "bytes"
61 | 
62 |       - name: Login to GHCR
63 |         run: |
64 |           echo "${{ secrets.GITHUB_TOKEN }}" | oras login ghcr.io -u "${{ github.actor }}" --password-stdin
65 | 
66 |       - name: Push OCI artifact
67 |         run: |
68 |           oras push ghcr.io/kaito-project/aikit/localai:${{ steps.vars.outputs.tag }} \
69 |             --annotation org.opencontainers.image.source=${{ github.repository }} \
70 |             --annotation org.opencontainers.image.title=local-ai \
71 |             --annotation org.opencontainers.image.version=${{ steps.vars.outputs.version }} \
72 |             local-ai:application/vnd.localai.binary.layer.v1+octet-stream
73 | 
74 |       - name: Summary
75 |         run: |
76 |           echo "Mirrored LocalAI ${{ steps.vars.outputs.version }} (${{ steps.vars.outputs.arch }}) to ghcr.io/kaito-project/aikit/localai:${{ steps.vars.outputs.tag }}" >> $GITHUB_STEP_SUMMARY
77 | 


--------------------------------------------------------------------------------
/.github/workflows/patch-models.yaml:
--------------------------------------------------------------------------------
  1 | name: patch-models
  2 | on:
  3 |   # patch weekly
  4 |   schedule:
  5 |     - cron: "0 0 * * 0"
  6 |   workflow_dispatch:
  7 | 
  8 | permissions:
  9 |   contents: read
 10 | 
 11 | jobs:
 12 |   patch-models:
 13 |     permissions:
 14 |       contents: read
 15 |       packages: write
 16 |       id-token: write
 17 |     runs-on: ubuntu-latest-16-cores
 18 |     timeout-minutes: 240
 19 |     strategy:
 20 |       fail-fast: false
 21 |       matrix:
 22 |         images:
 23 |           - ghcr.io/kaito-project/aikit/llama3.1:8b
 24 |           - ghcr.io/kaito-project/aikit/llama3.3:70b
 25 |           - ghcr.io/kaito-project/aikit/llama3.2:1b
 26 |           - ghcr.io/kaito-project/aikit/llama3.2:3b
 27 |           - ghcr.io/kaito-project/aikit/mixtral:8x7b
 28 |           - ghcr.io/kaito-project/aikit/phi3.5:3.8b
 29 |           - ghcr.io/kaito-project/aikit/gemma2:2b
 30 |           - ghcr.io/kaito-project/aikit/codestral:22b
 31 |           - ghcr.io/kaito-project/aikit/flux1:dev
 32 |     steps:
 33 |     - name: Harden Runner
 34 |       uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
 35 |       with:
 36 |         egress-policy: audit
 37 |         allowed-endpoints: >
 38 |           api.github.com:443
 39 |           auth.docker.io:443
 40 |           fulcio.sigstore.dev:443
 41 |           ghcr.io:443
 42 |           github.com:443
 43 |           *.githubusercontent.com:443
 44 |           proxy.golang.org:443
 45 |           registry-1.docker.io:443
 46 |           rekor.sigstore.dev:443
 47 |           storage.googleapis.com:443
 48 |           tuf-repo-cdn.sigstore.dev:443
 49 |           *.ubuntu.com:80
 50 |           *.blob.core.windows.net:443
 51 | 
 52 |     - name: Set up Docker Buildx
 53 |       uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
 54 | 
 55 |     - name: Login to GHCR
 56 |       uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
 57 |       with:
 58 |         registry: ghcr.io
 59 |         username: ${{ github.actor }}
 60 |         password: ${{ secrets.GITHUB_TOKEN }}
 61 | 
 62 |     - name: Download Trivy
 63 |       run: |
 64 |         TRIVY_VERSION=$(
 65 |             curl --silent "https://api.github.com/repos/aquasecurity/trivy/releases/latest" | \
 66 |             grep '"tag_name":' | \
 67 |             sed -E 's/.*"v([^"]+)".*/\1/'
 68 |         )
 69 |         wget https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz
 70 |         tar zxvf trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz
 71 |         mv trivy /usr/local/bin
 72 |         rm trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz
 73 | 
 74 |     - name: Download retry
 75 |       run: |
 76 |         wget https://github.com/joshdk/retry/releases/download/v${VERSION}/retry-linux-amd64.tar.gz
 77 |         tar -xf retry-linux-amd64.tar.gz
 78 |         sudo install retry /usr/bin/retry
 79 |       env:
 80 |         VERSION: 1.4.0
 81 | 
 82 |     - name: Scan with Trivy
 83 |       run: |
 84 |         retry -attempts ${ATTEMPTS} -max-time ${MAX_TIME} trivy image --pkg-types os --exit-code 0 --format json --output report.json --timeout ${TRIVY_TIMEOUT} --ignore-unfixed ${{ matrix.images }}
 85 |       env:
 86 |         ATTEMPTS: 25
 87 |         MAX_TIME: 0
 88 |         TRIVY_TIMEOUT: 60m
 89 | 
 90 |     - name: Check vulnerability count
 91 |       id: vuln_count
 92 |       run: |
 93 |         cat report.json | jq
 94 |         vuln_count=$(jq '.Results[0].Vulnerabilities | length' report.json)
 95 |         echo "vuln_count=$vuln_count" >> $GITHUB_OUTPUT
 96 | 
 97 |     - name: Get image tag
 98 |       run: |
 99 |         image_tag=$(echo ${{ matrix.images }} | cut -d':' -f2)
100 |         echo $image_tag
101 |         echo "image_tag=$image_tag" >> $GITHUB_ENV
102 | 
103 |     - name: Copa Action
104 |       if: steps.vuln_count.outputs.vuln_count != '0'
105 |       id: copa
106 |       uses: project-copacetic/copa-action@3843e22efdca421adb37aa8dec103a0f1db68544 # v1.2.1
107 |       with:
108 |         image: ${{ matrix.images }}
109 |         image-report: 'report.json'
110 |         patched-tag: ${image_tag}
111 |         timeout: 30m
112 | 
113 |     - name: Install Cosign
114 |       if: steps.copa.conclusion == 'success'
115 |       uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # v3.10.0
116 | 
117 |     - name: Docker Push Patched Image
118 |       id: push
119 |       if: steps.copa.conclusion == 'success'
120 |       run: |
121 |         docker tag ${{ steps.copa.outputs.patched-image }} ${{ matrix.images }}
122 |         docker images
123 |         docker push ${{ matrix.images }}
124 |         echo "DIGEST=$(cosign triangulate ${{ matrix.images }} --type digest)" >> $GITHUB_ENV
125 | 
126 |     - name: Sign the images with GitHub OIDC Token
127 |       id: sign
128 |       if: steps.push.conclusion == 'success'
129 |       run: cosign sign --yes ${DIGEST}
130 | 
131 |     - name: Verify image signature
132 |       if: steps.sign.conclusion == 'success'
133 |       run: |
134 |         cosign verify ${DIGEST} \
135 |           --certificate-oidc-issuer https://token.actions.githubusercontent.com \
136 |           --certificate-identity-regexp 'https://github\.com/kaito-project/aikit/\.github/workflows/.+'
137 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-release.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-release
 2 | 
 3 | on:
 4 |   workflow_dispatch: # used for testing
 5 |   push:
 6 |     branches:
 7 |       - main
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   pre-release:
14 |     permissions:
15 |       contents: write
16 |       packages: write
17 |       id-token: write
18 |     runs-on: ubuntu-latest-16-cores
19 |     timeout-minutes: 360
20 |     steps:
21 |       - name: Harden Runner
22 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
23 |         with:
24 |           egress-policy: audit
25 |           allowed-endpoints: >
26 |             auth.docker.io:443
27 |             fulcio.sigstore.dev:443
28 |             ghcr.io:443
29 |             github.com:443
30 |             *.githubusercontent.com:443
31 |             production.cloudflare.docker.com:443
32 |             proxy.golang.org:443
33 |             registry-1.docker.io:443
34 |             rekor.sigstore.dev:443
35 |             storage.googleapis.com:443
36 |             tuf-repo-cdn.sigstore.dev:443
37 |             sum.golang.org:443
38 | 
39 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
40 |         with:
41 |           fetch-tags: true
42 |           fetch-depth: 0
43 | 
44 |       - name: Install Cosign
45 |         uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # v3.10.0
46 | 
47 |       - name: Set up Docker Buildx
48 |         uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
49 | 
50 |       - name: Login to GHCR
51 |         uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
52 |         with:
53 |           registry: ghcr.io
54 |           username: ${{ github.actor }}
55 |           password: ${{ secrets.GITHUB_TOKEN }}
56 | 
57 |       - name: Set LDFLAGS
58 |         run: |
59 |           set -x
60 |           GIT_COMMIT=$(git rev-list --abbrev-commit --tags --max-count=1)
61 |           GIT_TAG=$(git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null)
62 |           echo LDFLAGS="-X github.com/kaito-project/aikit/pkg/version.Version=${GIT_TAG}" >> $GITHUB_ENV
63 | 
64 |       - name: Build and push
65 |         uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
66 |         id: build-and-push
67 |         with:
68 |           push: true
69 |           tags: ghcr.io/kaito-project/aikit/aikit:dev
70 |           cache-from: type=gha,scope=aikit
71 |           cache-to: type=gha,scope=aikit,mode=max
72 |           sbom: true
73 |           provenance: true
74 |           build-args: |
75 |             LDFLAGS=${{ env.LDFLAGS }}
76 |           platforms: linux/amd64,linux/arm64
77 |         env:
78 |           LDFLAGS: ${{ env.LDFLAGS }}
79 | 
80 |       - name: Sign the images with GitHub OIDC Token
81 |         env:
82 |           DIGEST: ${{ steps.build-and-push.outputs.digest }}
83 |         run: cosign sign --yes "ghcr.io/kaito-project/aikit/aikit@${DIGEST}"
84 | 
85 |       - name: Verify image signature
86 |         env:
87 |           DIGEST: ${{ steps.build-and-push.outputs.digest }}
88 |         run: |
89 |           cosign verify ghcr.io/kaito-project/aikit/aikit@${DIGEST} \
90 |             --certificate-oidc-issuer https://token.actions.githubusercontent.com \
91 |             --certificate-identity https://github.com/kaito-project/aikit/.github/workflows/pre-release.yaml@refs/heads/main
92 | 


--------------------------------------------------------------------------------
/.github/workflows/release-base.yaml:
--------------------------------------------------------------------------------
 1 | name: release-base
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 0 * * 0"
 6 |   workflow_dispatch: # used for testing
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   release-base:
13 |     permissions:
14 |       contents: write
15 |       packages: write
16 |       id-token: write
17 |     runs-on: ubuntu-latest-16-cores
18 |     timeout-minutes: 360
19 |     strategy:
20 |       fail-fast: true
21 |       matrix:
22 |         runtime:
23 |           - base
24 |           - applesilicon
25 |     steps:
26 |       - name: Harden Runner
27 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
28 |         with:
29 |           egress-policy: audit
30 | 
31 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
32 |         with:
33 |           fetch-tags: true
34 |           fetch-depth: 0
35 | 
36 |       - name: Install Cosign
37 |         uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # v3.10.0
38 | 
39 |       - name: Set up Docker Buildx
40 |         uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
41 | 
42 |       - name: Login to GHCR
43 |         uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
44 |         with:
45 |           registry: ghcr.io
46 |           username: ${{ github.actor }}
47 |           password: ${{ secrets.GITHUB_TOKEN }}
48 | 
49 |       - name: Set runtime variables for matrix
50 |         run: |
51 |           if [ ${{ matrix.runtime }} == "base" ]; then
52 |             echo PLATFORMS="linux/amd64,linux/arm64" >> $GITHUB_ENV
53 |             echo FILE="Dockerfile.base" >> $GITHUB_ENV
54 |             echo TAG="ghcr.io/kaito-project/aikit/base:latest" >> $GITHUB_ENV
55 |             echo CACHE_FROM="type=gha,scope=base" >> $GITHUB_ENV
56 |             echo CACHE_TO="type=gha,scope=base,mode=max" >> $GITHUB_ENV
57 |           elif [ ${{ matrix.runtime }} == "applesilicon" ]; then
58 |             echo PLATFORMS="linux/arm64" >> $GITHUB_ENV
59 |             echo FILE="Dockerfile.base-applesilicon" >> $GITHUB_ENV
60 |             echo TAG="ghcr.io/kaito-project/aikit/applesilicon/base:latest" >> $GITHUB_ENV
61 |             echo CACHE_FROM="type=gha,scope=base-applesilicon" >> $GITHUB_ENV
62 |             echo CACHE_TO="type=gha,scope=base-applesilicon,mode=max" >> $GITHUB_ENV
63 |           fi
64 | 
65 |       - name: Build and push
66 |         uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
67 |         id: build-and-push
68 |         with:
69 |           push: true
70 |           sbom: true
71 |           provenance: true
72 |           tags: ${{ env.TAG }}
73 |           cache-from: ${{ env.CACHE_FROM }}
74 |           cache-to: ${{ env.CACHE_TO }}
75 |           platforms: ${{ env.PLATFORMS }}
76 |           file: ${{ env.FILE }}
77 | 
78 |       - name: Sign the images with GitHub OIDC Token
79 |         env:
80 |           DIGEST: ${{ steps.build-and-push.outputs.digest }}
81 |         run: cosign sign --yes "ghcr.io/kaito-project/aikit/base@${DIGEST}"
82 | 
83 |       - name: Verify image signature
84 |         env:
85 |           DIGEST: ${{ steps.build-and-push.outputs.digest }}
86 |         run: |
87 |           cosign verify ghcr.io/kaito-project/aikit/base@${DIGEST} \
88 |             --certificate-oidc-issuer https://token.actions.githubusercontent.com \
89 |             --certificate-identity https://github.com/kaito-project/aikit/.github/workflows/release-base.yaml@refs/heads/main
90 | 


--------------------------------------------------------------------------------
/.github/workflows/release-pr.yaml:
--------------------------------------------------------------------------------
 1 | name: release-pr
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - 'v[0-9]+.[0-9]+.0' # run this workflow when a new minor version is published
 6 |   workflow_dispatch:
 7 |     inputs:
 8 |       release_version:
 9 |         description: 'Which version are we creating a release pull request for?'
10 |         required: true
11 | 
12 | permissions:
13 |   contents: read
14 | 
15 | jobs:
16 |   create-release-pull-request:
17 |     permissions:
18 |       contents: write
19 |       pull-requests: write
20 |       issues: write
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - name: Harden Runner
24 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
25 |         with:
26 |           egress-policy: audit
27 | 
28 |       - name: Set release version and target branch for main branch
29 |         if: github.event_name == 'push'
30 |         run: |
31 |           TAG="$(echo "${{ github.ref }}" | tr -d 'refs/tags/v')"
32 |           MAJOR_VERSION="$(echo "${TAG}" | cut -d '.' -f1)"
33 |           echo "MAJOR_VERSION=${MAJOR_VERSION}" >> ${GITHUB_ENV}
34 |           MINOR_VERSION="$(echo "${TAG}" | cut -d '.' -f2)"
35 |           echo "MINOR_VERSION=${MINOR_VERSION}" >> ${GITHUB_ENV}
36 | 
37 |           echo "NEWVERSION=v${MAJOR_VERSION}.${MINOR_VERSION}.0" >> ${GITHUB_ENV}
38 |           # push is always being merged to the main branch
39 |           echo "TARGET_BRANCH=main" >> ${GITHUB_ENV}
40 |           echo "TAG=${TAG}" >> ${GITHUB_ENV}
41 | 
42 |       - name: Set release version and target branch from input
43 |         if: github.event_name == 'workflow_dispatch'
44 |         run: |
45 |           NEWVERSION="${{ github.event.inputs.release_version }}"
46 |           echo "${NEWVERSION}" | grep -E '^v[0-9]+\.[0-9]+\.[0-9](-(beta|rc)\.[0-9]+)?$' || (echo "release_version should be in the format vX.Y.Z, vX.Y.Z-beta.A, or vX.Y.Z-rc.B" && exit 1)
47 | 
48 |           echo "NEWVERSION=${NEWVERSION}" >> ${GITHUB_ENV}
49 |           echo "TAG=${NEWVERSION}" >> ${GITHUB_ENV}
50 |           MAJOR_VERSION="$(echo "${NEWVERSION}" | cut -d '.' -f1 | tr -d 'v')"
51 |           MINOR_VERSION="$(echo "${NEWVERSION}" | cut -d '.' -f2)"
52 | 
53 |           # non-beta releases should always be merged to release branches
54 |           echo "TARGET_BRANCH=release-${MAJOR_VERSION}.${MINOR_VERSION}" >> ${GITHUB_ENV}
55 | 
56 |           # beta releases should always be merged to main
57 |           if [[ "${NEWVERSION}" =~ "beta" ]]; then
58 |             echo "TARGET_BRANCH=main" >> ${GITHUB_ENV}
59 |           fi
60 | 
61 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
62 |         with:
63 |           fetch-depth: 0
64 | 
65 |       - name: Create release branch if needed
66 |         run: |
67 |           git checkout "${TARGET_BRANCH}" && exit 0
68 | 
69 |           # Create and push release branch if it doesn't exist
70 |           git checkout -b "${TARGET_BRANCH}"
71 |           git push --set-upstream origin "${TARGET_BRANCH}"
72 | 
73 |       - run: make release-manifest
74 | 
75 |       - name: Create release pull request
76 |         uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
77 |         with:
78 |           commit-message: "chore: Prepare ${{ env.NEWVERSION }} release"
79 |           title: "chore: Prepare ${{ env.NEWVERSION }} release"
80 |           branch: "release-${{ env.NEWVERSION }}"
81 |           base: "${{ env.TARGET_BRANCH }}"
82 |           signoff: true
83 |           labels: |
84 |             release-pr
85 |             ${{ github.event.inputs.release_version }}
86 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
  1 | name: release
  2 | 
  3 | on:
  4 |   push:
  5 |     tags:
  6 |       - v*
  7 | 
  8 | permissions:
  9 |   contents: read
 10 | 
 11 | jobs:
 12 |   release:
 13 |     permissions:
 14 |       contents: write
 15 |       packages: write
 16 |       id-token: write
 17 |     runs-on: ubuntu-latest-16-cores
 18 |     timeout-minutes: 360
 19 |     steps:
 20 |       - name: Harden Runner
 21 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
 22 |         with:
 23 |           egress-policy: audit
 24 |           allowed-endpoints: >
 25 |             api.github.com:443
 26 |             auth.docker.io:443
 27 |             fulcio.sigstore.dev:443
 28 |             ghcr.io:443
 29 |             github.com:443
 30 |             *.githubusercontent.com:443
 31 |             production.cloudflare.docker.com:443
 32 |             proxy.golang.org:443
 33 |             registry-1.docker.io:443
 34 |             rekor.sigstore.dev:443
 35 |             storage.googleapis.com:443
 36 |             tuf-repo-cdn.sigstore.dev:443
 37 |             get.helm.sh:443
 38 | 
 39 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 40 |         with:
 41 |           fetch-tags: true
 42 |           fetch-depth: 0
 43 | 
 44 |       - name: Install Cosign
 45 |         uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # v3.10.0
 46 | 
 47 |       - name: Set up Docker Buildx
 48 |         uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
 49 |       - uses: crazy-max/ghaction-github-runtime@3cb05d89e1f492524af3d41a1c98c83bc3025124 # v3.1.0
 50 | 
 51 |       - name: Login to GHCR
 52 |         uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
 53 |         with:
 54 |           registry: ghcr.io
 55 |           username: ${{ github.actor }}
 56 |           password: ${{ secrets.GITHUB_TOKEN }}
 57 | 
 58 |       - id: docker_meta
 59 |         uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
 60 |         with:
 61 |           images: ghcr.io/kaito-project/aikit/aikit
 62 |           tags: type=semver,pattern={{raw}}
 63 | 
 64 |       - name: Set LDFLAGS
 65 |         run: |
 66 |           set -x
 67 |           GIT_COMMIT=$(git rev-list --abbrev-commit --tags --max-count=1)
 68 |           GIT_TAG=$(git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null)
 69 |           echo LDFLAGS="-X github.com/kaito-project/aikit/pkg/version.Version=${GIT_TAG}" >> $GITHUB_ENV
 70 | 
 71 |       - name: Build and push
 72 |         uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0
 73 |         id: build-and-push
 74 |         with:
 75 |           push: true
 76 |           tags: |
 77 |             ${{ steps.docker_meta.outputs.tags }}
 78 |             ghcr.io/kaito-project/aikit/aikit:latest
 79 |           cache-from: type=gha,scope=aikit
 80 |           cache-to: type=gha,scope=aikit,mode=max
 81 |           sbom: true
 82 |           provenance: true
 83 |           build-args: |
 84 |             LDFLAGS=${{ env.LDFLAGS }}
 85 |           platforms: linux/amd64,linux/arm64
 86 |         env:
 87 |           LDFLAGS: ${{ env.LDFLAGS }}
 88 | 
 89 |       - name: Sign the images with GitHub OIDC Token
 90 |         env:
 91 |           DIGEST: ${{ steps.build-and-push.outputs.digest }}
 92 |         run: cosign sign --yes "ghcr.io/kaito-project/aikit/aikit@${DIGEST}"
 93 | 
 94 |       - name: Verify image signature
 95 |         env:
 96 |           DIGEST: ${{ steps.build-and-push.outputs.digest }}
 97 |         run: |
 98 |           cosign verify ghcr.io/kaito-project/aikit/aikit@${DIGEST} \
 99 |             --certificate-oidc-issuer https://token.actions.githubusercontent.com \
100 |             --certificate-identity https://github.com/kaito-project/aikit/.github/workflows/release.yaml@$GITHUB_REF
101 | 
102 |       - name: Create GitHub release
103 |         uses: marvinpinto/action-automatic-releases@919008cf3f741b179569b7a6fb4d8860689ab7f0 # v1.2.1
104 |         with:
105 |           repo_token: "${{ secrets.GITHUB_TOKEN }}"
106 |           prerelease: false
107 | 
108 |       - name: Publish Helm chart
109 |         uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # v1.7.0
110 |         with:
111 |           token: ${{ secrets.GITHUB_TOKEN }}
112 |           charts_dir: charts
113 |           target_dir: charts
114 |           linting: off
115 | 


--------------------------------------------------------------------------------
/.github/workflows/scorecards.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub. They are provided
 2 | # by a third-party and are governed by separate terms of service, privacy
 3 | # policy, and support documentation.
 4 | 
 5 | name: Scorecard supply-chain security
 6 | on:
 7 |   # For Branch-Protection check. Only the default branch is supported. See
 8 |   # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
 9 |   branch_protection_rule:
10 |   # To guarantee Maintained check is occasionally updated. See
11 |   # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
12 |   schedule:
13 |     - cron: '20 7 * * 2'
14 |   push:
15 |     branches: ["main"]
16 | 
17 | # Declare default permissions as read only.
18 | permissions: read-all
19 | 
20 | jobs:
21 |   analysis:
22 |     name: Scorecard analysis
23 |     runs-on: ubuntu-latest
24 |     permissions:
25 |       # Needed to upload the results to code-scanning dashboard.
26 |       security-events: write
27 |       # Needed to publish results and get a badge (see publish_results below).
28 |       id-token: write
29 |       contents: read
30 |       actions: read
31 | 
32 |     steps:
33 |       - name: Harden Runner
34 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
35 |         with:
36 |           disable-sudo: true
37 |           egress-policy: audit
38 |           allowed-endpoints: >
39 |             api.github.com:443
40 |             api.osv.dev:443
41 |             api.securityscorecards.dev:443
42 |             bestpractices.coreinfrastructure.org:443
43 |             fulcio.sigstore.dev:443
44 |             github.com:443
45 |             oss-fuzz-build-logs.storage.googleapis.com:443
46 |             rekor.sigstore.dev:443
47 |             sigstore-tuf-root.storage.googleapis.com:443
48 |             tuf-repo-cdn.sigstore.dev:443
49 |             www.bestpractices.dev:443
50 |             api.scorecard.dev:443
51 |             api.deps.dev:443
52 | 
53 |       - name: "Checkout code"
54 |         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
55 |         with:
56 |           persist-credentials: false
57 | 
58 |       - name: "Run analysis"
59 |         uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde # v2.4.2
60 |         with:
61 |           results_file: results.sarif
62 |           results_format: sarif
63 |           # (Optional) "write" PAT token. Uncomment the `repo_token` line below if:
64 |           # - you want to enable the Branch-Protection check on a *public* repository, or
65 |           # - you are installing Scorecards on a *private* repository
66 |           # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat.
67 |           # repo_token: ${{ secrets.SCORECARD_TOKEN }}
68 | 
69 |           # Public repositories:
70 |           #   - Publish results to OpenSSF REST API for easy access by consumers
71 |           #   - Allows the repository to include the Scorecard badge.
72 |           #   - See https://github.com/ossf/scorecard-action#publishing-results.
73 |           # For private repositories:
74 |           #   - `publish_results` will always be set to `false`, regardless
75 |           #     of the value entered here.
76 |           publish_results: true
77 | 
78 |       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
79 |       # format to the repository Actions tab.
80 |       - name: "Upload artifact"
81 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
82 |         with:
83 |           name: SARIF file
84 |           path: results.sarif
85 |           retention-days: 5
86 | 
87 |       # Upload the results to GitHub's code scanning dashboard.
88 |       - name: "Upload to code-scanning"
89 |         uses: github/codeql-action/upload-sarif@192325c86100d080feab897ff886c34abd4c83a3 # v3.29.5
90 |         with:
91 |           sarif_file: results.sarif
92 | 


--------------------------------------------------------------------------------
/.github/workflows/test-docker-args.yaml:
--------------------------------------------------------------------------------
  1 | name: docker-test-args
  2 | 
  3 | on:
  4 |   push:
  5 |     paths-ignore:
  6 |       - '**.md'
  7 |       - 'website/**'
  8 |   pull_request:
  9 |     paths-ignore:
 10 |       - '**.md'
 11 |       - 'website/**'
 12 | 
 13 | permissions: read-all
 14 | 
 15 | concurrency:
 16 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
 17 |   cancel-in-progress: true
 18 | 
 19 | jobs:
 20 |   test:
 21 |     runs-on: ubuntu-latest-16-cores
 22 |     timeout-minutes: 240
 23 |     strategy:
 24 |       fail-fast: false
 25 |       matrix:
 26 |         protocol:
 27 |           - oci
 28 |           - huggingface
 29 |           - https
 30 |     steps:
 31 |       - name: Harden Runner
 32 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
 33 |         with:
 34 |           egress-policy: audit
 35 |           allowed-endpoints: >
 36 |             auth.docker.io:443
 37 |             *.huggingface.co:443
 38 |             cdn.dl.k8s.io:443
 39 |             dl.k8s.io:443
 40 |             download.docker.com:443
 41 |             gcr.io:443
 42 |             github.com:443
 43 |             huggingface.co:443
 44 |             *.githubusercontent.com:443
 45 |             production.cloudflare.docker.com:443
 46 |             proxy.golang.org:443
 47 |             registry-1.docker.io:443
 48 |             storage.googleapis.com:443
 49 |             *.blob.core.windows.net:443
 50 |             *.azureedge.net:443
 51 |             *.ubuntu.com:80
 52 |             developer.download.nvidia.com:443
 53 |             ghcr.io:443
 54 | 
 55 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 56 | 
 57 |       # need containerd image store for testing local images
 58 |       - uses: crazy-max/ghaction-setup-docker@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0
 59 |         with:
 60 |           daemon-config: |
 61 |             {
 62 |               "debug": true,
 63 |               "features": {
 64 |                 "containerd-snapshotter": true
 65 |               }
 66 |             }
 67 |       - uses: crazy-max/ghaction-github-runtime@3cb05d89e1f492524af3d41a1c98c83bc3025124 # v3.1.0
 68 | 
 69 |       - name: build aikit
 70 |         run: |
 71 |           docker buildx build . -t aikit:test \
 72 |             --load --provenance=false --progress plain \
 73 |             --cache-from=type=gha,scope=aikit-amd64 \
 74 |             --cache-to=type=gha,scope=aikit-amd64,mode=max
 75 | 
 76 |       - name: set url
 77 |         run: |
 78 |           if [ "${{ matrix.protocol }}" = "oci" ]; then
 79 |             echo "MODEL_URL=oci://registry.ollama.ai/library/llama3.2:1b" >> $GITHUB_ENV
 80 |             echo "MODEL_NAME=llama3.2" >> $GITHUB_ENV
 81 |           elif [ "${{ matrix.protocol }}" = "huggingface" ]; then
 82 |             echo "MODEL_URL=huggingface://MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/Llama-3.2-1B-Instruct.Q4_K_M.gguf" >> $GITHUB_ENV
 83 |             echo "MODEL_NAME=Llama-3.2-1B-Instruct.Q4_K_M.gguf" >> $GITHUB_ENV
 84 |           elif [ "${{ matrix.protocol }}" = "https" ]; then
 85 |             echo "MODEL_URL=https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf" >> $GITHUB_ENV
 86 |             echo "MODEL_NAME=Llama-3.2-1B-Instruct.Q4_K_M.gguf" >> $GITHUB_ENV
 87 |           fi
 88 | 
 89 |       - name: build test model
 90 |         run: |
 91 |           docker buildx build -t testmodel:test \
 92 |             --build-arg="model=$MODEL_URL" \
 93 |             --load --provenance=false --progress plain \
 94 |             --cache-from=type=gha,scope=testmodel-${{ matrix.protocol }} \
 95 |             --cache-to=type=gha,scope=testmodel-${{ matrix.protocol }},mode=max \
 96 |             "https://raw.githubusercontent.com/${REPO}/${SOURCE_BRANCH}/test/aikitfile-args.yaml"
 97 |         env:
 98 |           REPO: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
 99 |           # github head ref is only set for pull_request targets
100 |           # otherwise, get the github ref name to get the source branch
101 |           SOURCE_BRANCH: ${{ github.head_ref || github.ref_name }}
102 | 
103 |       - name: list images
104 |         run: docker images
105 | 
106 |       - name: run test model
107 |         run: docker run --name testmodel -d -p 8080:8080 testmodel:test
108 | 
109 |       - name: run llama test
110 |         run: |
111 |           set -e
112 |           result=$(curl --fail --retry 10 --retry-all-errors \
113 |             http://127.0.0.1:8080/v1/chat/completions \
114 |             -H "Content-Type: application/json" \
115 |             -d "{\"model\": \"${MODEL_NAME}\", \"messages\": [{\"role\": \"user\", \"content\": \"explain kubernetes in a sentence\"}]}")
116 |           echo $result
117 | 
118 |           choices=$(echo "$result" | jq '.choices')
119 |           if [ -z "$choices" ]; then
120 |             exit 1
121 |           fi
122 | 
123 |       - name: save logs
124 |         if: always()
125 |         run: docker logs testmodel > /tmp/docker-${{ matrix.protocol }}.log
126 | 
127 |       - name: publish test artifacts
128 |         if: always()
129 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
130 |         with:
131 |           name: test-${{ matrix.protocol }}
132 |           path: |
133 |             /tmp/*.log
134 | 


--------------------------------------------------------------------------------
/.github/workflows/test-docker-gpu.yaml:
--------------------------------------------------------------------------------
  1 | name: docker-test-gpu
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |     inputs:
  6 |       backend:
  7 |         description: 'Backend to test (leave empty to test all)'
  8 |         required: false
  9 |         type: choice
 10 |         default: 'all'
 11 |         options:
 12 |           - all
 13 |           - llama-cuda
 14 |           - exllama2-gptq
 15 |           - exllama2-exl2
 16 |           - diffusers
 17 | 
 18 | permissions: read-all
 19 | 
 20 | jobs:
 21 |   test:
 22 |     runs-on: self-hosted
 23 |     timeout-minutes: 240
 24 |     strategy:
 25 |       fail-fast: false
 26 |       max-parallel: 1
 27 |       matrix:
 28 |         backend: ${{ inputs.backend == 'all' && fromJson('["llama-cuda", "exllama2-gptq", "exllama2-exl2", "diffusers"]') || fromJson(format('["{0}"]', inputs.backend)) }}
 29 |     steps:
 30 |       - name: cleanup workspace
 31 |         run: |
 32 |           rm -rf ./* || true
 33 |           rm -rf ./.??* || true
 34 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 35 | 
 36 |         # use default docker driver builder with containerd image store for local aikit image
 37 |         # these must be setup before running this test
 38 |       - run: docker buildx use default
 39 | 
 40 |       - name: build aikit
 41 |         run: |
 42 |           docker buildx build . -t aikit:test \
 43 |             --load --provenance=false --progress plain
 44 | 
 45 |       - name: build test model
 46 |         run: |
 47 |           docker buildx build . -t testmodel:test \
 48 |             -f test/aikitfile-${{ matrix.backend }}.yaml \
 49 |             --load --provenance=false --progress plain
 50 | 
 51 |       - name: list images
 52 |         run: docker images
 53 | 
 54 |       - name: run test model
 55 |         run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test
 56 | 
 57 |       - name: run test (gguf)
 58 |         if: matrix.backend == 'llama-cuda'
 59 |         run: |
 60 |           result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
 61 |             "model": "llama-3.2-1b-instruct",
 62 |             "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
 63 |           }')
 64 |           echo $result
 65 | 
 66 |           choices=$(echo "$result" | jq '.choices')
 67 |           if [ -z "$choices" ]; then
 68 |             exit 1
 69 |           fi
 70 | 
 71 |       - name: run test (exl2/gptq)
 72 |         if: matrix.backend == 'exllama2-gptq' || matrix.backend == 'exllama2-exl2'
 73 |         run: |
 74 |           result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
 75 |             "model": "llama-2-7b-chat",
 76 |             "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
 77 |           }')
 78 |           echo $result
 79 | 
 80 |           choices=$(echo "$result" | jq '.choices')
 81 |           if [ -z "$choices" ]; then
 82 |             exit 1
 83 |           fi
 84 | 
 85 |       - name: run test (diffusers)
 86 |         if: matrix.backend == 'diffusers'
 87 |         run: |
 88 |           result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
 89 |             "model": "dreamshaper",
 90 |             "prompt": "A cute baby llama",
 91 |             "size": "256x256"
 92 |           }')
 93 |           echo $result
 94 | 
 95 |           url=$(echo "$result" | jq '.data[0].url')
 96 |           if [ -z "$url" ]; then
 97 |             exit 1
 98 |           fi
 99 | 
100 |       - name: save generated image
101 |         if: matrix.backend == 'diffusers'
102 |         run: docker cp testmodel:/tmp/generated/content/images /tmp
103 | 
104 |       - name: save logs
105 |         if: always()
106 |         run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log
107 | 
108 |       - run: docker stop testmodel
109 |         if: always()
110 | 
111 |       - run: docker system prune -a -f --volumes || true
112 |         if: always()
113 | 
114 |       - name: publish test artifacts
115 |         if: always()
116 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
117 |         with:
118 |           name: test-${{ matrix.backend }}
119 |           path: |
120 |             /tmp/*.log
121 |             /tmp/images/*.png
122 | 


--------------------------------------------------------------------------------
/.github/workflows/test-finetune.yaml:
--------------------------------------------------------------------------------
 1 | name: docker-test-finetune
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | permissions: read-all
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: self-hosted
11 |     timeout-minutes: 360
12 |     strategy:
13 |       fail-fast: false
14 |       max-parallel: 1
15 |       matrix:
16 |         targets:
17 |           - unsloth
18 |     steps:
19 |       - name: cleanup workspace
20 |         run: |
21 |           rm -rf ./* || true
22 |           rm -rf ./.??* || true
23 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
24 | 
25 |       - run: nvidia-smi
26 | 
27 |         # use default docker driver builder with containerd image store for local aikit image
28 |         # must have insecure security entitlement for finetuning
29 |         # these must be setup before running this test
30 |       - run: docker buildx use default
31 | 
32 |       - name: build aikit
33 |         run: |
34 |           docker --debug build . -t aikit:test \
35 |             --load --provenance=false --progress plain
36 | 
37 |       - name: build finetuned model
38 |         run: |
39 |           docker --debug build --allow security.insecure \
40 |             --file test/aikitfile-${{ matrix.targets }}.yaml --output _output \
41 |             --target ${{ matrix.targets }} --progress plain .
42 | 
43 |       - name: check if finetuned model exists
44 |         run: |
45 |           ls -al _output
46 |           test -f _output/model-q4_k_m.gguf
47 | 
48 |       - name: build custom model
49 |         run: |
50 |           docker --debug build _output --tag custommodel:test \
51 |             --file test/aikitfile-${{ matrix.targets }}-custom.yaml \
52 |             --load --provenance=false --progress plain
53 | 
54 |       - name: list images
55 |         run: docker images
56 | 
57 |       - name: run test model
58 |         run: docker run --name custommodel -d --rm -p 8080:8080 --gpus all custommodel:test
59 | 
60 |       - name: run test
61 |         run: |
62 |           result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
63 |             "model": "custom",
64 |             "messages": [{"role": "user", "content": "Compose a haiku about cats"}]
65 |           }')
66 |           echo $result
67 | 
68 |           choices=$(echo "$result" | jq '.choices')
69 |           if [ -z "$choices" ]; then
70 |             exit 1
71 |           fi
72 | 
73 |       - name: save logs
74 |         if: always()
75 |         run: docker logs custommodel > /tmp/docker.log
76 | 
77 |       - run: docker stop custommodel
78 |         if: always()
79 | 
80 |       - run: docker system prune -a -f --volumes || true
81 |         if: always()
82 | 
83 |       - name: clean up output
84 |         if: always()
85 |         run: rm -rf _output
86 | 
87 |       - name: publish test artifacts
88 |         if: always()
89 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
90 |         with:
91 |           name: test
92 |           path: |
93 |             /tmp/*.log
94 | 


--------------------------------------------------------------------------------
/.github/workflows/test-helm.yaml:
--------------------------------------------------------------------------------
  1 | name: helm-test
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   # push:
  6 |   #   branches:
  7 |   #     - main
  8 |   #   paths-ignore:
  9 |   #     - '**.md'
 10 |   #     - 'website/**'
 11 |   # pull_request:
 12 |   #   branches:
 13 |   #     - main
 14 |   #   paths-ignore:
 15 |   #     - '**.md'
 16 |   #     - 'website/**'
 17 | 
 18 | permissions: read-all
 19 | 
 20 | concurrency:
 21 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
 22 |   cancel-in-progress: true
 23 | 
 24 | jobs:
 25 |   test:
 26 |     runs-on: ubuntu-latest-16-cores
 27 |     timeout-minutes: 240
 28 |     steps:
 29 |       - name: Harden Runner
 30 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
 31 |         with:
 32 |           egress-policy: audit
 33 |           allowed-endpoints: >
 34 |             auth.docker.io:443
 35 |             huggingface.co:443
 36 |             *.huggingface.co:443
 37 |             *.hf.co:443
 38 |             cdn.dl.k8s.io:443
 39 |             dl.k8s.io:443
 40 |             download.docker.com:443
 41 |             gcr.io:443
 42 |             github.com:443
 43 |             *.githubusercontent.com:443
 44 |             production.cloudflare.docker.com:443
 45 |             proxy.golang.org:443
 46 |             registry-1.docker.io:443
 47 |             storage.googleapis.com:443
 48 |             *.ubuntu.com:80
 49 |             developer.download.nvidia.com:443
 50 |             get.helm.sh:443
 51 |             *.blob.core.windows.net:443
 52 |             *.azureedge.net:443
 53 |             registry.k8s.io:443
 54 |             *.pkg.dev:443
 55 |             *.amazonaws.com:443
 56 |             dl-cdn.alpinelinux.org:443
 57 |             ghcr.io:443
 58 | 
 59 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 60 | 
 61 |       # need containerd image store for testing local images
 62 |       - uses: crazy-max/ghaction-setup-docker@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0
 63 |         with:
 64 |           daemon-config: |
 65 |             {
 66 |               "debug": true,
 67 |               "features": {
 68 |                 "containerd-snapshotter": true
 69 |               }
 70 |             }
 71 |       - uses: crazy-max/ghaction-github-runtime@3cb05d89e1f492524af3d41a1c98c83bc3025124 # v3.1.0
 72 | 
 73 |       - name: build aikit
 74 |         run: |
 75 |           docker buildx build . -t aikit:test \
 76 |             --load --provenance=false --progress plain \
 77 |             --cache-from=type=gha,scope=aikit-amd64 \
 78 |             --cache-to=type=gha,scope=aikit-amd64,mode=max
 79 | 
 80 |       - name: build test model
 81 |         run: |
 82 |           docker buildx build . -t testmodel:test \
 83 |             -f test/aikitfile-llama.yaml \
 84 |             --load --provenance=false --progress plain \
 85 |             --cache-from=type=gha,scope=testmodel-amd64 \
 86 |             --cache-to=type=gha,scope=testmodel-amd64,mode=max
 87 | 
 88 |       - name: list images
 89 |         run: docker images
 90 | 
 91 |       - name: install e2e dependencies
 92 |         run: make test-e2e-dependencies
 93 | 
 94 |       - name: create kind cluster
 95 |         run: kind create cluster --wait 5m
 96 | 
 97 |       - name: load test model image into kind cluster
 98 |         run: kind load docker-image testmodel:test
 99 | 
100 |       - name: deploy test model
101 |         run: |
102 |           helm install charts/aikit --wait --debug \
103 |             --name-template aikit --namespace aikit --create-namespace \
104 |             --set image.repository=testmodel \
105 |             --set image.tag=test \
106 |             --set image.pullPolicy=Never
107 |           kubectl port-forward -n aikit service/aikit 8080:8080 &
108 | 
109 |       - name: run test
110 |         run: |
111 |           result=$(curl --fail --retry 10 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
112 |             "model": "llama-3.2-1b-instruct",
113 |             "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
114 |           }')
115 |           echo $result
116 | 
117 |           choices=$(echo "$result" | jq '.choices')
118 |           if [ -z "$choices" ]; then
119 |             exit 1
120 |           fi
121 | 
122 |       - name: save logs
123 |         if: always()
124 |         run: |
125 |           kind export logs /tmp/kind-logs
126 | 
127 |       - name: publish logs
128 |         if: always()
129 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
130 |         with:
131 |           name: kind-logs
132 |           path: /tmp/kind-logs
133 | 


--------------------------------------------------------------------------------
/.github/workflows/test-kubernetes.yaml:
--------------------------------------------------------------------------------
  1 | name: kubernetes-test
  2 | 
  3 | on:
  4 |   push:
  5 |     paths-ignore:
  6 |       - '**.md'
  7 |       - 'website/**'
  8 |   pull_request:
  9 |     paths-ignore:
 10 |       - '**.md'
 11 |       - 'website/**'
 12 | 
 13 | permissions: read-all
 14 | 
 15 | concurrency:
 16 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
 17 |   cancel-in-progress: true
 18 | 
 19 | jobs:
 20 |   test:
 21 |     runs-on: ubuntu-latest-16-cores
 22 |     timeout-minutes: 240
 23 |     steps:
 24 |       - name: Harden Runner
 25 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
 26 |         with:
 27 |           egress-policy: audit
 28 |           allowed-endpoints: >
 29 |             auth.docker.io:443
 30 |             huggingface.co:443
 31 |             *.huggingface.co:443
 32 |             *.hf.co:443
 33 |             cdn.dl.k8s.io:443
 34 |             dl.k8s.io:443
 35 |             download.docker.com:443
 36 |             gcr.io:443
 37 |             github.com:443
 38 |             *.githubusercontent.com:443
 39 |             production.cloudflare.docker.com:443
 40 |             proxy.golang.org:443
 41 |             registry-1.docker.io:443
 42 |             storage.googleapis.com:443
 43 |             *.ubuntu.com:80
 44 |             developer.download.nvidia.com:443
 45 |             get.helm.sh:443
 46 |             *.blob.core.windows.net:443
 47 |             *.azureedge.net:443
 48 |             ghcr.io:443
 49 | 
 50 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
 51 | 
 52 |       # need containerd image store for testing local images
 53 |       - uses: crazy-max/ghaction-setup-docker@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0
 54 |         with:
 55 |           daemon-config: |
 56 |             {
 57 |               "debug": true,
 58 |               "features": {
 59 |                 "containerd-snapshotter": true
 60 |               }
 61 |             }
 62 |       - uses: crazy-max/ghaction-github-runtime@3cb05d89e1f492524af3d41a1c98c83bc3025124 # v3.1.0
 63 | 
 64 |       - name: build aikit
 65 |         run: |
 66 |           docker buildx build . -t aikit:test \
 67 |             --load --provenance=false --progress plain \
 68 |             --cache-from=type=gha,scope=aikit-amd64 \
 69 |             --cache-to=type=gha,scope=aikit-amd64,mode=max
 70 | 
 71 |       - name: build test model
 72 |         run: |
 73 |           docker buildx build . -t testmodel:test \
 74 |             -f test/aikitfile-llama.yaml \
 75 |             --load --provenance=false --progress plain \
 76 |             --cache-from=type=gha,scope=testmodel-amd64 \
 77 |             --cache-to=type=gha,scope=testmodel-amd64,mode=max
 78 | 
 79 |       - name: list images
 80 |         run: docker images
 81 | 
 82 |       - name: install e2e dependencies
 83 |         run: make test-e2e-dependencies
 84 | 
 85 |       - name: create kind cluster
 86 |         run: kind create cluster --wait 5m
 87 | 
 88 |       - name: load test model image into kind cluster
 89 |         run: kind load docker-image testmodel:test
 90 | 
 91 |       - name: deploy test model
 92 |         run: |
 93 |           kubectl create deployment test-model-deployment --image=testmodel:test --replicas 1
 94 |           kubectl rollout status deployment test-model-deployment
 95 |           kubectl expose deployment test-model-deployment --port=8080 --target-port=8080 --name=test-model-service
 96 |           kubectl port-forward service/test-model-service 8080:8080 &
 97 | 
 98 |       - name: run test
 99 |         run: |
100 |           result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
101 |             "model": "llama-3.2-1b-instruct",
102 |             "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
103 |           }')
104 |           echo $result
105 | 
106 |           choices=$(echo "$result" | jq '.choices')
107 |           if [ -z "$choices" ]; then
108 |             exit 1
109 |           fi
110 | 
111 |       - name: save logs
112 |         if: always()
113 |         run: |
114 |           echo "KIND_LOGS_PATH=$(kind export logs)" >> $GITHUB_ENV
115 | 
116 |       - name: publish logs
117 |         if: always()
118 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
119 |         with:
120 |           name: kind-logs
121 |           path: $KIND_LOGS_PATH
122 | 


--------------------------------------------------------------------------------
/.github/workflows/test-podman-applesilicon.yaml:
--------------------------------------------------------------------------------
 1 | name: podman-test-gpu
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 | 
 6 | permissions: read-all
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: self-hosted
11 |     timeout-minutes: 240
12 |     steps:
13 |       - name: cleanup workspace
14 |         run: |
15 |           rm -rf ./* || true
16 |           rm -rf ./.??* || true
17 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
18 | 
19 |         # use default docker driver builder with containerd image store for local aikit image
20 |         # these must be setup before running this test
21 |       - run: docker buildx use desktop-linux
22 | 
23 |       - name: build aikit
24 |         run: |
25 |           docker buildx build . -t aikit:test \
26 |             --load --provenance=false --progress plain
27 | 
28 |       - name: build and push test model
29 |         run: |
30 |           docker buildx build . -t sozercan/testmodel:test \
31 |             --push \
32 |             -f test/aikitfile-llama.yaml \
33 |             --provenance=false --progress plain \
34 |             --platform "linux/arm64" \
35 |             --build-arg="runtime=applesilicon"
36 | 
37 |       - name: list images
38 |         run: docker images
39 | 
40 |       - name: run test model
41 |         run: |
42 |           podman run --name testmodel -d --rm -p 8080:8080 \
43 |             --device /dev/dri \
44 |             --pull always \
45 |             sozercan/testmodel:test
46 | 
47 |       - name: run test (gguf)
48 |         run: |
49 |           result=$(curl --fail --retry 10 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
50 |             "model": "llama-3.2-1b-instruct",
51 |             "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
52 |           }')
53 |           echo $result
54 | 
55 |           choices=$(echo "$result" | jq '.choices')
56 |           if [ -z "$choices" ]; then
57 |             exit 1
58 |           fi
59 | 
60 |       - name: save logs
61 |         if: always()
62 |         run: podman logs testmodel > /tmp/podman-gpu.log
63 | 
64 |       - run: podman stop testmodel
65 |         if: always()
66 | 
67 |       # - name: prune
68 |       #   run: |
69 |       #     docker system prune -a -f --volumes || true
70 |       #     podman system prune -a -f --volumes || true
71 |       #   if: always()
72 | 
73 |       - name: publish test artifacts
74 |         if: always()
75 |         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
76 |         with:
77 |           name: test-podman-gpu
78 |           path: |
79 |             /tmp/*.log
80 | 


--------------------------------------------------------------------------------
/.github/workflows/unit-test.yaml:
--------------------------------------------------------------------------------
 1 | name: unit-test
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |       - '**.md'
 7 |       - 'website/**'
 8 |   pull_request:
 9 |     paths-ignore:
10 |       - '**.md'
11 |       - 'website/**'
12 | 
13 | permissions: read-all
14 | 
15 | concurrency:
16 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
17 |   cancel-in-progress: true
18 | 
19 | jobs:
20 |   test:
21 |     runs-on: ubuntu-latest-16-cores
22 |     steps:
23 |       - name: Harden Runner
24 |         uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1
25 |         with:
26 |           disable-sudo: true
27 |           egress-policy: audit
28 |           allowed-endpoints: >
29 |             api.github.com:443
30 |             github.com:443
31 |             *.githubusercontent.com:443
32 |             proxy.golang.org:443
33 |             storage.googleapis.com:443
34 | 
35 |       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
36 | 
37 |       - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
38 |         with:
39 |           go-version: "1.24"
40 |           check-latest: true
41 | 
42 |       - name: go mod tidy
43 |         run: |
44 |           go mod tidy
45 |           git diff --exit-code
46 | 
47 |       - name: test
48 |         run: make test
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # If you prefer the allow list template instead of the deny list, see community template:
 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
 3 | #
 4 | # Binaries for programs and plugins
 5 | *.exe
 6 | *.exe~
 7 | *.dll
 8 | *.so
 9 | *.dylib
10 | 
11 | # Test binary, built with `go test -c`
12 | *.test
13 | 
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 | 
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 | 
20 | # Go workspace file
21 | go.work
22 | 
23 | bin
24 | coverage.txt
25 | 
26 | _output
27 | .vscode
28 | 


--------------------------------------------------------------------------------
/.golangci.yaml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | run:
 3 |   go: "1.24"
 4 | linters:
 5 |   default: none
 6 |   enable:
 7 |     - copyloopvar
 8 |     - errcheck
 9 |     - errorlint
10 |     - forcetypeassert
11 |     - goconst
12 |     - gocritic
13 |     - godot
14 |     - gosec
15 |     - govet
16 |     - ineffassign
17 |     - misspell
18 |     - revive
19 |     - staticcheck
20 |     - unconvert
21 |     - unused
22 |     - whitespace
23 |   settings:
24 |     lll:
25 |       line-length: 200
26 |     misspell:
27 |       locale: US
28 |   exclusions:
29 |     generated: lax
30 |     presets:
31 |       - comments
32 |       - common-false-positives
33 |       - legacy
34 |       - std-error-handling
35 |     paths:
36 |       - third_party$
37 |       - builtin$
38 |       - examples$
39 | formatters:
40 |   enable:
41 |     - gci
42 |     - gofmt
43 |     - gofumpt
44 |     - goimports
45 |   exclusions:
46 |     generated: lax
47 |     paths:
48 |       - third_party$
49 |       - builtin$
50 |       - examples$
51 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://github.com/gitleaks/gitleaks
 3 |   rev: v8.21.2
 4 |   hooks:
 5 |   - id: gitleaks
 6 | - repo: https://github.com/golangci/golangci-lint
 7 |   rev: v2.1.6
 8 |   hooks:
 9 |   - id: golangci-lint
10 | - repo: https://github.com/jumanjihouse/pre-commit-hooks
11 |   rev: 3.0.0
12 |   hooks:
13 |   - id: shellcheck
14 | - repo: https://github.com/pre-commit/pre-commit-hooks
15 |   rev: v5.0.0
16 |   hooks:
17 |   - id: end-of-file-fixer
18 |   - id: trailing-whitespace
19 | - repo: https://github.com/crate-ci/typos
20 |   rev: v1.27.3
21 |   hooks:
22 |     - id: typos
23 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM --platform=$BUILDPLATFORM golang:1.25-bookworm@sha256:c4bc0741e3c79c0e2d47ca2505a06f5f2a44682ada94e1dba251a3854e60c2bd AS builder
 2 | 
 3 | ARG TARGETPLATFORM
 4 | ARG TARGETOS
 5 | ARG TARGETARCH
 6 | ARG TARGETVARIANT=""
 7 | ARG LDFLAGS
 8 | 
 9 | COPY . /go/src/github.com/kaito-project/aikit
10 | WORKDIR /go/src/github.com/kaito-project/aikit
11 | RUN CGO_ENABLED=0 \
12 |     GOOS=${TARGETOS} \
13 |     GOARCH=${TARGETARCH} \
14 |     GOARM=${TARGETVARIANT} \
15 |     go build -o /aikit -ldflags "${LDFLAGS} -w -s -extldflags '-static'" ./cmd/frontend
16 | 
17 | FROM scratch
18 | COPY --from=builder /aikit /bin/aikit
19 | ENTRYPOINT ["/bin/aikit"]
20 | 


--------------------------------------------------------------------------------
/Dockerfile.base:
--------------------------------------------------------------------------------
 1 | ARG UBUNTU_RELEASE=22.04
 2 | 
 3 | FROM golang:1.25-bookworm@sha256:c4bc0741e3c79c0e2d47ca2505a06f5f2a44682ada94e1dba251a3854e60c2bd AS builder
 4 | ARG UBUNTU_RELEASE
 5 | ARG TARGETARCH
 6 | 
 7 | RUN go install github.com/canonical/chisel/cmd/chisel@v1.0.0
 8 | 
 9 | RUN mkdir -p /rootfs && chisel cut --release ubuntu-$UBUNTU_RELEASE --root /rootfs \
10 |     base-files_base \
11 |     base-files_chisel \
12 |     base-files_release-info \
13 |     ca-certificates_data \
14 |     libgcc-s1_libs \
15 |     libc6_libs \
16 |     bash_bins \
17 |     coreutils_bins \
18 |     grep_bins
19 | 
20 | FROM scratch
21 | COPY --from=builder /rootfs /
22 | 


--------------------------------------------------------------------------------
/Dockerfile.base-applesilicon:
--------------------------------------------------------------------------------
 1 | FROM fedora:41@sha256:3ec60eb34fa1a095c0c34dd37cead9fd38afb62612d43892fcf1d3425c32bc1e
 2 | 
 3 | ARG MESA_VERSION="24.1.2-101"
 4 | 
 5 | USER 0
 6 | 
 7 | # Install the patched mesa-krunkit drivers
 8 | RUN dnf -y install dnf-plugins-core && \
 9 |     dnf -y copr enable slp/mesa-krunkit epel-9-aarch64 && \
10 |     dnf -y install \
11 |         mesa-vulkan-drivers-$MESA_VERSION.el9.aarch64 && \
12 |     dnf clean all
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Sertaç Özercan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | VERSION := v0.19.2
 2 | 
 3 | REGISTRY ?= ghcr.io/kaito-project
 4 | REPOSITORY ?= /aikit
 5 | KIND_VERSION ?= 0.29.0
 6 | KUBERNETES_VERSION ?= 1.33.2
 7 | HELM_VERSION ?= 3.18.3
 8 | TAG ?= test
 9 | OUTPUT_TYPE ?= type=docker
10 | TEST_IMAGE_NAME ?= testmodel
11 | TEST_FILE ?= test/aikitfile-llama.yaml
12 | RUNTIME ?= ""
13 | PLATFORMS ?= linux/amd64,linux/arm64
14 | 
15 | GIT_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1)
16 | GIT_TAG := $(shell git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null || true)
17 | LDFLAGS := "-X github.com/kaito-project/aikit/pkg/version.Version=$(GIT_TAG:%=%)"
18 | 
19 | .PHONY: lint
20 | lint:
21 | 	golangci-lint run -v ./... --timeout 5m
22 | 
23 | .PHONY: build-aikit
24 | build-aikit:
25 | 	docker buildx build . -t ${REGISTRY}${REPOSITORY}/aikit:${TAG} \
26 | 		--output=${OUTPUT_TYPE} \
27 | 		--build-arg LDFLAGS=${LDFLAGS} \
28 | 		--platform ${PLATFORMS} \
29 | 		--progress=plain
30 | 
31 | .PHONY: build-test-model
32 | build-test-model:
33 | 	docker buildx build . -t ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG} -f ${TEST_FILE} \
34 | 		--progress=plain --provenance=false \
35 | 		--output=${OUTPUT_TYPE} \
36 | 		--build-arg runtime=${RUNTIME} \
37 | 		--platform ${PLATFORMS}
38 | 
39 | .PHONY: build-base
40 | build-base:
41 | 	docker buildx build . -t ${REGISTRY}${REPOSITORY}/base:latest -f Dockerfile.base \
42 | 		--platform ${PLATFORMS} \
43 | 		--output=${OUTPUT_TYPE} \
44 | 		--sbom=true --push
45 | 
46 | .PHONY: run-test-model
47 | run-test-model:
48 | 	docker run --rm -p 8080:8080 ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
49 | 
50 | .PHONY: run-test-model-gpu
51 | run-test-model-gpu:
52 | 	docker run --rm -p 8080:8080 --gpus all ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
53 | 
54 | .PHONY: run-test-model-applesilicon
55 | run-test-model-applesilicon:
56 | 	podman run --rm -p 8080:8080 --device /dev/dri ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG}
57 | 
58 | .PHONY: test
59 | test:
60 | 	go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic
61 | 
62 | .PHONY: test-e2e-dependencies
63 | test-e2e-dependencies:
64 | 	mkdir -p ${GITHUB_WORKSPACE}/bin
65 | 	echo "${GITHUB_WORKSPACE}/bin" >> ${GITHUB_PATH}
66 | 
67 | 	# used for kubernetes test
68 | 	curl -sSL https://dl.k8s.io/release/v${KUBERNETES_VERSION}/bin/linux/amd64/kubectl -o ${GITHUB_WORKSPACE}/bin/kubectl && chmod +x ${GITHUB_WORKSPACE}/bin/kubectl
69 | 	curl https://get.helm.sh/helm-v${HELM_VERSION}-linux-amd64.tar.gz | tar xz && mv linux-amd64/helm ${GITHUB_WORKSPACE}/bin/helm && chmod +x ${GITHUB_WORKSPACE}/bin/helm
70 | 	curl -sSL https://github.com/kubernetes-sigs/kind/releases/download/v${KIND_VERSION}/kind-linux-amd64 -o ${GITHUB_WORKSPACE}/bin/kind && chmod +x ${GITHUB_WORKSPACE}/bin/kind
71 | 
72 | .PHONY: release-manifest
73 | release-manifest:
74 | 	@sed -i "s/appVersion: $(VERSION)/appVersion: ${NEWVERSION}/" ./charts/aikit/Chart.yaml
75 | 	@sed -i "s/version: $$(echo ${VERSION} | cut -c2-)/version: $$(echo ${NEWVERSION} | cut -c2-)/" ./charts/aikit/Chart.yaml
76 | 	@sed -i -e 's/^VERSION := $(VERSION)/VERSION := ${NEWVERSION}/' ./Makefile
77 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | Security updates are applied only to the most recent releases.
 6 | 
 7 | ## Reporting a Vulnerability
 8 | 
 9 | To securely report a vulnerability, please [open an advisory on GitHub](https://github.com/kaito-project/aikit/security/advisories/new). This form is also accessible when [submitting a new issue](https://github.com/kaito-project/aikit/issues/new/choose).
10 | 


--------------------------------------------------------------------------------
/charts/aikit/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v2
 2 | name: aikit
 3 | description: Kubernetes Helm chart to deploy AIKit LLM images
 4 | 
 5 | # A chart can be either an 'application' or a 'library' chart.
 6 | #
 7 | # Application charts are a collection of templates that can be packaged into versioned archives
 8 | # to be deployed.
 9 | #
10 | # Library charts provide useful utilities or functions for the chart developer. They're included as
11 | # a dependency of application charts to inject those utilities and functions into the rendering
12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
13 | type: application
14 | 
15 | # This is the chart version. This version number should be incremented each time you make changes
16 | # to the chart and its templates, including the app version.
17 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
18 | version: 0.19.2
19 | 
20 | # This is the version number of the application being deployed. This version number should be
21 | # incremented each time you make changes to the application. Versions are not expected to
22 | # follow Semantic Versioning. They should reflect the version the application is using.
23 | # It is recommended to use it with quotes.
24 | appVersion: v0.19.2
25 | 


--------------------------------------------------------------------------------
/charts/aikit/templates/NOTES.txt:
--------------------------------------------------------------------------------
 1 | Access AIKit WebUI or API by running the following commands:
 2 | 
 3 | - Port forward the service to your local machine:
 4 | 
 5 |   kubectl --namespace {{ .Release.Namespace }} port-forward service/{{ (include "aikit.fullname" .) }} 8080:{{ .Values.service.port }} &
 6 | 
 7 | - Visit http://127.0.0.1:8080/chat to access the WebUI
 8 | 
 9 | - Access the OpenAI API compatible endpoint with:
10 | 
11 |   # replace this with the model name you want to use
12 |   export MODEL_NAME="llama-3-8b-instruct"
13 |   curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"${MODEL_NAME}\", \"messages\": [{\"role\": \"user\", \"content\": \"what is the meaning of life?\"}]}"
14 | 


--------------------------------------------------------------------------------
/charts/aikit/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/*
 2 | Expand the name of the chart.
 3 | */}}
 4 | {{- define "aikit.name" -}}
 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 6 | {{- end }}
 7 | 
 8 | {{/*
 9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "aikit.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 | 
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "aikit.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 | 
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "aikit.labels" -}}
37 | helm.sh/chart: {{ include "aikit.chart" . }}
38 | {{ include "aikit.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 | 
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "aikit.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "aikit.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 | 
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "aikit.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "aikit.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 | 


--------------------------------------------------------------------------------
/charts/aikit/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: {{ include "aikit.fullname" . }}
 5 |   labels:
 6 |     {{- include "aikit.labels" . | nindent 4 }}
 7 | spec:
 8 |   {{- if not .Values.autoscaling.enabled }}
 9 |   replicas: {{ .Values.replicaCount }}
10 |   {{- end }}
11 |   selector:
12 |     matchLabels:
13 |       {{- include "aikit.selectorLabels" . | nindent 6 }}
14 |   template:
15 |     metadata:
16 |       {{- with .Values.podAnnotations }}
17 |       annotations:
18 |         {{- toYaml . | nindent 8 }}
19 |       {{- end }}
20 |       labels:
21 |         {{- include "aikit.labels" . | nindent 8 }}
22 |         {{- with .Values.podLabels }}
23 |         {{- toYaml . | nindent 8 }}
24 |         {{- end }}
25 |     spec:
26 |       {{- with .Values.imagePullSecrets }}
27 |       imagePullSecrets:
28 |         {{- toYaml . | nindent 8 }}
29 |       {{- end }}
30 |       securityContext:
31 |         {{- toYaml .Values.podSecurityContext | nindent 8 }}
32 |       containers:
33 |         - name: {{ .Chart.Name }}
34 |           securityContext:
35 |             {{- if .Values.enableRuntimeDefaultSeccompProfile }}
36 |             seccompProfile:
37 |               type: RuntimeDefault
38 |             {{- end }}
39 |             {{- toYaml .Values.securityContext | nindent 12 }}
40 |           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
41 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
42 |           ports:
43 |             - name: http
44 |               containerPort: 8080
45 |               protocol: TCP
46 |           livenessProbe:
47 |             {{- toYaml .Values.livenessProbe | nindent 12 }}
48 |           readinessProbe:
49 |             {{- toYaml .Values.readinessProbe | nindent 12 }}
50 |           resources:
51 |             {{- toYaml .Values.resources | nindent 12 }}
52 |       {{- with .Values.nodeSelector }}
53 |       nodeSelector:
54 |         {{- toYaml . | nindent 8 }}
55 |       {{- end }}
56 |       {{- with .Values.affinity }}
57 |       affinity:
58 |         {{- toYaml . | nindent 8 }}
59 |       {{- end }}
60 |       {{- with .Values.tolerations }}
61 |       tolerations:
62 |         {{- toYaml . | nindent 8 }}
63 |       {{- end }}
64 | 


--------------------------------------------------------------------------------
/charts/aikit/templates/hpa.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.autoscaling.enabled }}
 2 | apiVersion: autoscaling/v2
 3 | kind: HorizontalPodAutoscaler
 4 | metadata:
 5 |   name: {{ include "aikit.fullname" . }}
 6 |   labels:
 7 |     {{- include "aikit.labels" . | nindent 4 }}
 8 | spec:
 9 |   scaleTargetRef:
10 |     apiVersion: apps/v1
11 |     kind: Deployment
12 |     name: {{ include "aikit.fullname" . }}
13 |   minReplicas: {{ .Values.autoscaling.minReplicas }}
14 |   maxReplicas: {{ .Values.autoscaling.maxReplicas }}
15 |   metrics:
16 |     {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
17 |     - type: Resource
18 |       resource:
19 |         name: cpu
20 |         target:
21 |           type: Utilization
22 |           averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
23 |     {{- end }}
24 |     {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
25 |     - type: Resource
26 |       resource:
27 |         name: memory
28 |         target:
29 |           type: Utilization
30 |           averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
31 |     {{- end }}
32 | {{- end }}
33 | 


--------------------------------------------------------------------------------
/charts/aikit/templates/namespace-post-install.yaml:
--------------------------------------------------------------------------------
  1 | {{- if .Values.postInstall.labelNamespace.enabled }}
  2 | apiVersion: batch/v1
  3 | kind: Job
  4 | metadata:
  5 |   name: aikit-update-namespace-label
  6 |   namespace: {{ .Release.Namespace | quote }}
  7 |   labels:
  8 |     {{- include "aikit.labels" . | nindent 4 }}
  9 |   annotations:
 10 |     "helm.sh/hook": post-install
 11 |     "helm.sh/hook-weight": "-5"
 12 |     "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
 13 |     {{- if .Values.postInstall.labelNamespace.extraAnnotations }}
 14 |     {{- toYaml .Values.postInstall.labelNamespace.extraAnnotations | trim | nindent 4 }}
 15 |     {{- end }}
 16 | spec:
 17 |   template:
 18 |     metadata:
 19 |       annotations:
 20 |         {{- toYaml .Values.podAnnotations | trim | nindent 8 }}
 21 |     spec:
 22 |       restartPolicy: OnFailure
 23 |       {{- if .Values.postInstall.labelNamespace.priorityClassName }}
 24 |       priorityClassName: {{ .Values.postInstall.labelNamespace.priorityClassName }}
 25 |       {{- end }}
 26 |       {{- if .Values.postInstall.labelNamespace.image.pullSecrets }}
 27 |       imagePullSecrets:
 28 |       {{- .Values.postInstall.labelNamespace.image.pullSecrets | toYaml | nindent 12 }}
 29 |       {{- end }}
 30 |       serviceAccount: aikit-update-namespace-label
 31 |       containers:
 32 |         - name: kubectl-label
 33 |           image: "{{ .Values.postInstall.labelNamespace.image.repository }}:{{ .Values.postInstall.labelNamespace.image.tag }}"
 34 |           imagePullPolicy: {{ .Values.postInstall.labelNamespace.image.pullPolicy }}
 35 |           args:
 36 |             - label
 37 |             - ns
 38 |             - {{ .Release.Namespace }}
 39 |             {{- range .Values.postInstall.labelNamespace.podSecurity }}
 40 |             - {{ . }}
 41 |             {{- end }}
 42 |             - --overwrite
 43 |           resources:
 44 |             {{- toYaml .Values.postInstall.resources | nindent 12 }}
 45 |           securityContext:
 46 |             {{- if .Values.enableRuntimeDefaultSeccompProfile }}
 47 |             seccompProfile:
 48 |               type: RuntimeDefault
 49 |             {{- end }}
 50 |             {{- toYaml .Values.postInstall.securityContext | nindent 12 }}
 51 |       {{- with .Values.postInstall }}
 52 |       nodeSelector:
 53 |         {{- toYaml .nodeSelector | nindent 8 }}
 54 |       affinity:
 55 |         {{- toYaml .affinity | nindent 8 }}
 56 |       tolerations:
 57 |         {{- toYaml .tolerations | nindent 8 }}
 58 |       {{- end }}
 59 | ---
 60 | apiVersion: v1
 61 | kind: ServiceAccount
 62 | metadata:
 63 |   name: aikit-update-namespace-label
 64 |   namespace: {{ .Release.Namespace | quote }}
 65 |   labels:
 66 |     {{- include "aikit.labels" . | nindent 4 }}
 67 |   annotations:
 68 |     "helm.sh/hook": post-install
 69 |     "helm.sh/hook-weight": "-5"
 70 |     "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
 71 | ---
 72 | {{- if .Values.rbac.create }}
 73 | apiVersion: rbac.authorization.k8s.io/v1
 74 | kind: ClusterRole
 75 | metadata:
 76 |   name: aikit-update-namespace-label
 77 |   labels:
 78 |     {{- include "aikit.labels" . | nindent 4 }}
 79 |   annotations:
 80 |     "helm.sh/hook": post-install
 81 |     "helm.sh/hook-weight": "-5"
 82 |     "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
 83 | rules:
 84 |   - apiGroups:
 85 |       - ""
 86 |     resources:
 87 |       - namespaces
 88 |     verbs:
 89 |       - get
 90 |       - update
 91 |       - patch
 92 |     resourceNames:
 93 |       - {{ .Release.Namespace }}
 94 |       {{- range .Values.postInstall.labelNamespace.extraNamespaces }}
 95 |       - {{ . }}
 96 |       {{- end }}
 97 | {{- with .Values.postInstall.labelNamespace.extraRules }}
 98 |   {{- toYaml . | nindent 2 }}
 99 | {{- end }}
100 | {{- end }}
101 | ---
102 | {{- if .Values.rbac.create }}
103 | apiVersion: rbac.authorization.k8s.io/v1
104 | kind: ClusterRoleBinding
105 | metadata:
106 |   name: aikit-update-namespace-label
107 |   labels:
108 |     {{- include "aikit.labels" . | nindent 4 }}
109 |   annotations:
110 |     "helm.sh/hook": post-install
111 |     "helm.sh/hook-weight": "-5"
112 |     "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation
113 | roleRef:
114 |   apiGroup: rbac.authorization.k8s.io
115 |   kind: ClusterRole
116 |   name: aikit-update-namespace-label
117 | subjects:
118 |   - kind: ServiceAccount
119 |     name: aikit-update-namespace-label
120 |     namespace: {{ .Release.Namespace | quote }}
121 | {{- end }}
122 | {{- end }}
123 | 


--------------------------------------------------------------------------------
/charts/aikit/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ include "aikit.fullname" . }}
 5 |   labels:
 6 |     {{- include "aikit.labels" . | nindent 4 }}
 7 | spec:
 8 |   type: {{ .Values.service.type }}
 9 |   ports:
10 |     - port: {{ .Values.service.port }}
11 |       targetPort: 8080
12 |       protocol: TCP
13 |       name: http
14 |   selector:
15 |     {{- include "aikit.selectorLabels" . | nindent 4 }}
16 | 


--------------------------------------------------------------------------------
/charts/aikit/values.yaml:
--------------------------------------------------------------------------------
 1 | image:
 2 |   repository: ghcr.io/kaito-project/aikit/llama3.1
 3 |   tag: "8b"
 4 |   pullPolicy: IfNotPresent
 5 | 
 6 | replicaCount: 1
 7 | 
 8 | imagePullSecrets: []
 9 | nameOverride: ""
10 | fullnameOverride: ""
11 | podAnnotations: {}
12 | podLabels: {}
13 | 
14 | podSecurityContext:
15 |   fsGroup: 999
16 |   supplementalGroups:
17 |     - 999
18 | 
19 | securityContext:
20 |   allowPrivilegeEscalation: false
21 |   capabilities:
22 |     drop:
23 |     - ALL
24 |   readOnlyRootFilesystem: false # aikit extracts backends during runtime
25 |   runAsGroup: 999
26 |   runAsNonRoot: true
27 |   runAsUser: 1000
28 | 
29 | service:
30 |   type: ClusterIP
31 |   port: 8080
32 | 
33 | resources:
34 |   limits:
35 |     memory: 8Gi
36 |     # nvidia.com/gpu: "1"
37 |   requests:
38 |     cpu: 100m
39 |     memory: 128Mi
40 |     # nvidia.com/gpu: "1"
41 | 
42 | livenessProbe:
43 |   httpGet:
44 |     path: /
45 |     port: http
46 | readinessProbe:
47 |   httpGet:
48 |     path: /
49 |     port: http
50 | 
51 | autoscaling:
52 |   enabled: false
53 |   minReplicas: 1
54 |   maxReplicas: 100
55 |   targetCPUUtilizationPercentage: 80
56 |   # targetMemoryUtilizationPercentage: 80
57 | 
58 | nodeSelector: {}
59 | 
60 | affinity: {}
61 | 
62 | rbac:
63 |   create: true
64 | 
65 | enableRuntimeDefaultSeccompProfile: true
66 | postInstall:
67 |   resources: {}
68 |   affinity: {}
69 |   tolerations: []
70 |   nodeSelector: {kubernetes.io/os: linux}
71 |   securityContext:
72 |     allowPrivilegeEscalation: false
73 |     capabilities:
74 |       drop:
75 |       - ALL
76 |     readOnlyRootFilesystem: true
77 |     runAsGroup: 999
78 |     runAsNonRoot: true
79 |     runAsUser: 1000
80 |   labelNamespace:
81 |     enabled: true
82 |     image:
83 |       repository: registry.k8s.io/kubectl
84 |       tag: v1.34.1
85 |       pullPolicy: IfNotPresent
86 |       pullSecrets: []
87 |     podSecurity: ["pod-security.kubernetes.io/audit=restricted",
88 |       "pod-security.kubernetes.io/audit-version=latest",
89 |       "pod-security.kubernetes.io/warn=restricted",
90 |       "pod-security.kubernetes.io/warn-version=latest",
91 |       "pod-security.kubernetes.io/enforce=restricted",
92 |       "pod-security.kubernetes.io/enforce-version=v1.30"]
93 |     extraAnnotations: {}
94 |     extraRules: []
95 |     priorityClassName: ""
96 | 


--------------------------------------------------------------------------------
/cmd/frontend/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"os"
 5 | 
 6 | 	"github.com/kaito-project/aikit/pkg/build"
 7 | 	"github.com/moby/buildkit/frontend/gateway/grpcclient"
 8 | 	"github.com/moby/buildkit/util/appcontext"
 9 | 	"github.com/moby/buildkit/util/bklog"
10 | 	"github.com/sirupsen/logrus"
11 | 	"google.golang.org/grpc/grpclog"
12 | )
13 | 
14 | func main() {
15 | 	bklog.L.Logger.SetOutput(os.Stderr)
16 | 	grpclog.SetLoggerV2(grpclog.NewLoggerV2WithVerbosity(bklog.L.WriterLevel(logrus.InfoLevel), bklog.L.WriterLevel(logrus.WarnLevel), bklog.L.WriterLevel(logrus.ErrorLevel), 1))
17 | 
18 | 	ctx := appcontext.Context()
19 | 
20 | 	if err := grpcclient.RunFromEnvironment(ctx, build.Build); err != nil {
21 | 		bklog.L.WithError(err).Fatal("error running frontend")
22 | 		os.Exit(1)
23 | 	}
24 | }
25 | 


--------------------------------------------------------------------------------
/demo/demo.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | . third_party/demo-magic/demo-magic.sh
  4 | 
  5 | clear
  6 | export DEMO_PROMPT="${GREEN}➜  ${COLOR_RESET}"
  7 | 
  8 | echo "✨ In this demo, we are going to start by fine tuning a model and then deploy the model as a minimal container!"
  9 | 
 10 | echo ""
 11 | 
 12 | echo "👷‍ First, we are going to create a new builder"
 13 | 
 14 | echo ""
 15 | 
 16 | pei "docker buildx create --name aikit-builder --use --buildkitd-flags '--allow-insecure-entitlement security.insecure'"
 17 | 
 18 | echo ""
 19 | 
 20 | echo "🗃️ Create a configuration for the fine tuning. We are going to be using a Mistral model and fine tune using OpenHermes dataset."
 21 | 
 22 | cat > aikit-finetune.yaml << EOF
 23 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 24 | apiVersion: v1alpha1
 25 | baseModel: unsloth/mistral-7b-instruct-v0.2-bnb-4bit
 26 | datasets:
 27 |   - source: "teknium/openhermes"
 28 |     type: "alpaca"
 29 | config:
 30 |   unsloth:
 31 | EOF
 32 | 
 33 | echo ""
 34 | 
 35 | pei "bat aikit-finetune.yaml"
 36 | 
 37 | echo ""
 38 | 
 39 | echo "🎵 Starting the fine tuning process using the above configuration file, and output fine tuned model will be saved in _output folder."
 40 | 
 41 | echo ""
 42 | 
 43 | pei "docker buildx build --allow security.insecure --file 'aikit-finetune.yaml' --output '_output' --target unsloth --progress plain ."
 44 | 
 45 | echo ""
 46 | 
 47 | echo "✅ We have finished fine tuning the model. Let's look at the output..."
 48 | 
 49 | echo ""
 50 | 
 51 | pei "ls -al _output"
 52 | 
 53 | echo ""
 54 | 
 55 | echo "📦 Now that we have a fine tuned model. We can deploy it as a minimal container."
 56 | 
 57 | echo ""
 58 | 
 59 | echo "📃 We'll start by creating a basic inference configuration file for the deployment."
 60 | 
 61 | cat > aikit-inference.yaml << EOF
 62 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 63 | debug: true
 64 | apiVersion: v1alpha1
 65 | runtime: cuda
 66 | models:
 67 |   - name: mistral-finetuned
 68 |     source: aikit-model-q4_k_m.gguf
 69 |     promptTemplates:
 70 |       - name: instruct
 71 |         template: |
 72 |           Below is an instruction that describes a task. Write a response that appropriately completes the request. Keep your responses concise.
 73 | 
 74 |           ### Instruction:
 75 |           {{.Input}}
 76 | 
 77 |           ### Response:
 78 | config: |
 79 |   - name: mistral-finetuned
 80 |     parameters:
 81 |       model: aikit-model-q4_k_m.gguf
 82 |     context_size: 4096
 83 |     gpu_layers: 35
 84 |     f16: true
 85 |     mmap: true
 86 |     template:
 87 |       chat: instruct
 88 | EOF
 89 | 
 90 | pei "bat aikit-inference.yaml"
 91 | 
 92 | echo ""
 93 | 
 94 | echo "🏗️ We can now build a minimal container for the model using the configuration file."
 95 | 
 96 | echo ""
 97 | 
 98 | pei "docker buildx build -t mistral-finetuned -f aikit-inference.yaml --load --progress plain _output"
 99 | 
100 | echo ""
101 | 
102 | echo "🏃 We have finished building the minimal container. Let's start the container and test it."
103 | 
104 | echo ""
105 | 
106 | pei "docker run --name mistral-finetuned -d --rm -p 8080:8080 --gpus all mistral-finetuned"
107 | 
108 | echo ""
109 | 
110 | echo "🧪 We can now test the container using a sample query. Since this is OpenAI API compatible, you can use it as a drop-in replacement for any application that uses OpenAI API."
111 | 
112 | echo ""
113 | 
114 | pei "curl http://localhost:8080/v1/chat/completions -H \"Content-Type: application/json\" -d '{\"model\": \"mistral-finetuned\", \"messages\": [{\"role\": \"user\", \"content\": \"Generate a list of 10 words that start with ab\"}]}'"
115 | 
116 | echo ""
117 | 
118 | echo "🙌 We have successfully deployed the fine tuned model as a minimal container and successfully verified it! We can now stop the container if we wish."
119 | 
120 | echo ""
121 | 
122 | pei "docker stop mistral-finetuned"
123 | 
124 | echo ""
125 | 
126 | echo "❤️ In this demo, we have shown how to fine tune a model and deploy it as a minimal container using AIKit. Thank you for watching!"
127 | 
128 | echo ""
129 | 
130 | # pei "docker buildx rm aikit-builder"
131 | 


--------------------------------------------------------------------------------
/demo/third_party/demo-magic/README.md:
--------------------------------------------------------------------------------
  1 | # Demo Magic
  2 | 
  3 | demo-magic.sh is a handy shell script that enables you to script repeatable demos in a bash environment so you don't have to type as you present. Rather than trying to type commands when presenting you simply script them and let demo-magic.sh run them for you.
  4 | 
  5 | ## Features
  6 | - Simulates typing. It looks like you are actually typing out commands
  7 | - Allows you to actually run commands or pretend to do so.
  8 | - Can hide commands from presentation. Useful for behind the scenes stuff that doesn't need to be shown.
  9 | 
 10 | ## Functions
 11 | 
 12 | ### pe
 13 | Print and Execute. This function will simulate typing whatever you give it. It will then pause until you press <kbd>ENTER</kbd>. After your keypress it will run the command.
 14 | 
 15 | ```bash
 16 | #!/bin/bash
 17 | 
 18 | pe "ls -l"
 19 | ```
 20 | 
 21 | ### p
 22 | Print only. This function will simulate typing whatever you give it. It will not run the command. After typing it will pause until you press <kbd>ENTER</kbd>. After your keypress it will move on to the next instruction in your script.
 23 | 
 24 | ```bash
 25 | #!/bin/bash
 26 | 
 27 | p "ls -l"
 28 | ```
 29 | 
 30 | ### wait
 31 | Waits for the user to press <kbd>ENTER</kbd>.
 32 | 
 33 | If `PROMPT_TIMEOUT` is defined and > 0 the demo will automatically proceed after the amount of seconds has passed.
 34 | 
 35 | ```bash
 36 | #!/bin/bash
 37 | 
 38 | # Will wait until user presses enter
 39 | PROMPT_TIMEOUT=0
 40 | wait
 41 | 
 42 | # Will wait max 5 seconds until user presses
 43 | PROMPT_TIMEOUT=5
 44 | wait
 45 | 
 46 | ```
 47 | 
 48 | ### cmd
 49 | Enters script into interactive mode and allows newly typed commands to be executed within the script
 50 | ```
 51 | #!/bin/bash
 52 | 
 53 | cmd
 54 | ```
 55 | 
 56 | ## Getting Started
 57 | Create a shell script and include demo-magic.sh
 58 | 
 59 | ```bash
 60 | #!/bin/bash
 61 | 
 62 | ########################
 63 | # include the magic
 64 | ########################
 65 | . demo-magic.sh
 66 | 
 67 | # hide the evidence
 68 | clear
 69 | 
 70 | # Put your stuff here
 71 | ```
 72 | 
 73 | Then use the handy functions to run through your demo.
 74 | 
 75 | ## Command line usage
 76 | demo-magic.sh exposes 3 options out of the box to your script.
 77 | - `-d` - disable simulated typing. Useful for debugging
 78 | - `-h` - prints the usage text
 79 | - `-n` - set no default waiting after `p` and `pe` functions
 80 | - `-w` - set no wait timeout after `p` and `pe` functions
 81 | 
 82 | ```bash
 83 | $ ./my-demo.sh -h
 84 | 
 85 | Usage: ./my-demo.sh [options]
 86 | 
 87 |   Where options is one or more of:
 88 |   -h  Prints Help text
 89 |   -d  Debug mode. Disables simulated typing
 90 |   -n  No wait
 91 |   -w  Waits max the given amount of seconds before proceeding with demo (e.g. `-w5`)
 92 | ```
 93 | 
 94 | ## Useful Tricks
 95 | 
 96 | ### Faking network connections
 97 | Network connections during demos are often unreliable. Try and fake whatever commands would rely on a network connection. For example: Instead of trying to install node modules in a node.js application you can fake it. You can install the node_modules at home on your decent network. Then rename the directory and pretend to install it later by symlinking. If you want to be thorough you can capture the output of npm install into a log file then cat it out later to simulate the install.
 98 | 
 99 | ```bash
100 | #!/bin/bash
101 | 
102 | ########################
103 | # include the magic
104 | ########################
105 | . demo-magic.sh
106 | 
107 | # hide the evidence
108 | clear
109 | 
110 | # this command is typed and executed
111 | pe "cd my-app"
112 | 
113 | # this command is merely typed. Not executed
114 | p "npm install"
115 | 
116 | # this command runs behind the scenes
117 | ln -s cached_node_modules node_modules
118 | 
119 | # cat out a log file that captures a previous successful node modules install
120 | cat node-modules-install.log
121 | 
122 | # now type and run the command to start your app
123 | pe "node index.js"
124 | ```
125 | 
126 | ### No waiting
127 | The -n _no wait_ option can be useful if you want to print and execute multiple commands.
128 | 
129 | ```bash
130 | # include demo-magic
131 | . demo-magic.sh -n
132 | 
133 | # add multiple commands
134 | pe 'git status'
135 | pe 'git log --oneline --decorate -n 20'
136 | ```
137 | 
138 | However this will oblige you to define your waiting points manually e.g.
139 | ```bash
140 | ...
141 | # define waiting points
142 | pe 'git status'
143 | pe 'git log --oneline --decorate -n 20'
144 | wait
145 | pe 'git pull'
146 | pe 'git log --oneline --decorate -n 20'
147 | wait
148 | ```
149 | 


--------------------------------------------------------------------------------
/demo/third_party/demo-magic/license.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Paxton Hare
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/kaito-project/aikit
 2 | 
 3 | go 1.24.0
 4 | 
 5 | toolchain go1.24.4
 6 | 
 7 | require (
 8 | 	github.com/containerd/platforms v1.0.0-rc.1
 9 | 	github.com/moby/buildkit v0.23.2
10 | 	github.com/opencontainers/go-digest v1.0.0
11 | 	github.com/opencontainers/image-spec v1.1.1
12 | 	github.com/pkg/errors v0.9.1
13 | 	github.com/sirupsen/logrus v1.9.3
14 | 	golang.org/x/sync v0.17.0
15 | 	google.golang.org/grpc v1.75.1
16 | 	gopkg.in/yaml.v2 v2.4.0
17 | )
18 | 
19 | require (
20 | 	github.com/agext/levenshtein v1.2.3 // indirect
21 | 	github.com/containerd/containerd/v2 v2.1.3 // indirect
22 | 	github.com/containerd/errdefs v1.0.0 // indirect
23 | 	github.com/containerd/log v0.1.0 // indirect
24 | 	github.com/containerd/ttrpc v1.2.7 // indirect
25 | 	github.com/containerd/typeurl/v2 v2.2.3 // indirect
26 | 	github.com/distribution/reference v0.6.0 // indirect
27 | 	github.com/docker/go-connections v0.5.0 // indirect
28 | 	github.com/docker/go-units v0.5.0 // indirect
29 | 	github.com/felixge/httpsnoop v1.0.4 // indirect
30 | 	github.com/go-logr/logr v1.4.3 // indirect
31 | 	github.com/go-logr/stdr v1.2.2 // indirect
32 | 	github.com/gogo/protobuf v1.3.2 // indirect
33 | 	github.com/golang/protobuf v1.5.4 // indirect
34 | 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
35 | 	github.com/google/uuid v1.6.0 // indirect
36 | 	github.com/hashicorp/errwrap v1.1.0 // indirect
37 | 	github.com/hashicorp/go-multierror v1.1.1 // indirect
38 | 	github.com/in-toto/in-toto-golang v0.9.0 // indirect
39 | 	github.com/klauspost/compress v1.18.0 // indirect
40 | 	github.com/moby/docker-image-spec v1.3.1 // indirect
41 | 	github.com/moby/locker v1.0.1 // indirect
42 | 	github.com/moby/patternmatcher v0.6.0 // indirect
43 | 	github.com/moby/sys/signal v0.7.1 // indirect
44 | 	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
45 | 	github.com/secure-systems-lab/go-securesystemslib v0.8.0 // indirect
46 | 	github.com/shibumi/go-pathspec v1.3.0 // indirect
47 | 	github.com/tonistiigi/dchapes-mode v0.0.0-20250318174251-73d941a28323 // indirect
48 | 	github.com/tonistiigi/fsutil v0.0.0-20250605211040-586307ad452f // indirect
49 | 	github.com/tonistiigi/go-csvvalue v0.0.0-20240814133006-030d3b2625d0 // indirect
50 | 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
51 | 	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 // indirect
52 | 	go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 // indirect
53 | 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
54 | 	go.opentelemetry.io/otel v1.37.0 // indirect
55 | 	go.opentelemetry.io/otel/metric v1.37.0 // indirect
56 | 	go.opentelemetry.io/otel/sdk v1.37.0 // indirect
57 | 	go.opentelemetry.io/otel/trace v1.37.0 // indirect
58 | 	golang.org/x/crypto v0.39.0 // indirect
59 | 	golang.org/x/net v0.41.0 // indirect
60 | 	golang.org/x/sys v0.33.0 // indirect
61 | 	golang.org/x/text v0.26.0 // indirect
62 | 	google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect
63 | 	google.golang.org/protobuf v1.36.6 // indirect
64 | )
65 | 


--------------------------------------------------------------------------------
/models/aikitfile.yaml:
--------------------------------------------------------------------------------
1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
2 | apiVersion: v1alpha1
3 | debug: true
4 | 


--------------------------------------------------------------------------------
/models/codestral-22b.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: codestral-22b
 7 |     source: https://huggingface.co/lmstudio-community/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q4_K_M.gguf
 8 |     sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
 9 |     promptTemplates:
10 |       - name: instruct
11 |         template: |
12 |           [INST]{{ if .SystemPrompt }}{{ .SystemPrompt }}{{ end }} {{ .Input }}[/INST]
13 | config: |
14 |   - name: codestral-22b
15 |     backend: llama
16 |     parameters:
17 |       model: Codestral-22B-v0.1-Q4_K_M.gguf
18 |     context_size: 8192
19 |     template:
20 |       chat: instruct
21 |       completion: instruct
22 |     stopwords:
23 |      - \"[INST]\"
24 |      - \"[/INST]\"
25 |      - \"[PREFIX]\"
26 |      - \"[MIDDLE]\"
27 |      - \"[SUFFIX]\"
28 |     f16: true
29 |     mmap: true
30 | 


--------------------------------------------------------------------------------
/models/flux-1-dev.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | backends:
 6 |   - diffusers
 7 | config: |
 8 |   - name: flux-1-dev
 9 |     backend: diffusers
10 |     parameters:
11 |       model: ChuckMcSneed/FLUX.1-dev
12 |     diffusers:
13 |       cfg_scale: 0
14 |       cuda: true
15 |       enable_parameters: num_inference_steps
16 |       pipeline_type: FluxPipeline
17 |     f16: true
18 |     step: 25
19 | 


--------------------------------------------------------------------------------
/models/gemma-2-2b-instruct.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: gemma-2-2b-instruct
 7 |     source: https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf
 8 |     sha256: e0aee85060f168f0f2d8473d7ea41ce2f3230c1bc1374847505ea599288a7787
 9 |     promptTemplates:
10 |       - name: chatMsg
11 |         template: |
12 |           <start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}
13 |           {{ if .Content -}}
14 |           {{.Content -}}
15 |           {{ end -}}<end_of_turn>
16 |       - name: chat
17 |         template: |
18 |           {{ .Input }}
19 |           <start_of_turn>model
20 |       - name: completion
21 |         template: |
22 |           {{ .Input }}
23 | config: |
24 |   - name: gemma-2-2b-instruct
25 |     backend: llama
26 |     parameters:
27 |       model: gemma-2-2b-it-Q4_K_M.gguf
28 |     context_size: 8192
29 |     template:
30 |       chat_message: chatMsg
31 |       chat: chat
32 |       completion: completion
33 |     repeat_penalty: 1
34 |     stopwords:
35 |      - \"<start_of_turn>\"
36 |      - \"<end_of_turn>\"
37 |      - \"<|im_end|>\"
38 |     f16: true
39 |     mmap: true
40 | 


--------------------------------------------------------------------------------
/models/gpt-oss-120b.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: gpt-oss-120b-1
 7 |     source: https://huggingface.co/ggml-org/gpt-oss-120b-GGUF/resolve/main/gpt-oss-120b-mxfp4-00001-of-00003.gguf
 8 |     sha256: e2865eb6c1df7b2ffbebf305cd5d9074d5ccc0fe3b862f98d343a46dad1606f9
 9 |   - name: gpt-oss-120b-2
10 |     source: https://huggingface.co/ggml-org/gpt-oss-120b-GGUF/resolve/main/gpt-oss-120b-mxfp4-00002-of-00003.gguf
11 |     sha256: "346492f65891fb27cac5c74a8c07626cbfeb4211cd391ec4de37dbbe3109a93b"
12 |   - name: gpt-oss-120b-3
13 |     source: https://huggingface.co/ggml-org/gpt-oss-120b-GGUF/resolve/main/gpt-oss-120b-mxfp4-00003-of-00003.gguf
14 |     sha256: "66dca81040933f5a49177e82c479c51319cefb83bd22dad9f06dad45e25f1463"
15 | config: |
16 |   - name: gpt-oss-120b
17 |     backend: llama-cpp
18 |     parameters:
19 |       model: gpt-oss-120b-mxfp4-00001-of-00003.gguf
20 |     context_size: 8192
21 |     f16: true
22 |     mmap: true
23 |     template:
24 |       chat_message: |-
25 |         <|start|>{{ if .FunctionCall -}}functions.{{ .FunctionCall.Name }} to=assistant{{ else if eq .RoleName \"assistant\"}}assistant<|channel|>final<|message|>{{else}}{{ .RoleName }}{{end}}<|message|>
26 |         {{- if .Content -}}
27 |         {{- .Content -}}
28 |         {{- end -}}
29 |         {{- if .FunctionCall -}}
30 |         {{- toJson .FunctionCall -}}
31 |         {{- end -}}<|end|>
32 | 
33 |       function: |-
34 |         <|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
35 |         Knowledge cutoff: 2024-06
36 |         Current date: {{ now | date \"Mon Jan 2 15:04:05 MST 2006\" }}
37 | 
38 |         Reasoning: {{if eq .ReasoningEffort \"\"}}medium{{else}}{{.ReasoningEffort}}{{end}}
39 | 
40 |         # {{with .Metadata}}{{ if ne .system_prompt \"\" }}{{ .system_prompt }}{{ end }}{{else}}You are a friendly and helpful assistant.{{ end }}<|end|>{{- .Input -}}<|start|>assistant
41 | 
42 |         # Tools
43 | 
44 |         ## functions
45 | 
46 |         namespace functions {
47 |         {{-range .Functions}}
48 |         {{if .Description }}
49 |         // {{ .Description }}
50 |         {{- end }}
51 |         {{- if and .Parameters.Properties (gt (len .Parameters.Properties) 0) }}
52 |         type {{ .Name }} = (_: {
53 |         {{- range $name, $prop := .Parameters.Properties }}
54 |         {{- if $prop.Description }}
55 |           // {{ $prop.Description }}
56 |         {{- end }}
57 |           {{ $name }}: {{ if gt (len $prop.Type) 1 }}{{ range $i, $t := $prop.Type }}{{ if $i }} | {{ end }}{{ $t }}{{ end }}{{ else }}{{ index $prop.Type 0 }}{{ end }},
58 |         {{- end }}
59 |         }) => any;
60 |         {{- else }}
61 |         type {{ .Function.Name }} = () => any;
62 |         {{- end }}
63 |         {{- end }}{{/* end of range .Functions */}}
64 |         } // namespace functions
65 | 
66 |         # Instructions
67 | 
68 |         <|end|>{{.Input -}}<|start|>assistant
69 | 
70 |       chat: |-
71 |         <|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
72 |         Knowledge cutoff: 2024-06
73 |         Current date: {{ now | date \"Mon Jan 2 15:04:05 MST 2006\" }}
74 | 
75 |         Reasoning: {{if eq .ReasoningEffort \"\"}}medium{{else}}{{.ReasoningEffort}}{{end}}
76 | 
77 |         # {{with .Metadata}}{{ if ne .system_prompt \"\" }}{{ .system_prompt }}{{ end }}{{else}}You are a friendly and helpful assistant.{{ end }}<|end|>{{- .Input -}}<|start|>assistant
78 | 
79 |       completion: |-
80 |         {{.Input}}
81 |     stopwords:
82 |       - '<|im_end|>'
83 |       - '<dummy32000>'
84 |       - '</s>'
85 |       - '<|endoftext|>'
86 |       - '<|end|>s'
87 | 


--------------------------------------------------------------------------------
/models/gpt-oss-20b.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: gpt-oss-20b
 7 |     source: https://huggingface.co/ggml-org/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-mxfp4.gguf
 8 |     sha256: "be37a636aca0fc1aae0d32325f82f6b4d21495f06823b5fbc1898ae0303e9935"
 9 | config: |
10 |   - name: gpt-oss-20b
11 |     backend: llama-cpp
12 |     parameters:
13 |       model: gpt-oss-20b-mxfp4.gguf
14 |     context_size: 8192
15 |     f16: true
16 |     mmap: true
17 |     template:
18 |       chat_message: |-
19 |         <|start|>{{ if .FunctionCall -}}functions.{{ .FunctionCall.Name }} to=assistant{{ else if eq .RoleName \"assistant\"}}assistant<|channel|>final<|message|>{{else}}{{ .RoleName }}{{end}}<|message|>
20 |         {{- if .Content -}}
21 |         {{- .Content -}}
22 |         {{- end -}}
23 |         {{- if .FunctionCall -}}
24 |         {{- toJson .FunctionCall -}}
25 |         {{- end -}}<|end|>
26 | 
27 |       function: |-
28 |         <|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
29 |         Knowledge cutoff: 2024-06
30 |         Current date: {{ now | date \"Mon Jan 2 15:04:05 MST 2006\" }}
31 | 
32 |         Reasoning: {{if eq .ReasoningEffort \"\"}}medium{{else}}{{.ReasoningEffort}}{{end}}
33 | 
34 |         # {{with .Metadata}}{{ if ne .system_prompt \"\" }}{{ .system_prompt }}{{ end }}{{else}}You are a friendly and helpful assistant.{{ end }}<|end|>{{- .Input -}}<|start|>assistant
35 | 
36 |         # Tools
37 | 
38 |         ## functions
39 | 
40 |         namespace functions {
41 |         {{-range .Functions}}
42 |         {{if .Description }}
43 |         // {{ .Description }}
44 |         {{- end }}
45 |         {{- if and .Parameters.Properties (gt (len .Parameters.Properties) 0) }}
46 |         type {{ .Name }} = (_: {
47 |         {{- range $name, $prop := .Parameters.Properties }}
48 |         {{- if $prop.Description }}
49 |           // {{ $prop.Description }}
50 |         {{- end }}
51 |           {{ $name }}: {{ if gt (len $prop.Type) 1 }}{{ range $i, $t := $prop.Type }}{{ if $i }} | {{ end }}{{ $t }}{{ end }}{{ else }}{{ index $prop.Type 0 }}{{ end }},
52 |         {{- end }}
53 |         }) => any;
54 |         {{- else }}
55 |         type {{ .Function.Name }} = () => any;
56 |         {{- end }}
57 |         {{- end }}{{/* end of range .Functions */}}
58 |         } // namespace functions
59 | 
60 |         # Instructions
61 | 
62 |         <|end|>{{.Input -}}<|start|>assistant
63 | 
64 |       chat: |-
65 |         <|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI.
66 |         Knowledge cutoff: 2024-06
67 |         Current date: {{ now | date \"Mon Jan 2 15:04:05 MST 2006\" }}
68 | 
69 |         Reasoning: {{if eq .ReasoningEffort \"\"}}medium{{else}}{{.ReasoningEffort}}{{end}}
70 | 
71 |         # {{with .Metadata}}{{ if ne .system_prompt \"\" }}{{ .system_prompt }}{{ end }}{{else}}You are a friendly and helpful assistant.{{ end }}<|end|>{{- .Input -}}<|start|>assistant
72 | 
73 |       completion: |-
74 |         {{.Input}}
75 |     stopwords:
76 |       - '<|im_end|>'
77 |       - '<dummy32000>'
78 |       - '</s>'
79 |       - '<|endoftext|>'
80 |       - '<|end|>s'
81 | 


--------------------------------------------------------------------------------
/models/llama-3.1-8b-instruct.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: llama-3.1-8b-instruct
 7 |     source: https://huggingface.co/QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
 8 |     sha256: "6d86fb9d2910178f5c744234fdf91910e033ef7b03c5e23dcc6d25e98687e5fa"
 9 |     promptTemplates:
10 |       - name: chatMsg
11 |         template: |
12 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
13 | 
14 |           {{ if .FunctionCall -}}
15 |           Function call:
16 |           {{ else if eq .RoleName \"tool\" -}}
17 |           Function response:
18 |           {{ end -}}
19 |           {{ if .Content -}}
20 |           {{.Content -}}
21 |           {{ else if .FunctionCall -}}
22 |           {{ toJson .FunctionCall -}}
23 |           {{ end -}}
24 |           <|eot_id|>
25 |       - name: function
26 |         template: |
27 |           <|start_header_id|>system<|end_header_id|>
28 | 
29 |           You have access to the following functions:
30 | 
31 |           {{range .Functions}}
32 |           Use the function '{{.Name}}' to '{{.Description}}'
33 |           {{toJson .Parameters}}
34 |           {{end}}
35 | 
36 |           Think very carefully before calling functions.
37 |           If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
38 | 
39 |           <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
40 | 
41 |           Reminder:
42 |           - If looking for real time information use relevant functions before falling back to searching on internet
43 |           - Function calls MUST follow the specified format, start with <function= and end with </function>
44 |           - Required parameters MUST be specified
45 |           - Only call one function at a time
46 |           - Put the entire function call reply on one line
47 |           <|eot_id|>
48 |           {{.Input }}
49 |           <|start_header_id|>assistant<|end_header_id|>
50 |       - name: chat
51 |         template: |
52 |           {{.Input }}
53 |           <|start_header_id|>assistant<|end_header_id|>
54 |       - name: completion
55 |         template: |
56 |           {{.Input}}
57 | config: |
58 |   - name: llama-3.1-8b-instruct
59 |     backend: llama
60 |     function:
61 |       disable_no_action: true
62 |       grammar:
63 |         disable: true
64 |       response_regex:
65 |       - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
66 |     parameters:
67 |       model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
68 |     context_size: 8192
69 |     f16: true
70 |     template:
71 |       chat_message: \"chatMsg\"
72 |       function: \"function\"
73 |       chat: \"chat\"
74 |       completion: \"completion\"
75 |     stopwords:
76 |       - <|im_end|>
77 |       - <dummy32000>
78 |       - \"<|eot_id|>\"
79 |       - <|end_of_text|>
80 | 


--------------------------------------------------------------------------------
/models/llama-3.2-1b-instruct.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: llama-3.2-1b-instruct
 7 |     source: https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf
 8 |     sha256: "e4650dd6b45ef456066b11e4927f775eef4dd1e0e8473c3c0f27dd19ee13cc4e"
 9 |     promptTemplates:
10 |       - name: chatMsg
11 |         template: |
12 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
13 | 
14 |           {{ if .FunctionCall -}}
15 |           Function call:
16 |           {{ else if eq .RoleName \"tool\" -}}
17 |           Function response:
18 |           {{ end -}}
19 |           {{ if .Content -}}
20 |           {{.Content -}}
21 |           {{ else if .FunctionCall -}}
22 |           {{ toJson .FunctionCall -}}
23 |           {{ end -}}
24 |           <|eot_id|>
25 |       - name: function
26 |         template: |
27 |           <|start_header_id|>system<|end_header_id|>
28 | 
29 |           You have access to the following functions:
30 | 
31 |           {{range .Functions}}
32 |           Use the function '{{.Name}}' to '{{.Description}}'
33 |           {{toJson .Parameters}}
34 |           {{end}}
35 | 
36 |           Think very carefully before calling functions.
37 |           If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
38 | 
39 |           <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
40 | 
41 |           Reminder:
42 |           - If looking for real time information use relevant functions before falling back to searching on internet
43 |           - Function calls MUST follow the specified format, start with <function= and end with </function>
44 |           - Required parameters MUST be specified
45 |           - Only call one function at a time
46 |           - Put the entire function call reply on one line
47 |           <|eot_id|>
48 |           {{.Input }}
49 |           <|start_header_id|>assistant<|end_header_id|>
50 |       - name: chat
51 |         template: |
52 |           {{.Input }}
53 |           <|start_header_id|>assistant<|end_header_id|>
54 |       - name: completion
55 |         template: |
56 |           {{.Input}}
57 | config: |
58 |   - name: llama-3.2-1b-instruct
59 |     backend: llama
60 |     function:
61 |       disable_no_action: true
62 |       grammar:
63 |         disable: true
64 |       response_regex:
65 |       - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
66 |     parameters:
67 |       model: Llama-3.2-1B-Instruct.Q4_K_M.gguf
68 |     context_size: 8192
69 |     f16: true
70 |     template:
71 |       chat_message: \"chatMsg\"
72 |       function: \"function\"
73 |       chat: \"chat\"
74 |       completion: \"completion\"
75 |     stopwords:
76 |       - <|im_end|>
77 |       - <dummy32000>
78 |       - <|eot_id|>
79 |       - <|end_of_text|>
80 | 


--------------------------------------------------------------------------------
/models/llama-3.2-3b-instruct.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: llama-3.2-3b-instruct
 7 |     source: https://huggingface.co/MaziyarPanahi/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct.Q4_K_M.gguf
 8 |     sha256: "1c323c8ef8b7dd877d40a4138de8bf915884a383b08097b5c20abcb2ae2896d6"
 9 |     promptTemplates:
10 |       - name: chatMsg
11 |         template: |
12 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
13 | 
14 |           {{ if .FunctionCall -}}
15 |           Function call:
16 |           {{ else if eq .RoleName \"tool\" -}}
17 |           Function response:
18 |           {{ end -}}
19 |           {{ if .Content -}}
20 |           {{.Content -}}
21 |           {{ else if .FunctionCall -}}
22 |           {{ toJson .FunctionCall -}}
23 |           {{ end -}}
24 |           <|eot_id|>
25 |       - name: function
26 |         template: |
27 |           <|start_header_id|>system<|end_header_id|>
28 | 
29 |           You have access to the following functions:
30 | 
31 |           {{range .Functions}}
32 |           Use the function '{{.Name}}' to '{{.Description}}'
33 |           {{toJson .Parameters}}
34 |           {{end}}
35 | 
36 |           Think very carefully before calling functions.
37 |           If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
38 | 
39 |           <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
40 | 
41 |           Reminder:
42 |           - If looking for real time information use relevant functions before falling back to searching on internet
43 |           - Function calls MUST follow the specified format, start with <function= and end with </function>
44 |           - Required parameters MUST be specified
45 |           - Only call one function at a time
46 |           - Put the entire function call reply on one line
47 |           <|eot_id|>
48 |           {{.Input }}
49 |           <|start_header_id|>assistant<|end_header_id|>
50 |       - name: chat
51 |         template: |
52 |           {{.Input }}
53 |           <|start_header_id|>assistant<|end_header_id|>
54 |       - name: completion
55 |         template: |
56 |           {{.Input}}
57 | config: |
58 |   - name: llama-3.2-3b-instruct
59 |     backend: llama
60 |     function:
61 |       disable_no_action: true
62 |       grammar:
63 |         disable: true
64 |       response_regex:
65 |       - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
66 |     parameters:
67 |       model: Llama-3.2-3B-Instruct.Q4_K_M.gguf
68 |     context_size: 8192
69 |     f16: true
70 |     template:
71 |       chat_message: \"chatMsg\"
72 |       function: \"function\"
73 |       chat: \"chat\"
74 |       completion: \"completion\"
75 |     stopwords:
76 |       - <|im_end|>
77 |       - <dummy32000>
78 |       - <|eot_id|>
79 |       - <|end_of_text|>
80 | 


--------------------------------------------------------------------------------
/models/llama-3.3-70b-instruct.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: llama-3.3-70b-instruct
 7 |     source: https://huggingface.co/MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF/resolve/main/Llama-3.3-70B-Instruct.Q4_K_M.gguf
 8 |     sha256: "4f3b04ecae278bdb0fd545b47c210bc5edf823e5ebf7d41e0b526c81d54b1ff3"
 9 |     promptTemplates:
10 |       - name: chatMsg
11 |         template: |
12 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
13 | 
14 |           {{ if .FunctionCall -}}
15 |           Function call:
16 |           {{ else if eq .RoleName \"tool\" -}}
17 |           Function response:
18 |           {{ end -}}
19 |           {{ if .Content -}}
20 |           {{.Content -}}
21 |           {{ else if .FunctionCall -}}
22 |           {{ toJson .FunctionCall -}}
23 |           {{ end -}}
24 |           <|eot_id|>
25 |       - name: function
26 |         template: |
27 |           <|start_header_id|>system<|end_header_id|>
28 | 
29 |           You have access to the following functions:
30 | 
31 |           {{range .Functions}}
32 |           Use the function '{{.Name}}' to '{{.Description}}'
33 |           {{toJson .Parameters}}
34 |           {{end}}
35 | 
36 |           Think very carefully before calling functions.
37 |           If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
38 | 
39 |           <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
40 | 
41 |           Reminder:
42 |           - If looking for real time information use relevant functions before falling back to searching on internet
43 |           - Function calls MUST follow the specified format, start with <function= and end with </function>
44 |           - Required parameters MUST be specified
45 |           - Only call one function at a time
46 |           - Put the entire function call reply on one line
47 |           <|eot_id|>
48 |           {{.Input }}
49 |           <|start_header_id|>assistant<|end_header_id|>
50 |       - name: chat
51 |         template: |
52 |           {{.Input }}
53 |           <|start_header_id|>assistant<|end_header_id|>
54 |       - name: completion
55 |         template: |
56 |           {{.Input}}
57 | config: |
58 |   - name: llama-3.3-70b-instruct
59 |     backend: llama
60 |     function:
61 |       disable_no_action: true
62 |       grammar:
63 |         disable: true
64 |       response_regex:
65 |       - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
66 |     parameters:
67 |       model: Llama-3.3-70B-Instruct.Q4_K_M.gguf
68 |     context_size: 8192
69 |     f16: true
70 |     template:
71 |       chat_message: \"chatMsg\"
72 |       function: \"function\"
73 |       chat: \"chat\"
74 |       completion: \"completion\"
75 |     stopwords:
76 |       - <|im_end|>
77 |       - <dummy32000>
78 |       - <|eot_id|>
79 |       - <|end_of_text|>
80 | 


--------------------------------------------------------------------------------
/models/mixtral-8x7b-instruct.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: mixtral-8x7b-instruct
 7 |     source: https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
 8 |     sha256: 9193684683657e90707087bd1ed19fd0b277ab66358d19edeadc26d6fdec4f53
 9 | config: |
10 |   - name: mixtral-8x7b-instruct
11 |     backend: llama
12 |     parameters:
13 |       model: mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
14 |     context_size: 4096
15 |     f16: true
16 |     mmap: true
17 | 


--------------------------------------------------------------------------------
/models/phi-4-14b-instruct.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: phi-4-14b-instruct
 7 |     source: https://huggingface.co/unsloth/phi-4-GGUF/resolve/main/phi-4-Q4_K_M.gguf
 8 |     sha256: 01e1f25b3e6931054c6c2227b06f4969828434eebc299e8e171f55dab6814485
 9 | config: |
10 |   - name: phi-4-14b-instruct
11 |     backend: llama
12 |     parameters:
13 |       model: phi-4-Q4_K_M.gguf
14 |     context_size: 4096
15 |     template:
16 |       chat_message: |
17 |         <|im_start|>{{ .RoleName }}<|im_sep|>
18 |         {{.Content}}<|im_end|>
19 |       chat: |
20 |         {{.Input}}
21 |         <|im_start|>assistant<|im_sep|>
22 |       completion: |
23 |         {{.Input}}
24 |     stopwords:
25 |     - <|end|>
26 |     - <|endoftext|>
27 |     - <|im_end|>
28 |     f16: true
29 |     mmap: true
30 | 


--------------------------------------------------------------------------------
/models/qwq-32b.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: qwq-32b
 7 |     source: https://huggingface.co/unsloth/QwQ-32B-GGUF/resolve/main/QwQ-32B-Q4_K_M.gguf
 8 |     sha256: 39dc40d34d44d66406e9ad94416585839daa5edc35425f6f6925c35e149aa5f5
 9 | config: |
10 |   - name: qwq-32b
11 |     backend: llama-cpp
12 |     parameters:
13 |       model: QwQ-32B-Q4_K_M.gguf
14 |       top_k: 40
15 |       temperature: 0.6
16 |       top_p: 0.95
17 |     context_size: 8192
18 |     repeat_penalty: 1
19 |     template:
20 |       chat_message: |
21 |         <|im_start|>{{ .RoleName }}
22 |         {{ if .FunctionCall -}}
23 |         Function call:
24 |         {{ else if eq .RoleName \"tool\" -}}
25 |         Function response:
26 |         {{ end -}}
27 |         {{ if .Content -}}
28 |         {{.Content }}
29 |         {{ end -}}
30 |         {{ if .FunctionCall -}}
31 |         {{toJson .FunctionCall}}
32 |         {{ end -}}<|im_end|>
33 |       function: |
34 |         <|im_start|>system
35 |         # Tools
36 | 
37 |         You may call one or more functions to assist with the user query.
38 | 
39 |         You are provided with function signatures within <tools></tools> XML tags:
40 |         <tools>
41 |         {{range .Functions}}
42 |         {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
43 |         {{end}}
44 |         </tools>
45 |         For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
46 |         <tool_call>
47 |         {\"name\": <function-name>, \"arguments\": <args-json-object>}
48 |         </tool_call>
49 |         <|im_end|>
50 |         {{.Input -}}
51 |         <|im_start|>assistant
52 |       chat: |
53 |         {{.Input -}}
54 |         <|im_start|>assistant
55 |       completion: |
56 |         {{.Input}}
57 |     f16: true
58 |     mmap: true
59 |     stopwords:
60 |       - \"<|im_start|>\"
61 |       - \"<|im_end|>\"
62 | 


--------------------------------------------------------------------------------
/pkg/aikit/config/finetune_specs.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | type FineTuneConfig struct {
 4 | 	APIVersion string             `yaml:"apiVersion"`
 5 | 	Target     string             `yaml:"target"`
 6 | 	BaseModel  string             `yaml:"baseModel"`
 7 | 	Datasets   []Dataset          `yaml:"datasets"`
 8 | 	Config     FineTuneConfigSpec `yaml:"config"`
 9 | 	Output     FineTuneOutputSpec `yaml:"output"`
10 | }
11 | 
12 | type FineTuneConfigSpec struct {
13 | 	Unsloth FineTuneConfigUnslothSpec `yaml:"unsloth"`
14 | }
15 | 
16 | type Dataset struct {
17 | 	Source string `yaml:"source"`
18 | 	Type   string `yaml:"type"`
19 | }
20 | 
21 | type FineTuneConfigUnslothSpec struct {
22 | 	Packing                   bool    `yaml:"packing"`
23 | 	MaxSeqLength              int     `yaml:"maxSeqLength"`
24 | 	LoadIn4bit                bool    `yaml:"loadIn4bit"`
25 | 	BatchSize                 int     `yaml:"batchSize"`
26 | 	GradientAccumulationSteps int     `yaml:"gradientAccumulationSteps"`
27 | 	WarmupSteps               int     `yaml:"warmupSteps"`
28 | 	MaxSteps                  int     `yaml:"maxSteps"`
29 | 	LearningRate              float64 `yaml:"learningRate"`
30 | 	LoggingSteps              int     `yaml:"loggingSteps"`
31 | 	Optimizer                 string  `yaml:"optimizer"`
32 | 	WeightDecay               float64 `yaml:"weightDecay"`
33 | 	LrSchedulerType           string  `yaml:"lrSchedulerType"`
34 | 	Seed                      int     `yaml:"seed"`
35 | }
36 | 
37 | type FineTuneOutputSpec struct {
38 | 	Quantize string `yaml:"quantize"`
39 | 	Name     string `yaml:"name"`
40 | }
41 | 


--------------------------------------------------------------------------------
/pkg/aikit/config/inference_specs.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | type InferenceConfig struct {
 4 | 	APIVersion string   `yaml:"apiVersion"`
 5 | 	Debug      bool     `yaml:"debug"`
 6 | 	Runtime    string   `yaml:"runtime"`
 7 | 	Backends   []string `yaml:"backends"`
 8 | 	Models     []Model  `yaml:"models"`
 9 | 	Config     string   `yaml:"config"`
10 | }
11 | 
12 | type Model struct {
13 | 	Name            string           `yaml:"name"`
14 | 	Source          string           `yaml:"source"`
15 | 	SHA256          string           `yaml:"sha256"`
16 | 	PromptTemplates []PromptTemplate `yaml:"promptTemplates"`
17 | }
18 | 
19 | type PromptTemplate struct {
20 | 	Name     string `yaml:"name"`
21 | 	Template string `yaml:"template"`
22 | }
23 | 


--------------------------------------------------------------------------------
/pkg/aikit/config/specs.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | import (
 4 | 	"github.com/pkg/errors"
 5 | 	yaml "gopkg.in/yaml.v2"
 6 | )
 7 | 
 8 | func NewFromBytes(b []byte) (*InferenceConfig, *FineTuneConfig, error) {
 9 | 	inferenceConfig := &InferenceConfig{}
10 | 	fineTuneConfig := &FineTuneConfig{}
11 | 	var err error
12 | 	err = yaml.Unmarshal(b, inferenceConfig)
13 | 	if err == nil {
14 | 		return inferenceConfig, nil, nil
15 | 	}
16 | 
17 | 	err = yaml.Unmarshal(b, fineTuneConfig)
18 | 	if err == nil {
19 | 		return nil, fineTuneConfig, nil
20 | 	}
21 | 
22 | 	return nil, nil, errors.Wrap(err, "unmarshal config")
23 | }
24 | 


--------------------------------------------------------------------------------
/pkg/aikit/config/specs_test.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/kaito-project/aikit/pkg/utils"
 8 | )
 9 | 
10 | func TestNewFromBytes(t *testing.T) {
11 | 	type args struct {
12 | 		b []byte
13 | 	}
14 | 	tests := []struct {
15 | 		name    string
16 | 		args    args
17 | 		want    *InferenceConfig
18 | 		wantErr bool
19 | 	}{
20 | 		{
21 | 			name: "valid yaml",
22 | 			args: args{b: []byte(`
23 | apiVersion: v1alpha1
24 | runtime: cuda
25 | backends:
26 | - exllama2
27 | models:
28 | - name: test
29 |   source: foo
30 | `)},
31 | 			want: &InferenceConfig{
32 | 				APIVersion: utils.APIv1alpha1,
33 | 				Runtime:    utils.RuntimeNVIDIA,
34 | 				Backends: []string{
35 | 					utils.BackendExllamaV2,
36 | 				},
37 | 				Models: []Model{
38 | 					{
39 | 						Name:   "test",
40 | 						Source: "foo",
41 | 					},
42 | 				},
43 | 			},
44 | 			wantErr: false,
45 | 		},
46 | 		{
47 | 			name: "invalid yaml",
48 | 			args: args{b: []byte(`
49 | foo
50 | `)},
51 | 			want:    nil,
52 | 			wantErr: true,
53 | 		},
54 | 	}
55 | 	for _, tt := range tests {
56 | 		t.Run(tt.name, func(t *testing.T) {
57 | 			infCfg, _, err := NewFromBytes(tt.args.b)
58 | 			if (err != nil) != tt.wantErr {
59 | 				t.Errorf("NewFromBytes() error = %v, wantErr %v", err, tt.wantErr)
60 | 				return
61 | 			}
62 | 			if !reflect.DeepEqual(infCfg, tt.want) {
63 | 				t.Errorf("NewFromBytes() = %v, want %v", infCfg, tt.want)
64 | 			}
65 | 		})
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/pkg/aikit2llb/finetune/convert.go:
--------------------------------------------------------------------------------
 1 | package finetune
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"github.com/kaito-project/aikit/pkg/aikit/config"
 7 | 	"github.com/kaito-project/aikit/pkg/utils"
 8 | 	"github.com/kaito-project/aikit/pkg/version"
 9 | 	"github.com/moby/buildkit/client/llb"
10 | 	"github.com/moby/buildkit/util/system"
11 | 	"gopkg.in/yaml.v2"
12 | )
13 | 
14 | const (
15 | 	unslothCommitOrTag = "fb77505f8429566f5d21d6ea5318c342e8a67991" // September-2024
16 | 	nvidiaMknod        = "mknod --mode 666 /dev/nvidiactl c 195 255 && mknod --mode 666 /dev/nvidia-uvm c 235 0 && mknod --mode 666 /dev/nvidia-uvm-tools c 235 1 && mknod --mode 666 /dev/nvidia0 c 195 0 && nvidia-smi"
17 | 	sourceVenv         = ". .venv/bin/activate"
18 | )
19 | 
20 | func Aikit2LLB(c *config.FineTuneConfig) llb.State {
21 | 	env := map[string]string{
22 | 		"PATH":                       system.DefaultPathEnv("linux") + ":/usr/local/cuda/bin",
23 | 		"NVIDIA_REQUIRE_CUDA":        "cuda>=12.0",
24 | 		"NVIDIA_DRIVER_CAPABILITIES": "compute,utility",
25 | 		"NVIDIA_VISIBLE_DEVICES":     "all",
26 | 		"LD_LIBRARY_PATH":            "/usr/local/cuda/lib64",
27 | 	}
28 | 
29 | 	state := llb.Image(utils.CudaDevel)
30 | 	for k, v := range env {
31 | 		state = state.AddEnv(k, v)
32 | 	}
33 | 
34 | 	// installing dependencies
35 | 	// due to buildkit run limitations, we need to install nvidia drivers and driver version must match the host
36 | 	state = state.Run(utils.Sh("apt-get update && apt-get install -y --no-install-recommends python3-dev python3 python3-pip python-is-python3 git wget kmod && cd /root && VERSION=$(cat /proc/driver/nvidia/version | sed -n 's/.*NVIDIA UNIX x86_64 Kernel Module  \\([0-9]\\+\\.[0-9]\\+\\.[0-9]\\+\\).*/\\1/p') && wget --no-verbose https://download.nvidia.com/XFree86/Linux-x86_64/$VERSION/NVIDIA-Linux-x86_64-$VERSION.run && chmod +x NVIDIA-Linux-x86_64-$VERSION.run && ./NVIDIA-Linux-x86_64-$VERSION.run -x && rm NVIDIA-Linux-x86_64-$VERSION.run && /root/NVIDIA-Linux-x86_64-$VERSION/nvidia-installer -a -s --skip-depmod --no-dkms --no-nvidia-modprobe --no-questions --no-systemd --no-x-check --no-kernel-modules --no-kernel-module-source && rm -rf /root/NVIDIA-Linux-x86_64-$VERSION")).Root()
37 | 
38 | 	// write config to /config.yaml
39 | 	cfg, err := yaml.Marshal(c)
40 | 	if err != nil {
41 | 		panic(err)
42 | 	}
43 | 	state = state.Run(utils.Shf("echo -n \"%s\" > /config.yaml", string(cfg))).Root()
44 | 
45 | 	var scratch llb.State
46 | 	if c.Target == utils.TargetUnsloth {
47 | 		// installing unsloth and its dependencies
48 | 		// uv does not support installing xformers via unsloth pyproject
49 | 		state = state.Run(utils.Shf("pip install --upgrade pip uv && uv venv --system-site-packages && %[1]s && uv pip install --upgrade --force-reinstall packaging torch==2.4.0 ipython ninja packaging bitsandbytes setuptools==69.5.1 wheel psutil transformers==4.44.2 numpy==2.0.2 && uv pip install flash-attn --no-build-isolation && python -m pip install 'unsloth[cu121_ampere_torch240] @ git+https://github.com/unslothai/unsloth.git@%[2]s'", sourceVenv, unslothCommitOrTag)).Root()
50 | 
51 | 		version := version.Version
52 | 		if version == "" {
53 | 			version = "main"
54 | 		}
55 | 		unslothScriptURL := fmt.Sprintf("https://raw.githubusercontent.com/kaito-project/aikit/%s/pkg/finetune/target_unsloth.py", version)
56 | 		var opts []llb.HTTPOption
57 | 		opts = append(opts, llb.Chmod(0o755))
58 | 		unslothScript := llb.HTTP(unslothScriptURL, opts...)
59 | 		state = state.File(
60 | 			llb.Copy(unslothScript, utils.FileNameFromURL(unslothScriptURL), "/"),
61 | 			llb.WithCustomName("Copying "+utils.FileNameFromURL(unslothScriptURL)),
62 | 		)
63 | 
64 | 		// setup nvidia devices and run unsloth
65 | 		// due to buildkit run limitations, we need to create the devices manually and run unsloth in the same command
66 | 		state = state.Run(utils.Shf("%[1]s && %[2]s && python -m target_unsloth", nvidiaMknod, sourceVenv), llb.Security(llb.SecurityModeInsecure)).Root()
67 | 
68 | 		// copy gguf to scratch which will be the output
69 | 		const inputFile = "model/*.gguf"
70 | 		copyOpts := []llb.CopyOption{}
71 | 		copyOpts = append(copyOpts, &llb.CopyInfo{AllowWildcard: true})
72 | 		outputFile := fmt.Sprintf("%s-%s.gguf", c.Output.Name, c.Output.Quantize)
73 | 		scratch = llb.Scratch().File(llb.Copy(state, inputFile, outputFile, copyOpts...))
74 | 	}
75 | 
76 | 	return scratch
77 | }
78 | 


--------------------------------------------------------------------------------
/pkg/aikit2llb/inference/diffusers.go:
--------------------------------------------------------------------------------
 1 | package inference
 2 | 
 3 | import (
 4 | 	"github.com/moby/buildkit/client/llb"
 5 | )
 6 | 
 7 | // installDiffusersDependencies installs minimal Python dependencies required for diffusers backend.
 8 | // Diffusers only needs basic Python tools, no build dependencies.
 9 | func installDiffusersDependencies(s llb.State, merge llb.State) llb.State {
10 | 	return installPythonBaseDependencies(s, merge)
11 | }
12 | 


--------------------------------------------------------------------------------
/pkg/aikit2llb/inference/diffusers_test.go:
--------------------------------------------------------------------------------
 1 | package inference
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/moby/buildkit/client/llb"
 7 | )
 8 | 
 9 | func TestInstallDiffusersDependencies(t *testing.T) {
10 | 	// Create a simple base state for testing
11 | 	baseState := llb.Image("ubuntu:22.04")
12 | 	mergeState := baseState
13 | 
14 | 	// Call the function to install dependencies
15 | 	// This should execute without panicking
16 | 	defer func() {
17 | 		if r := recover(); r != nil {
18 | 			t.Errorf("installDiffusersDependencies panicked: %v", r)
19 | 		}
20 | 	}()
21 | 
22 | 	result := installDiffusersDependencies(baseState, mergeState)
23 | 
24 | 	// The function should return a valid LLB state
25 | 	// We can't easily test the actual installation without running BuildKit,
26 | 	// but we can verify the function executes without panicking
27 | 	_ = result // Use the result to avoid unused variable warning
28 | }
29 | 


--------------------------------------------------------------------------------
/pkg/aikit2llb/inference/exllama.go:
--------------------------------------------------------------------------------
 1 | package inference
 2 | 
 3 | import (
 4 | 	"github.com/kaito-project/aikit/pkg/utils"
 5 | 	"github.com/moby/buildkit/client/llb"
 6 | )
 7 | 
 8 | // installPythonBaseDependencies installs minimal Python dependencies common to all Python backends.
 9 | func installPythonBaseDependencies(s llb.State, merge llb.State) llb.State {
10 | 	savedState := s
11 | 
12 | 	// Install minimal Python dependencies common to all Python backends
13 | 	s = s.Run(utils.Sh("apt-get update && apt-get install --no-install-recommends -y git python3 python3-pip python3-venv python-is-python3 && pip install uv && pip install grpcio-tools==1.71.0 --no-dependencies && apt-get clean"), llb.IgnoreCache).Root()
14 | 
15 | 	diff := llb.Diff(savedState, s)
16 | 	return llb.Merge([]llb.State{merge, diff})
17 | }
18 | 
19 | // installExllamaDependencies installs Python and other dependencies required for exllama2 backend.
20 | // ExLLama2 needs additional build tools for compilation.
21 | func installExllamaDependencies(s llb.State, merge llb.State) llb.State {
22 | 	savedState := s
23 | 
24 | 	// Install Python and build dependencies needed for exllama2
25 | 	s = s.Run(utils.Sh("apt-get update && apt-get install --no-install-recommends -y bash git ca-certificates python3-pip python3-dev python3-venv python-is-python3 make g++ curl && pip install uv ninja && pip install grpcio-tools==1.71.0 --no-dependencies && apt-get clean"), llb.IgnoreCache).Root()
26 | 
27 | 	diff := llb.Diff(savedState, s)
28 | 	return llb.Merge([]llb.State{merge, diff})
29 | }
30 | 


--------------------------------------------------------------------------------
/pkg/aikit2llb/inference/exllama_test.go:
--------------------------------------------------------------------------------
 1 | package inference
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/moby/buildkit/client/llb"
 7 | )
 8 | 
 9 | func TestInstallExllamaDependencies(t *testing.T) {
10 | 	// Create a simple base state for testing
11 | 	baseState := llb.Image("ubuntu:22.04")
12 | 	mergeState := baseState
13 | 
14 | 	// Call the function to install dependencies
15 | 	// This should execute without panicking
16 | 	defer func() {
17 | 		if r := recover(); r != nil {
18 | 			t.Errorf("installExllamaDependencies panicked: %v", r)
19 | 		}
20 | 	}()
21 | 
22 | 	result := installExllamaDependencies(baseState, mergeState)
23 | 
24 | 	// The function should return a valid LLB state
25 | 	// We can't easily test the actual installation without running BuildKit,
26 | 	// but we can verify the function executes without panicking
27 | 	_ = result // Use the result to avoid unused variable warning
28 | }
29 | 
30 | func TestInstallPythonBaseDependencies(t *testing.T) {
31 | 	// Create a simple base state for testing
32 | 	baseState := llb.Image("ubuntu:22.04")
33 | 	mergeState := baseState
34 | 
35 | 	// Call the function to install dependencies
36 | 	// This should execute without panicking
37 | 	defer func() {
38 | 		if r := recover(); r != nil {
39 | 			t.Errorf("installPythonBaseDependencies panicked: %v", r)
40 | 		}
41 | 	}()
42 | 
43 | 	result := installPythonBaseDependencies(baseState, mergeState)
44 | 
45 | 	// The function should return a valid LLB state
46 | 	// We can't easily test the actual installation without running BuildKit,
47 | 	// but we can verify the function executes without panicking
48 | 	_ = result // Use the result to avoid unused variable warning
49 | }
50 | 


--------------------------------------------------------------------------------
/pkg/aikit2llb/inference/image.go:
--------------------------------------------------------------------------------
 1 | package inference
 2 | 
 3 | import (
 4 | 	"github.com/kaito-project/aikit/pkg/aikit/config"
 5 | 	"github.com/kaito-project/aikit/pkg/utils"
 6 | 	"github.com/moby/buildkit/util/system"
 7 | 	specs "github.com/opencontainers/image-spec/specs-go/v1"
 8 | )
 9 | 
10 | func NewImageConfig(c *config.InferenceConfig, platform *specs.Platform) *specs.Image {
11 | 	img := emptyImage(c, platform)
12 | 	cmd := []string{}
13 | 	if c.Debug {
14 | 		cmd = append(cmd, "--debug")
15 | 	}
16 | 	if c.Config != "" {
17 | 		cmd = append(cmd, "--config-file=/config.yaml")
18 | 	}
19 | 
20 | 	img.Config.Entrypoint = []string{"local-ai"}
21 | 	img.Config.Cmd = cmd
22 | 	return img
23 | }
24 | 
25 | func emptyImage(c *config.InferenceConfig, platform *specs.Platform) *specs.Image {
26 | 	img := &specs.Image{
27 | 		Platform: specs.Platform{
28 | 			Architecture: platform.Architecture,
29 | 			OS:           utils.PlatformLinux,
30 | 		},
31 | 	}
32 | 	img.RootFS.Type = "layers"
33 | 	img.Config.WorkingDir = "/"
34 | 
35 | 	img.Config.Env = []string{
36 | 		"PATH=" + system.DefaultPathEnv(utils.PlatformLinux),
37 | 		"CONFIG_FILE=/config.yaml",
38 | 	}
39 | 
40 | 	cudaEnv := []string{
41 | 		"PATH=" + system.DefaultPathEnv(utils.PlatformLinux) + ":/usr/local/cuda/bin",
42 | 		"NVIDIA_REQUIRE_CUDA=cuda>=12.0",
43 | 		"NVIDIA_DRIVER_CAPABILITIES=compute,utility",
44 | 		"NVIDIA_VISIBLE_DEVICES=all",
45 | 		"LD_LIBRARY_PATH=/usr/local/cuda/lib64",
46 | 		"BUILD_TYPE=cublas",
47 | 		"CUDA_HOME=/usr/local/cuda",
48 | 	}
49 | 	if c.Runtime == utils.RuntimeNVIDIA {
50 | 		img.Config.Env = append(img.Config.Env, cudaEnv...)
51 | 	}
52 | 
53 | 	return img
54 | }
55 | 


--------------------------------------------------------------------------------
/pkg/build/args.go:
--------------------------------------------------------------------------------
 1 | package build
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"path"
 6 | 	"strings"
 7 | 
 8 | 	"github.com/kaito-project/aikit/pkg/aikit/config"
 9 | 	"github.com/kaito-project/aikit/pkg/aikit2llb/inference"
10 | 	"github.com/kaito-project/aikit/pkg/utils"
11 | )
12 | 
13 | // parseBuildArgs parses the build arguments and configures inference settings.
14 | func parseBuildArgs(opts map[string]string, inferenceCfg *config.InferenceConfig) error {
15 | 	if inferenceCfg == nil {
16 | 		return nil
17 | 	}
18 | 
19 | 	// Get model and runtime arguments
20 | 	modelArg := getBuildArg(opts, "model")
21 | 	runtimeArg := getBuildArg(opts, "runtime")
22 | 
23 | 	// Set the runtime if provided
24 | 	if runtimeArg != "" {
25 | 		inferenceCfg.Runtime = runtimeArg
26 | 	}
27 | 
28 | 	// Set the model if provided
29 | 	if modelArg != "" {
30 | 		var modelName, modelSource string
31 | 		var err error
32 | 
33 | 		// Handle based on the URL prefix
34 | 		switch {
35 | 		case strings.HasPrefix(modelArg, "huggingface://"):
36 | 			// Handle Hugging Face URLs with optional branch
37 | 			modelSource, modelName, err = inference.ParseHuggingFaceURL(modelArg)
38 | 			if err != nil {
39 | 				return err
40 | 			}
41 | 
42 | 		case strings.HasPrefix(modelArg, "http://"), strings.HasPrefix(modelArg, "https://"):
43 | 			// Handle HTTP(S) URLs directly
44 | 			modelName = utils.FileNameFromURL(modelArg)
45 | 			modelSource = modelArg
46 | 
47 | 		case strings.HasPrefix(modelArg, "oci://"):
48 | 			// Handle OCI URLs
49 | 			modelName = parseOCIURL(modelArg)
50 | 			modelSource = modelArg
51 | 
52 | 		default:
53 | 			// Assume it's a local file path
54 | 			modelName = path.Base(modelArg)
55 | 			modelSource = modelArg
56 | 		}
57 | 
58 | 		// Set the inference configuration
59 | 		inferenceCfg.Models = []config.Model{
60 | 			{
61 | 				Name:   modelName,
62 | 				Source: modelSource,
63 | 			},
64 | 		}
65 | 		inferenceCfg.Config = generateInferenceConfig(modelName)
66 | 	}
67 | 
68 | 	return nil
69 | }
70 | 
71 | // generateInferenceConfig generates the inference configuration for the given model name.
72 | func generateInferenceConfig(modelName string) string {
73 | 	return fmt.Sprintf(`
74 | - name: %[1]s
75 |   backend: llama
76 |   parameters:
77 |     model: %[1]s`, modelName)
78 | }
79 | 
80 | // parseOCIURL extracts model name for OCI-based models.
81 | func parseOCIURL(source string) string {
82 | 	const ollamaRegistryURL = "registry.ollama.ai"
83 | 	artifactURL := strings.TrimPrefix(source, "oci://")
84 | 	var modelName string
85 | 
86 | 	if strings.HasPrefix(artifactURL, ollamaRegistryURL) {
87 | 		// Special handling for Ollama registry
88 | 		artifactURLWithoutTag := strings.Split(artifactURL, ":")[0]
89 | 		modelName = strings.Split(artifactURLWithoutTag, "/")[2]
90 | 	} else {
91 | 		// Generic OCI artifact
92 | 		modelName = path.Base(artifactURL)
93 | 		modelName = strings.Split(modelName, ":")[0]
94 | 		modelName = strings.Split(modelName, "@")[0]
95 | 	}
96 | 
97 | 	return modelName
98 | }
99 | 


--------------------------------------------------------------------------------
/pkg/finetune/target_unsloth.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | from unsloth import is_bfloat16_supported
  4 | from transformers import TrainingArguments, DataCollatorForSeq2Seq
  5 | from unsloth import FastLanguageModel
  6 | import torch
  7 | from trl import SFTTrainer
  8 | from transformers import TrainingArguments
  9 | from datasets import load_dataset
 10 | import yaml
 11 | 
 12 | with open('config.yaml', 'r') as config_file:
 13 |     try:
 14 |         data = yaml.safe_load(config_file)
 15 |         print(data)
 16 |     except yaml.YAMLError as exc:
 17 |         print(exc)
 18 | 
 19 | cfg = data.get('config').get('unsloth')
 20 | max_seq_length = cfg.get('maxSeqLength')
 21 | 
 22 | model, tokenizer = FastLanguageModel.from_pretrained(
 23 |     model_name=data.get('baseModel'),
 24 |     max_seq_length=max_seq_length,
 25 |     dtype=None,
 26 |     load_in_4bit=True,
 27 | )
 28 | 
 29 | model = FastLanguageModel.get_peft_model(
 30 |     model,
 31 |     r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
 32 |     target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
 33 |                       "gate_proj", "up_proj", "down_proj",],
 34 |     lora_alpha = 16,
 35 |     lora_dropout = 0, # Supports any, but = 0 is optimized
 36 |     bias = "none",    # Supports any, but = "none" is optimized
 37 |     use_gradient_checkpointing="unsloth",
 38 |     random_state = 3407,
 39 |     use_rslora = False,  # We support rank stabilized LoRA
 40 |     loftq_config = None, # And LoftQ
 41 | )
 42 | 
 43 | # TODO: right now, this is hardcoded for alpaca. use the dataset type here in the future to make this customizable
 44 | alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 45 | 
 46 | ### Instruction:
 47 | {}
 48 | 
 49 | ### Input:
 50 | {}
 51 | 
 52 | ### Response:
 53 | {}"""
 54 | 
 55 | EOS_TOKEN = tokenizer.eos_token
 56 | def formatting_prompts_func(examples):
 57 |     instructions = examples["instruction"]
 58 |     inputs       = examples["input"]
 59 |     outputs      = examples["output"]
 60 |     texts = []
 61 |     for instruction, input, output in zip(instructions, inputs, outputs):
 62 |         # Must add EOS_TOKEN, otherwise your generation will go on forever!
 63 |         text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
 64 |         texts.append(text)
 65 |     return { "text" : texts, }
 66 | pass
 67 | 
 68 | from datasets import load_dataset
 69 | source = data.get('datasets')[0]['source']
 70 | 
 71 | if source.startswith('http'):
 72 |     dataset = load_dataset("json", data_files={"train": source}, split="train")
 73 | else:
 74 |     dataset = load_dataset(source, split = "train")
 75 | 
 76 | dataset = dataset.map(formatting_prompts_func, batched=True)
 77 | 
 78 | trainer = SFTTrainer(
 79 |     model=model,
 80 |     train_dataset=dataset,
 81 |     dataset_text_field="text",
 82 |     max_seq_length=max_seq_length,
 83 |     data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),
 84 |     tokenizer=tokenizer,
 85 |     dataset_num_proc = 2,
 86 |     packing = cfg.get('packing'), # Can make training 5x faster for short sequences.
 87 |     args=TrainingArguments(
 88 |         per_device_train_batch_size=cfg.get('batchSize'),
 89 |         gradient_accumulation_steps=cfg.get('gradientAccumulationSteps'),
 90 |         warmup_steps=cfg.get('warmupSteps'),
 91 |         max_steps=cfg.get('maxSteps'),
 92 |         learning_rate = cfg.get('learningRate'),
 93 |         fp16=not is_bfloat16_supported(),
 94 |         bf16=is_bfloat16_supported(),
 95 |         logging_steps=cfg.get('loggingSteps'),
 96 |         optim=cfg.get('optimizer'),
 97 |         weight_decay = cfg.get('weightDecay'),
 98 |         lr_scheduler_type = cfg.get('lrSchedulerType'),
 99 |         seed=cfg.get('seed'),
100 |         output_dir="outputs",
101 |     ),
102 | )
103 | trainer.train()
104 | 
105 | output = data.get('output')
106 | model.save_pretrained_gguf(output.get('name'), tokenizer,
107 |                            quantization_method=output.get('quantize'))
108 | 


--------------------------------------------------------------------------------
/pkg/utils/const.go:
--------------------------------------------------------------------------------
 1 | package utils // nolint:revive
 2 | 
 3 | const (
 4 | 	RuntimeNVIDIA       = "cuda"
 5 | 	RuntimeAppleSilicon = "applesilicon" // experimental apple silicon runtime with vulkan arm64 support
 6 | 
 7 | 	BackendExllamaV2 = "exllama2"
 8 | 	BackendDiffusers = "diffusers"
 9 | 	BackendLlamaCpp  = "llama-cpp"
10 | 
11 | 	BackendOCIRegistry = "quay.io/go-skynet/local-ai-backends"
12 | 
13 | 	TargetUnsloth = "unsloth"
14 | 
15 | 	DatasetAlpaca = "alpaca"
16 | 
17 | 	APIv1alpha1 = "v1alpha1"
18 | 
19 | 	UbuntuBase       = "docker.io/library/ubuntu:22.04"
20 | 	AppleSiliconBase = "ghcr.io/kaito-project/aikit/applesilicon/base:latest"
21 | 	CudaDevel        = "nvcr.io/nvidia/cuda:12.3.2-devel-ubuntu22.04"
22 | 
23 | 	PlatformLinux = "linux"
24 | 	PlatformAMD64 = "amd64"
25 | 	PlatformARM64 = "arm64"
26 | )
27 | 


--------------------------------------------------------------------------------
/pkg/utils/util.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"net/url"
 6 | 	"path"
 7 | 
 8 | 	"github.com/moby/buildkit/client/llb"
 9 | )
10 | 
11 | func FileNameFromURL(urlString string) string {
12 | 	parsedURL, err := url.Parse(urlString)
13 | 	if err != nil {
14 | 		panic(err)
15 | 	}
16 | 	return path.Base(parsedURL.Path)
17 | }
18 | 
19 | func Sh(cmd string) llb.RunOption {
20 | 	return llb.Args([]string{"/bin/sh", "-c", cmd})
21 | }
22 | 
23 | func Shf(cmd string, v ...interface{}) llb.RunOption {
24 | 	return llb.Args([]string{"/bin/sh", "-c", fmt.Sprintf(cmd, v...)})
25 | }
26 | 
27 | func Bashf(cmd string, v ...interface{}) llb.RunOption {
28 | 	return llb.Args([]string{"/bin/bash", "-c", fmt.Sprintf(cmd, v...)})
29 | }
30 | 


--------------------------------------------------------------------------------
/pkg/utils/util_test.go:
--------------------------------------------------------------------------------
 1 | package utils // nolint:revive
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | func Test_FileNameFromURL(t *testing.T) {
 8 | 	type args struct {
 9 | 		urlString string
10 | 	}
11 | 	tests := []struct {
12 | 		name string
13 | 		args args
14 | 		want string
15 | 	}{
16 | 		{
17 | 			name: "simple",
18 | 			args: args{urlString: "http://foo.bar/baz"},
19 | 			want: "baz",
20 | 		},
21 | 		{
22 | 			name: "complex",
23 | 			args: args{urlString: "http://foo.bar/baz.tar.gz"},
24 | 			want: "baz.tar.gz",
25 | 		},
26 | 		{
27 | 			name: "complex with path",
28 | 			args: args{urlString: "http://foo.bar/baz.tar.gz?foo=bar"},
29 | 			want: "baz.tar.gz",
30 | 		},
31 | 	}
32 | 	for _, tt := range tests {
33 | 		t.Run(tt.name, func(t *testing.T) {
34 | 			if got := FileNameFromURL(tt.args.urlString); got != tt.want {
35 | 				t.Errorf("FileNameFromURL() = %v, want %v", got, tt.want)
36 | 			}
37 | 		})
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/pkg/version/version.go:
--------------------------------------------------------------------------------
1 | package version
2 | 
3 | // Version is the aikit version.
4 | var Version string
5 | 


--------------------------------------------------------------------------------
/scripts/parse-models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | extract_model_name() {
 4 |     echo "$1" | sed -E '
 5 |         s/^llama-(3\.[1-9])-([0-9]+\.?[0-9]*b)-.*/llama\1/;t;
 6 |         s/^flux-([0-9]+)-dev$/flux\1/;t;
 7 |         s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t;
 8 |         s/^([a-z]+-[a-z]+)-([0-9]+\.?[0-9]*b)$/\1/;t;
 9 |         s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/;
10 |         s/^([a-z]+)-([0-9]+)-.*/\1\2/;
11 |     s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/'
12 | }
13 | 
14 | extract_model_size() {
15 |     echo "$1" | sed -E '
16 |         s/^llama-(3\.[1-9])-([0-9]+\.?[0-9]*b)-.*/\2/;t;
17 |         s/^flux-[0-9]+-dev$/dev/;t;
18 |         s/^[a-z]+-[a-z]+-([0-9]+\.?[0-9]*b)$/\1/;t;
19 |         s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/;
20 |         s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/;
21 |     s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/'
22 | }
23 | 
24 | extract_model_type() {
25 |     echo "$1" | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p'
26 | }
27 | 
28 | for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "llama-3.2-1b-instruct" "llama-3.2-3b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.3-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev" "qwq-32b" "phi-4-14b-instruct" "gpt-oss-20b" "gpt-oss-120b"; do
29 |     echo "Model: $MODEL"
30 |     echo " Name: $(extract_model_name "$MODEL")"
31 |     echo " Size: $(extract_model_size "$MODEL")"
32 |     echo " Type: $(extract_model_type "$MODEL")"
33 |     echo
34 | done
35 | 


--------------------------------------------------------------------------------
/test/aikitfile-args.yaml:
--------------------------------------------------------------------------------
1 | #syntax=aikit:test
2 | apiVersion: v1alpha1
3 | debug: true
4 | 


--------------------------------------------------------------------------------
/test/aikitfile-cpu-exllama2.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | backends:
 5 |   - exllama2
 6 | models:
 7 |   - name: Llama2-7B-chat-exl2/output.safetensors
 8 |     source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/resolve/2.5bpw/output.safetensors
 9 |   - name: Llama2-7B-chat-exl2/tokenizer.model
10 |     source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/resolve/2.5bpw/tokenizer.model
11 |   - name: Llama2-7B-chat-exl2/config.json
12 |     source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/raw/2.5bpw/config.json
13 | config: |
14 |   - name: llama-2-7b-chat
15 |     backend: exllama2
16 |     context_size: 4096
17 |     parameters:
18 |       model: "Llama2-7B-chat-exl2"
19 |       temperature: 0.2
20 | 


--------------------------------------------------------------------------------
/test/aikitfile-dev.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:dev
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | models:
 5 |   - name: llama-2-7b-chat
 6 |     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
 7 | config: |
 8 |   - name: llama-2-7b-chat
 9 |     backend: llama
10 |     parameters:
11 |       top_k: 80
12 |       temperature: 0.2
13 |       top_p: 0.7
14 |       model: llama-2-7b-chat.Q4_K_M.gguf
15 |     context_size: 4096
16 | 


--------------------------------------------------------------------------------
/test/aikitfile-diffusers.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | backends:
 6 |   - diffusers
 7 | models:
 8 |   - name: "dreamshaper_assets/DreamShaper_8_pruned.safetensors"
 9 |     sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
10 |     source: "https://huggingface.co/Lykon/DreamShaper/resolve/main/DreamShaper_8_pruned.safetensors"
11 | config: |
12 |   - name: dreamshaper
13 |     backend: diffusers
14 |     parameters:
15 |       model: dreamshaper_assets/DreamShaper_8_pruned.safetensors
16 |     diffusers:
17 |       pipeline_type: StableDiffusionPipeline
18 |       cuda: true
19 |       enable_parameters: "negative_prompt,num_inference_steps"
20 |       scheduler_type: "k_dpmpp_2m"
21 |     step: 25
22 |     f16: true
23 | 


--------------------------------------------------------------------------------
/test/aikitfile-exllama2-exl2.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | backends:
 6 |   - exllama2
 7 | models:
 8 |   - name: Llama2-7B-chat-exl2/output.safetensors
 9 |     source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/resolve/2.5bpw/output.safetensors
10 |   - name: Llama2-7B-chat-exl2/tokenizer.model
11 |     source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/resolve/2.5bpw/tokenizer.model
12 |   - name: Llama2-7B-chat-exl2/config.json
13 |     source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/raw/2.5bpw/config.json
14 | config: |
15 |   - name: llama-2-7b-chat
16 |     backend: exllama2
17 |     context_size: 4096
18 |     parameters:
19 |       model: "Llama2-7B-chat-exl2"
20 |       temperature: 0.2
21 | 


--------------------------------------------------------------------------------
/test/aikitfile-exllama2-gptq.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | backends:
 6 |   - exllama2
 7 | models:
 8 |   - name: Llama-2-7B-Chat-GPTQ/model.safetensors
 9 |     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GPTQ/resolve/main/model.safetensors
10 |   - name: Llama-2-7B-Chat-GPTQ/tokenizer.model
11 |     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GPTQ/resolve/main/tokenizer.model
12 |   - name: Llama-2-7B-Chat-GPTQ/config.json
13 |     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GPTQ/resolve/main/config.json
14 | config: |
15 |   - name: llama-2-7b-chat
16 |     backend: exllama2
17 |     context_size: 4096
18 |     parameters:
19 |       model: "Llama-2-7B-Chat-GPTQ"
20 |       temperature: 0.2
21 | 


--------------------------------------------------------------------------------
/test/aikitfile-flux-schnell.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | backends:
 6 |   - diffusers
 7 | config: |
 8 |   - name: flux-1-schnell
 9 |     backend: diffusers
10 |     parameters:
11 |       model: black-forest-labs/FLUX.1-schnell
12 |     diffusers:
13 |       cfg_scale: 0
14 |       cuda: true
15 |       enable_parameters: num_inference_steps
16 |       pipeline_type: FluxPipeline
17 |     f16: true
18 |     step: 25
19 |     low_vram: true
20 | 


--------------------------------------------------------------------------------
/test/aikitfile-hf.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | models:
 5 |   - name: llama-3.2-1b-instruct
 6 |     source: huggingface://MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/Llama-3.2-1B-Instruct.Q4_K_M.gguf
 7 |     promptTemplates:
 8 |       - name: chatMsg
 9 |         template: |
10 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
11 | 
12 |           {{ if .FunctionCall -}}
13 |           Function call:
14 |           {{ else if eq .RoleName \"tool\" -}}
15 |           Function response:
16 |           {{ end -}}
17 |           {{ if .Content -}}
18 |           {{.Content -}}
19 |           {{ else if .FunctionCall -}}
20 |           {{ toJson .FunctionCall -}}
21 |           {{ end -}}
22 |           <|eot_id|>
23 |       - name: function
24 |         template: |
25 |           <|start_header_id|>system<|end_header_id|>
26 | 
27 |           You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
28 |           <tools>
29 |           {{range .Functions}}
30 |           {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
31 |           {{end}}
32 |           </tools>
33 |           Use the following pydantic model json schema for each tool call you will make:
34 |           {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
35 |           Function call:
36 |       - name: chat
37 |         template: |
38 |           <|begin_of_text|>{{.Input }}
39 |           <|start_header_id|>assistant<|end_header_id|>
40 |       - name: completion
41 |         template: |
42 |           {{.Input}}
43 | config: |
44 |   - name: llama-3.2-1b-instruct
45 |     backend: llama
46 |     parameters:
47 |       model: Llama-3.2-1B-Instruct.Q4_K_M.gguf
48 |     context_size: 8192
49 |     template:
50 |       chat_message: \"chatMsg\"
51 |       function: \"function\"
52 |       chat: \"chat\"
53 |       completion: \"completion\"
54 |     stopwords:
55 |       - <|im_end|>
56 |       - <dummy32000>
57 |       - <|eot_id|>
58 |       - <|end_of_text|>
59 | 


--------------------------------------------------------------------------------
/test/aikitfile-llama-cpp.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | backends:
 5 |   - llama-cpp
 6 | models:
 7 |   - name: llama-3.2-1b-instruct
 8 |     source: https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf
 9 |     sha256: "e4650dd6b45ef456066b11e4927f775eef4dd1e0e8473c3c0f27dd19ee13cc4e"
10 |     promptTemplates:
11 |       - name: chatMsg
12 |         template: |
13 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
14 | 
15 |           {{ if .FunctionCall -}}
16 |           Function call:
17 |           {{ else if eq .RoleName \"tool\" -}}
18 |           Function response:
19 |           {{ end -}}
20 |           {{ if .Content -}}
21 |           {{.Content -}}
22 |           {{ else if .FunctionCall -}}
23 |           {{ toJson .FunctionCall -}}
24 |           {{ end -}}
25 |           <|eot_id|>
26 |       - name: function
27 |         template: |
28 |           <|start_header_id|>system<|end_header_id|>
29 | 
30 |           You have access to the following functions:
31 | 
32 |           {{range .Functions}}
33 |           Use the function '{{.Name}}' to '{{.Description}}'
34 |           {{toJson .Parameters}}
35 |           {{end}}
36 | 
37 |           Think very carefully before calling functions.
38 |           If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
39 | 
40 |           <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
41 | 
42 |           Reminder:
43 |           - If looking for real time information use relevant functions before falling back to searching on internet
44 |           - Function calls MUST follow the specified format, start with <function= and end with </function>
45 |           - Required parameters MUST be specified
46 |           - Only call one function at a time
47 |           - Put the entire function call reply on one line
48 |           <|eot_id|>
49 |           {{.Input }}
50 |           <|start_header_id|>assistant<|end_header_id|>
51 |       - name: chat
52 |         template: |
53 |           {{.Input }}
54 |           <|start_header_id|>assistant<|end_header_id|>
55 |       - name: completion
56 |         template: |
57 |           {{.Input}}
58 | config: |
59 |   - name: llama-3.2-1b-instruct
60 |     backend: llama-cpp
61 |     function:
62 |       disable_no_action: true
63 |       grammar:
64 |         disable: true
65 |       response_regex:
66 |       - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
67 |     parameters:
68 |       model: Llama-3.2-1B-Instruct.Q4_K_M.gguf
69 |     context_size: 8192
70 |     template:
71 |       chat_message: \"chatMsg\"
72 |       function: \"function\"
73 |       chat: \"chat\"
74 |       completion: \"completion\"
75 |     stopwords:
76 |       - <|im_end|>
77 |       - <dummy32000>
78 |       - <|eot_id|>
79 |       - <|end_of_text|>
80 | 


--------------------------------------------------------------------------------
/test/aikitfile-llama-cuda.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: llama-3.2-1b-instruct
 7 |     source: https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf
 8 |     sha256: "e4650dd6b45ef456066b11e4927f775eef4dd1e0e8473c3c0f27dd19ee13cc4e"
 9 |     promptTemplates:
10 |       - name: chatMsg
11 |         template: |
12 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
13 | 
14 |           {{ if .FunctionCall -}}
15 |           Function call:
16 |           {{ else if eq .RoleName \"tool\" -}}
17 |           Function response:
18 |           {{ end -}}
19 |           {{ if .Content -}}
20 |           {{.Content -}}
21 |           {{ else if .FunctionCall -}}
22 |           {{ toJson .FunctionCall -}}
23 |           {{ end -}}
24 |           <|eot_id|>
25 |       - name: function
26 |         template: |
27 |           <|start_header_id|>system<|end_header_id|>
28 | 
29 |           You have access to the following functions:
30 | 
31 |           {{range .Functions}}
32 |           Use the function '{{.Name}}' to '{{.Description}}'
33 |           {{toJson .Parameters}}
34 |           {{end}}
35 | 
36 |           Think very carefully before calling functions.
37 |           If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
38 | 
39 |           <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
40 | 
41 |           Reminder:
42 |           - If looking for real time information use relevant functions before falling back to searching on internet
43 |           - Function calls MUST follow the specified format, start with <function= and end with </function>
44 |           - Required parameters MUST be specified
45 |           - Only call one function at a time
46 |           - Put the entire function call reply on one line
47 |           <|eot_id|>
48 |           {{.Input }}
49 |           <|start_header_id|>assistant<|end_header_id|>
50 |       - name: chat
51 |         template: |
52 |           {{.Input }}
53 |           <|start_header_id|>assistant<|end_header_id|>
54 |       - name: completion
55 |         template: |
56 |           {{.Input}}
57 | config: |
58 |   - name: llama-3.2-1b-instruct
59 |     backend: llama
60 |     function:
61 |       disable_no_action: true
62 |       grammar:
63 |         disable: true
64 |       response_regex:
65 |       - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
66 |     parameters:
67 |       model: Llama-3.2-1B-Instruct.Q4_K_M.gguf
68 |     context_size: 8192
69 |     f16: true
70 |     template:
71 |       chat_message: \"chatMsg\"
72 |       function: \"function\"
73 |       chat: \"chat\"
74 |       completion: \"completion\"
75 |     stopwords:
76 |       - <|im_end|>
77 |       - <dummy32000>
78 |       - <|eot_id|>
79 |       - <|end_of_text|>
80 | 


--------------------------------------------------------------------------------
/test/aikitfile-llama.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | backends:
 5 |   - llama-cpp
 6 | models:
 7 |   - name: llama-3.2-1b-instruct
 8 |     source: https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf
 9 |     sha256: "e4650dd6b45ef456066b11e4927f775eef4dd1e0e8473c3c0f27dd19ee13cc4e"
10 |     promptTemplates:
11 |       - name: chatMsg
12 |         template: |
13 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
14 | 
15 |           {{ if .FunctionCall -}}
16 |           Function call:
17 |           {{ else if eq .RoleName \"tool\" -}}
18 |           Function response:
19 |           {{ end -}}
20 |           {{ if .Content -}}
21 |           {{.Content -}}
22 |           {{ else if .FunctionCall -}}
23 |           {{ toJson .FunctionCall -}}
24 |           {{ end -}}
25 |           <|eot_id|>
26 |       - name: function
27 |         template: |
28 |           <|start_header_id|>system<|end_header_id|>
29 | 
30 |           You have access to the following functions:
31 | 
32 |           {{range .Functions}}
33 |           Use the function '{{.Name}}' to '{{.Description}}'
34 |           {{toJson .Parameters}}
35 |           {{end}}
36 | 
37 |           Think very carefully before calling functions.
38 |           If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
39 | 
40 |           <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function>
41 | 
42 |           Reminder:
43 |           - If looking for real time information use relevant functions before falling back to searching on internet
44 |           - Function calls MUST follow the specified format, start with <function= and end with </function>
45 |           - Required parameters MUST be specified
46 |           - Only call one function at a time
47 |           - Put the entire function call reply on one line
48 |           <|eot_id|>
49 |           {{.Input }}
50 |           <|start_header_id|>assistant<|end_header_id|>
51 |       - name: chat
52 |         template: |
53 |           {{.Input }}
54 |           <|start_header_id|>assistant<|end_header_id|>
55 |       - name: completion
56 |         template: |
57 |           {{.Input}}
58 | config: |
59 |   - name: llama-3.2-1b-instruct
60 |     backend: llama
61 |     function:
62 |       disable_no_action: true
63 |       grammar:
64 |         disable: true
65 |       response_regex:
66 |       - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
67 |     parameters:
68 |       model: Llama-3.2-1B-Instruct.Q4_K_M.gguf
69 |     context_size: 8192
70 |     template:
71 |       chat_message: \"chatMsg\"
72 |       function: \"function\"
73 |       chat: \"chat\"
74 |       completion: \"completion\"
75 |     stopwords:
76 |       - <|im_end|>
77 |       - <dummy32000>
78 |       - <|eot_id|>
79 |       - <|end_of_text|>
80 | 


--------------------------------------------------------------------------------
/test/aikitfile-llava.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | models:
 5 |   - name: ggml-model-q4_k.gguf
 6 |     source:  https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf
 7 |     sha256: 5be58c339d8762e72d482a3e071a58d2df07df4a7aaabf8869415ae2b0f088d6
 8 |     promptTemplates:
 9 |       - name: chat-simple
10 |         template: |
11 |           A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
12 |           {{.Input}}
13 |           ASSISTANT:
14 |   - name: mmproj-model-f16.gguf
15 |     source: https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf
16 |     sha256: 2e467eba710002839e0966d5e329942bb836eabd4e787bc713b07eff1d8ea13b
17 | config: |
18 |   - name: llava
19 |     backend: llama-cpp
20 |     context_size: 4096
21 |     threads: 4
22 |     mmap: true
23 |     mmproj: mmproj-model-f16.gguf
24 |     roles:
25 |       user: \"USER:\"
26 |       assistant: \"ASSISTANT:\"
27 |       system: \"SYSTEM:\"
28 |     parameters:
29 |       model: ggml-model-q4_k.gguf
30 |       temperature: 0.2
31 |       top_k: 40
32 |       top_p: 0.95
33 |     template:
34 |       chat: chat-simple
35 | 


--------------------------------------------------------------------------------
/test/aikitfile-oci.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | models:
 5 |   - name: llama-3.2-1b-instruct
 6 |     source: oci://registry.ollama.ai/library/llama3.2:1b
 7 |     promptTemplates:
 8 |       - name: chatMsg
 9 |         template: |
10 |           <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|>
11 | 
12 |           {{ if .FunctionCall -}}
13 |           Function call:
14 |           {{ else if eq .RoleName \"tool\" -}}
15 |           Function response:
16 |           {{ end -}}
17 |           {{ if .Content -}}
18 |           {{.Content -}}
19 |           {{ else if .FunctionCall -}}
20 |           {{ toJson .FunctionCall -}}
21 |           {{ end -}}
22 |           <|eot_id|>
23 |       - name: function
24 |         template: |
25 |           <|start_header_id|>system<|end_header_id|>
26 | 
27 |           You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
28 |           <tools>
29 |           {{range .Functions}}
30 |           {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
31 |           {{end}}
32 |           </tools>
33 |           Use the following pydantic model json schema for each tool call you will make:
34 |           {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
35 |           Function call:
36 |       - name: chat
37 |         template: |
38 |           <|begin_of_text|>{{.Input }}
39 |           <|start_header_id|>assistant<|end_header_id|>
40 |       - name: completion
41 |         template: |
42 |           {{.Input}}
43 | config: |
44 |   - name: llama-3.2-1b-instruct
45 |     backend: llama
46 |     parameters:
47 |       model: llama3.2
48 |     context_size: 8192
49 |     template:
50 |       chat_message: \"chatMsg\"
51 |       function: \"function\"
52 |       chat: \"chat\"
53 |       completion: \"completion\"
54 |     stopwords:
55 |       - <|im_end|>
56 |       - <dummy32000>
57 |       - <|eot_id|>
58 |       - <|end_of_text|>
59 | 


--------------------------------------------------------------------------------
/test/aikitfile-unsloth-custom.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | debug: true
 4 | runtime: cuda
 5 | models:
 6 |   - name: custom
 7 |     source: model-q4_k_m.gguf
 8 |     promptTemplates:
 9 |     - name: instruct
10 |       template: |
11 |         Below is an instruction that describes a task. Write a response that appropriately completes the request.
12 | 
13 |         ### Instruction:
14 |         {{.Input}}
15 | 
16 |         ### Response:
17 | config: |
18 |   - name: custom
19 |     backend: llama
20 |     parameters:
21 |       model: model-q4_k_m.gguf
22 |     context_size: 4096
23 |     f16: true
24 |     mmap: true
25 |     template:
26 |       chat: instruct
27 | 


--------------------------------------------------------------------------------
/test/aikitfile-unsloth.yaml:
--------------------------------------------------------------------------------
 1 | #syntax=aikit:test
 2 | apiVersion: v1alpha1
 3 | baseModel: unsloth/Meta-Llama-3.1-8B
 4 | datasets:
 5 |   - source: "yahma/alpaca-cleaned"
 6 |     type: alpaca
 7 | config:
 8 |   unsloth:
 9 |     packing: false
10 |     maxSeqLength: 2048
11 |     loadIn4bit: true
12 |     batchSize: 2
13 |     gradientAccumulationSteps: 4
14 |     warmupSteps: 10
15 |     maxSteps: 60
16 |     learningRate: 0.0002
17 |     loggingSteps: 1
18 |     optimizer: adamw_8bit
19 |     weightDecay: 0.01
20 |     lrSchedulerType: linear
21 |     seed: 42
22 | output:
23 |   quantize: q4_k_m
24 |   name: model
25 | 


--------------------------------------------------------------------------------
/website/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | /node_modules
 3 | 
 4 | # Production
 5 | /build
 6 | 
 7 | # Generated files
 8 | .docusaurus
 9 | .cache-loader
10 | 
11 | # Misc
12 | .DS_Store
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | npm-debug.log*
19 | yarn-debug.log*
20 | yarn-error.log*
21 | 


--------------------------------------------------------------------------------
/website/README.md:
--------------------------------------------------------------------------------
 1 | # Website
 2 | 
 3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator.
 4 | 
 5 | ### Installation
 6 | 
 7 | ```
 8 | $ yarn
 9 | ```
10 | 
11 | ### Local Development
12 | 
13 | ```
14 | $ yarn start
15 | ```
16 | 
17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
18 | 
19 | ### Build
20 | 
21 | ```
22 | $ yarn build
23 | ```
24 | 
25 | This command generates static content into the `build` directory and can be served using any static contents hosting service.
26 | 
27 | ### Deployment
28 | 
29 | Using SSH:
30 | 
31 | ```
32 | $ USE_SSH=true yarn deploy
33 | ```
34 | 
35 | Not using SSH:
36 | 
37 | ```
38 | $ GIT_USER=<Your GitHub username> yarn deploy
39 | ```
40 | 
41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
42 | 


--------------------------------------------------------------------------------
/website/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 | 


--------------------------------------------------------------------------------
/website/docs/architecture.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Architecture
3 | ---
4 | 
5 | Below is the architecture diagram for AIKit:
6 | 
7 | [![AIKit Architecture](../static/img/architecture.png)](../static/img/architecture.png)
8 | 


--------------------------------------------------------------------------------
/website/docs/demo.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Demos
 3 | ---
 4 | 
 5 | Below are various demos to help you get started with AIKit.
 6 | 
 7 | ## Inference
 8 | 
 9 | ### Running inference with CUDA
10 | 
11 | https://www.youtube.com/watch?v=yFh_Zfk34PE
12 | 
13 | ### Running inference (on ARM64)
14 | 
15 | https://www.youtube.com/watch?v=O0AOnxXp-o4
16 | 
17 | ### Using Flux-1 Dev to generate images
18 | 
19 | https://www.youtube.com/watch?v=m38h4afJlBE
20 | 
21 | ## Building a model image
22 | 
23 | ### Building a model image from model in Hugging Face and running inference
24 | 
25 | https://www.youtube.com/watch?v=DI5NbdEFLC8
26 | 
27 | ### Building a model image from model in ollama (OCI artifacts) and running inference
28 | 
29 | https://www.youtube.com/watch?v=G6PrzhEe_p8
30 | 
31 | ### Building a model image from configuration and running inference
32 | 
33 | https://www.youtube.com/watch?v=5AQfG5VwN2c
34 | 
35 | ## Fine tuning
36 | 
37 | ### Fine Tuning Mistral 7b with OpenHermes dataset and building a model image for inference
38 | 
39 | https://www.youtube.com/watch?v=FZuVb-9i-94
40 | 


--------------------------------------------------------------------------------
/website/docs/diffusion.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Diffusion
 3 | ---
 4 | 
 5 | AIKit supports [`diffusers`](#diffusers) backend.
 6 | 
 7 | ## diffusers
 8 | 
 9 | `diffusers` backend uses the huggingface [`diffusers`](https://huggingface.co/docs/diffusers/en/index) library to generate images. This backend only supports CUDA runtime.
10 | 
11 | ### Example
12 | 
13 | :::warning
14 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below.
15 | :::
16 | 
17 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-diffusers.yaml
18 | 
19 | ## stablediffusion NCNN
20 | 
21 | https://github.com/EdVince/Stable-Diffusion-NCNN
22 | 
23 | This backend:
24 | - provides support for Stable Diffusion models
25 | - does not support CUDA runtime yet
26 | 
27 | :::note
28 | This has been deprecated as of `v0.18.0` release.
29 | :::
30 | 
31 | ### Example
32 | 
33 | :::warning
34 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below.
35 | :::
36 | 
37 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-stablediffusion.yaml
38 | 
39 | ### Demo
40 | 
41 | https://www.youtube.com/watch?v=gh7b-rt70Ug
42 | 


--------------------------------------------------------------------------------
/website/docs/exllama2.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Exllama v2 (GPTQ and EXL2)
 3 | ---
 4 | 
 5 | [ExLlamaV2](https://github.com/turboderp/exllamav2) is an inference library for running local LLMs on modern consumer GPUs.
 6 | 
 7 | This backend:
 8 | - provides support for GPTQ and EXL2 models
 9 | - requires CUDA runtime
10 | 
11 | :::note
12 | This is an experimental backend and it may change in the future.
13 | :::
14 | 
15 | ## Example
16 | 
17 | :::warning
18 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below.
19 | :::
20 | 
21 | ### EXL2
22 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-exllama2-exl2.yaml
23 | 
24 | ### GPTQ
25 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-exllama2-gptq.yaml
26 | 


--------------------------------------------------------------------------------
/website/docs/fine-tune.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Fine Tuning
  3 | ---
  4 | 
  5 | Fine tuning process allows the adaptation of pre-trained models to domain-specific data. At this time, AIKit fine tuning process is only supported with NVIDIA GPUs.
  6 | 
  7 | :::note
  8 | Due to limitations with BuildKit and NVIDIA, it is essential that the GPU driver version on your host matches the version AIKit will install in the container during the build process.
  9 | 
 10 | To determine your host GPU driver version, you can execute `nvidia-smi` or `cat /proc/driver/nvidia/version`.
 11 | 
 12 | For information on the GPU driver versions supported by AIKit, please visit https://download.nvidia.com/XFree86/Linux-x86_64/.
 13 | 
 14 | Should your host GPU driver version not be listed, you will need to update to a compatible version available in the NVIDIA downloads mentioned above. It's important to note that there's no need to directly install drivers from the NVIDIA downloads; the versions simply need to be consistent.
 15 | 
 16 | We hope to optimize this process in the future to eliminate this requirement.
 17 | :::
 18 | 
 19 | ## Getting Started
 20 | 
 21 | To get started, you need to create a builder to be able to access host GPU devices.
 22 | 
 23 | Create a builder with the following configuration:
 24 | 
 25 | ```bash
 26 | docker buildx create --name aikit-builder --use --buildkitd-flags '--allow-insecure-entitlement security.insecure'
 27 | ```
 28 | 
 29 | :::tip
 30 | Additionally, you can build using other BuildKit drivers, such as [Kubernetes driver](https://docs.docker.com/build/drivers/kubernetes/) by setting `--driver=kubernetes` if you are interested in building using a Kubernetes cluster. Please see [BuildKit Drivers](https://docs.docker.com/build/drivers/) for more information.
 31 | :::
 32 | 
 33 | ## Targets and Configuration
 34 | 
 35 | AIKit is capable of supporting multiple fine tuning implementation targets. At this time, [Unsloth](https://github.com/unslothai/unsloth) is the only supported target, but can be extended for other fine tuning implementations in the future.
 36 | 
 37 | ### Unsloth
 38 | 
 39 | Create a YAML file with your configuration. For example, minimum config looks like:
 40 | 
 41 | ```yaml
 42 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 43 | apiVersion: v1alpha1
 44 | baseModel: "unsloth/llama-2-7b-bnb-4bit" # base model to be fine tuned. this can be any model from Huggingface. For unsloth optimized base models, see https://huggingface.co/unsloth
 45 | datasets:
 46 |   - source: "yahma/alpaca-cleaned" # data set to be used for fine tuning. This can be a Huggingface dataset or a URL pointing to a JSON file
 47 |     type: "alpaca" # type of dataset. only alpaca is supported at this time.
 48 | config:
 49 |   unsloth:
 50 | ```
 51 | 
 52 | For full configuration, please refer to [Fine Tune API Specifications](./specs-finetune.md).
 53 | 
 54 | :::note
 55 | Please refer to [Unsloth documentation](https://github.com/unslothai/unsloth) for more information about Unsloth configuration.
 56 | :::
 57 | 
 58 | #### Example Configuration
 59 | 
 60 | :::warning
 61 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the example below.
 62 | :::
 63 | 
 64 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-unsloth.yaml
 65 | 
 66 | 
 67 | ## Build
 68 | 
 69 | Build using following command and make sure to replace `--target` with the fine-tuning implementation of your choice (`unsloth` is the only option supported at this time), `--file` with the path to your configuration YAML and `--output` with the output directory of the finetuned model.
 70 | 
 71 | ```bash
 72 | docker buildx build --builder aikit-builder --allow security.insecure --file "/path/to/config.yaml" --output "/path/to/output" --target unsloth --progress plain .
 73 | ```
 74 | 
 75 | Depending on your setup and configuration, build process may take some time. At the end of the build, the fine-tuned model will automatically be quantized with the specified format and output to the path specified in the `--output`.
 76 | 
 77 | Output will be a `GGUF` model file with the name and quanization format from the configuration. For example:
 78 | 
 79 | ```bash
 80 | $ ls -al _output
 81 | -rw-r--r--  1 kaito-project kaito-project 7161089856 Mar  3 00:19 aikit-model-q4_k_m.gguf
 82 | ```
 83 | 
 84 | ## Demo
 85 | 
 86 | https://www.youtube.com/watch?v=FZuVb-9i-94
 87 | 
 88 | ## What's next?
 89 | 
 90 | 👉 Now that you have a fine-tuned model output as a GGUF file, you can refer to [Creating Model Images](./create-images.md) on how to create an image with AIKit to serve your fine-tuned model!
 91 | 
 92 | ## Troubleshooting
 93 | 
 94 | ### Build fails with `failed to solve: DeadlineExceeded: context deadline exceeded`
 95 | 
 96 | This is a known issue with BuildKit and might be related to disk speed. For more information, please see https://github.com/moby/buildkit/issues/4327
 97 | 
 98 | ### Build fails with `ERROR 404: Not Found.`
 99 | 
100 | This issue arises from a discrepancy between the GPU driver versions on your host and the container. Unfortunately, a matching version for your host driver is not available in the NVIDIA downloads at this time. For further details, please consult the note provided at the beginning of this page.
101 | 
102 | If you are on Windows Subsystem for Linux (WSL), WSL doesn't expose the host driver version information on `/proc/driver/nvidia/version`. Due to this limitation, WSL is not supported at this time.
103 | 


--------------------------------------------------------------------------------
/website/docs/gpu.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: GPU Acceleration
  3 | ---
  4 | 
  5 | :::note
  6 | At this time, only NVIDIA GPU acceleration is supported, with experimental support for Apple Silicon. Please open an issue if you'd like to see support for other GPU vendors.
  7 | :::
  8 | 
  9 | ## NVIDIA
 10 | 
 11 | AIKit supports GPU accelerated inferencing with [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). You must also have [NVIDIA Drivers](https://www.nvidia.com/en-us/drivers/unix/) installed on your host machine.
 12 | 
 13 | For Kubernetes, [NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator) provides a streamlined way to install the NVIDIA drivers and container toolkit to configure your cluster to use GPUs.
 14 | 
 15 | To get started with GPU-accelerated inferencing, make sure to set the following in your `aikitfile` and build your model.
 16 | 
 17 | ```yaml
 18 | runtime: cuda         # use NVIDIA CUDA runtime
 19 | ```
 20 | 
 21 | For `llama` backend, set the following in your `config`:
 22 | 
 23 | ```yaml
 24 | f16: true             # use float16 precision
 25 | gpu_layers: 35        # number of layers to offload to GPU
 26 | low_vram: true        # for devices with low VRAM
 27 | ```
 28 | 
 29 | :::tip
 30 | Make sure to customize these values based on your model and GPU specs.
 31 | :::
 32 | 
 33 | :::note
 34 | For `exllama2` backend, GPU acceleration is enabled by default and cannot be disabled.
 35 | :::
 36 | 
 37 | After building the model, you can run it with [`--gpus all`](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#gpu-enumeration) flag to enable GPU support:
 38 | 
 39 | ```bash
 40 | # for pre-made models, replace "my-model" with the image name
 41 | docker run --rm --gpus all -p 8080:8080 my-model
 42 | ```
 43 | 
 44 | If GPU acceleration is working, you'll see output that is similar to following in the debug logs:
 45 | 
 46 | ```bash
 47 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr ggml_init_cublas: found 1 CUDA devices:
 48 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr   Device 0: Tesla T4, compute capability 7.5
 49 | ...
 50 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: using CUDA for GPU acceleration
 51 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: mem required  =   70.41 MB (+ 2048.00 MB per state)
 52 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading 32 repeating layers to GPU
 53 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading non-repeating layers to GPU
 54 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading v cache to GPU
 55 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading k cache to GPU
 56 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloaded 35/35 layers to GPU
 57 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: VRAM used: 5869 MB
 58 | ```
 59 | 
 60 | ### Demo
 61 | 
 62 | https://www.youtube.com/watch?v=yFh_Zfk34PE
 63 | 
 64 | ## Apple Silicon (experimental)
 65 | 
 66 | :::note
 67 | Apple Silicon is an experimental runtime and it may change in the future. This runtime is specific to Apple Silicon only, and it will not work as expected on other architectures, including Intel Macs.
 68 | :::
 69 | 
 70 | AIKit supports Apple Silicon GPU acceleration with Podman Desktop for Mac with [`libkrun`](https://github.com/containers/libkrun). Please see [Podman Desktop documentation](https://podman-desktop.io/docs/podman/gpu) on how to enable GPU support.
 71 | 
 72 | To get started with Apple Silicon GPU-accelerated inferencing, make sure to set the following in your `aikitfile` and build your model.
 73 | 
 74 | ```yaml
 75 | runtime: applesilicon         # use Apple Silicon runtime
 76 | ```
 77 | 
 78 | Please note that only the default `llama.cpp` backend with `gguf` models are supported for Apple Silicon.
 79 | 
 80 | After building the model, you can run it with:
 81 | 
 82 | ```bash
 83 | # for pre-made models, replace "my-model" with the image name
 84 | podman run --rm --device /dev/dri -p 8080:8080 my-model
 85 | ```
 86 | 
 87 | If GPU acceleration is working, you'll see output that is similar to following in the debug logs:
 88 | 
 89 | ```bash
 90 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr ggml_vulkan: Found 1 Vulkan devices:
 91 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr Vulkan0: Virtio-GPU Venus (Apple M1 Max) (venus) | uma: 1 | fp16: 1 | warp size: 32
 92 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llama_load_model_from_file: using device Vulkan0 (Virtio-GPU Venus (Apple M1 Max)) - 65536 MiB free
 93 | ...
 94 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors: offloading 32 repeating layers to GPU
 95 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors: offloading output layer to GPU
 96 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors: offloaded 33/33 layers to GPU
 97 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors:   CPU_Mapped model buffer size =    52.84 MiB
 98 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors:      Vulkan0 model buffer size =  2228.82 MiB
 99 | ```
100 | 


--------------------------------------------------------------------------------
/website/docs/intro.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Introduction
 3 | slug: /
 4 | ---
 5 | 
 6 | AIKit is a comprehensive platform to quickly get started to host, deploy, build and fine-tune large language models (LLMs).
 7 | 
 8 | AIKit offers two main capabilities:
 9 | 
10 | - **Inference**: AIKit uses [LocalAI](https://localai.io/), which supports a wide range of inference capabilities and formats. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open LLMs!
11 | 
12 | - **[Fine Tuning](fine-tune.md)**: AIKit offers an extensible fine tuning interface. It supports [Unsloth](https://github.com/unslothai/unsloth) for fast, memory efficient, and easy fine-tuning experience.
13 | 
14 | 👉 To get started, please see [Quick Start](quick-start.md)!
15 | 
16 | ## Features
17 | 
18 | - 💡 No GPU, or Internet access is required for inference!
19 | - 🐳 No additional tools are needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/) or [Podman](https://podman.io)!
20 | - 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [chiseled](https://ubuntu.com/containers/chiseled) image
21 | - 🎵 [Fine tune support](fine-tune.md)
22 | - 🚀 Easy to use declarative configuration for [inference](specs-inference.md) and [fine tuning](specs-finetune.md)
23 | - ✨ OpenAI API compatible to use with any OpenAI API compatible client
24 | - 📸 [Multi-modal model support](vision.md)
25 | - 🖼️ [Image generation support](diffusion.md)
26 | - 🦙 Support for GGUF ([`llama`](https://github.com/ggerganov/llama.cpp)), GPTQ or EXL2 ([`exllama2`](https://github.com/turboderp/exllamav2)), and GGML ([`llama-ggml`](https://github.com/ggerganov/llama.cpp)) models
27 | - 🚢 [Kubernetes deployment ready](#kubernetes-deployment)
28 | - 📦 Supports multiple models with a single image
29 | - 🖥️ Supports [AMD64 and ARM64](create-images.md#multi-platform-support) CPUs and [GPU-accelerated inferencing with NVIDIA GPUs](gpu.md)
30 | - 🔐 Ensure [supply chain security](security.md) with SBOMs, Provenance attestations, and signed images
31 | - 🌈 Support for non-proprietary and self-hosted container registries to store model images
32 | 


--------------------------------------------------------------------------------
/website/docs/llama-cpp.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: llama.cpp (GGUF and GGML)
 3 | ---
 4 | 
 5 | AIKit utilizes and depends on [llama.cpp](https://github.com/ggerganov/llama.cpp), which provides inference of Meta's LLaMA model (and others) in pure C/C++, for the `llama` backend.
 6 | 
 7 | This is the default backend for `aikit`. No additional configuration is required.
 8 | 
 9 | This backend:
10 | - provides support for GGUF (recommended) and GGML models
11 | - supports both CPU (`avx2`, `avx` or `fallback`) and CUDA runtimes
12 | 
13 | ## Example
14 | 
15 | :::warning
16 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below.
17 | :::
18 | 
19 | ### CPU
20 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-llama.yaml
21 | 
22 | ### GPU (CUDA)
23 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-llama-cuda.yaml
24 | 


--------------------------------------------------------------------------------
/website/docs/quick-start.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Quick Start
 3 | ---
 4 | 
 5 | You can get started with AIKit quickly on your local machine without a GPU!
 6 | 
 7 | ```bash
 8 | docker run -d --rm -p 8080:8080 ghcr.io/kaito-project/aikit/llama3.1:8b
 9 | ```
10 | 
11 | After running this, navigate to [http://localhost:8080/chat](http://localhost:8080/chat) to access the WebUI.
12 | 
13 | ## API
14 | 
15 | AIKit provides an OpenAI API compatible endpoint, so you can use any OpenAI API compatible client to send requests to open LLMs!
16 | 
17 | For example:
18 | 
19 | ```bash
20 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
21 |     "model": "llama-3.1-8b-instruct",
22 |     "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
23 |   }'
24 | ```
25 | 
26 | Output should be similar to:
27 | 
28 | ```jsonc
29 | {
30 |   // ...
31 |     "model": "llama-3.1-8b-instruct",
32 |     "choices": [
33 |         {
34 |             "index": 0,
35 |             "finish_reason": "stop",
36 |             "message": {
37 |                 "role": "assistant",
38 |                 "content": "Kubernetes is an open-source container orchestration system that automates the deployment, scaling, and management of applications and services, allowing developers to focus on writing code rather than managing infrastructure."
39 |             }
40 |         }
41 |     ],
42 |     // ...
43 | }
44 | ```
45 | 
46 | That's it! 🎉 API is OpenAI compatible so this is a drop-in replacement for any OpenAI API compatible client.
47 | 
48 | ## Demo
49 | 
50 | https://www.youtube.com/watch?v=O0AOnxXp-o4
51 | 
52 | ## What's next?
53 | 
54 | 👉 If you are interested in other pre-made models (such as Gemma, Mixtral or Phi), please refer to [Pre-made models](./premade-models.md).
55 | 
56 | 👉  If you are interested in learning more about how to create your own custom model images, please refer to [Creating Model Images](./create-images.md).
57 | 
58 | 👉  If you are interested in fine tuning a model with domain-specific knowledge, please refer to [Fine Tuning](./fine-tune.md).
59 | 


--------------------------------------------------------------------------------
/website/docs/release.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Release Process
 3 | ---
 4 | 
 5 | The release process is as follows:
 6 | 
 7 | - Trigger the [release-pr action](https://github.com/kaito-project/aikit/actions/workflows/release-pr.yaml) with the version to release to create a release PR. Merge the PR to the applicable `release-X.Y` branch.
 8 | 
 9 | - Tag the `release-X.Y` branch with a version number that's semver compliant (vMAJOR.MINOR.PATCH), and push the tag to GitHub.
10 | 
11 | ```bash
12 | git tag v0.1.0
13 | git push origin v0.1.0
14 | ```
15 | 
16 | - GitHub Actions will automatically build the AIKit image and push the versioned and `latest` tag to GitHub Container Registry (GHCR) using [release action](https://github.com/kaito-project/aikit/actions/workflows/release.yaml).
17 | 
18 | - Once release is done, trigger [update models](https://github.com/kaito-project/aikit/actions/workflows/update-models.yaml) action to update the pre-built models.
19 | 
20 | - Mixtral 8x7b and Llama 3 70b models does not fit into GitHub runners due to their size. Trigger self-hosted [update models](https://github.com/kaito-project/aikit/actions/workflows/update-models-self.yaml) action to update these pre-built models.
21 | 


--------------------------------------------------------------------------------
/website/docs/security.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Supply Chain Security
 3 | ---
 4 | 
 5 | AIKit is designed with security in mind. Our approach to supply chain security includes detailed tracking of software components, transparent build processes, and proactive vulnerability management. This ensures that every part of our software ecosystem remains secure and trustworthy.
 6 | 
 7 | ## SBOM (Software Bill of Materials)
 8 | 
 9 | AIKit publishes [Software Bill of Materials (SBOM)](https://www.cisa.gov/sbom) for each release and for all [pre-made models](premade-models.md). The SBOM is a comprehensive list of all the components and dependencies used in the project, detailing their versions, licenses, and sources. This transparency helps users and stakeholders understand what software is included, facilitating better risk management and compliance with security and licensing requirements.
10 | 
11 | To access the SBOM for a specific AIKit image, use the following command:
12 | 
13 | ```bash
14 | # update this with the image you want to inspect
15 | IMAGE=ghcr.io/kaito-project/aikit/llama3.1:8b
16 | docker buildx imagetools inspect $IMAGE --format "{{ json .SBOM.SPDX }}"
17 | ```
18 | 
19 | The output will provide a detailed JSON document listing all the software components in the image, including direct and transitive dependencies. For more information, please visit [Docker SBOM documentation](https://docs.docker.com/build/attestations/sbom/).
20 | 
21 | ## Provenance attestation
22 | 
23 | Provenance attestation provides a detailed record of how and where an image was built, offering transparency and trust in the build process. AIKit uses BuildKit to generate and publish provenance data for each of its images. This data includes information about the build environment, the build process, and the source control context, ensuring that the images are traceable and verifiable from their origins to their final state.
24 | 
25 | To inspect the provenance attestation for an AIKit image, you can use the following command:
26 | 
27 | ```bash
28 | # update this with the image you want to inspect
29 | IMAGE=ghcr.io/kaito-project/aikit/llama3.1:8b
30 | docker buildx imagetools inspect $IMAGE --format "{{ json .Provenance.SLSA }}"
31 | ```
32 | 
33 | This command will output a JSON file containing the build provenance details, including the source repository, commit hash, build configuration, and more. This helps verify that the image was built from trusted sources and has not been tampered with. For more information, please visit [Docker Provenance documentation](https://docs.docker.com/build/attestations/slsa-provenance/).
34 | 
35 | ## Vulnerability Patching
36 | 
37 | Ensuring that our images are free from known vulnerabilities is crucial. Not only AIKit uses a custom distroless-based base image to reduce the number of vulnerabilities, attack surface and size, AIKit uses [Copacetic](https://github.com/project-copacetic/copacetic) to scan and patch OS-based vulnerabilities for all [pre-made models](premade-models.md) on a weekly basis. Copacetic automates the process of identifying and remediating security issues, helping us maintain a robust and secure software supply chain.
38 | 
39 | Every week, Copacetic performs the following actions:
40 | 
41 | - Scan: It analyzes the images for vulnerabilities using [Trivy](https://github.com/aquasecurity/trivy) against a comprehensive database of known security issues.
42 | - Patch: It automatically applies patches or updates to mitigate any identified vulnerabilities using [Copacetic](https://github.com/project-copacetic/copacetic).
43 | - Publish: It updates the images with the latest security fixes and publishes them to our container registry.
44 | 
45 | This automated and regular process ensures that our users always have access to the most secure and up-to-date images. You can monitor the status and results of these scans on our security dashboard.
46 | 
47 | ## Image Signature Verification
48 | 
49 | AIKit and pre-made models are keyless signed with OIDC in GitHub Actions with [cosign](https://github.com/sigstore/cosign). You can verify the images with the following commands:
50 | 
51 | ```bash
52 | IMAGE=ghcr.io/kaito-project/aikit/llama2:7b # update this with the image you want to verify
53 | DIGEST=$(cosign triangulate ${IMAGE} --type digest)
54 | cosign verify ${DIGEST} \
55 |     --certificate-oidc-issuer https://token.actions.githubusercontent.com \
56 |     --certificate-identity-regexp 'https://github\.com/kaito-project/aikit/\.github/workflows/.+'
57 | ```
58 | 
59 | You should see an output similar to the following:
60 | 
61 | ```bash
62 | Verification for ghcr.io/kaito-project/aikit/llama2@sha256:d47fdba491a9a47ce4911539a77e0c0a12b2e14f5beed88cb8072924b02130b4 --
63 | The following checks were performed on each of these signatures:
64 |   - The cosign claims were validated
65 |   - Existence of the claims in the transparency log was verified offline
66 |   - The code-signing certificate was verified using trusted certificate authority certificates
67 | ...
68 | ```
69 | 


--------------------------------------------------------------------------------
/website/docs/specs-finetune.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Fine Tuning API Specifications
 3 | ---
 4 | 
 5 | ## v1alpha1
 6 | 
 7 | ```yaml
 8 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
 9 | apiVersion: # required. only v1alpha1 is supported at the moment
10 | baseModel: # required. any base model from Huggingface. for unsloth, see for 4bit pre-quantized models: https://huggingface.co/unsloth
11 | datasets:
12 |   - source: # required. this can be a Huggingface dataset repo or a URL pointing to a JSON file
13 |     type: # required. can be "alpaca". only alpaca is supported at the moment
14 | config:
15 |   unsloth:
16 |     packing: # optional. defaults to false. can make training 5x faster for short sequences.
17 |     maxSeqLength: # optional. defaults to 2048
18 |     loadIn4bit: # optional. defaults to true
19 |     batchSize: # optional. default to 2
20 |     gradientAccumulationSteps: # optional. defaults to 4
21 |     warmupSteps: # optional. defaults to 10
22 |     maxSteps: # optional. defaults to 60
23 |     learningRate: # optional. defaults to 0.0002
24 |     loggingSteps: # optional. defaults to 1
25 |     optimizer: # optional. defaults to adamw_8bit
26 |     weightDecay: # optional. defaults to 0.01
27 |     lrSchedulerType: # optional. defaults to linear
28 |     seed: # optional. defaults to 42
29 | output:
30 |   quantize: # optional. defaults to q4_k_m. for unsloth, see for allowed quantization methods: https://github.com/unslothai/unsloth/wiki#saving-to-gguf.
31 |   name: # optional. defaults to "aikit-model"
32 | ```
33 | 
34 | Example:
35 | 
36 | ```yaml
37 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
38 | apiVersion: v1alpha1
39 | baseModel: unsloth/mistral-7b-instruct-v0.2-bnb-4bit
40 | datasets:
41 |   - source: yahma/alpaca-cleaned
42 |     type: alpaca
43 | config:
44 |   unsloth:
45 |     packing: false
46 |     maxSeqLength: 2048
47 |     loadIn4bit: true
48 |     batchSize: 2
49 |     gradientAccumulationSteps: 4
50 |     warmupSteps: 10
51 |     maxSteps: 60
52 |     learningRate: 0.0002
53 |     loggingSteps: 1
54 |     optimizer: adamw_8bit
55 |     weightDecay: 0.01
56 |     lrSchedulerType: linear
57 |     seed: 42
58 | output:
59 |   quantize: q4_k_m
60 |   name: model
61 | ```
62 | 


--------------------------------------------------------------------------------
/website/docs/specs-inference.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Inference API Specifications
 3 | ---
 4 | 
 5 | ## v1alpha1
 6 | 
 7 | ```yaml
 8 | apiVersion: # required. only v1alpha1 is supported at the moment
 9 | debug: # optional. if set to true, debug logs will be printed
10 | runtime: # optional. defaults to avx. can be "avx", "avx2", "avx512", "cuda"
11 | backends: # optional. list of additional backends. can be "llama-cpp" (default), "exllama2", "diffusers"
12 | models: # required. list of models to build
13 |   - name: # required. name of the model
14 |     source: # required. source of the model. can be a url or a local file
15 |     sha256: # optional. sha256 hash of the model file
16 |     promptTemplates: # optional. list of prompt templates for a model
17 |       - name: # required. name of the template
18 |         template: # required. template string
19 | config: # optional. list of config files
20 | ```
21 | 
22 | Example:
23 | 
24 | ```yaml
25 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest
26 | apiVersion: v1alpha1
27 | debug: true
28 | runtime: cuda
29 | models:
30 |   - name: llama-2-7b-chat
31 |     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
32 |     sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
33 |     promptTemplates:
34 |       - name: "llama-2-7b-chat"
35 |         template: |
36 |           {{if eq .RoleName \"assistant\"}}{{.Content}}{{else}}
37 |           [INST]
38 |           {{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName \"system\"}}<<SYS>>{{.Content}}<</SYS>>
39 | 
40 |           {{else if .Content}}{{.Content}}{{end}}
41 |           [/INST]
42 |           {{end}}
43 | config: |
44 |   - name: \"llama-2-7b-chat\"
45 |     backend: \"llama\"
46 |     parameters:
47 |       top_k: 80
48 |       temperature: 0.2
49 |       top_p: 0.7
50 |       model: \"llama-2-7b-chat.Q4_K_M.gguf\"
51 |     context_size: 4096
52 |     roles:
53 |       function: 'Function Result:'
54 |       assistant_function_call: 'Function Call:'
55 |       assistant: 'Assistant:'
56 |       user: 'User:'
57 |       system: 'System:'
58 |     template:
59 |       chat_message: \"llama-2-7b-chat\"
60 |     system_prompt: \"You are a helpful assistant, below is a conversation, please respond with the next message and do not ask follow-up questions\"
61 | ```
62 | 


--------------------------------------------------------------------------------
/website/docs/vision.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Vision
 3 | ---
 4 | 
 5 | Vision is supported through [llama-cpp](llama-cpp.md) and [llava](https://llava.hliu.cc/).
 6 | 
 7 | 
 8 | ## Example
 9 | 
10 | :::warning
11 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below.
12 | :::
13 | 
14 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-llava.yaml
15 | 
16 | ## Demo
17 | 
18 | [![Vision with LLaVA
19 | ](https://asciinema.org/a/626553.svg 'Vision with LLaVA')](https://asciinema.org/a/626553)
20 | 


--------------------------------------------------------------------------------
/website/docusaurus.config.js:
--------------------------------------------------------------------------------
  1 | // @ts-check
  2 | // `@type` JSDoc annotations allow editor autocompletion and type checking
  3 | // (when paired with `@ts-check`).
  4 | // There are various equivalent ways to declare your Docusaurus config.
  5 | // See: https://docusaurus.io/docs/api/docusaurus-config
  6 | 
  7 | import {themes as prismThemes} from 'prism-react-renderer';
  8 | 
  9 | /** @type {import('@docusaurus/types').Config} */
 10 | const config = {
 11 |   title: 'AIKit',
 12 |   tagline: 'Fine-tune, build, and deploy open-source LLMs easily!',
 13 |   favicon: 'img/favicon.ico',
 14 |   headTags: [
 15 |     {
 16 |       tagName: "meta",
 17 |       attributes: {
 18 |         // Allow Algolia crawler to index the site
 19 |         // See https://www.algolia.com/doc/tools/crawler/getting-started/create-crawler/#verify-your-domain.
 20 |         name: "algolia-site-verification",
 21 |         content: "58101301D914B63C",
 22 |       }
 23 |     },
 24 |   ],
 25 | 
 26 |   // Set the production url of your site here
 27 |   url: 'https://kaito-project.github.io',
 28 |   // Set the /<baseUrl>/ pathname under which your site is served
 29 |   // For GitHub pages deployment, it is often '/<projectName>/'
 30 |   baseUrl: '/aikit/docs/',
 31 | 
 32 |   // GitHub pages deployment config.
 33 |   // If you aren't using GitHub pages, you don't need these.
 34 |   organizationName: 'kaito-project', // Usually your GitHub org/user name.
 35 |   projectName: 'aikit', // Usually your repo name.
 36 | 
 37 |   onBrokenLinks: 'throw', // throw
 38 |   onBrokenMarkdownLinks: 'warn',
 39 | 
 40 |   // Even if you don't use internationalization, you can use this field to set
 41 |   // useful metadata like html lang. For example, if your site is Chinese, you
 42 |   // may want to replace "en" with "zh-Hans".
 43 |   i18n: {
 44 |     defaultLocale: 'en',
 45 |     locales: ['en'],
 46 |   },
 47 | 
 48 |   presets: [
 49 |     [
 50 |       'classic',
 51 |       /** @type {import('@docusaurus/preset-classic').Options} */
 52 |       ({
 53 |         docs: {
 54 |           routeBasePath: '/',
 55 |           sidebarPath: './sidebars.js',
 56 |           // Please change this to your repo.
 57 |           // Remove this to remove the "edit this page" links.
 58 |           editUrl:
 59 |             'https://github.com/kaito-project/aikit/blob/main/website/docs/',
 60 |         },
 61 |         blog: false,
 62 |         theme: {
 63 |           customCss: './src/css/custom.css',
 64 |         },
 65 |       }),
 66 |     ],
 67 |   ],
 68 | 
 69 |   themeConfig:
 70 |     /** @type {import('@docusaurus/preset-classic').ThemeConfig} */
 71 |     ({
 72 |       // Replace with your project's social card
 73 |       image: 'img/logo.png',
 74 |       navbar: {
 75 |         title: 'AIKit',
 76 |         logo: {
 77 |           alt: 'AIKit Logo',
 78 |           src: 'img/logo.svg',
 79 |         },
 80 |         items: [
 81 |           {
 82 |             href: 'https://github.com/kaito-project/aikit',
 83 |             position: 'right',
 84 |             className: 'header-github-link',
 85 |             'aria-label': 'GitHub repository',
 86 |           },
 87 |         ],
 88 |       },
 89 |       footer: {
 90 |         style: 'dark',
 91 |         copyright: `Copyright © ${new Date().getFullYear()} Sertac Ozercan`,
 92 |       },
 93 |       prism: {
 94 |         theme: prismThemes.github,
 95 |         darkTheme: prismThemes.dracula,
 96 |         additionalLanguages: ['bash', 'json', 'yaml'],
 97 |       },
 98 |       colorMode: {
 99 |         defaultMode: 'light',
100 |         disableSwitch: false,
101 |         respectPrefersColorScheme: true,
102 |       },
103 |       announcementBar: {
104 |         id: 'announcementBar-1', // Increment on change
105 |         content: `⭐️ If you like AIKit, please give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/kaito-project/aikit">GitHub</a>!</a>`,
106 |       },
107 |       algolia: {
108 |         appId: 'BWYV6PMJ5K',
109 |         apiKey: 'e2cfa004b0a812062660e0039aca0bda',
110 |         indexName: 'aikit-crawler',
111 |       },
112 |     }),
113 | };
114 | 
115 | export default config;
116 | 


--------------------------------------------------------------------------------
/website/osv-scanner.toml:
--------------------------------------------------------------------------------
 1 | [[IgnoredVulns]]
 2 | id = "GHSA-pxg6-pf52-xh8x"
 3 | reason = "Not applicable to core AIKit; used in static website generation only."
 4 | 
 5 | [[IgnoredVulns]]
 6 | id = "GHSA-76c9-3jph-rj3q"
 7 | reason = "Not applicable to core AIKit; used in static website generation only."
 8 | 
 9 | [[IgnoredVulns]]
10 | id = "GHSA-rhx6-c78j-4q9w"
11 | reason = "Not applicable to core AIKit; used in static website generation only."
12 | 
13 | [[IgnoredVulns]]
14 | id = "GHSA-9wv6-86v2-598j"
15 | reason = "Not applicable to core AIKit; used in static website generation only."
16 | 
17 | [[IgnoredVulns]]
18 | id = "GHSA-4v9v-hfq4-rm2v"
19 | reason = "Not applicable to core AIKit; used in static website generation only."
20 | 
21 | [[IgnoredVulns]]
22 | id = "GHSA-9jgg-88mc-972h"
23 | reason = "Not applicable to core AIKit; used in static website generation only."
24 | 


--------------------------------------------------------------------------------
/website/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "website",
 3 |   "version": "0.0.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "docusaurus": "docusaurus",
 7 |     "start": "docusaurus start",
 8 |     "build": "docusaurus build",
 9 |     "swizzle": "docusaurus swizzle",
10 |     "deploy": "docusaurus deploy",
11 |     "clear": "docusaurus clear",
12 |     "serve": "docusaurus serve",
13 |     "write-translations": "docusaurus write-translations",
14 |     "write-heading-ids": "docusaurus write-heading-ids"
15 |   },
16 |   "dependencies": {
17 |     "@docusaurus/core": "3.1.0",
18 |     "@docusaurus/preset-classic": "3.1.0",
19 |     "@mdx-js/react": "^3.0.0",
20 |     "clsx": "^2.0.0",
21 |     "prism-react-renderer": "^2.3.0",
22 |     "react": "^18.0.0",
23 |     "react-dom": "^18.0.0"
24 |   },
25 |   "devDependencies": {
26 |     "@docusaurus/module-type-aliases": "3.1.0",
27 |     "@docusaurus/types": "3.1.0"
28 |   },
29 |   "browserslist": {
30 |     "production": [
31 |       ">0.5%",
32 |       "not dead",
33 |       "not op_mini all"
34 |     ],
35 |     "development": [
36 |       "last 3 chrome version",
37 |       "last 3 firefox version",
38 |       "last 5 safari version"
39 |     ]
40 |   },
41 |   "engines": {
42 |     "node": ">=18.0"
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/website/sidebars.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Creating a sidebar enables you to:
 3 |  - create an ordered group of docs
 4 |  - render a sidebar for each doc of that group
 5 |  - provide next/previous navigation
 6 | 
 7 |  The sidebars can be generated from the filesystem, or explicitly defined here.
 8 | 
 9 |  Create as many sidebars as you want.
10 |  */
11 | 
12 | // @ts-check
13 | 
14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */
15 | const sidebars = {
16 |   sidebar: [
17 |     {
18 |       type: 'category',
19 |       label: 'Getting Started',
20 |       collapsed: false,
21 |       items: [
22 |         'intro',
23 |         'quick-start',
24 |         'premade-models',
25 |         'demo',
26 |       ],
27 |     },
28 |     {
29 |       type: 'category',
30 |       label: 'Features',
31 |       collapsed: false,
32 |       items: [
33 |         'create-images',
34 |         'fine-tune',
35 |         'vision',
36 |         'gpu',
37 |         'kubernetes',
38 |         'security',
39 |       ],
40 |     },
41 |     {
42 |       type: 'category',
43 |       label: 'Specifications',
44 |       collapsed: false,
45 |       items: [
46 |         'specs-inference',
47 |         'specs-finetune',
48 |       ],
49 |     },
50 |     {
51 |       type: 'category',
52 |       label: 'Inference Supported Backends',
53 |       collapsed: false,
54 |       items: [
55 |         'llama-cpp',
56 |         'exllama2',
57 |         'diffusion',
58 |       ],
59 |     },
60 |     {
61 |       type: 'category',
62 |       label: 'Contributing',
63 |       collapsed: false,
64 |       items: [
65 |         'architecture',
66 |         'release',
67 |       ],
68 |     },
69 |   ],
70 | };
71 | 
72 | export default sidebars;
73 | 


--------------------------------------------------------------------------------
/website/src/css/custom.css:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Any CSS included here will be global. The classic template
 3 |  * bundles Infima by default. Infima is a CSS framework designed to
 4 |  * work well for content-centric websites.
 5 |  */
 6 | 
 7 | /* You can override the default Infima variables here. */
 8 | :root {
 9 |   --ifm-color-primary: #2e8555;
10 |   --ifm-color-primary-dark: #29784c;
11 |   --ifm-color-primary-darker: #277148;
12 |   --ifm-color-primary-darkest: #205d3b;
13 |   --ifm-color-primary-light: #33925d;
14 |   --ifm-color-primary-lighter: #359962;
15 |   --ifm-color-primary-lightest: #3cad6e;
16 |   --ifm-code-font-size: 95%;
17 |   --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1);
18 | }
19 | 
20 | /* For readability concerns, you should choose a lighter palette in dark mode. */
21 | [data-theme='dark'] {
22 |   --ifm-color-primary: #25c2a0;
23 |   --ifm-color-primary-dark: #21af90;
24 |   --ifm-color-primary-darker: #1fa588;
25 |   --ifm-color-primary-darkest: #1a8870;
26 |   --ifm-color-primary-light: #29d5b0;
27 |   --ifm-color-primary-lighter: #32d8b4;
28 |   --ifm-color-primary-lightest: #4fddbf;
29 |   --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3);
30 | }
31 | 
32 | .header-github-link::before {
33 |   content: '';
34 |   width: 24px;
35 |   height: 24px;
36 |   display: flex;
37 |   background-color: var(--ifm-navbar-link-color);
38 |   mask-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E");
39 |   transition: background-color var(--ifm-transition-fast) var(--ifm-transition-timing-default);
40 | }
41 | 
42 | .header-github-link:hover::before {
43 |   background-color: var(--ifm-navbar-link-hover-color);
44 | }


--------------------------------------------------------------------------------
/website/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaito-project/aikit/1a0aa94bb930508fb34e65a3bf76d4ad3b174642/website/static/.nojekyll


--------------------------------------------------------------------------------
/website/static/img/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaito-project/aikit/1a0aa94bb930508fb34e65a3bf76d4ad3b174642/website/static/img/architecture.png


--------------------------------------------------------------------------------
/website/static/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaito-project/aikit/1a0aa94bb930508fb34e65a3bf76d4ad3b174642/website/static/img/favicon.ico


--------------------------------------------------------------------------------
/website/static/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaito-project/aikit/1a0aa94bb930508fb34e65a3bf76d4ad3b174642/website/static/img/logo.png


--------------------------------------------------------------------------------