├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ └── feature-request.yml ├── PULL_REQUEST_TEMPLATE.md ├── copilot-instructions.md ├── dependabot.yaml ├── semantic.yml └── workflows │ ├── codeql.yml │ ├── dependabot.yaml │ ├── dependency-review.yml │ ├── deploy-docs.yaml │ ├── lint.yaml │ ├── mirror-localai.yml │ ├── patch-models.yaml │ ├── pre-release.yaml │ ├── release-base.yaml │ ├── release-pr.yaml │ ├── release.yaml │ ├── scorecards.yml │ ├── test-docker-args.yaml │ ├── test-docker-gpu.yaml │ ├── test-docker.yaml │ ├── test-finetune.yaml │ ├── test-helm.yaml │ ├── test-kubernetes.yaml │ ├── test-podman-applesilicon.yaml │ ├── unit-test.yaml │ └── update-models.yaml ├── .gitignore ├── .golangci.yaml ├── .pre-commit-config.yaml ├── AGENTS.md ├── CONTRIBUTING.md ├── Dockerfile ├── Dockerfile.base ├── Dockerfile.base-applesilicon ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── charts └── aikit │ ├── Chart.yaml │ ├── templates │ ├── NOTES.txt │ ├── _helpers.tpl │ ├── deployment.yaml │ ├── hpa.yaml │ ├── namespace-post-install.yaml │ └── service.yaml │ └── values.yaml ├── cmd └── frontend │ └── main.go ├── demo ├── demo.sh └── third_party │ └── demo-magic │ ├── README.md │ ├── demo-magic.sh │ └── license.txt ├── go.mod ├── go.sum ├── models ├── aikitfile.yaml ├── codestral-22b.yaml ├── flux-1-dev.yaml ├── gemma-2-2b-instruct.yaml ├── gpt-oss-120b.yaml ├── gpt-oss-20b.yaml ├── llama-3.1-8b-instruct.yaml ├── llama-3.2-1b-instruct.yaml ├── llama-3.2-3b-instruct.yaml ├── llama-3.3-70b-instruct.yaml ├── mixtral-8x7b-instruct.yaml ├── phi-4-14b-instruct.yaml └── qwq-32b.yaml ├── pkg ├── aikit │ └── config │ │ ├── finetune_specs.go │ │ ├── inference_specs.go │ │ ├── specs.go │ │ └── specs_test.go ├── aikit2llb │ ├── finetune │ │ └── convert.go │ └── inference │ │ ├── backend.go │ │ ├── backend_test.go │ │ ├── convert.go │ │ ├── diffusers.go │ │ ├── diffusers_test.go │ │ ├── download.go │ │ ├── exllama.go │ │ ├── exllama_test.go │ │ └── image.go ├── build │ ├── args.go │ ├── build.go │ └── build_test.go ├── finetune │ └── target_unsloth.py ├── utils │ ├── const.go │ ├── util.go │ └── util_test.go └── version │ └── version.go ├── scripts └── parse-models.sh ├── test ├── aikitfile-args.yaml ├── aikitfile-cpu-exllama2.yaml ├── aikitfile-dev.yaml ├── aikitfile-diffusers.yaml ├── aikitfile-exllama2-exl2.yaml ├── aikitfile-exllama2-gptq.yaml ├── aikitfile-flux-schnell.yaml ├── aikitfile-hf.yaml ├── aikitfile-llama-cpp.yaml ├── aikitfile-llama-cuda.yaml ├── aikitfile-llama.yaml ├── aikitfile-llava.yaml ├── aikitfile-oci.yaml ├── aikitfile-unsloth-custom.yaml └── aikitfile-unsloth.yaml └── website ├── .gitignore ├── README.md ├── babel.config.js ├── docs ├── architecture.md ├── create-images.md ├── demo.md ├── diffusion.md ├── exllama2.md ├── fine-tune.md ├── gpu.md ├── intro.md ├── kubernetes.md ├── llama-cpp.md ├── premade-models.md ├── quick-start.md ├── release.md ├── security.md ├── specs-finetune.md ├── specs-inference.md └── vision.md ├── docusaurus.config.js ├── osv-scanner.toml ├── package.json ├── sidebars.js ├── src └── css │ └── custom.css ├── static ├── .nojekyll └── img │ ├── architecture.png │ ├── favicon.ico │ ├── logo.png │ └── logo.svg └── yarn.lock /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @sozercan 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Report a bug in AIKit 3 | title: "[BUG] " 4 | labels: 5 | - "bug" 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Please search to see if an issue already exists for your bug before continuing. 11 | > If you need to report a security issue please see https://github.com/kaito-project/aikit/security/policy instead. 12 | - type: textarea 13 | attributes: 14 | label: Expected Behavior 15 | description: Briefly describe what you expect to happen. 16 | - type: textarea 17 | attributes: 18 | label: Actual Behavior 19 | description: Briefly describe what is actually happening. 20 | - type: textarea 21 | attributes: 22 | label: Steps To Reproduce 23 | description: Detailed steps to reproduce the behavior. 24 | placeholder: | 25 | 1. In ... 26 | 2. With this config... 27 | 3. Run '...' 28 | 4. See error... 29 | - type: checkboxes 30 | id: idea 31 | attributes: 32 | label: "Are you willing to submit PRs to contribute to this bug fix?" 33 | options: 34 | - label: Yes, I am willing to implement it. 35 | - type: markdown 36 | attributes: 37 | value: | 38 | Thanks for taking the time to fill out a bug report! 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: Request 2 | description: Request a new feature or propose an enhancement to AIKit 3 | title: "[REQ] <title>" 4 | labels: 5 | - "enhancement" 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Please search to see if an issue already exists for your request before continuing. 11 | - type: dropdown 12 | attributes: 13 | label: What kind of request is this? 14 | multiple: false 15 | options: 16 | - New feature 17 | - Improvement of existing experience 18 | - Other 19 | - type: textarea 20 | attributes: 21 | label: What is your request or suggestion? 22 | placeholder: | 23 | e.g. I would like AIKit to add this <feature> so that I can use it in my <scenario>. 24 | e.g. When using AIKit the <current behavior> has this <limitation> and it would be better if it has this <improvement>. 25 | - type: checkboxes 26 | id: idea 27 | attributes: 28 | label: "Are you willing to submit PRs to contribute to this feature request?" 29 | options: 30 | - label: Yes, I am willing to implement it. 31 | - type: markdown 32 | attributes: 33 | value: | 34 | Thanks for taking the time to fill out a request! -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **What this PR does / why we need it**: 2 | 3 | **Which issue(s) this PR fixes** *(optional, using `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when the PR gets merged)*: 4 | Fixes # 5 | 6 | **Special notes for your reviewer**: 7 | -------------------------------------------------------------------------------- /.github/copilot-instructions.md: -------------------------------------------------------------------------------- 1 | ../AGENTS.md -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | 8 | - package-ecosystem: "github-actions" 9 | directory: "/" 10 | schedule: 11 | interval: "weekly" 12 | groups: 13 | actions-all: 14 | patterns: 15 | - "*" 16 | 17 | - package-ecosystem: docker 18 | directory: / 19 | schedule: 20 | interval: "weekly" 21 | 22 | - package-ecosystem: docker 23 | directory: /charts/aikit 24 | schedule: 25 | interval: "weekly" 26 | -------------------------------------------------------------------------------- /.github/semantic.yml: -------------------------------------------------------------------------------- 1 | titleOnly: true 2 | types: 3 | - build 4 | - chore 5 | - ci 6 | - docs 7 | - feat 8 | - fix 9 | - perf 10 | - refactor 11 | - revert 12 | - style 13 | - test 14 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: ["main"] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: ["main"] 20 | schedule: 21 | - cron: "0 0 * * 1" 22 | 23 | permissions: 24 | contents: read 25 | 26 | jobs: 27 | analyze: 28 | name: Analyze 29 | runs-on: ubuntu-latest 30 | permissions: 31 | actions: read 32 | contents: read 33 | security-events: write 34 | 35 | strategy: 36 | fail-fast: false 37 | matrix: 38 | language: ["go"] 39 | # CodeQL supports [ $supported-codeql-languages ] 40 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 41 | 42 | steps: 43 | - name: Harden Runner 44 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 45 | with: 46 | disable-sudo: true 47 | egress-policy: audit 48 | allowed-endpoints: > 49 | *.github.com:443 50 | github.com:443 51 | *.githubusercontent.com:443 52 | proxy.golang.org:443 53 | storage.googleapis.com:443 54 | sum.golang.org:443 55 | 56 | - name: Checkout repository 57 | uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 58 | 59 | - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0 60 | with: 61 | go-version: "1.24" 62 | check-latest: true 63 | 64 | # Initializes the CodeQL tools for scanning. 65 | - name: Initialize CodeQL 66 | uses: github/codeql-action/init@192325c86100d080feab897ff886c34abd4c83a3 # v3.29.5 67 | with: 68 | languages: ${{ matrix.language }} 69 | # If you wish to specify custom queries, you can do so here or in a config file. 70 | # By default, queries listed here will override any specified in a config file. 71 | # Prefix the list here with "+" to use these queries and those in the config file. 72 | 73 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 74 | # If this step fails, then you should remove it and run the build manually (see below) 75 | - name: Autobuild 76 | uses: github/codeql-action/autobuild@192325c86100d080feab897ff886c34abd4c83a3 # v3.29.5 77 | 78 | # ℹ️ Command-line programs to run using the OS shell. 79 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 80 | 81 | # If the Autobuild fails above, remove it and uncomment the following three lines. 82 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 83 | 84 | # - run: | 85 | # echo "Run, Build Application using script" 86 | # ./location_of_script_within_repo/buildscript.sh 87 | 88 | - name: Perform CodeQL Analysis 89 | uses: github/codeql-action/analyze@192325c86100d080feab897ff886c34abd4c83a3 # v3.29.5 90 | with: 91 | category: "/language:${{matrix.language}}" 92 | -------------------------------------------------------------------------------- /.github/workflows/dependabot.yaml: -------------------------------------------------------------------------------- 1 | name: Dependabot auto-merge 2 | on: pull_request 3 | 4 | permissions: 5 | contents: read 6 | 7 | jobs: 8 | dependabot: 9 | permissions: 10 | contents: write 11 | pull-requests: write 12 | runs-on: ubuntu-latest 13 | if: github.event.pull_request.user.login == 'dependabot[bot]' && github.repository == 'kaito-project/aikit' 14 | steps: 15 | - name: Dependabot metadata 16 | id: metadata 17 | uses: dependabot/fetch-metadata@08eff52bf64351f401fb50d4972fa95b9f2c2d1b # v1.3.1 18 | with: 19 | github-token: "${{ secrets.GITHUB_TOKEN }}" 20 | - name: Enable auto-merge for Dependabot PRs 21 | run: gh pr merge --auto --merge "$PR_URL" 22 | env: 23 | PR_URL: ${{github.event.pull_request.html_url}} 24 | GH_TOKEN: ${{secrets.GITHUB_TOKEN}} 25 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yml: -------------------------------------------------------------------------------- 1 | # Dependency Review Action 2 | # 3 | # This Action will scan dependency manifest files that change as part of a Pull Request, 4 | # surfacing known-vulnerable versions of the packages declared or updated in the PR. 5 | # Once installed, if the workflow run is marked as required, 6 | # PRs introducing known-vulnerable packages will be blocked from merging. 7 | # 8 | # Source repository: https://github.com/actions/dependency-review-action 9 | name: 'Dependency Review' 10 | on: [pull_request] 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | dependency-review: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Harden Runner 20 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 21 | with: 22 | disable-sudo: true 23 | egress-policy: audit 24 | allowed-endpoints: > 25 | api.github.com:443 26 | github.com:443 27 | *.githubusercontent.com:443 28 | api.securityscorecards.dev:443 29 | api.deps.dev:443 30 | 31 | - name: 'Checkout Repository' 32 | uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 33 | - name: 'Dependency Review' 34 | uses: actions/dependency-review-action@595b5aeba73380359d98a5e087f648dbb0edce1b # v4.7.3 35 | -------------------------------------------------------------------------------- /.github/workflows/deploy-docs.yaml: -------------------------------------------------------------------------------- 1 | name: deploy-docs 2 | on: 3 | push: 4 | branches: 5 | - main 6 | paths: 7 | - '.github/workflows/deploy-docs.yaml' 8 | - 'website/**' 9 | pull_request: 10 | branches: 11 | - main 12 | paths: 13 | - '.github/workflows/deploy-docs.yaml' 14 | - 'website/**' 15 | 16 | permissions: 17 | contents: read 18 | 19 | jobs: 20 | deploy: 21 | name: Generate docs website to GitHub Pages 22 | runs-on: ubuntu-latest 23 | permissions: 24 | contents: write 25 | defaults: 26 | run: 27 | working-directory: website 28 | steps: 29 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 30 | 31 | - name: Harden Runner 32 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a 33 | with: 34 | disable-sudo: true 35 | egress-policy: audit 36 | allowed-endpoints: > 37 | github.com:443 38 | registry.yarnpkg.com:443 39 | *.githubusercontent.com:443 40 | *.blob.core.windows.net:443 41 | 42 | - name: Setup Node 43 | uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0.0 44 | with: 45 | node-version: 20.x 46 | 47 | - name: Get yarn cache 48 | id: yarn-cache 49 | run: echo "dir=$(yarn cache dir)" > $GITHUB_OUTPUT 50 | 51 | - name: Cache dependencies 52 | uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 53 | with: 54 | path: ${{ steps.yarn-cache.outputs.dir }} 55 | key: ${{ runner.os }}-website-${{ hashFiles('**/yarn.lock') }} 56 | restore-keys: | 57 | ${{ runner.os }}-website- 58 | 59 | - run: yarn install --frozen-lockfile 60 | - run: yarn build 61 | 62 | - name: Deploy to GitHub Pages 63 | if: github.ref == 'refs/heads/main' && github.event_name == 'push' && github.repository == 'kaito-project/aikit' 64 | uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 65 | with: 66 | github_token: ${{ secrets.GITHUB_TOKEN }} 67 | publish_dir: ./website/build 68 | destination_dir: ./docs 69 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - '**.md' 7 | - 'website/**' 8 | pull_request: 9 | paths-ignore: 10 | - '**.md' 11 | - 'website/**' 12 | 13 | permissions: read-all 14 | 15 | concurrency: 16 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 17 | cancel-in-progress: true 18 | 19 | jobs: 20 | lint: 21 | runs-on: ubuntu-latest-16-cores 22 | steps: 23 | - name: Harden Runner 24 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 25 | with: 26 | disable-sudo: true 27 | egress-policy: audit 28 | allowed-endpoints: > 29 | api.github.com:443 30 | github.com:443 31 | *.githubusercontent.com:443 32 | proxy.golang.org:443 33 | storage.googleapis.com:443 34 | golangci-lint.run:443 35 | 36 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 37 | 38 | - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0 39 | with: 40 | go-version: "1.24" 41 | check-latest: true 42 | 43 | - name: lint 44 | uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8.0.0 45 | with: 46 | version: v2.1.6 47 | -------------------------------------------------------------------------------- /.github/workflows/mirror-localai.yml: -------------------------------------------------------------------------------- 1 | name: Mirror LocalAI Binary to GHCR 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | version: 7 | description: "LocalAI version tag (e.g. v3.4.0)" 8 | required: true 9 | default: v3.4.0 10 | 11 | permissions: 12 | contents: read 13 | actions: read 14 | packages: write 15 | 16 | jobs: 17 | mirror: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | arch: [linux-amd64, linux-arm64] 23 | env: 24 | ORAS_VERSION: v1.2.0 25 | steps: 26 | - name: Compute variables 27 | id: vars 28 | run: | 29 | VERSION="${{ github.event.inputs.version }}" 30 | ARCH_INPUT="${{ matrix.arch }}" 31 | # Normalize arch for tag suffix (linux-amd64 -> amd64, linux-arm64 -> arm64) 32 | case "${ARCH_INPUT}" in 33 | linux-amd64) ARCH_TAG=amd64 ;; 34 | linux-arm64) ARCH_TAG=arm64 ;; 35 | *) echo "Unsupported arch: ${ARCH_INPUT}"; exit 1 ;; 36 | esac 37 | ASSET_NAME="local-ai-${VERSION}-${ARCH_INPUT}" 38 | UPSTREAM_URL="https://github.com/mudler/LocalAI/releases/download/${VERSION}/${ASSET_NAME}" 39 | TARGET_TAG="${VERSION}-${ARCH_TAG}" 40 | echo "version=${VERSION}" >> $GITHUB_OUTPUT 41 | echo "arch=${ARCH_INPUT}" >> $GITHUB_OUTPUT 42 | echo "arch_tag=${ARCH_TAG}" >> $GITHUB_OUTPUT 43 | echo "asset=${ASSET_NAME}" >> $GITHUB_OUTPUT 44 | echo "url=${UPSTREAM_URL}" >> $GITHUB_OUTPUT 45 | echo "tag=${TARGET_TAG}" >> $GITHUB_OUTPUT 46 | 47 | - name: Show plan 48 | run: | 49 | echo "Mirroring ${{ steps.vars.outputs.url }} -> ghcr.io/kaito-project/aikit/localai:${{ steps.vars.outputs.tag }}" 50 | 51 | - name: Install ORAS 52 | run: | 53 | curl -sSL https://github.com/oras-project/oras/releases/download/${ORAS_VERSION}/oras_${ORAS_VERSION#v}_linux_amd64.tar.gz | sudo tar -xz -C /usr/local/bin oras 54 | oras version 55 | 56 | - name: Download LocalAI binary 57 | run: | 58 | curl -fL "${{ steps.vars.outputs.url }}" -o local-ai 59 | chmod +x local-ai 60 | echo "Downloaded binary size:" $(stat -c%s local-ai) "bytes" 61 | 62 | - name: Login to GHCR 63 | run: | 64 | echo "${{ secrets.GITHUB_TOKEN }}" | oras login ghcr.io -u "${{ github.actor }}" --password-stdin 65 | 66 | - name: Push OCI artifact 67 | run: | 68 | oras push ghcr.io/kaito-project/aikit/localai:${{ steps.vars.outputs.tag }} \ 69 | --annotation org.opencontainers.image.source=${{ github.repository }} \ 70 | --annotation org.opencontainers.image.title=local-ai \ 71 | --annotation org.opencontainers.image.version=${{ steps.vars.outputs.version }} \ 72 | local-ai:application/vnd.localai.binary.layer.v1+octet-stream 73 | 74 | - name: Summary 75 | run: | 76 | echo "Mirrored LocalAI ${{ steps.vars.outputs.version }} (${{ steps.vars.outputs.arch }}) to ghcr.io/kaito-project/aikit/localai:${{ steps.vars.outputs.tag }}" >> $GITHUB_STEP_SUMMARY 77 | -------------------------------------------------------------------------------- /.github/workflows/patch-models.yaml: -------------------------------------------------------------------------------- 1 | name: patch-models 2 | on: 3 | # patch weekly 4 | schedule: 5 | - cron: "0 0 * * 0" 6 | workflow_dispatch: 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | patch-models: 13 | permissions: 14 | contents: read 15 | packages: write 16 | id-token: write 17 | runs-on: ubuntu-latest-16-cores 18 | timeout-minutes: 240 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | images: 23 | - ghcr.io/kaito-project/aikit/llama3.1:8b 24 | - ghcr.io/kaito-project/aikit/llama3.3:70b 25 | - ghcr.io/kaito-project/aikit/llama3.2:1b 26 | - ghcr.io/kaito-project/aikit/llama3.2:3b 27 | - ghcr.io/kaito-project/aikit/mixtral:8x7b 28 | - ghcr.io/kaito-project/aikit/phi3.5:3.8b 29 | - ghcr.io/kaito-project/aikit/gemma2:2b 30 | - ghcr.io/kaito-project/aikit/codestral:22b 31 | - ghcr.io/kaito-project/aikit/flux1:dev 32 | steps: 33 | - name: Harden Runner 34 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 35 | with: 36 | egress-policy: audit 37 | allowed-endpoints: > 38 | api.github.com:443 39 | auth.docker.io:443 40 | fulcio.sigstore.dev:443 41 | ghcr.io:443 42 | github.com:443 43 | *.githubusercontent.com:443 44 | proxy.golang.org:443 45 | registry-1.docker.io:443 46 | rekor.sigstore.dev:443 47 | storage.googleapis.com:443 48 | tuf-repo-cdn.sigstore.dev:443 49 | *.ubuntu.com:80 50 | *.blob.core.windows.net:443 51 | 52 | - name: Set up Docker Buildx 53 | uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 54 | 55 | - name: Login to GHCR 56 | uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 57 | with: 58 | registry: ghcr.io 59 | username: ${{ github.actor }} 60 | password: ${{ secrets.GITHUB_TOKEN }} 61 | 62 | - name: Download Trivy 63 | run: | 64 | TRIVY_VERSION=$( 65 | curl --silent "https://api.github.com/repos/aquasecurity/trivy/releases/latest" | \ 66 | grep '"tag_name":' | \ 67 | sed -E 's/.*"v([^"]+)".*/\1/' 68 | ) 69 | wget https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz 70 | tar zxvf trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz 71 | mv trivy /usr/local/bin 72 | rm trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz 73 | 74 | - name: Download retry 75 | run: | 76 | wget https://github.com/joshdk/retry/releases/download/v${VERSION}/retry-linux-amd64.tar.gz 77 | tar -xf retry-linux-amd64.tar.gz 78 | sudo install retry /usr/bin/retry 79 | env: 80 | VERSION: 1.4.0 81 | 82 | - name: Scan with Trivy 83 | run: | 84 | retry -attempts ${ATTEMPTS} -max-time ${MAX_TIME} trivy image --pkg-types os --exit-code 0 --format json --output report.json --timeout ${TRIVY_TIMEOUT} --ignore-unfixed ${{ matrix.images }} 85 | env: 86 | ATTEMPTS: 25 87 | MAX_TIME: 0 88 | TRIVY_TIMEOUT: 60m 89 | 90 | - name: Check vulnerability count 91 | id: vuln_count 92 | run: | 93 | cat report.json | jq 94 | vuln_count=$(jq '.Results[0].Vulnerabilities | length' report.json) 95 | echo "vuln_count=$vuln_count" >> $GITHUB_OUTPUT 96 | 97 | - name: Get image tag 98 | run: | 99 | image_tag=$(echo ${{ matrix.images }} | cut -d':' -f2) 100 | echo $image_tag 101 | echo "image_tag=$image_tag" >> $GITHUB_ENV 102 | 103 | - name: Copa Action 104 | if: steps.vuln_count.outputs.vuln_count != '0' 105 | id: copa 106 | uses: project-copacetic/copa-action@3843e22efdca421adb37aa8dec103a0f1db68544 # v1.2.1 107 | with: 108 | image: ${{ matrix.images }} 109 | image-report: 'report.json' 110 | patched-tag: ${image_tag} 111 | timeout: 30m 112 | 113 | - name: Install Cosign 114 | if: steps.copa.conclusion == 'success' 115 | uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # v3.10.0 116 | 117 | - name: Docker Push Patched Image 118 | id: push 119 | if: steps.copa.conclusion == 'success' 120 | run: | 121 | docker tag ${{ steps.copa.outputs.patched-image }} ${{ matrix.images }} 122 | docker images 123 | docker push ${{ matrix.images }} 124 | echo "DIGEST=$(cosign triangulate ${{ matrix.images }} --type digest)" >> $GITHUB_ENV 125 | 126 | - name: Sign the images with GitHub OIDC Token 127 | id: sign 128 | if: steps.push.conclusion == 'success' 129 | run: cosign sign --yes ${DIGEST} 130 | 131 | - name: Verify image signature 132 | if: steps.sign.conclusion == 'success' 133 | run: | 134 | cosign verify ${DIGEST} \ 135 | --certificate-oidc-issuer https://token.actions.githubusercontent.com \ 136 | --certificate-identity-regexp 'https://github\.com/kaito-project/aikit/\.github/workflows/.+' 137 | -------------------------------------------------------------------------------- /.github/workflows/pre-release.yaml: -------------------------------------------------------------------------------- 1 | name: pre-release 2 | 3 | on: 4 | workflow_dispatch: # used for testing 5 | push: 6 | branches: 7 | - main 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | pre-release: 14 | permissions: 15 | contents: write 16 | packages: write 17 | id-token: write 18 | runs-on: ubuntu-latest-16-cores 19 | timeout-minutes: 360 20 | steps: 21 | - name: Harden Runner 22 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 23 | with: 24 | egress-policy: audit 25 | allowed-endpoints: > 26 | auth.docker.io:443 27 | fulcio.sigstore.dev:443 28 | ghcr.io:443 29 | github.com:443 30 | *.githubusercontent.com:443 31 | production.cloudflare.docker.com:443 32 | proxy.golang.org:443 33 | registry-1.docker.io:443 34 | rekor.sigstore.dev:443 35 | storage.googleapis.com:443 36 | tuf-repo-cdn.sigstore.dev:443 37 | sum.golang.org:443 38 | 39 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 40 | with: 41 | fetch-tags: true 42 | fetch-depth: 0 43 | 44 | - name: Install Cosign 45 | uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # v3.10.0 46 | 47 | - name: Set up Docker Buildx 48 | uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 49 | 50 | - name: Login to GHCR 51 | uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 52 | with: 53 | registry: ghcr.io 54 | username: ${{ github.actor }} 55 | password: ${{ secrets.GITHUB_TOKEN }} 56 | 57 | - name: Set LDFLAGS 58 | run: | 59 | set -x 60 | GIT_COMMIT=$(git rev-list --abbrev-commit --tags --max-count=1) 61 | GIT_TAG=$(git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null) 62 | echo LDFLAGS="-X github.com/kaito-project/aikit/pkg/version.Version=${GIT_TAG}" >> $GITHUB_ENV 63 | 64 | - name: Build and push 65 | uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 66 | id: build-and-push 67 | with: 68 | push: true 69 | tags: ghcr.io/kaito-project/aikit/aikit:dev 70 | cache-from: type=gha,scope=aikit 71 | cache-to: type=gha,scope=aikit,mode=max 72 | sbom: true 73 | provenance: true 74 | build-args: | 75 | LDFLAGS=${{ env.LDFLAGS }} 76 | platforms: linux/amd64,linux/arm64 77 | env: 78 | LDFLAGS: ${{ env.LDFLAGS }} 79 | 80 | - name: Sign the images with GitHub OIDC Token 81 | env: 82 | DIGEST: ${{ steps.build-and-push.outputs.digest }} 83 | run: cosign sign --yes "ghcr.io/kaito-project/aikit/aikit@${DIGEST}" 84 | 85 | - name: Verify image signature 86 | env: 87 | DIGEST: ${{ steps.build-and-push.outputs.digest }} 88 | run: | 89 | cosign verify ghcr.io/kaito-project/aikit/aikit@${DIGEST} \ 90 | --certificate-oidc-issuer https://token.actions.githubusercontent.com \ 91 | --certificate-identity https://github.com/kaito-project/aikit/.github/workflows/pre-release.yaml@refs/heads/main 92 | -------------------------------------------------------------------------------- /.github/workflows/release-base.yaml: -------------------------------------------------------------------------------- 1 | name: release-base 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * 0" 6 | workflow_dispatch: # used for testing 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | release-base: 13 | permissions: 14 | contents: write 15 | packages: write 16 | id-token: write 17 | runs-on: ubuntu-latest-16-cores 18 | timeout-minutes: 360 19 | strategy: 20 | fail-fast: true 21 | matrix: 22 | runtime: 23 | - base 24 | - applesilicon 25 | steps: 26 | - name: Harden Runner 27 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 28 | with: 29 | egress-policy: audit 30 | 31 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 32 | with: 33 | fetch-tags: true 34 | fetch-depth: 0 35 | 36 | - name: Install Cosign 37 | uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # v3.10.0 38 | 39 | - name: Set up Docker Buildx 40 | uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 41 | 42 | - name: Login to GHCR 43 | uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 44 | with: 45 | registry: ghcr.io 46 | username: ${{ github.actor }} 47 | password: ${{ secrets.GITHUB_TOKEN }} 48 | 49 | - name: Set runtime variables for matrix 50 | run: | 51 | if [ ${{ matrix.runtime }} == "base" ]; then 52 | echo PLATFORMS="linux/amd64,linux/arm64" >> $GITHUB_ENV 53 | echo FILE="Dockerfile.base" >> $GITHUB_ENV 54 | echo TAG="ghcr.io/kaito-project/aikit/base:latest" >> $GITHUB_ENV 55 | echo CACHE_FROM="type=gha,scope=base" >> $GITHUB_ENV 56 | echo CACHE_TO="type=gha,scope=base,mode=max" >> $GITHUB_ENV 57 | elif [ ${{ matrix.runtime }} == "applesilicon" ]; then 58 | echo PLATFORMS="linux/arm64" >> $GITHUB_ENV 59 | echo FILE="Dockerfile.base-applesilicon" >> $GITHUB_ENV 60 | echo TAG="ghcr.io/kaito-project/aikit/applesilicon/base:latest" >> $GITHUB_ENV 61 | echo CACHE_FROM="type=gha,scope=base-applesilicon" >> $GITHUB_ENV 62 | echo CACHE_TO="type=gha,scope=base-applesilicon,mode=max" >> $GITHUB_ENV 63 | fi 64 | 65 | - name: Build and push 66 | uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 67 | id: build-and-push 68 | with: 69 | push: true 70 | sbom: true 71 | provenance: true 72 | tags: ${{ env.TAG }} 73 | cache-from: ${{ env.CACHE_FROM }} 74 | cache-to: ${{ env.CACHE_TO }} 75 | platforms: ${{ env.PLATFORMS }} 76 | file: ${{ env.FILE }} 77 | 78 | - name: Sign the images with GitHub OIDC Token 79 | env: 80 | DIGEST: ${{ steps.build-and-push.outputs.digest }} 81 | run: cosign sign --yes "ghcr.io/kaito-project/aikit/base@${DIGEST}" 82 | 83 | - name: Verify image signature 84 | env: 85 | DIGEST: ${{ steps.build-and-push.outputs.digest }} 86 | run: | 87 | cosign verify ghcr.io/kaito-project/aikit/base@${DIGEST} \ 88 | --certificate-oidc-issuer https://token.actions.githubusercontent.com \ 89 | --certificate-identity https://github.com/kaito-project/aikit/.github/workflows/release-base.yaml@refs/heads/main 90 | -------------------------------------------------------------------------------- /.github/workflows/release-pr.yaml: -------------------------------------------------------------------------------- 1 | name: release-pr 2 | on: 3 | push: 4 | tags: 5 | - 'v[0-9]+.[0-9]+.0' # run this workflow when a new minor version is published 6 | workflow_dispatch: 7 | inputs: 8 | release_version: 9 | description: 'Which version are we creating a release pull request for?' 10 | required: true 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | create-release-pull-request: 17 | permissions: 18 | contents: write 19 | pull-requests: write 20 | issues: write 21 | runs-on: ubuntu-latest 22 | steps: 23 | - name: Harden Runner 24 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 25 | with: 26 | egress-policy: audit 27 | 28 | - name: Set release version and target branch for main branch 29 | if: github.event_name == 'push' 30 | run: | 31 | TAG="$(echo "${{ github.ref }}" | tr -d 'refs/tags/v')" 32 | MAJOR_VERSION="$(echo "${TAG}" | cut -d '.' -f1)" 33 | echo "MAJOR_VERSION=${MAJOR_VERSION}" >> ${GITHUB_ENV} 34 | MINOR_VERSION="$(echo "${TAG}" | cut -d '.' -f2)" 35 | echo "MINOR_VERSION=${MINOR_VERSION}" >> ${GITHUB_ENV} 36 | 37 | echo "NEWVERSION=v${MAJOR_VERSION}.${MINOR_VERSION}.0" >> ${GITHUB_ENV} 38 | # push is always being merged to the main branch 39 | echo "TARGET_BRANCH=main" >> ${GITHUB_ENV} 40 | echo "TAG=${TAG}" >> ${GITHUB_ENV} 41 | 42 | - name: Set release version and target branch from input 43 | if: github.event_name == 'workflow_dispatch' 44 | run: | 45 | NEWVERSION="${{ github.event.inputs.release_version }}" 46 | echo "${NEWVERSION}" | grep -E '^v[0-9]+\.[0-9]+\.[0-9](-(beta|rc)\.[0-9]+)?$' || (echo "release_version should be in the format vX.Y.Z, vX.Y.Z-beta.A, or vX.Y.Z-rc.B" && exit 1) 47 | 48 | echo "NEWVERSION=${NEWVERSION}" >> ${GITHUB_ENV} 49 | echo "TAG=${NEWVERSION}" >> ${GITHUB_ENV} 50 | MAJOR_VERSION="$(echo "${NEWVERSION}" | cut -d '.' -f1 | tr -d 'v')" 51 | MINOR_VERSION="$(echo "${NEWVERSION}" | cut -d '.' -f2)" 52 | 53 | # non-beta releases should always be merged to release branches 54 | echo "TARGET_BRANCH=release-${MAJOR_VERSION}.${MINOR_VERSION}" >> ${GITHUB_ENV} 55 | 56 | # beta releases should always be merged to main 57 | if [[ "${NEWVERSION}" =~ "beta" ]]; then 58 | echo "TARGET_BRANCH=main" >> ${GITHUB_ENV} 59 | fi 60 | 61 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 62 | with: 63 | fetch-depth: 0 64 | 65 | - name: Create release branch if needed 66 | run: | 67 | git checkout "${TARGET_BRANCH}" && exit 0 68 | 69 | # Create and push release branch if it doesn't exist 70 | git checkout -b "${TARGET_BRANCH}" 71 | git push --set-upstream origin "${TARGET_BRANCH}" 72 | 73 | - run: make release-manifest 74 | 75 | - name: Create release pull request 76 | uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8 77 | with: 78 | commit-message: "chore: Prepare ${{ env.NEWVERSION }} release" 79 | title: "chore: Prepare ${{ env.NEWVERSION }} release" 80 | branch: "release-${{ env.NEWVERSION }}" 81 | base: "${{ env.TARGET_BRANCH }}" 82 | signoff: true 83 | labels: | 84 | release-pr 85 | ${{ github.event.inputs.release_version }} 86 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | release: 13 | permissions: 14 | contents: write 15 | packages: write 16 | id-token: write 17 | runs-on: ubuntu-latest-16-cores 18 | timeout-minutes: 360 19 | steps: 20 | - name: Harden Runner 21 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 22 | with: 23 | egress-policy: audit 24 | allowed-endpoints: > 25 | api.github.com:443 26 | auth.docker.io:443 27 | fulcio.sigstore.dev:443 28 | ghcr.io:443 29 | github.com:443 30 | *.githubusercontent.com:443 31 | production.cloudflare.docker.com:443 32 | proxy.golang.org:443 33 | registry-1.docker.io:443 34 | rekor.sigstore.dev:443 35 | storage.googleapis.com:443 36 | tuf-repo-cdn.sigstore.dev:443 37 | get.helm.sh:443 38 | 39 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 40 | with: 41 | fetch-tags: true 42 | fetch-depth: 0 43 | 44 | - name: Install Cosign 45 | uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # v3.10.0 46 | 47 | - name: Set up Docker Buildx 48 | uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1 49 | - uses: crazy-max/ghaction-github-runtime@3cb05d89e1f492524af3d41a1c98c83bc3025124 # v3.1.0 50 | 51 | - name: Login to GHCR 52 | uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0 53 | with: 54 | registry: ghcr.io 55 | username: ${{ github.actor }} 56 | password: ${{ secrets.GITHUB_TOKEN }} 57 | 58 | - id: docker_meta 59 | uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0 60 | with: 61 | images: ghcr.io/kaito-project/aikit/aikit 62 | tags: type=semver,pattern={{raw}} 63 | 64 | - name: Set LDFLAGS 65 | run: | 66 | set -x 67 | GIT_COMMIT=$(git rev-list --abbrev-commit --tags --max-count=1) 68 | GIT_TAG=$(git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null) 69 | echo LDFLAGS="-X github.com/kaito-project/aikit/pkg/version.Version=${GIT_TAG}" >> $GITHUB_ENV 70 | 71 | - name: Build and push 72 | uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6.18.0 73 | id: build-and-push 74 | with: 75 | push: true 76 | tags: | 77 | ${{ steps.docker_meta.outputs.tags }} 78 | ghcr.io/kaito-project/aikit/aikit:latest 79 | cache-from: type=gha,scope=aikit 80 | cache-to: type=gha,scope=aikit,mode=max 81 | sbom: true 82 | provenance: true 83 | build-args: | 84 | LDFLAGS=${{ env.LDFLAGS }} 85 | platforms: linux/amd64,linux/arm64 86 | env: 87 | LDFLAGS: ${{ env.LDFLAGS }} 88 | 89 | - name: Sign the images with GitHub OIDC Token 90 | env: 91 | DIGEST: ${{ steps.build-and-push.outputs.digest }} 92 | run: cosign sign --yes "ghcr.io/kaito-project/aikit/aikit@${DIGEST}" 93 | 94 | - name: Verify image signature 95 | env: 96 | DIGEST: ${{ steps.build-and-push.outputs.digest }} 97 | run: | 98 | cosign verify ghcr.io/kaito-project/aikit/aikit@${DIGEST} \ 99 | --certificate-oidc-issuer https://token.actions.githubusercontent.com \ 100 | --certificate-identity https://github.com/kaito-project/aikit/.github/workflows/release.yaml@$GITHUB_REF 101 | 102 | - name: Create GitHub release 103 | uses: marvinpinto/action-automatic-releases@919008cf3f741b179569b7a6fb4d8860689ab7f0 # v1.2.1 104 | with: 105 | repo_token: "${{ secrets.GITHUB_TOKEN }}" 106 | prerelease: false 107 | 108 | - name: Publish Helm chart 109 | uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # v1.7.0 110 | with: 111 | token: ${{ secrets.GITHUB_TOKEN }} 112 | charts_dir: charts 113 | target_dir: charts 114 | linting: off 115 | -------------------------------------------------------------------------------- /.github/workflows/scorecards.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. They are provided 2 | # by a third-party and are governed by separate terms of service, privacy 3 | # policy, and support documentation. 4 | 5 | name: Scorecard supply-chain security 6 | on: 7 | # For Branch-Protection check. Only the default branch is supported. See 8 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 9 | branch_protection_rule: 10 | # To guarantee Maintained check is occasionally updated. See 11 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained 12 | schedule: 13 | - cron: '20 7 * * 2' 14 | push: 15 | branches: ["main"] 16 | 17 | # Declare default permissions as read only. 18 | permissions: read-all 19 | 20 | jobs: 21 | analysis: 22 | name: Scorecard analysis 23 | runs-on: ubuntu-latest 24 | permissions: 25 | # Needed to upload the results to code-scanning dashboard. 26 | security-events: write 27 | # Needed to publish results and get a badge (see publish_results below). 28 | id-token: write 29 | contents: read 30 | actions: read 31 | 32 | steps: 33 | - name: Harden Runner 34 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 35 | with: 36 | disable-sudo: true 37 | egress-policy: audit 38 | allowed-endpoints: > 39 | api.github.com:443 40 | api.osv.dev:443 41 | api.securityscorecards.dev:443 42 | bestpractices.coreinfrastructure.org:443 43 | fulcio.sigstore.dev:443 44 | github.com:443 45 | oss-fuzz-build-logs.storage.googleapis.com:443 46 | rekor.sigstore.dev:443 47 | sigstore-tuf-root.storage.googleapis.com:443 48 | tuf-repo-cdn.sigstore.dev:443 49 | www.bestpractices.dev:443 50 | api.scorecard.dev:443 51 | api.deps.dev:443 52 | 53 | - name: "Checkout code" 54 | uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 55 | with: 56 | persist-credentials: false 57 | 58 | - name: "Run analysis" 59 | uses: ossf/scorecard-action@05b42c624433fc40578a4040d5cf5e36ddca8cde # v2.4.2 60 | with: 61 | results_file: results.sarif 62 | results_format: sarif 63 | # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: 64 | # - you want to enable the Branch-Protection check on a *public* repository, or 65 | # - you are installing Scorecards on a *private* repository 66 | # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. 67 | # repo_token: ${{ secrets.SCORECARD_TOKEN }} 68 | 69 | # Public repositories: 70 | # - Publish results to OpenSSF REST API for easy access by consumers 71 | # - Allows the repository to include the Scorecard badge. 72 | # - See https://github.com/ossf/scorecard-action#publishing-results. 73 | # For private repositories: 74 | # - `publish_results` will always be set to `false`, regardless 75 | # of the value entered here. 76 | publish_results: true 77 | 78 | # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF 79 | # format to the repository Actions tab. 80 | - name: "Upload artifact" 81 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 82 | with: 83 | name: SARIF file 84 | path: results.sarif 85 | retention-days: 5 86 | 87 | # Upload the results to GitHub's code scanning dashboard. 88 | - name: "Upload to code-scanning" 89 | uses: github/codeql-action/upload-sarif@192325c86100d080feab897ff886c34abd4c83a3 # v3.29.5 90 | with: 91 | sarif_file: results.sarif 92 | -------------------------------------------------------------------------------- /.github/workflows/test-docker-args.yaml: -------------------------------------------------------------------------------- 1 | name: docker-test-args 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - '**.md' 7 | - 'website/**' 8 | pull_request: 9 | paths-ignore: 10 | - '**.md' 11 | - 'website/**' 12 | 13 | permissions: read-all 14 | 15 | concurrency: 16 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 17 | cancel-in-progress: true 18 | 19 | jobs: 20 | test: 21 | runs-on: ubuntu-latest-16-cores 22 | timeout-minutes: 240 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | protocol: 27 | - oci 28 | - huggingface 29 | - https 30 | steps: 31 | - name: Harden Runner 32 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 33 | with: 34 | egress-policy: audit 35 | allowed-endpoints: > 36 | auth.docker.io:443 37 | *.huggingface.co:443 38 | cdn.dl.k8s.io:443 39 | dl.k8s.io:443 40 | download.docker.com:443 41 | gcr.io:443 42 | github.com:443 43 | huggingface.co:443 44 | *.githubusercontent.com:443 45 | production.cloudflare.docker.com:443 46 | proxy.golang.org:443 47 | registry-1.docker.io:443 48 | storage.googleapis.com:443 49 | *.blob.core.windows.net:443 50 | *.azureedge.net:443 51 | *.ubuntu.com:80 52 | developer.download.nvidia.com:443 53 | ghcr.io:443 54 | 55 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 56 | 57 | # need containerd image store for testing local images 58 | - uses: crazy-max/ghaction-setup-docker@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0 59 | with: 60 | daemon-config: | 61 | { 62 | "debug": true, 63 | "features": { 64 | "containerd-snapshotter": true 65 | } 66 | } 67 | - uses: crazy-max/ghaction-github-runtime@3cb05d89e1f492524af3d41a1c98c83bc3025124 # v3.1.0 68 | 69 | - name: build aikit 70 | run: | 71 | docker buildx build . -t aikit:test \ 72 | --load --provenance=false --progress plain \ 73 | --cache-from=type=gha,scope=aikit-amd64 \ 74 | --cache-to=type=gha,scope=aikit-amd64,mode=max 75 | 76 | - name: set url 77 | run: | 78 | if [ "${{ matrix.protocol }}" = "oci" ]; then 79 | echo "MODEL_URL=oci://registry.ollama.ai/library/llama3.2:1b" >> $GITHUB_ENV 80 | echo "MODEL_NAME=llama3.2" >> $GITHUB_ENV 81 | elif [ "${{ matrix.protocol }}" = "huggingface" ]; then 82 | echo "MODEL_URL=huggingface://MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/Llama-3.2-1B-Instruct.Q4_K_M.gguf" >> $GITHUB_ENV 83 | echo "MODEL_NAME=Llama-3.2-1B-Instruct.Q4_K_M.gguf" >> $GITHUB_ENV 84 | elif [ "${{ matrix.protocol }}" = "https" ]; then 85 | echo "MODEL_URL=https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf" >> $GITHUB_ENV 86 | echo "MODEL_NAME=Llama-3.2-1B-Instruct.Q4_K_M.gguf" >> $GITHUB_ENV 87 | fi 88 | 89 | - name: build test model 90 | run: | 91 | docker buildx build -t testmodel:test \ 92 | --build-arg="model=$MODEL_URL" \ 93 | --load --provenance=false --progress plain \ 94 | --cache-from=type=gha,scope=testmodel-${{ matrix.protocol }} \ 95 | --cache-to=type=gha,scope=testmodel-${{ matrix.protocol }},mode=max \ 96 | "https://raw.githubusercontent.com/${REPO}/${SOURCE_BRANCH}/test/aikitfile-args.yaml" 97 | env: 98 | REPO: ${{ github.event.pull_request.head.repo.full_name || github.repository }} 99 | # github head ref is only set for pull_request targets 100 | # otherwise, get the github ref name to get the source branch 101 | SOURCE_BRANCH: ${{ github.head_ref || github.ref_name }} 102 | 103 | - name: list images 104 | run: docker images 105 | 106 | - name: run test model 107 | run: docker run --name testmodel -d -p 8080:8080 testmodel:test 108 | 109 | - name: run llama test 110 | run: | 111 | set -e 112 | result=$(curl --fail --retry 10 --retry-all-errors \ 113 | http://127.0.0.1:8080/v1/chat/completions \ 114 | -H "Content-Type: application/json" \ 115 | -d "{\"model\": \"${MODEL_NAME}\", \"messages\": [{\"role\": \"user\", \"content\": \"explain kubernetes in a sentence\"}]}") 116 | echo $result 117 | 118 | choices=$(echo "$result" | jq '.choices') 119 | if [ -z "$choices" ]; then 120 | exit 1 121 | fi 122 | 123 | - name: save logs 124 | if: always() 125 | run: docker logs testmodel > /tmp/docker-${{ matrix.protocol }}.log 126 | 127 | - name: publish test artifacts 128 | if: always() 129 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 130 | with: 131 | name: test-${{ matrix.protocol }} 132 | path: | 133 | /tmp/*.log 134 | -------------------------------------------------------------------------------- /.github/workflows/test-docker-gpu.yaml: -------------------------------------------------------------------------------- 1 | name: docker-test-gpu 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | backend: 7 | description: 'Backend to test (leave empty to test all)' 8 | required: false 9 | type: choice 10 | default: 'all' 11 | options: 12 | - all 13 | - llama-cuda 14 | - exllama2-gptq 15 | - exllama2-exl2 16 | - diffusers 17 | 18 | permissions: read-all 19 | 20 | jobs: 21 | test: 22 | runs-on: self-hosted 23 | timeout-minutes: 240 24 | strategy: 25 | fail-fast: false 26 | max-parallel: 1 27 | matrix: 28 | backend: ${{ inputs.backend == 'all' && fromJson('["llama-cuda", "exllama2-gptq", "exllama2-exl2", "diffusers"]') || fromJson(format('["{0}"]', inputs.backend)) }} 29 | steps: 30 | - name: cleanup workspace 31 | run: | 32 | rm -rf ./* || true 33 | rm -rf ./.??* || true 34 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 35 | 36 | # use default docker driver builder with containerd image store for local aikit image 37 | # these must be setup before running this test 38 | - run: docker buildx use default 39 | 40 | - name: build aikit 41 | run: | 42 | docker buildx build . -t aikit:test \ 43 | --load --provenance=false --progress plain 44 | 45 | - name: build test model 46 | run: | 47 | docker buildx build . -t testmodel:test \ 48 | -f test/aikitfile-${{ matrix.backend }}.yaml \ 49 | --load --provenance=false --progress plain 50 | 51 | - name: list images 52 | run: docker images 53 | 54 | - name: run test model 55 | run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test 56 | 57 | - name: run test (gguf) 58 | if: matrix.backend == 'llama-cuda' 59 | run: | 60 | result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 61 | "model": "llama-3.2-1b-instruct", 62 | "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] 63 | }') 64 | echo $result 65 | 66 | choices=$(echo "$result" | jq '.choices') 67 | if [ -z "$choices" ]; then 68 | exit 1 69 | fi 70 | 71 | - name: run test (exl2/gptq) 72 | if: matrix.backend == 'exllama2-gptq' || matrix.backend == 'exllama2-exl2' 73 | run: | 74 | result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 75 | "model": "llama-2-7b-chat", 76 | "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] 77 | }') 78 | echo $result 79 | 80 | choices=$(echo "$result" | jq '.choices') 81 | if [ -z "$choices" ]; then 82 | exit 1 83 | fi 84 | 85 | - name: run test (diffusers) 86 | if: matrix.backend == 'diffusers' 87 | run: | 88 | result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{ 89 | "model": "dreamshaper", 90 | "prompt": "A cute baby llama", 91 | "size": "256x256" 92 | }') 93 | echo $result 94 | 95 | url=$(echo "$result" | jq '.data[0].url') 96 | if [ -z "$url" ]; then 97 | exit 1 98 | fi 99 | 100 | - name: save generated image 101 | if: matrix.backend == 'diffusers' 102 | run: docker cp testmodel:/tmp/generated/content/images /tmp 103 | 104 | - name: save logs 105 | if: always() 106 | run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log 107 | 108 | - run: docker stop testmodel 109 | if: always() 110 | 111 | - run: docker system prune -a -f --volumes || true 112 | if: always() 113 | 114 | - name: publish test artifacts 115 | if: always() 116 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 117 | with: 118 | name: test-${{ matrix.backend }} 119 | path: | 120 | /tmp/*.log 121 | /tmp/images/*.png 122 | -------------------------------------------------------------------------------- /.github/workflows/test-finetune.yaml: -------------------------------------------------------------------------------- 1 | name: docker-test-finetune 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | permissions: read-all 7 | 8 | jobs: 9 | test: 10 | runs-on: self-hosted 11 | timeout-minutes: 360 12 | strategy: 13 | fail-fast: false 14 | max-parallel: 1 15 | matrix: 16 | targets: 17 | - unsloth 18 | steps: 19 | - name: cleanup workspace 20 | run: | 21 | rm -rf ./* || true 22 | rm -rf ./.??* || true 23 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 24 | 25 | - run: nvidia-smi 26 | 27 | # use default docker driver builder with containerd image store for local aikit image 28 | # must have insecure security entitlement for finetuning 29 | # these must be setup before running this test 30 | - run: docker buildx use default 31 | 32 | - name: build aikit 33 | run: | 34 | docker --debug build . -t aikit:test \ 35 | --load --provenance=false --progress plain 36 | 37 | - name: build finetuned model 38 | run: | 39 | docker --debug build --allow security.insecure \ 40 | --file test/aikitfile-${{ matrix.targets }}.yaml --output _output \ 41 | --target ${{ matrix.targets }} --progress plain . 42 | 43 | - name: check if finetuned model exists 44 | run: | 45 | ls -al _output 46 | test -f _output/model-q4_k_m.gguf 47 | 48 | - name: build custom model 49 | run: | 50 | docker --debug build _output --tag custommodel:test \ 51 | --file test/aikitfile-${{ matrix.targets }}-custom.yaml \ 52 | --load --provenance=false --progress plain 53 | 54 | - name: list images 55 | run: docker images 56 | 57 | - name: run test model 58 | run: docker run --name custommodel -d --rm -p 8080:8080 --gpus all custommodel:test 59 | 60 | - name: run test 61 | run: | 62 | result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 63 | "model": "custom", 64 | "messages": [{"role": "user", "content": "Compose a haiku about cats"}] 65 | }') 66 | echo $result 67 | 68 | choices=$(echo "$result" | jq '.choices') 69 | if [ -z "$choices" ]; then 70 | exit 1 71 | fi 72 | 73 | - name: save logs 74 | if: always() 75 | run: docker logs custommodel > /tmp/docker.log 76 | 77 | - run: docker stop custommodel 78 | if: always() 79 | 80 | - run: docker system prune -a -f --volumes || true 81 | if: always() 82 | 83 | - name: clean up output 84 | if: always() 85 | run: rm -rf _output 86 | 87 | - name: publish test artifacts 88 | if: always() 89 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 90 | with: 91 | name: test 92 | path: | 93 | /tmp/*.log 94 | -------------------------------------------------------------------------------- /.github/workflows/test-helm.yaml: -------------------------------------------------------------------------------- 1 | name: helm-test 2 | 3 | on: 4 | workflow_dispatch: 5 | # push: 6 | # branches: 7 | # - main 8 | # paths-ignore: 9 | # - '**.md' 10 | # - 'website/**' 11 | # pull_request: 12 | # branches: 13 | # - main 14 | # paths-ignore: 15 | # - '**.md' 16 | # - 'website/**' 17 | 18 | permissions: read-all 19 | 20 | concurrency: 21 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 22 | cancel-in-progress: true 23 | 24 | jobs: 25 | test: 26 | runs-on: ubuntu-latest-16-cores 27 | timeout-minutes: 240 28 | steps: 29 | - name: Harden Runner 30 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 31 | with: 32 | egress-policy: audit 33 | allowed-endpoints: > 34 | auth.docker.io:443 35 | huggingface.co:443 36 | *.huggingface.co:443 37 | *.hf.co:443 38 | cdn.dl.k8s.io:443 39 | dl.k8s.io:443 40 | download.docker.com:443 41 | gcr.io:443 42 | github.com:443 43 | *.githubusercontent.com:443 44 | production.cloudflare.docker.com:443 45 | proxy.golang.org:443 46 | registry-1.docker.io:443 47 | storage.googleapis.com:443 48 | *.ubuntu.com:80 49 | developer.download.nvidia.com:443 50 | get.helm.sh:443 51 | *.blob.core.windows.net:443 52 | *.azureedge.net:443 53 | registry.k8s.io:443 54 | *.pkg.dev:443 55 | *.amazonaws.com:443 56 | dl-cdn.alpinelinux.org:443 57 | ghcr.io:443 58 | 59 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 60 | 61 | # need containerd image store for testing local images 62 | - uses: crazy-max/ghaction-setup-docker@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0 63 | with: 64 | daemon-config: | 65 | { 66 | "debug": true, 67 | "features": { 68 | "containerd-snapshotter": true 69 | } 70 | } 71 | - uses: crazy-max/ghaction-github-runtime@3cb05d89e1f492524af3d41a1c98c83bc3025124 # v3.1.0 72 | 73 | - name: build aikit 74 | run: | 75 | docker buildx build . -t aikit:test \ 76 | --load --provenance=false --progress plain \ 77 | --cache-from=type=gha,scope=aikit-amd64 \ 78 | --cache-to=type=gha,scope=aikit-amd64,mode=max 79 | 80 | - name: build test model 81 | run: | 82 | docker buildx build . -t testmodel:test \ 83 | -f test/aikitfile-llama.yaml \ 84 | --load --provenance=false --progress plain \ 85 | --cache-from=type=gha,scope=testmodel-amd64 \ 86 | --cache-to=type=gha,scope=testmodel-amd64,mode=max 87 | 88 | - name: list images 89 | run: docker images 90 | 91 | - name: install e2e dependencies 92 | run: make test-e2e-dependencies 93 | 94 | - name: create kind cluster 95 | run: kind create cluster --wait 5m 96 | 97 | - name: load test model image into kind cluster 98 | run: kind load docker-image testmodel:test 99 | 100 | - name: deploy test model 101 | run: | 102 | helm install charts/aikit --wait --debug \ 103 | --name-template aikit --namespace aikit --create-namespace \ 104 | --set image.repository=testmodel \ 105 | --set image.tag=test \ 106 | --set image.pullPolicy=Never 107 | kubectl port-forward -n aikit service/aikit 8080:8080 & 108 | 109 | - name: run test 110 | run: | 111 | result=$(curl --fail --retry 10 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 112 | "model": "llama-3.2-1b-instruct", 113 | "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] 114 | }') 115 | echo $result 116 | 117 | choices=$(echo "$result" | jq '.choices') 118 | if [ -z "$choices" ]; then 119 | exit 1 120 | fi 121 | 122 | - name: save logs 123 | if: always() 124 | run: | 125 | kind export logs /tmp/kind-logs 126 | 127 | - name: publish logs 128 | if: always() 129 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 130 | with: 131 | name: kind-logs 132 | path: /tmp/kind-logs 133 | -------------------------------------------------------------------------------- /.github/workflows/test-kubernetes.yaml: -------------------------------------------------------------------------------- 1 | name: kubernetes-test 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - '**.md' 7 | - 'website/**' 8 | pull_request: 9 | paths-ignore: 10 | - '**.md' 11 | - 'website/**' 12 | 13 | permissions: read-all 14 | 15 | concurrency: 16 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 17 | cancel-in-progress: true 18 | 19 | jobs: 20 | test: 21 | runs-on: ubuntu-latest-16-cores 22 | timeout-minutes: 240 23 | steps: 24 | - name: Harden Runner 25 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 26 | with: 27 | egress-policy: audit 28 | allowed-endpoints: > 29 | auth.docker.io:443 30 | huggingface.co:443 31 | *.huggingface.co:443 32 | *.hf.co:443 33 | cdn.dl.k8s.io:443 34 | dl.k8s.io:443 35 | download.docker.com:443 36 | gcr.io:443 37 | github.com:443 38 | *.githubusercontent.com:443 39 | production.cloudflare.docker.com:443 40 | proxy.golang.org:443 41 | registry-1.docker.io:443 42 | storage.googleapis.com:443 43 | *.ubuntu.com:80 44 | developer.download.nvidia.com:443 45 | get.helm.sh:443 46 | *.blob.core.windows.net:443 47 | *.azureedge.net:443 48 | ghcr.io:443 49 | 50 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 51 | 52 | # need containerd image store for testing local images 53 | - uses: crazy-max/ghaction-setup-docker@b60f85385d03ac8acfca6d9996982511d8620a19 # v4.3.0 54 | with: 55 | daemon-config: | 56 | { 57 | "debug": true, 58 | "features": { 59 | "containerd-snapshotter": true 60 | } 61 | } 62 | - uses: crazy-max/ghaction-github-runtime@3cb05d89e1f492524af3d41a1c98c83bc3025124 # v3.1.0 63 | 64 | - name: build aikit 65 | run: | 66 | docker buildx build . -t aikit:test \ 67 | --load --provenance=false --progress plain \ 68 | --cache-from=type=gha,scope=aikit-amd64 \ 69 | --cache-to=type=gha,scope=aikit-amd64,mode=max 70 | 71 | - name: build test model 72 | run: | 73 | docker buildx build . -t testmodel:test \ 74 | -f test/aikitfile-llama.yaml \ 75 | --load --provenance=false --progress plain \ 76 | --cache-from=type=gha,scope=testmodel-amd64 \ 77 | --cache-to=type=gha,scope=testmodel-amd64,mode=max 78 | 79 | - name: list images 80 | run: docker images 81 | 82 | - name: install e2e dependencies 83 | run: make test-e2e-dependencies 84 | 85 | - name: create kind cluster 86 | run: kind create cluster --wait 5m 87 | 88 | - name: load test model image into kind cluster 89 | run: kind load docker-image testmodel:test 90 | 91 | - name: deploy test model 92 | run: | 93 | kubectl create deployment test-model-deployment --image=testmodel:test --replicas 1 94 | kubectl rollout status deployment test-model-deployment 95 | kubectl expose deployment test-model-deployment --port=8080 --target-port=8080 --name=test-model-service 96 | kubectl port-forward service/test-model-service 8080:8080 & 97 | 98 | - name: run test 99 | run: | 100 | result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 101 | "model": "llama-3.2-1b-instruct", 102 | "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] 103 | }') 104 | echo $result 105 | 106 | choices=$(echo "$result" | jq '.choices') 107 | if [ -z "$choices" ]; then 108 | exit 1 109 | fi 110 | 111 | - name: save logs 112 | if: always() 113 | run: | 114 | echo "KIND_LOGS_PATH=$(kind export logs)" >> $GITHUB_ENV 115 | 116 | - name: publish logs 117 | if: always() 118 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 119 | with: 120 | name: kind-logs 121 | path: $KIND_LOGS_PATH 122 | -------------------------------------------------------------------------------- /.github/workflows/test-podman-applesilicon.yaml: -------------------------------------------------------------------------------- 1 | name: podman-test-gpu 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | permissions: read-all 7 | 8 | jobs: 9 | test: 10 | runs-on: self-hosted 11 | timeout-minutes: 240 12 | steps: 13 | - name: cleanup workspace 14 | run: | 15 | rm -rf ./* || true 16 | rm -rf ./.??* || true 17 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 18 | 19 | # use default docker driver builder with containerd image store for local aikit image 20 | # these must be setup before running this test 21 | - run: docker buildx use desktop-linux 22 | 23 | - name: build aikit 24 | run: | 25 | docker buildx build . -t aikit:test \ 26 | --load --provenance=false --progress plain 27 | 28 | - name: build and push test model 29 | run: | 30 | docker buildx build . -t sozercan/testmodel:test \ 31 | --push \ 32 | -f test/aikitfile-llama.yaml \ 33 | --provenance=false --progress plain \ 34 | --platform "linux/arm64" \ 35 | --build-arg="runtime=applesilicon" 36 | 37 | - name: list images 38 | run: docker images 39 | 40 | - name: run test model 41 | run: | 42 | podman run --name testmodel -d --rm -p 8080:8080 \ 43 | --device /dev/dri \ 44 | --pull always \ 45 | sozercan/testmodel:test 46 | 47 | - name: run test (gguf) 48 | run: | 49 | result=$(curl --fail --retry 10 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 50 | "model": "llama-3.2-1b-instruct", 51 | "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] 52 | }') 53 | echo $result 54 | 55 | choices=$(echo "$result" | jq '.choices') 56 | if [ -z "$choices" ]; then 57 | exit 1 58 | fi 59 | 60 | - name: save logs 61 | if: always() 62 | run: podman logs testmodel > /tmp/podman-gpu.log 63 | 64 | - run: podman stop testmodel 65 | if: always() 66 | 67 | # - name: prune 68 | # run: | 69 | # docker system prune -a -f --volumes || true 70 | # podman system prune -a -f --volumes || true 71 | # if: always() 72 | 73 | - name: publish test artifacts 74 | if: always() 75 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 76 | with: 77 | name: test-podman-gpu 78 | path: | 79 | /tmp/*.log 80 | -------------------------------------------------------------------------------- /.github/workflows/unit-test.yaml: -------------------------------------------------------------------------------- 1 | name: unit-test 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - '**.md' 7 | - 'website/**' 8 | pull_request: 9 | paths-ignore: 10 | - '**.md' 11 | - 'website/**' 12 | 13 | permissions: read-all 14 | 15 | concurrency: 16 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 17 | cancel-in-progress: true 18 | 19 | jobs: 20 | test: 21 | runs-on: ubuntu-latest-16-cores 22 | steps: 23 | - name: Harden Runner 24 | uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 25 | with: 26 | disable-sudo: true 27 | egress-policy: audit 28 | allowed-endpoints: > 29 | api.github.com:443 30 | github.com:443 31 | *.githubusercontent.com:443 32 | proxy.golang.org:443 33 | storage.googleapis.com:443 34 | 35 | - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 36 | 37 | - uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0 38 | with: 39 | go-version: "1.24" 40 | check-latest: true 41 | 42 | - name: go mod tidy 43 | run: | 44 | go mod tidy 45 | git diff --exit-code 46 | 47 | - name: test 48 | run: make test 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | 23 | bin 24 | coverage.txt 25 | 26 | _output 27 | .vscode 28 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | run: 3 | go: "1.24" 4 | linters: 5 | default: none 6 | enable: 7 | - copyloopvar 8 | - errcheck 9 | - errorlint 10 | - forcetypeassert 11 | - goconst 12 | - gocritic 13 | - godot 14 | - gosec 15 | - govet 16 | - ineffassign 17 | - misspell 18 | - revive 19 | - staticcheck 20 | - unconvert 21 | - unused 22 | - whitespace 23 | settings: 24 | lll: 25 | line-length: 200 26 | misspell: 27 | locale: US 28 | exclusions: 29 | generated: lax 30 | presets: 31 | - comments 32 | - common-false-positives 33 | - legacy 34 | - std-error-handling 35 | paths: 36 | - third_party$ 37 | - builtin$ 38 | - examples$ 39 | formatters: 40 | enable: 41 | - gci 42 | - gofmt 43 | - gofumpt 44 | - goimports 45 | exclusions: 46 | generated: lax 47 | paths: 48 | - third_party$ 49 | - builtin$ 50 | - examples$ 51 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/gitleaks/gitleaks 3 | rev: v8.21.2 4 | hooks: 5 | - id: gitleaks 6 | - repo: https://github.com/golangci/golangci-lint 7 | rev: v2.1.6 8 | hooks: 9 | - id: golangci-lint 10 | - repo: https://github.com/jumanjihouse/pre-commit-hooks 11 | rev: 3.0.0 12 | hooks: 13 | - id: shellcheck 14 | - repo: https://github.com/pre-commit/pre-commit-hooks 15 | rev: v5.0.0 16 | hooks: 17 | - id: end-of-file-fixer 18 | - id: trailing-whitespace 19 | - repo: https://github.com/crate-ci/typos 20 | rev: v1.27.3 21 | hooks: 22 | - id: typos 23 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=$BUILDPLATFORM golang:1.25-bookworm@sha256:c4bc0741e3c79c0e2d47ca2505a06f5f2a44682ada94e1dba251a3854e60c2bd AS builder 2 | 3 | ARG TARGETPLATFORM 4 | ARG TARGETOS 5 | ARG TARGETARCH 6 | ARG TARGETVARIANT="" 7 | ARG LDFLAGS 8 | 9 | COPY . /go/src/github.com/kaito-project/aikit 10 | WORKDIR /go/src/github.com/kaito-project/aikit 11 | RUN CGO_ENABLED=0 \ 12 | GOOS=${TARGETOS} \ 13 | GOARCH=${TARGETARCH} \ 14 | GOARM=${TARGETVARIANT} \ 15 | go build -o /aikit -ldflags "${LDFLAGS} -w -s -extldflags '-static'" ./cmd/frontend 16 | 17 | FROM scratch 18 | COPY --from=builder /aikit /bin/aikit 19 | ENTRYPOINT ["/bin/aikit"] 20 | -------------------------------------------------------------------------------- /Dockerfile.base: -------------------------------------------------------------------------------- 1 | ARG UBUNTU_RELEASE=22.04 2 | 3 | FROM golang:1.25-bookworm@sha256:c4bc0741e3c79c0e2d47ca2505a06f5f2a44682ada94e1dba251a3854e60c2bd AS builder 4 | ARG UBUNTU_RELEASE 5 | ARG TARGETARCH 6 | 7 | RUN go install github.com/canonical/chisel/cmd/chisel@v1.0.0 8 | 9 | RUN mkdir -p /rootfs && chisel cut --release ubuntu-$UBUNTU_RELEASE --root /rootfs \ 10 | base-files_base \ 11 | base-files_chisel \ 12 | base-files_release-info \ 13 | ca-certificates_data \ 14 | libgcc-s1_libs \ 15 | libc6_libs \ 16 | bash_bins \ 17 | coreutils_bins \ 18 | grep_bins 19 | 20 | FROM scratch 21 | COPY --from=builder /rootfs / 22 | -------------------------------------------------------------------------------- /Dockerfile.base-applesilicon: -------------------------------------------------------------------------------- 1 | FROM fedora:41@sha256:3ec60eb34fa1a095c0c34dd37cead9fd38afb62612d43892fcf1d3425c32bc1e 2 | 3 | ARG MESA_VERSION="24.1.2-101" 4 | 5 | USER 0 6 | 7 | # Install the patched mesa-krunkit drivers 8 | RUN dnf -y install dnf-plugins-core && \ 9 | dnf -y copr enable slp/mesa-krunkit epel-9-aarch64 && \ 10 | dnf -y install \ 11 | mesa-vulkan-drivers-$MESA_VERSION.el9.aarch64 && \ 12 | dnf clean all 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Sertaç Özercan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION := v0.19.2 2 | 3 | REGISTRY ?= ghcr.io/kaito-project 4 | REPOSITORY ?= /aikit 5 | KIND_VERSION ?= 0.29.0 6 | KUBERNETES_VERSION ?= 1.33.2 7 | HELM_VERSION ?= 3.18.3 8 | TAG ?= test 9 | OUTPUT_TYPE ?= type=docker 10 | TEST_IMAGE_NAME ?= testmodel 11 | TEST_FILE ?= test/aikitfile-llama.yaml 12 | RUNTIME ?= "" 13 | PLATFORMS ?= linux/amd64,linux/arm64 14 | 15 | GIT_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1) 16 | GIT_TAG := $(shell git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null || true) 17 | LDFLAGS := "-X github.com/kaito-project/aikit/pkg/version.Version=$(GIT_TAG:%=%)" 18 | 19 | .PHONY: lint 20 | lint: 21 | golangci-lint run -v ./... --timeout 5m 22 | 23 | .PHONY: build-aikit 24 | build-aikit: 25 | docker buildx build . -t ${REGISTRY}${REPOSITORY}/aikit:${TAG} \ 26 | --output=${OUTPUT_TYPE} \ 27 | --build-arg LDFLAGS=${LDFLAGS} \ 28 | --platform ${PLATFORMS} \ 29 | --progress=plain 30 | 31 | .PHONY: build-test-model 32 | build-test-model: 33 | docker buildx build . -t ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG} -f ${TEST_FILE} \ 34 | --progress=plain --provenance=false \ 35 | --output=${OUTPUT_TYPE} \ 36 | --build-arg runtime=${RUNTIME} \ 37 | --platform ${PLATFORMS} 38 | 39 | .PHONY: build-base 40 | build-base: 41 | docker buildx build . -t ${REGISTRY}${REPOSITORY}/base:latest -f Dockerfile.base \ 42 | --platform ${PLATFORMS} \ 43 | --output=${OUTPUT_TYPE} \ 44 | --sbom=true --push 45 | 46 | .PHONY: run-test-model 47 | run-test-model: 48 | docker run --rm -p 8080:8080 ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG} 49 | 50 | .PHONY: run-test-model-gpu 51 | run-test-model-gpu: 52 | docker run --rm -p 8080:8080 --gpus all ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG} 53 | 54 | .PHONY: run-test-model-applesilicon 55 | run-test-model-applesilicon: 56 | podman run --rm -p 8080:8080 --device /dev/dri ${REGISTRY}${REPOSITORY}/${TEST_IMAGE_NAME}:${TAG} 57 | 58 | .PHONY: test 59 | test: 60 | go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic 61 | 62 | .PHONY: test-e2e-dependencies 63 | test-e2e-dependencies: 64 | mkdir -p ${GITHUB_WORKSPACE}/bin 65 | echo "${GITHUB_WORKSPACE}/bin" >> ${GITHUB_PATH} 66 | 67 | # used for kubernetes test 68 | curl -sSL https://dl.k8s.io/release/v${KUBERNETES_VERSION}/bin/linux/amd64/kubectl -o ${GITHUB_WORKSPACE}/bin/kubectl && chmod +x ${GITHUB_WORKSPACE}/bin/kubectl 69 | curl https://get.helm.sh/helm-v${HELM_VERSION}-linux-amd64.tar.gz | tar xz && mv linux-amd64/helm ${GITHUB_WORKSPACE}/bin/helm && chmod +x ${GITHUB_WORKSPACE}/bin/helm 70 | curl -sSL https://github.com/kubernetes-sigs/kind/releases/download/v${KIND_VERSION}/kind-linux-amd64 -o ${GITHUB_WORKSPACE}/bin/kind && chmod +x ${GITHUB_WORKSPACE}/bin/kind 71 | 72 | .PHONY: release-manifest 73 | release-manifest: 74 | @sed -i "s/appVersion: $(VERSION)/appVersion: ${NEWVERSION}/" ./charts/aikit/Chart.yaml 75 | @sed -i "s/version: $$(echo ${VERSION} | cut -c2-)/version: $$(echo ${NEWVERSION} | cut -c2-)/" ./charts/aikit/Chart.yaml 76 | @sed -i -e 's/^VERSION := $(VERSION)/VERSION := ${NEWVERSION}/' ./Makefile 77 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Security updates are applied only to the most recent releases. 6 | 7 | ## Reporting a Vulnerability 8 | 9 | To securely report a vulnerability, please [open an advisory on GitHub](https://github.com/kaito-project/aikit/security/advisories/new). This form is also accessible when [submitting a new issue](https://github.com/kaito-project/aikit/issues/new/choose). 10 | -------------------------------------------------------------------------------- /charts/aikit/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: aikit 3 | description: Kubernetes Helm chart to deploy AIKit LLM images 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 0.19.2 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | # It is recommended to use it with quotes. 24 | appVersion: v0.19.2 25 | -------------------------------------------------------------------------------- /charts/aikit/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | Access AIKit WebUI or API by running the following commands: 2 | 3 | - Port forward the service to your local machine: 4 | 5 | kubectl --namespace {{ .Release.Namespace }} port-forward service/{{ (include "aikit.fullname" .) }} 8080:{{ .Values.service.port }} & 6 | 7 | - Visit http://127.0.0.1:8080/chat to access the WebUI 8 | 9 | - Access the OpenAI API compatible endpoint with: 10 | 11 | # replace this with the model name you want to use 12 | export MODEL_NAME="llama-3-8b-instruct" 13 | curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d "{\"model\": \"${MODEL_NAME}\", \"messages\": [{\"role\": \"user\", \"content\": \"what is the meaning of life?\"}]}" 14 | -------------------------------------------------------------------------------- /charts/aikit/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Expand the name of the chart. 3 | */}} 4 | {{- define "aikit.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 6 | {{- end }} 7 | 8 | {{/* 9 | Create a default fully qualified app name. 10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 11 | If release name contains chart name it will be used as a full name. 12 | */}} 13 | {{- define "aikit.fullname" -}} 14 | {{- if .Values.fullnameOverride }} 15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} 16 | {{- else }} 17 | {{- $name := default .Chart.Name .Values.nameOverride }} 18 | {{- if contains $name .Release.Name }} 19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 20 | {{- else }} 21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 22 | {{- end }} 23 | {{- end }} 24 | {{- end }} 25 | 26 | {{/* 27 | Create chart name and version as used by the chart label. 28 | */}} 29 | {{- define "aikit.chart" -}} 30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 31 | {{- end }} 32 | 33 | {{/* 34 | Common labels 35 | */}} 36 | {{- define "aikit.labels" -}} 37 | helm.sh/chart: {{ include "aikit.chart" . }} 38 | {{ include "aikit.selectorLabels" . }} 39 | {{- if .Chart.AppVersion }} 40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 41 | {{- end }} 42 | app.kubernetes.io/managed-by: {{ .Release.Service }} 43 | {{- end }} 44 | 45 | {{/* 46 | Selector labels 47 | */}} 48 | {{- define "aikit.selectorLabels" -}} 49 | app.kubernetes.io/name: {{ include "aikit.name" . }} 50 | app.kubernetes.io/instance: {{ .Release.Name }} 51 | {{- end }} 52 | 53 | {{/* 54 | Create the name of the service account to use 55 | */}} 56 | {{- define "aikit.serviceAccountName" -}} 57 | {{- if .Values.serviceAccount.create }} 58 | {{- default (include "aikit.fullname" .) .Values.serviceAccount.name }} 59 | {{- else }} 60 | {{- default "default" .Values.serviceAccount.name }} 61 | {{- end }} 62 | {{- end }} 63 | -------------------------------------------------------------------------------- /charts/aikit/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "aikit.fullname" . }} 5 | labels: 6 | {{- include "aikit.labels" . | nindent 4 }} 7 | spec: 8 | {{- if not .Values.autoscaling.enabled }} 9 | replicas: {{ .Values.replicaCount }} 10 | {{- end }} 11 | selector: 12 | matchLabels: 13 | {{- include "aikit.selectorLabels" . | nindent 6 }} 14 | template: 15 | metadata: 16 | {{- with .Values.podAnnotations }} 17 | annotations: 18 | {{- toYaml . | nindent 8 }} 19 | {{- end }} 20 | labels: 21 | {{- include "aikit.labels" . | nindent 8 }} 22 | {{- with .Values.podLabels }} 23 | {{- toYaml . | nindent 8 }} 24 | {{- end }} 25 | spec: 26 | {{- with .Values.imagePullSecrets }} 27 | imagePullSecrets: 28 | {{- toYaml . | nindent 8 }} 29 | {{- end }} 30 | securityContext: 31 | {{- toYaml .Values.podSecurityContext | nindent 8 }} 32 | containers: 33 | - name: {{ .Chart.Name }} 34 | securityContext: 35 | {{- if .Values.enableRuntimeDefaultSeccompProfile }} 36 | seccompProfile: 37 | type: RuntimeDefault 38 | {{- end }} 39 | {{- toYaml .Values.securityContext | nindent 12 }} 40 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" 41 | imagePullPolicy: {{ .Values.image.pullPolicy }} 42 | ports: 43 | - name: http 44 | containerPort: 8080 45 | protocol: TCP 46 | livenessProbe: 47 | {{- toYaml .Values.livenessProbe | nindent 12 }} 48 | readinessProbe: 49 | {{- toYaml .Values.readinessProbe | nindent 12 }} 50 | resources: 51 | {{- toYaml .Values.resources | nindent 12 }} 52 | {{- with .Values.nodeSelector }} 53 | nodeSelector: 54 | {{- toYaml . | nindent 8 }} 55 | {{- end }} 56 | {{- with .Values.affinity }} 57 | affinity: 58 | {{- toYaml . | nindent 8 }} 59 | {{- end }} 60 | {{- with .Values.tolerations }} 61 | tolerations: 62 | {{- toYaml . | nindent 8 }} 63 | {{- end }} 64 | -------------------------------------------------------------------------------- /charts/aikit/templates/hpa.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.autoscaling.enabled }} 2 | apiVersion: autoscaling/v2 3 | kind: HorizontalPodAutoscaler 4 | metadata: 5 | name: {{ include "aikit.fullname" . }} 6 | labels: 7 | {{- include "aikit.labels" . | nindent 4 }} 8 | spec: 9 | scaleTargetRef: 10 | apiVersion: apps/v1 11 | kind: Deployment 12 | name: {{ include "aikit.fullname" . }} 13 | minReplicas: {{ .Values.autoscaling.minReplicas }} 14 | maxReplicas: {{ .Values.autoscaling.maxReplicas }} 15 | metrics: 16 | {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} 17 | - type: Resource 18 | resource: 19 | name: cpu 20 | target: 21 | type: Utilization 22 | averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} 23 | {{- end }} 24 | {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} 25 | - type: Resource 26 | resource: 27 | name: memory 28 | target: 29 | type: Utilization 30 | averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} 31 | {{- end }} 32 | {{- end }} 33 | -------------------------------------------------------------------------------- /charts/aikit/templates/namespace-post-install.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.postInstall.labelNamespace.enabled }} 2 | apiVersion: batch/v1 3 | kind: Job 4 | metadata: 5 | name: aikit-update-namespace-label 6 | namespace: {{ .Release.Namespace | quote }} 7 | labels: 8 | {{- include "aikit.labels" . | nindent 4 }} 9 | annotations: 10 | "helm.sh/hook": post-install 11 | "helm.sh/hook-weight": "-5" 12 | "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation 13 | {{- if .Values.postInstall.labelNamespace.extraAnnotations }} 14 | {{- toYaml .Values.postInstall.labelNamespace.extraAnnotations | trim | nindent 4 }} 15 | {{- end }} 16 | spec: 17 | template: 18 | metadata: 19 | annotations: 20 | {{- toYaml .Values.podAnnotations | trim | nindent 8 }} 21 | spec: 22 | restartPolicy: OnFailure 23 | {{- if .Values.postInstall.labelNamespace.priorityClassName }} 24 | priorityClassName: {{ .Values.postInstall.labelNamespace.priorityClassName }} 25 | {{- end }} 26 | {{- if .Values.postInstall.labelNamespace.image.pullSecrets }} 27 | imagePullSecrets: 28 | {{- .Values.postInstall.labelNamespace.image.pullSecrets | toYaml | nindent 12 }} 29 | {{- end }} 30 | serviceAccount: aikit-update-namespace-label 31 | containers: 32 | - name: kubectl-label 33 | image: "{{ .Values.postInstall.labelNamespace.image.repository }}:{{ .Values.postInstall.labelNamespace.image.tag }}" 34 | imagePullPolicy: {{ .Values.postInstall.labelNamespace.image.pullPolicy }} 35 | args: 36 | - label 37 | - ns 38 | - {{ .Release.Namespace }} 39 | {{- range .Values.postInstall.labelNamespace.podSecurity }} 40 | - {{ . }} 41 | {{- end }} 42 | - --overwrite 43 | resources: 44 | {{- toYaml .Values.postInstall.resources | nindent 12 }} 45 | securityContext: 46 | {{- if .Values.enableRuntimeDefaultSeccompProfile }} 47 | seccompProfile: 48 | type: RuntimeDefault 49 | {{- end }} 50 | {{- toYaml .Values.postInstall.securityContext | nindent 12 }} 51 | {{- with .Values.postInstall }} 52 | nodeSelector: 53 | {{- toYaml .nodeSelector | nindent 8 }} 54 | affinity: 55 | {{- toYaml .affinity | nindent 8 }} 56 | tolerations: 57 | {{- toYaml .tolerations | nindent 8 }} 58 | {{- end }} 59 | --- 60 | apiVersion: v1 61 | kind: ServiceAccount 62 | metadata: 63 | name: aikit-update-namespace-label 64 | namespace: {{ .Release.Namespace | quote }} 65 | labels: 66 | {{- include "aikit.labels" . | nindent 4 }} 67 | annotations: 68 | "helm.sh/hook": post-install 69 | "helm.sh/hook-weight": "-5" 70 | "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation 71 | --- 72 | {{- if .Values.rbac.create }} 73 | apiVersion: rbac.authorization.k8s.io/v1 74 | kind: ClusterRole 75 | metadata: 76 | name: aikit-update-namespace-label 77 | labels: 78 | {{- include "aikit.labels" . | nindent 4 }} 79 | annotations: 80 | "helm.sh/hook": post-install 81 | "helm.sh/hook-weight": "-5" 82 | "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation 83 | rules: 84 | - apiGroups: 85 | - "" 86 | resources: 87 | - namespaces 88 | verbs: 89 | - get 90 | - update 91 | - patch 92 | resourceNames: 93 | - {{ .Release.Namespace }} 94 | {{- range .Values.postInstall.labelNamespace.extraNamespaces }} 95 | - {{ . }} 96 | {{- end }} 97 | {{- with .Values.postInstall.labelNamespace.extraRules }} 98 | {{- toYaml . | nindent 2 }} 99 | {{- end }} 100 | {{- end }} 101 | --- 102 | {{- if .Values.rbac.create }} 103 | apiVersion: rbac.authorization.k8s.io/v1 104 | kind: ClusterRoleBinding 105 | metadata: 106 | name: aikit-update-namespace-label 107 | labels: 108 | {{- include "aikit.labels" . | nindent 4 }} 109 | annotations: 110 | "helm.sh/hook": post-install 111 | "helm.sh/hook-weight": "-5" 112 | "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation 113 | roleRef: 114 | apiGroup: rbac.authorization.k8s.io 115 | kind: ClusterRole 116 | name: aikit-update-namespace-label 117 | subjects: 118 | - kind: ServiceAccount 119 | name: aikit-update-namespace-label 120 | namespace: {{ .Release.Namespace | quote }} 121 | {{- end }} 122 | {{- end }} 123 | -------------------------------------------------------------------------------- /charts/aikit/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "aikit.fullname" . }} 5 | labels: 6 | {{- include "aikit.labels" . | nindent 4 }} 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.port }} 11 | targetPort: 8080 12 | protocol: TCP 13 | name: http 14 | selector: 15 | {{- include "aikit.selectorLabels" . | nindent 4 }} 16 | -------------------------------------------------------------------------------- /charts/aikit/values.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | repository: ghcr.io/kaito-project/aikit/llama3.1 3 | tag: "8b" 4 | pullPolicy: IfNotPresent 5 | 6 | replicaCount: 1 7 | 8 | imagePullSecrets: [] 9 | nameOverride: "" 10 | fullnameOverride: "" 11 | podAnnotations: {} 12 | podLabels: {} 13 | 14 | podSecurityContext: 15 | fsGroup: 999 16 | supplementalGroups: 17 | - 999 18 | 19 | securityContext: 20 | allowPrivilegeEscalation: false 21 | capabilities: 22 | drop: 23 | - ALL 24 | readOnlyRootFilesystem: false # aikit extracts backends during runtime 25 | runAsGroup: 999 26 | runAsNonRoot: true 27 | runAsUser: 1000 28 | 29 | service: 30 | type: ClusterIP 31 | port: 8080 32 | 33 | resources: 34 | limits: 35 | memory: 8Gi 36 | # nvidia.com/gpu: "1" 37 | requests: 38 | cpu: 100m 39 | memory: 128Mi 40 | # nvidia.com/gpu: "1" 41 | 42 | livenessProbe: 43 | httpGet: 44 | path: / 45 | port: http 46 | readinessProbe: 47 | httpGet: 48 | path: / 49 | port: http 50 | 51 | autoscaling: 52 | enabled: false 53 | minReplicas: 1 54 | maxReplicas: 100 55 | targetCPUUtilizationPercentage: 80 56 | # targetMemoryUtilizationPercentage: 80 57 | 58 | nodeSelector: {} 59 | 60 | affinity: {} 61 | 62 | rbac: 63 | create: true 64 | 65 | enableRuntimeDefaultSeccompProfile: true 66 | postInstall: 67 | resources: {} 68 | affinity: {} 69 | tolerations: [] 70 | nodeSelector: {kubernetes.io/os: linux} 71 | securityContext: 72 | allowPrivilegeEscalation: false 73 | capabilities: 74 | drop: 75 | - ALL 76 | readOnlyRootFilesystem: true 77 | runAsGroup: 999 78 | runAsNonRoot: true 79 | runAsUser: 1000 80 | labelNamespace: 81 | enabled: true 82 | image: 83 | repository: registry.k8s.io/kubectl 84 | tag: v1.34.1 85 | pullPolicy: IfNotPresent 86 | pullSecrets: [] 87 | podSecurity: ["pod-security.kubernetes.io/audit=restricted", 88 | "pod-security.kubernetes.io/audit-version=latest", 89 | "pod-security.kubernetes.io/warn=restricted", 90 | "pod-security.kubernetes.io/warn-version=latest", 91 | "pod-security.kubernetes.io/enforce=restricted", 92 | "pod-security.kubernetes.io/enforce-version=v1.30"] 93 | extraAnnotations: {} 94 | extraRules: [] 95 | priorityClassName: "" 96 | -------------------------------------------------------------------------------- /cmd/frontend/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/kaito-project/aikit/pkg/build" 7 | "github.com/moby/buildkit/frontend/gateway/grpcclient" 8 | "github.com/moby/buildkit/util/appcontext" 9 | "github.com/moby/buildkit/util/bklog" 10 | "github.com/sirupsen/logrus" 11 | "google.golang.org/grpc/grpclog" 12 | ) 13 | 14 | func main() { 15 | bklog.L.Logger.SetOutput(os.Stderr) 16 | grpclog.SetLoggerV2(grpclog.NewLoggerV2WithVerbosity(bklog.L.WriterLevel(logrus.InfoLevel), bklog.L.WriterLevel(logrus.WarnLevel), bklog.L.WriterLevel(logrus.ErrorLevel), 1)) 17 | 18 | ctx := appcontext.Context() 19 | 20 | if err := grpcclient.RunFromEnvironment(ctx, build.Build); err != nil { 21 | bklog.L.WithError(err).Fatal("error running frontend") 22 | os.Exit(1) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /demo/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . third_party/demo-magic/demo-magic.sh 4 | 5 | clear 6 | export DEMO_PROMPT="${GREEN}➜ ${COLOR_RESET}" 7 | 8 | echo "✨ In this demo, we are going to start by fine tuning a model and then deploy the model as a minimal container!" 9 | 10 | echo "" 11 | 12 | echo "👷‍ First, we are going to create a new builder" 13 | 14 | echo "" 15 | 16 | pei "docker buildx create --name aikit-builder --use --buildkitd-flags '--allow-insecure-entitlement security.insecure'" 17 | 18 | echo "" 19 | 20 | echo "🗃️ Create a configuration for the fine tuning. We are going to be using a Mistral model and fine tune using OpenHermes dataset." 21 | 22 | cat > aikit-finetune.yaml << EOF 23 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 24 | apiVersion: v1alpha1 25 | baseModel: unsloth/mistral-7b-instruct-v0.2-bnb-4bit 26 | datasets: 27 | - source: "teknium/openhermes" 28 | type: "alpaca" 29 | config: 30 | unsloth: 31 | EOF 32 | 33 | echo "" 34 | 35 | pei "bat aikit-finetune.yaml" 36 | 37 | echo "" 38 | 39 | echo "🎵 Starting the fine tuning process using the above configuration file, and output fine tuned model will be saved in _output folder." 40 | 41 | echo "" 42 | 43 | pei "docker buildx build --allow security.insecure --file 'aikit-finetune.yaml' --output '_output' --target unsloth --progress plain ." 44 | 45 | echo "" 46 | 47 | echo "✅ We have finished fine tuning the model. Let's look at the output..." 48 | 49 | echo "" 50 | 51 | pei "ls -al _output" 52 | 53 | echo "" 54 | 55 | echo "📦 Now that we have a fine tuned model. We can deploy it as a minimal container." 56 | 57 | echo "" 58 | 59 | echo "📃 We'll start by creating a basic inference configuration file for the deployment." 60 | 61 | cat > aikit-inference.yaml << EOF 62 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 63 | debug: true 64 | apiVersion: v1alpha1 65 | runtime: cuda 66 | models: 67 | - name: mistral-finetuned 68 | source: aikit-model-q4_k_m.gguf 69 | promptTemplates: 70 | - name: instruct 71 | template: | 72 | Below is an instruction that describes a task. Write a response that appropriately completes the request. Keep your responses concise. 73 | 74 | ### Instruction: 75 | {{.Input}} 76 | 77 | ### Response: 78 | config: | 79 | - name: mistral-finetuned 80 | parameters: 81 | model: aikit-model-q4_k_m.gguf 82 | context_size: 4096 83 | gpu_layers: 35 84 | f16: true 85 | mmap: true 86 | template: 87 | chat: instruct 88 | EOF 89 | 90 | pei "bat aikit-inference.yaml" 91 | 92 | echo "" 93 | 94 | echo "🏗️ We can now build a minimal container for the model using the configuration file." 95 | 96 | echo "" 97 | 98 | pei "docker buildx build -t mistral-finetuned -f aikit-inference.yaml --load --progress plain _output" 99 | 100 | echo "" 101 | 102 | echo "🏃 We have finished building the minimal container. Let's start the container and test it." 103 | 104 | echo "" 105 | 106 | pei "docker run --name mistral-finetuned -d --rm -p 8080:8080 --gpus all mistral-finetuned" 107 | 108 | echo "" 109 | 110 | echo "🧪 We can now test the container using a sample query. Since this is OpenAI API compatible, you can use it as a drop-in replacement for any application that uses OpenAI API." 111 | 112 | echo "" 113 | 114 | pei "curl http://localhost:8080/v1/chat/completions -H \"Content-Type: application/json\" -d '{\"model\": \"mistral-finetuned\", \"messages\": [{\"role\": \"user\", \"content\": \"Generate a list of 10 words that start with ab\"}]}'" 115 | 116 | echo "" 117 | 118 | echo "🙌 We have successfully deployed the fine tuned model as a minimal container and successfully verified it! We can now stop the container if we wish." 119 | 120 | echo "" 121 | 122 | pei "docker stop mistral-finetuned" 123 | 124 | echo "" 125 | 126 | echo "❤️ In this demo, we have shown how to fine tune a model and deploy it as a minimal container using AIKit. Thank you for watching!" 127 | 128 | echo "" 129 | 130 | # pei "docker buildx rm aikit-builder" 131 | -------------------------------------------------------------------------------- /demo/third_party/demo-magic/README.md: -------------------------------------------------------------------------------- 1 | # Demo Magic 2 | 3 | demo-magic.sh is a handy shell script that enables you to script repeatable demos in a bash environment so you don't have to type as you present. Rather than trying to type commands when presenting you simply script them and let demo-magic.sh run them for you. 4 | 5 | ## Features 6 | - Simulates typing. It looks like you are actually typing out commands 7 | - Allows you to actually run commands or pretend to do so. 8 | - Can hide commands from presentation. Useful for behind the scenes stuff that doesn't need to be shown. 9 | 10 | ## Functions 11 | 12 | ### pe 13 | Print and Execute. This function will simulate typing whatever you give it. It will then pause until you press <kbd>ENTER</kbd>. After your keypress it will run the command. 14 | 15 | ```bash 16 | #!/bin/bash 17 | 18 | pe "ls -l" 19 | ``` 20 | 21 | ### p 22 | Print only. This function will simulate typing whatever you give it. It will not run the command. After typing it will pause until you press <kbd>ENTER</kbd>. After your keypress it will move on to the next instruction in your script. 23 | 24 | ```bash 25 | #!/bin/bash 26 | 27 | p "ls -l" 28 | ``` 29 | 30 | ### wait 31 | Waits for the user to press <kbd>ENTER</kbd>. 32 | 33 | If `PROMPT_TIMEOUT` is defined and > 0 the demo will automatically proceed after the amount of seconds has passed. 34 | 35 | ```bash 36 | #!/bin/bash 37 | 38 | # Will wait until user presses enter 39 | PROMPT_TIMEOUT=0 40 | wait 41 | 42 | # Will wait max 5 seconds until user presses 43 | PROMPT_TIMEOUT=5 44 | wait 45 | 46 | ``` 47 | 48 | ### cmd 49 | Enters script into interactive mode and allows newly typed commands to be executed within the script 50 | ``` 51 | #!/bin/bash 52 | 53 | cmd 54 | ``` 55 | 56 | ## Getting Started 57 | Create a shell script and include demo-magic.sh 58 | 59 | ```bash 60 | #!/bin/bash 61 | 62 | ######################## 63 | # include the magic 64 | ######################## 65 | . demo-magic.sh 66 | 67 | # hide the evidence 68 | clear 69 | 70 | # Put your stuff here 71 | ``` 72 | 73 | Then use the handy functions to run through your demo. 74 | 75 | ## Command line usage 76 | demo-magic.sh exposes 3 options out of the box to your script. 77 | - `-d` - disable simulated typing. Useful for debugging 78 | - `-h` - prints the usage text 79 | - `-n` - set no default waiting after `p` and `pe` functions 80 | - `-w` - set no wait timeout after `p` and `pe` functions 81 | 82 | ```bash 83 | $ ./my-demo.sh -h 84 | 85 | Usage: ./my-demo.sh [options] 86 | 87 | Where options is one or more of: 88 | -h Prints Help text 89 | -d Debug mode. Disables simulated typing 90 | -n No wait 91 | -w Waits max the given amount of seconds before proceeding with demo (e.g. `-w5`) 92 | ``` 93 | 94 | ## Useful Tricks 95 | 96 | ### Faking network connections 97 | Network connections during demos are often unreliable. Try and fake whatever commands would rely on a network connection. For example: Instead of trying to install node modules in a node.js application you can fake it. You can install the node_modules at home on your decent network. Then rename the directory and pretend to install it later by symlinking. If you want to be thorough you can capture the output of npm install into a log file then cat it out later to simulate the install. 98 | 99 | ```bash 100 | #!/bin/bash 101 | 102 | ######################## 103 | # include the magic 104 | ######################## 105 | . demo-magic.sh 106 | 107 | # hide the evidence 108 | clear 109 | 110 | # this command is typed and executed 111 | pe "cd my-app" 112 | 113 | # this command is merely typed. Not executed 114 | p "npm install" 115 | 116 | # this command runs behind the scenes 117 | ln -s cached_node_modules node_modules 118 | 119 | # cat out a log file that captures a previous successful node modules install 120 | cat node-modules-install.log 121 | 122 | # now type and run the command to start your app 123 | pe "node index.js" 124 | ``` 125 | 126 | ### No waiting 127 | The -n _no wait_ option can be useful if you want to print and execute multiple commands. 128 | 129 | ```bash 130 | # include demo-magic 131 | . demo-magic.sh -n 132 | 133 | # add multiple commands 134 | pe 'git status' 135 | pe 'git log --oneline --decorate -n 20' 136 | ``` 137 | 138 | However this will oblige you to define your waiting points manually e.g. 139 | ```bash 140 | ... 141 | # define waiting points 142 | pe 'git status' 143 | pe 'git log --oneline --decorate -n 20' 144 | wait 145 | pe 'git pull' 146 | pe 'git log --oneline --decorate -n 20' 147 | wait 148 | ``` 149 | -------------------------------------------------------------------------------- /demo/third_party/demo-magic/license.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Paxton Hare 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/kaito-project/aikit 2 | 3 | go 1.24.0 4 | 5 | toolchain go1.24.4 6 | 7 | require ( 8 | github.com/containerd/platforms v1.0.0-rc.1 9 | github.com/moby/buildkit v0.23.2 10 | github.com/opencontainers/go-digest v1.0.0 11 | github.com/opencontainers/image-spec v1.1.1 12 | github.com/pkg/errors v0.9.1 13 | github.com/sirupsen/logrus v1.9.3 14 | golang.org/x/sync v0.17.0 15 | google.golang.org/grpc v1.75.1 16 | gopkg.in/yaml.v2 v2.4.0 17 | ) 18 | 19 | require ( 20 | github.com/agext/levenshtein v1.2.3 // indirect 21 | github.com/containerd/containerd/v2 v2.1.3 // indirect 22 | github.com/containerd/errdefs v1.0.0 // indirect 23 | github.com/containerd/log v0.1.0 // indirect 24 | github.com/containerd/ttrpc v1.2.7 // indirect 25 | github.com/containerd/typeurl/v2 v2.2.3 // indirect 26 | github.com/distribution/reference v0.6.0 // indirect 27 | github.com/docker/go-connections v0.5.0 // indirect 28 | github.com/docker/go-units v0.5.0 // indirect 29 | github.com/felixge/httpsnoop v1.0.4 // indirect 30 | github.com/go-logr/logr v1.4.3 // indirect 31 | github.com/go-logr/stdr v1.2.2 // indirect 32 | github.com/gogo/protobuf v1.3.2 // indirect 33 | github.com/golang/protobuf v1.5.4 // indirect 34 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect 35 | github.com/google/uuid v1.6.0 // indirect 36 | github.com/hashicorp/errwrap v1.1.0 // indirect 37 | github.com/hashicorp/go-multierror v1.1.1 // indirect 38 | github.com/in-toto/in-toto-golang v0.9.0 // indirect 39 | github.com/klauspost/compress v1.18.0 // indirect 40 | github.com/moby/docker-image-spec v1.3.1 // indirect 41 | github.com/moby/locker v1.0.1 // indirect 42 | github.com/moby/patternmatcher v0.6.0 // indirect 43 | github.com/moby/sys/signal v0.7.1 // indirect 44 | github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect 45 | github.com/secure-systems-lab/go-securesystemslib v0.8.0 // indirect 46 | github.com/shibumi/go-pathspec v1.3.0 // indirect 47 | github.com/tonistiigi/dchapes-mode v0.0.0-20250318174251-73d941a28323 // indirect 48 | github.com/tonistiigi/fsutil v0.0.0-20250605211040-586307ad452f // indirect 49 | github.com/tonistiigi/go-csvvalue v0.0.0-20240814133006-030d3b2625d0 // indirect 50 | go.opentelemetry.io/auto/sdk v1.1.0 // indirect 51 | go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 // indirect 52 | go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 // indirect 53 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect 54 | go.opentelemetry.io/otel v1.37.0 // indirect 55 | go.opentelemetry.io/otel/metric v1.37.0 // indirect 56 | go.opentelemetry.io/otel/sdk v1.37.0 // indirect 57 | go.opentelemetry.io/otel/trace v1.37.0 // indirect 58 | golang.org/x/crypto v0.39.0 // indirect 59 | golang.org/x/net v0.41.0 // indirect 60 | golang.org/x/sys v0.33.0 // indirect 61 | golang.org/x/text v0.26.0 // indirect 62 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect 63 | google.golang.org/protobuf v1.36.6 // indirect 64 | ) 65 | -------------------------------------------------------------------------------- /models/aikitfile.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | -------------------------------------------------------------------------------- /models/codestral-22b.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: codestral-22b 7 | source: https://huggingface.co/lmstudio-community/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q4_K_M.gguf 8 | sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c 9 | promptTemplates: 10 | - name: instruct 11 | template: | 12 | [INST]{{ if .SystemPrompt }}{{ .SystemPrompt }}{{ end }} {{ .Input }}[/INST] 13 | config: | 14 | - name: codestral-22b 15 | backend: llama 16 | parameters: 17 | model: Codestral-22B-v0.1-Q4_K_M.gguf 18 | context_size: 8192 19 | template: 20 | chat: instruct 21 | completion: instruct 22 | stopwords: 23 | - \"[INST]\" 24 | - \"[/INST]\" 25 | - \"[PREFIX]\" 26 | - \"[MIDDLE]\" 27 | - \"[SUFFIX]\" 28 | f16: true 29 | mmap: true 30 | -------------------------------------------------------------------------------- /models/flux-1-dev.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | backends: 6 | - diffusers 7 | config: | 8 | - name: flux-1-dev 9 | backend: diffusers 10 | parameters: 11 | model: ChuckMcSneed/FLUX.1-dev 12 | diffusers: 13 | cfg_scale: 0 14 | cuda: true 15 | enable_parameters: num_inference_steps 16 | pipeline_type: FluxPipeline 17 | f16: true 18 | step: 25 19 | -------------------------------------------------------------------------------- /models/gemma-2-2b-instruct.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: gemma-2-2b-instruct 7 | source: https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf 8 | sha256: e0aee85060f168f0f2d8473d7ea41ce2f3230c1bc1374847505ea599288a7787 9 | promptTemplates: 10 | - name: chatMsg 11 | template: | 12 | <start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}} 13 | {{ if .Content -}} 14 | {{.Content -}} 15 | {{ end -}}<end_of_turn> 16 | - name: chat 17 | template: | 18 | {{ .Input }} 19 | <start_of_turn>model 20 | - name: completion 21 | template: | 22 | {{ .Input }} 23 | config: | 24 | - name: gemma-2-2b-instruct 25 | backend: llama 26 | parameters: 27 | model: gemma-2-2b-it-Q4_K_M.gguf 28 | context_size: 8192 29 | template: 30 | chat_message: chatMsg 31 | chat: chat 32 | completion: completion 33 | repeat_penalty: 1 34 | stopwords: 35 | - \"<start_of_turn>\" 36 | - \"<end_of_turn>\" 37 | - \"<|im_end|>\" 38 | f16: true 39 | mmap: true 40 | -------------------------------------------------------------------------------- /models/gpt-oss-120b.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: gpt-oss-120b-1 7 | source: https://huggingface.co/ggml-org/gpt-oss-120b-GGUF/resolve/main/gpt-oss-120b-mxfp4-00001-of-00003.gguf 8 | sha256: e2865eb6c1df7b2ffbebf305cd5d9074d5ccc0fe3b862f98d343a46dad1606f9 9 | - name: gpt-oss-120b-2 10 | source: https://huggingface.co/ggml-org/gpt-oss-120b-GGUF/resolve/main/gpt-oss-120b-mxfp4-00002-of-00003.gguf 11 | sha256: "346492f65891fb27cac5c74a8c07626cbfeb4211cd391ec4de37dbbe3109a93b" 12 | - name: gpt-oss-120b-3 13 | source: https://huggingface.co/ggml-org/gpt-oss-120b-GGUF/resolve/main/gpt-oss-120b-mxfp4-00003-of-00003.gguf 14 | sha256: "66dca81040933f5a49177e82c479c51319cefb83bd22dad9f06dad45e25f1463" 15 | config: | 16 | - name: gpt-oss-120b 17 | backend: llama-cpp 18 | parameters: 19 | model: gpt-oss-120b-mxfp4-00001-of-00003.gguf 20 | context_size: 8192 21 | f16: true 22 | mmap: true 23 | template: 24 | chat_message: |- 25 | <|start|>{{ if .FunctionCall -}}functions.{{ .FunctionCall.Name }} to=assistant{{ else if eq .RoleName \"assistant\"}}assistant<|channel|>final<|message|>{{else}}{{ .RoleName }}{{end}}<|message|> 26 | {{- if .Content -}} 27 | {{- .Content -}} 28 | {{- end -}} 29 | {{- if .FunctionCall -}} 30 | {{- toJson .FunctionCall -}} 31 | {{- end -}}<|end|> 32 | 33 | function: |- 34 | <|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. 35 | Knowledge cutoff: 2024-06 36 | Current date: {{ now | date \"Mon Jan 2 15:04:05 MST 2006\" }} 37 | 38 | Reasoning: {{if eq .ReasoningEffort \"\"}}medium{{else}}{{.ReasoningEffort}}{{end}} 39 | 40 | # {{with .Metadata}}{{ if ne .system_prompt \"\" }}{{ .system_prompt }}{{ end }}{{else}}You are a friendly and helpful assistant.{{ end }}<|end|>{{- .Input -}}<|start|>assistant 41 | 42 | # Tools 43 | 44 | ## functions 45 | 46 | namespace functions { 47 | {{-range .Functions}} 48 | {{if .Description }} 49 | // {{ .Description }} 50 | {{- end }} 51 | {{- if and .Parameters.Properties (gt (len .Parameters.Properties) 0) }} 52 | type {{ .Name }} = (_: { 53 | {{- range $name, $prop := .Parameters.Properties }} 54 | {{- if $prop.Description }} 55 | // {{ $prop.Description }} 56 | {{- end }} 57 | {{ $name }}: {{ if gt (len $prop.Type) 1 }}{{ range $i, $t := $prop.Type }}{{ if $i }} | {{ end }}{{ $t }}{{ end }}{{ else }}{{ index $prop.Type 0 }}{{ end }}, 58 | {{- end }} 59 | }) => any; 60 | {{- else }} 61 | type {{ .Function.Name }} = () => any; 62 | {{- end }} 63 | {{- end }}{{/* end of range .Functions */}} 64 | } // namespace functions 65 | 66 | # Instructions 67 | 68 | <|end|>{{.Input -}}<|start|>assistant 69 | 70 | chat: |- 71 | <|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. 72 | Knowledge cutoff: 2024-06 73 | Current date: {{ now | date \"Mon Jan 2 15:04:05 MST 2006\" }} 74 | 75 | Reasoning: {{if eq .ReasoningEffort \"\"}}medium{{else}}{{.ReasoningEffort}}{{end}} 76 | 77 | # {{with .Metadata}}{{ if ne .system_prompt \"\" }}{{ .system_prompt }}{{ end }}{{else}}You are a friendly and helpful assistant.{{ end }}<|end|>{{- .Input -}}<|start|>assistant 78 | 79 | completion: |- 80 | {{.Input}} 81 | stopwords: 82 | - '<|im_end|>' 83 | - '<dummy32000>' 84 | - '</s>' 85 | - '<|endoftext|>' 86 | - '<|end|>s' 87 | -------------------------------------------------------------------------------- /models/gpt-oss-20b.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: gpt-oss-20b 7 | source: https://huggingface.co/ggml-org/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-mxfp4.gguf 8 | sha256: "be37a636aca0fc1aae0d32325f82f6b4d21495f06823b5fbc1898ae0303e9935" 9 | config: | 10 | - name: gpt-oss-20b 11 | backend: llama-cpp 12 | parameters: 13 | model: gpt-oss-20b-mxfp4.gguf 14 | context_size: 8192 15 | f16: true 16 | mmap: true 17 | template: 18 | chat_message: |- 19 | <|start|>{{ if .FunctionCall -}}functions.{{ .FunctionCall.Name }} to=assistant{{ else if eq .RoleName \"assistant\"}}assistant<|channel|>final<|message|>{{else}}{{ .RoleName }}{{end}}<|message|> 20 | {{- if .Content -}} 21 | {{- .Content -}} 22 | {{- end -}} 23 | {{- if .FunctionCall -}} 24 | {{- toJson .FunctionCall -}} 25 | {{- end -}}<|end|> 26 | 27 | function: |- 28 | <|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. 29 | Knowledge cutoff: 2024-06 30 | Current date: {{ now | date \"Mon Jan 2 15:04:05 MST 2006\" }} 31 | 32 | Reasoning: {{if eq .ReasoningEffort \"\"}}medium{{else}}{{.ReasoningEffort}}{{end}} 33 | 34 | # {{with .Metadata}}{{ if ne .system_prompt \"\" }}{{ .system_prompt }}{{ end }}{{else}}You are a friendly and helpful assistant.{{ end }}<|end|>{{- .Input -}}<|start|>assistant 35 | 36 | # Tools 37 | 38 | ## functions 39 | 40 | namespace functions { 41 | {{-range .Functions}} 42 | {{if .Description }} 43 | // {{ .Description }} 44 | {{- end }} 45 | {{- if and .Parameters.Properties (gt (len .Parameters.Properties) 0) }} 46 | type {{ .Name }} = (_: { 47 | {{- range $name, $prop := .Parameters.Properties }} 48 | {{- if $prop.Description }} 49 | // {{ $prop.Description }} 50 | {{- end }} 51 | {{ $name }}: {{ if gt (len $prop.Type) 1 }}{{ range $i, $t := $prop.Type }}{{ if $i }} | {{ end }}{{ $t }}{{ end }}{{ else }}{{ index $prop.Type 0 }}{{ end }}, 52 | {{- end }} 53 | }) => any; 54 | {{- else }} 55 | type {{ .Function.Name }} = () => any; 56 | {{- end }} 57 | {{- end }}{{/* end of range .Functions */}} 58 | } // namespace functions 59 | 60 | # Instructions 61 | 62 | <|end|>{{.Input -}}<|start|>assistant 63 | 64 | chat: |- 65 | <|start|>system<|message|>You are ChatGPT, a large language model trained by OpenAI. 66 | Knowledge cutoff: 2024-06 67 | Current date: {{ now | date \"Mon Jan 2 15:04:05 MST 2006\" }} 68 | 69 | Reasoning: {{if eq .ReasoningEffort \"\"}}medium{{else}}{{.ReasoningEffort}}{{end}} 70 | 71 | # {{with .Metadata}}{{ if ne .system_prompt \"\" }}{{ .system_prompt }}{{ end }}{{else}}You are a friendly and helpful assistant.{{ end }}<|end|>{{- .Input -}}<|start|>assistant 72 | 73 | completion: |- 74 | {{.Input}} 75 | stopwords: 76 | - '<|im_end|>' 77 | - '<dummy32000>' 78 | - '</s>' 79 | - '<|endoftext|>' 80 | - '<|end|>s' 81 | -------------------------------------------------------------------------------- /models/llama-3.1-8b-instruct.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: llama-3.1-8b-instruct 7 | source: https://huggingface.co/QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf 8 | sha256: "6d86fb9d2910178f5c744234fdf91910e033ef7b03c5e23dcc6d25e98687e5fa" 9 | promptTemplates: 10 | - name: chatMsg 11 | template: | 12 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 13 | 14 | {{ if .FunctionCall -}} 15 | Function call: 16 | {{ else if eq .RoleName \"tool\" -}} 17 | Function response: 18 | {{ end -}} 19 | {{ if .Content -}} 20 | {{.Content -}} 21 | {{ else if .FunctionCall -}} 22 | {{ toJson .FunctionCall -}} 23 | {{ end -}} 24 | <|eot_id|> 25 | - name: function 26 | template: | 27 | <|start_header_id|>system<|end_header_id|> 28 | 29 | You have access to the following functions: 30 | 31 | {{range .Functions}} 32 | Use the function '{{.Name}}' to '{{.Description}}' 33 | {{toJson .Parameters}} 34 | {{end}} 35 | 36 | Think very carefully before calling functions. 37 | If a you choose to call a function ONLY reply in the following format with no prefix or suffix: 38 | 39 | <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function> 40 | 41 | Reminder: 42 | - If looking for real time information use relevant functions before falling back to searching on internet 43 | - Function calls MUST follow the specified format, start with <function= and end with </function> 44 | - Required parameters MUST be specified 45 | - Only call one function at a time 46 | - Put the entire function call reply on one line 47 | <|eot_id|> 48 | {{.Input }} 49 | <|start_header_id|>assistant<|end_header_id|> 50 | - name: chat 51 | template: | 52 | {{.Input }} 53 | <|start_header_id|>assistant<|end_header_id|> 54 | - name: completion 55 | template: | 56 | {{.Input}} 57 | config: | 58 | - name: llama-3.1-8b-instruct 59 | backend: llama 60 | function: 61 | disable_no_action: true 62 | grammar: 63 | disable: true 64 | response_regex: 65 | - <function=(?P<name>\w+)>(?P<arguments>.*)</function> 66 | parameters: 67 | model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf 68 | context_size: 8192 69 | f16: true 70 | template: 71 | chat_message: \"chatMsg\" 72 | function: \"function\" 73 | chat: \"chat\" 74 | completion: \"completion\" 75 | stopwords: 76 | - <|im_end|> 77 | - <dummy32000> 78 | - \"<|eot_id|>\" 79 | - <|end_of_text|> 80 | -------------------------------------------------------------------------------- /models/llama-3.2-1b-instruct.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: llama-3.2-1b-instruct 7 | source: https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf 8 | sha256: "e4650dd6b45ef456066b11e4927f775eef4dd1e0e8473c3c0f27dd19ee13cc4e" 9 | promptTemplates: 10 | - name: chatMsg 11 | template: | 12 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 13 | 14 | {{ if .FunctionCall -}} 15 | Function call: 16 | {{ else if eq .RoleName \"tool\" -}} 17 | Function response: 18 | {{ end -}} 19 | {{ if .Content -}} 20 | {{.Content -}} 21 | {{ else if .FunctionCall -}} 22 | {{ toJson .FunctionCall -}} 23 | {{ end -}} 24 | <|eot_id|> 25 | - name: function 26 | template: | 27 | <|start_header_id|>system<|end_header_id|> 28 | 29 | You have access to the following functions: 30 | 31 | {{range .Functions}} 32 | Use the function '{{.Name}}' to '{{.Description}}' 33 | {{toJson .Parameters}} 34 | {{end}} 35 | 36 | Think very carefully before calling functions. 37 | If a you choose to call a function ONLY reply in the following format with no prefix or suffix: 38 | 39 | <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function> 40 | 41 | Reminder: 42 | - If looking for real time information use relevant functions before falling back to searching on internet 43 | - Function calls MUST follow the specified format, start with <function= and end with </function> 44 | - Required parameters MUST be specified 45 | - Only call one function at a time 46 | - Put the entire function call reply on one line 47 | <|eot_id|> 48 | {{.Input }} 49 | <|start_header_id|>assistant<|end_header_id|> 50 | - name: chat 51 | template: | 52 | {{.Input }} 53 | <|start_header_id|>assistant<|end_header_id|> 54 | - name: completion 55 | template: | 56 | {{.Input}} 57 | config: | 58 | - name: llama-3.2-1b-instruct 59 | backend: llama 60 | function: 61 | disable_no_action: true 62 | grammar: 63 | disable: true 64 | response_regex: 65 | - <function=(?P<name>\w+)>(?P<arguments>.*)</function> 66 | parameters: 67 | model: Llama-3.2-1B-Instruct.Q4_K_M.gguf 68 | context_size: 8192 69 | f16: true 70 | template: 71 | chat_message: \"chatMsg\" 72 | function: \"function\" 73 | chat: \"chat\" 74 | completion: \"completion\" 75 | stopwords: 76 | - <|im_end|> 77 | - <dummy32000> 78 | - <|eot_id|> 79 | - <|end_of_text|> 80 | -------------------------------------------------------------------------------- /models/llama-3.2-3b-instruct.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: llama-3.2-3b-instruct 7 | source: https://huggingface.co/MaziyarPanahi/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct.Q4_K_M.gguf 8 | sha256: "1c323c8ef8b7dd877d40a4138de8bf915884a383b08097b5c20abcb2ae2896d6" 9 | promptTemplates: 10 | - name: chatMsg 11 | template: | 12 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 13 | 14 | {{ if .FunctionCall -}} 15 | Function call: 16 | {{ else if eq .RoleName \"tool\" -}} 17 | Function response: 18 | {{ end -}} 19 | {{ if .Content -}} 20 | {{.Content -}} 21 | {{ else if .FunctionCall -}} 22 | {{ toJson .FunctionCall -}} 23 | {{ end -}} 24 | <|eot_id|> 25 | - name: function 26 | template: | 27 | <|start_header_id|>system<|end_header_id|> 28 | 29 | You have access to the following functions: 30 | 31 | {{range .Functions}} 32 | Use the function '{{.Name}}' to '{{.Description}}' 33 | {{toJson .Parameters}} 34 | {{end}} 35 | 36 | Think very carefully before calling functions. 37 | If a you choose to call a function ONLY reply in the following format with no prefix or suffix: 38 | 39 | <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function> 40 | 41 | Reminder: 42 | - If looking for real time information use relevant functions before falling back to searching on internet 43 | - Function calls MUST follow the specified format, start with <function= and end with </function> 44 | - Required parameters MUST be specified 45 | - Only call one function at a time 46 | - Put the entire function call reply on one line 47 | <|eot_id|> 48 | {{.Input }} 49 | <|start_header_id|>assistant<|end_header_id|> 50 | - name: chat 51 | template: | 52 | {{.Input }} 53 | <|start_header_id|>assistant<|end_header_id|> 54 | - name: completion 55 | template: | 56 | {{.Input}} 57 | config: | 58 | - name: llama-3.2-3b-instruct 59 | backend: llama 60 | function: 61 | disable_no_action: true 62 | grammar: 63 | disable: true 64 | response_regex: 65 | - <function=(?P<name>\w+)>(?P<arguments>.*)</function> 66 | parameters: 67 | model: Llama-3.2-3B-Instruct.Q4_K_M.gguf 68 | context_size: 8192 69 | f16: true 70 | template: 71 | chat_message: \"chatMsg\" 72 | function: \"function\" 73 | chat: \"chat\" 74 | completion: \"completion\" 75 | stopwords: 76 | - <|im_end|> 77 | - <dummy32000> 78 | - <|eot_id|> 79 | - <|end_of_text|> 80 | -------------------------------------------------------------------------------- /models/llama-3.3-70b-instruct.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: llama-3.3-70b-instruct 7 | source: https://huggingface.co/MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF/resolve/main/Llama-3.3-70B-Instruct.Q4_K_M.gguf 8 | sha256: "4f3b04ecae278bdb0fd545b47c210bc5edf823e5ebf7d41e0b526c81d54b1ff3" 9 | promptTemplates: 10 | - name: chatMsg 11 | template: | 12 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 13 | 14 | {{ if .FunctionCall -}} 15 | Function call: 16 | {{ else if eq .RoleName \"tool\" -}} 17 | Function response: 18 | {{ end -}} 19 | {{ if .Content -}} 20 | {{.Content -}} 21 | {{ else if .FunctionCall -}} 22 | {{ toJson .FunctionCall -}} 23 | {{ end -}} 24 | <|eot_id|> 25 | - name: function 26 | template: | 27 | <|start_header_id|>system<|end_header_id|> 28 | 29 | You have access to the following functions: 30 | 31 | {{range .Functions}} 32 | Use the function '{{.Name}}' to '{{.Description}}' 33 | {{toJson .Parameters}} 34 | {{end}} 35 | 36 | Think very carefully before calling functions. 37 | If a you choose to call a function ONLY reply in the following format with no prefix or suffix: 38 | 39 | <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function> 40 | 41 | Reminder: 42 | - If looking for real time information use relevant functions before falling back to searching on internet 43 | - Function calls MUST follow the specified format, start with <function= and end with </function> 44 | - Required parameters MUST be specified 45 | - Only call one function at a time 46 | - Put the entire function call reply on one line 47 | <|eot_id|> 48 | {{.Input }} 49 | <|start_header_id|>assistant<|end_header_id|> 50 | - name: chat 51 | template: | 52 | {{.Input }} 53 | <|start_header_id|>assistant<|end_header_id|> 54 | - name: completion 55 | template: | 56 | {{.Input}} 57 | config: | 58 | - name: llama-3.3-70b-instruct 59 | backend: llama 60 | function: 61 | disable_no_action: true 62 | grammar: 63 | disable: true 64 | response_regex: 65 | - <function=(?P<name>\w+)>(?P<arguments>.*)</function> 66 | parameters: 67 | model: Llama-3.3-70B-Instruct.Q4_K_M.gguf 68 | context_size: 8192 69 | f16: true 70 | template: 71 | chat_message: \"chatMsg\" 72 | function: \"function\" 73 | chat: \"chat\" 74 | completion: \"completion\" 75 | stopwords: 76 | - <|im_end|> 77 | - <dummy32000> 78 | - <|eot_id|> 79 | - <|end_of_text|> 80 | -------------------------------------------------------------------------------- /models/mixtral-8x7b-instruct.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: mixtral-8x7b-instruct 7 | source: https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf 8 | sha256: 9193684683657e90707087bd1ed19fd0b277ab66358d19edeadc26d6fdec4f53 9 | config: | 10 | - name: mixtral-8x7b-instruct 11 | backend: llama 12 | parameters: 13 | model: mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf 14 | context_size: 4096 15 | f16: true 16 | mmap: true 17 | -------------------------------------------------------------------------------- /models/phi-4-14b-instruct.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: phi-4-14b-instruct 7 | source: https://huggingface.co/unsloth/phi-4-GGUF/resolve/main/phi-4-Q4_K_M.gguf 8 | sha256: 01e1f25b3e6931054c6c2227b06f4969828434eebc299e8e171f55dab6814485 9 | config: | 10 | - name: phi-4-14b-instruct 11 | backend: llama 12 | parameters: 13 | model: phi-4-Q4_K_M.gguf 14 | context_size: 4096 15 | template: 16 | chat_message: | 17 | <|im_start|>{{ .RoleName }}<|im_sep|> 18 | {{.Content}}<|im_end|> 19 | chat: | 20 | {{.Input}} 21 | <|im_start|>assistant<|im_sep|> 22 | completion: | 23 | {{.Input}} 24 | stopwords: 25 | - <|end|> 26 | - <|endoftext|> 27 | - <|im_end|> 28 | f16: true 29 | mmap: true 30 | -------------------------------------------------------------------------------- /models/qwq-32b.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: qwq-32b 7 | source: https://huggingface.co/unsloth/QwQ-32B-GGUF/resolve/main/QwQ-32B-Q4_K_M.gguf 8 | sha256: 39dc40d34d44d66406e9ad94416585839daa5edc35425f6f6925c35e149aa5f5 9 | config: | 10 | - name: qwq-32b 11 | backend: llama-cpp 12 | parameters: 13 | model: QwQ-32B-Q4_K_M.gguf 14 | top_k: 40 15 | temperature: 0.6 16 | top_p: 0.95 17 | context_size: 8192 18 | repeat_penalty: 1 19 | template: 20 | chat_message: | 21 | <|im_start|>{{ .RoleName }} 22 | {{ if .FunctionCall -}} 23 | Function call: 24 | {{ else if eq .RoleName \"tool\" -}} 25 | Function response: 26 | {{ end -}} 27 | {{ if .Content -}} 28 | {{.Content }} 29 | {{ end -}} 30 | {{ if .FunctionCall -}} 31 | {{toJson .FunctionCall}} 32 | {{ end -}}<|im_end|> 33 | function: | 34 | <|im_start|>system 35 | # Tools 36 | 37 | You may call one or more functions to assist with the user query. 38 | 39 | You are provided with function signatures within <tools></tools> XML tags: 40 | <tools> 41 | {{range .Functions}} 42 | {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} 43 | {{end}} 44 | </tools> 45 | For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags: 46 | <tool_call> 47 | {\"name\": <function-name>, \"arguments\": <args-json-object>} 48 | </tool_call> 49 | <|im_end|> 50 | {{.Input -}} 51 | <|im_start|>assistant 52 | chat: | 53 | {{.Input -}} 54 | <|im_start|>assistant 55 | completion: | 56 | {{.Input}} 57 | f16: true 58 | mmap: true 59 | stopwords: 60 | - \"<|im_start|>\" 61 | - \"<|im_end|>\" 62 | -------------------------------------------------------------------------------- /pkg/aikit/config/finetune_specs.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | type FineTuneConfig struct { 4 | APIVersion string `yaml:"apiVersion"` 5 | Target string `yaml:"target"` 6 | BaseModel string `yaml:"baseModel"` 7 | Datasets []Dataset `yaml:"datasets"` 8 | Config FineTuneConfigSpec `yaml:"config"` 9 | Output FineTuneOutputSpec `yaml:"output"` 10 | } 11 | 12 | type FineTuneConfigSpec struct { 13 | Unsloth FineTuneConfigUnslothSpec `yaml:"unsloth"` 14 | } 15 | 16 | type Dataset struct { 17 | Source string `yaml:"source"` 18 | Type string `yaml:"type"` 19 | } 20 | 21 | type FineTuneConfigUnslothSpec struct { 22 | Packing bool `yaml:"packing"` 23 | MaxSeqLength int `yaml:"maxSeqLength"` 24 | LoadIn4bit bool `yaml:"loadIn4bit"` 25 | BatchSize int `yaml:"batchSize"` 26 | GradientAccumulationSteps int `yaml:"gradientAccumulationSteps"` 27 | WarmupSteps int `yaml:"warmupSteps"` 28 | MaxSteps int `yaml:"maxSteps"` 29 | LearningRate float64 `yaml:"learningRate"` 30 | LoggingSteps int `yaml:"loggingSteps"` 31 | Optimizer string `yaml:"optimizer"` 32 | WeightDecay float64 `yaml:"weightDecay"` 33 | LrSchedulerType string `yaml:"lrSchedulerType"` 34 | Seed int `yaml:"seed"` 35 | } 36 | 37 | type FineTuneOutputSpec struct { 38 | Quantize string `yaml:"quantize"` 39 | Name string `yaml:"name"` 40 | } 41 | -------------------------------------------------------------------------------- /pkg/aikit/config/inference_specs.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | type InferenceConfig struct { 4 | APIVersion string `yaml:"apiVersion"` 5 | Debug bool `yaml:"debug"` 6 | Runtime string `yaml:"runtime"` 7 | Backends []string `yaml:"backends"` 8 | Models []Model `yaml:"models"` 9 | Config string `yaml:"config"` 10 | } 11 | 12 | type Model struct { 13 | Name string `yaml:"name"` 14 | Source string `yaml:"source"` 15 | SHA256 string `yaml:"sha256"` 16 | PromptTemplates []PromptTemplate `yaml:"promptTemplates"` 17 | } 18 | 19 | type PromptTemplate struct { 20 | Name string `yaml:"name"` 21 | Template string `yaml:"template"` 22 | } 23 | -------------------------------------------------------------------------------- /pkg/aikit/config/specs.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "github.com/pkg/errors" 5 | yaml "gopkg.in/yaml.v2" 6 | ) 7 | 8 | func NewFromBytes(b []byte) (*InferenceConfig, *FineTuneConfig, error) { 9 | inferenceConfig := &InferenceConfig{} 10 | fineTuneConfig := &FineTuneConfig{} 11 | var err error 12 | err = yaml.Unmarshal(b, inferenceConfig) 13 | if err == nil { 14 | return inferenceConfig, nil, nil 15 | } 16 | 17 | err = yaml.Unmarshal(b, fineTuneConfig) 18 | if err == nil { 19 | return nil, fineTuneConfig, nil 20 | } 21 | 22 | return nil, nil, errors.Wrap(err, "unmarshal config") 23 | } 24 | -------------------------------------------------------------------------------- /pkg/aikit/config/specs_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | 7 | "github.com/kaito-project/aikit/pkg/utils" 8 | ) 9 | 10 | func TestNewFromBytes(t *testing.T) { 11 | type args struct { 12 | b []byte 13 | } 14 | tests := []struct { 15 | name string 16 | args args 17 | want *InferenceConfig 18 | wantErr bool 19 | }{ 20 | { 21 | name: "valid yaml", 22 | args: args{b: []byte(` 23 | apiVersion: v1alpha1 24 | runtime: cuda 25 | backends: 26 | - exllama2 27 | models: 28 | - name: test 29 | source: foo 30 | `)}, 31 | want: &InferenceConfig{ 32 | APIVersion: utils.APIv1alpha1, 33 | Runtime: utils.RuntimeNVIDIA, 34 | Backends: []string{ 35 | utils.BackendExllamaV2, 36 | }, 37 | Models: []Model{ 38 | { 39 | Name: "test", 40 | Source: "foo", 41 | }, 42 | }, 43 | }, 44 | wantErr: false, 45 | }, 46 | { 47 | name: "invalid yaml", 48 | args: args{b: []byte(` 49 | foo 50 | `)}, 51 | want: nil, 52 | wantErr: true, 53 | }, 54 | } 55 | for _, tt := range tests { 56 | t.Run(tt.name, func(t *testing.T) { 57 | infCfg, _, err := NewFromBytes(tt.args.b) 58 | if (err != nil) != tt.wantErr { 59 | t.Errorf("NewFromBytes() error = %v, wantErr %v", err, tt.wantErr) 60 | return 61 | } 62 | if !reflect.DeepEqual(infCfg, tt.want) { 63 | t.Errorf("NewFromBytes() = %v, want %v", infCfg, tt.want) 64 | } 65 | }) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /pkg/aikit2llb/finetune/convert.go: -------------------------------------------------------------------------------- 1 | package finetune 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/kaito-project/aikit/pkg/aikit/config" 7 | "github.com/kaito-project/aikit/pkg/utils" 8 | "github.com/kaito-project/aikit/pkg/version" 9 | "github.com/moby/buildkit/client/llb" 10 | "github.com/moby/buildkit/util/system" 11 | "gopkg.in/yaml.v2" 12 | ) 13 | 14 | const ( 15 | unslothCommitOrTag = "fb77505f8429566f5d21d6ea5318c342e8a67991" // September-2024 16 | nvidiaMknod = "mknod --mode 666 /dev/nvidiactl c 195 255 && mknod --mode 666 /dev/nvidia-uvm c 235 0 && mknod --mode 666 /dev/nvidia-uvm-tools c 235 1 && mknod --mode 666 /dev/nvidia0 c 195 0 && nvidia-smi" 17 | sourceVenv = ". .venv/bin/activate" 18 | ) 19 | 20 | func Aikit2LLB(c *config.FineTuneConfig) llb.State { 21 | env := map[string]string{ 22 | "PATH": system.DefaultPathEnv("linux") + ":/usr/local/cuda/bin", 23 | "NVIDIA_REQUIRE_CUDA": "cuda>=12.0", 24 | "NVIDIA_DRIVER_CAPABILITIES": "compute,utility", 25 | "NVIDIA_VISIBLE_DEVICES": "all", 26 | "LD_LIBRARY_PATH": "/usr/local/cuda/lib64", 27 | } 28 | 29 | state := llb.Image(utils.CudaDevel) 30 | for k, v := range env { 31 | state = state.AddEnv(k, v) 32 | } 33 | 34 | // installing dependencies 35 | // due to buildkit run limitations, we need to install nvidia drivers and driver version must match the host 36 | state = state.Run(utils.Sh("apt-get update && apt-get install -y --no-install-recommends python3-dev python3 python3-pip python-is-python3 git wget kmod && cd /root && VERSION=$(cat /proc/driver/nvidia/version | sed -n 's/.*NVIDIA UNIX x86_64 Kernel Module \\([0-9]\\+\\.[0-9]\\+\\.[0-9]\\+\\).*/\\1/p') && wget --no-verbose https://download.nvidia.com/XFree86/Linux-x86_64/$VERSION/NVIDIA-Linux-x86_64-$VERSION.run && chmod +x NVIDIA-Linux-x86_64-$VERSION.run && ./NVIDIA-Linux-x86_64-$VERSION.run -x && rm NVIDIA-Linux-x86_64-$VERSION.run && /root/NVIDIA-Linux-x86_64-$VERSION/nvidia-installer -a -s --skip-depmod --no-dkms --no-nvidia-modprobe --no-questions --no-systemd --no-x-check --no-kernel-modules --no-kernel-module-source && rm -rf /root/NVIDIA-Linux-x86_64-$VERSION")).Root() 37 | 38 | // write config to /config.yaml 39 | cfg, err := yaml.Marshal(c) 40 | if err != nil { 41 | panic(err) 42 | } 43 | state = state.Run(utils.Shf("echo -n \"%s\" > /config.yaml", string(cfg))).Root() 44 | 45 | var scratch llb.State 46 | if c.Target == utils.TargetUnsloth { 47 | // installing unsloth and its dependencies 48 | // uv does not support installing xformers via unsloth pyproject 49 | state = state.Run(utils.Shf("pip install --upgrade pip uv && uv venv --system-site-packages && %[1]s && uv pip install --upgrade --force-reinstall packaging torch==2.4.0 ipython ninja packaging bitsandbytes setuptools==69.5.1 wheel psutil transformers==4.44.2 numpy==2.0.2 && uv pip install flash-attn --no-build-isolation && python -m pip install 'unsloth[cu121_ampere_torch240] @ git+https://github.com/unslothai/unsloth.git@%[2]s'", sourceVenv, unslothCommitOrTag)).Root() 50 | 51 | version := version.Version 52 | if version == "" { 53 | version = "main" 54 | } 55 | unslothScriptURL := fmt.Sprintf("https://raw.githubusercontent.com/kaito-project/aikit/%s/pkg/finetune/target_unsloth.py", version) 56 | var opts []llb.HTTPOption 57 | opts = append(opts, llb.Chmod(0o755)) 58 | unslothScript := llb.HTTP(unslothScriptURL, opts...) 59 | state = state.File( 60 | llb.Copy(unslothScript, utils.FileNameFromURL(unslothScriptURL), "/"), 61 | llb.WithCustomName("Copying "+utils.FileNameFromURL(unslothScriptURL)), 62 | ) 63 | 64 | // setup nvidia devices and run unsloth 65 | // due to buildkit run limitations, we need to create the devices manually and run unsloth in the same command 66 | state = state.Run(utils.Shf("%[1]s && %[2]s && python -m target_unsloth", nvidiaMknod, sourceVenv), llb.Security(llb.SecurityModeInsecure)).Root() 67 | 68 | // copy gguf to scratch which will be the output 69 | const inputFile = "model/*.gguf" 70 | copyOpts := []llb.CopyOption{} 71 | copyOpts = append(copyOpts, &llb.CopyInfo{AllowWildcard: true}) 72 | outputFile := fmt.Sprintf("%s-%s.gguf", c.Output.Name, c.Output.Quantize) 73 | scratch = llb.Scratch().File(llb.Copy(state, inputFile, outputFile, copyOpts...)) 74 | } 75 | 76 | return scratch 77 | } 78 | -------------------------------------------------------------------------------- /pkg/aikit2llb/inference/diffusers.go: -------------------------------------------------------------------------------- 1 | package inference 2 | 3 | import ( 4 | "github.com/moby/buildkit/client/llb" 5 | ) 6 | 7 | // installDiffusersDependencies installs minimal Python dependencies required for diffusers backend. 8 | // Diffusers only needs basic Python tools, no build dependencies. 9 | func installDiffusersDependencies(s llb.State, merge llb.State) llb.State { 10 | return installPythonBaseDependencies(s, merge) 11 | } 12 | -------------------------------------------------------------------------------- /pkg/aikit2llb/inference/diffusers_test.go: -------------------------------------------------------------------------------- 1 | package inference 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/moby/buildkit/client/llb" 7 | ) 8 | 9 | func TestInstallDiffusersDependencies(t *testing.T) { 10 | // Create a simple base state for testing 11 | baseState := llb.Image("ubuntu:22.04") 12 | mergeState := baseState 13 | 14 | // Call the function to install dependencies 15 | // This should execute without panicking 16 | defer func() { 17 | if r := recover(); r != nil { 18 | t.Errorf("installDiffusersDependencies panicked: %v", r) 19 | } 20 | }() 21 | 22 | result := installDiffusersDependencies(baseState, mergeState) 23 | 24 | // The function should return a valid LLB state 25 | // We can't easily test the actual installation without running BuildKit, 26 | // but we can verify the function executes without panicking 27 | _ = result // Use the result to avoid unused variable warning 28 | } 29 | -------------------------------------------------------------------------------- /pkg/aikit2llb/inference/exllama.go: -------------------------------------------------------------------------------- 1 | package inference 2 | 3 | import ( 4 | "github.com/kaito-project/aikit/pkg/utils" 5 | "github.com/moby/buildkit/client/llb" 6 | ) 7 | 8 | // installPythonBaseDependencies installs minimal Python dependencies common to all Python backends. 9 | func installPythonBaseDependencies(s llb.State, merge llb.State) llb.State { 10 | savedState := s 11 | 12 | // Install minimal Python dependencies common to all Python backends 13 | s = s.Run(utils.Sh("apt-get update && apt-get install --no-install-recommends -y git python3 python3-pip python3-venv python-is-python3 && pip install uv && pip install grpcio-tools==1.71.0 --no-dependencies && apt-get clean"), llb.IgnoreCache).Root() 14 | 15 | diff := llb.Diff(savedState, s) 16 | return llb.Merge([]llb.State{merge, diff}) 17 | } 18 | 19 | // installExllamaDependencies installs Python and other dependencies required for exllama2 backend. 20 | // ExLLama2 needs additional build tools for compilation. 21 | func installExllamaDependencies(s llb.State, merge llb.State) llb.State { 22 | savedState := s 23 | 24 | // Install Python and build dependencies needed for exllama2 25 | s = s.Run(utils.Sh("apt-get update && apt-get install --no-install-recommends -y bash git ca-certificates python3-pip python3-dev python3-venv python-is-python3 make g++ curl && pip install uv ninja && pip install grpcio-tools==1.71.0 --no-dependencies && apt-get clean"), llb.IgnoreCache).Root() 26 | 27 | diff := llb.Diff(savedState, s) 28 | return llb.Merge([]llb.State{merge, diff}) 29 | } 30 | -------------------------------------------------------------------------------- /pkg/aikit2llb/inference/exllama_test.go: -------------------------------------------------------------------------------- 1 | package inference 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/moby/buildkit/client/llb" 7 | ) 8 | 9 | func TestInstallExllamaDependencies(t *testing.T) { 10 | // Create a simple base state for testing 11 | baseState := llb.Image("ubuntu:22.04") 12 | mergeState := baseState 13 | 14 | // Call the function to install dependencies 15 | // This should execute without panicking 16 | defer func() { 17 | if r := recover(); r != nil { 18 | t.Errorf("installExllamaDependencies panicked: %v", r) 19 | } 20 | }() 21 | 22 | result := installExllamaDependencies(baseState, mergeState) 23 | 24 | // The function should return a valid LLB state 25 | // We can't easily test the actual installation without running BuildKit, 26 | // but we can verify the function executes without panicking 27 | _ = result // Use the result to avoid unused variable warning 28 | } 29 | 30 | func TestInstallPythonBaseDependencies(t *testing.T) { 31 | // Create a simple base state for testing 32 | baseState := llb.Image("ubuntu:22.04") 33 | mergeState := baseState 34 | 35 | // Call the function to install dependencies 36 | // This should execute without panicking 37 | defer func() { 38 | if r := recover(); r != nil { 39 | t.Errorf("installPythonBaseDependencies panicked: %v", r) 40 | } 41 | }() 42 | 43 | result := installPythonBaseDependencies(baseState, mergeState) 44 | 45 | // The function should return a valid LLB state 46 | // We can't easily test the actual installation without running BuildKit, 47 | // but we can verify the function executes without panicking 48 | _ = result // Use the result to avoid unused variable warning 49 | } 50 | -------------------------------------------------------------------------------- /pkg/aikit2llb/inference/image.go: -------------------------------------------------------------------------------- 1 | package inference 2 | 3 | import ( 4 | "github.com/kaito-project/aikit/pkg/aikit/config" 5 | "github.com/kaito-project/aikit/pkg/utils" 6 | "github.com/moby/buildkit/util/system" 7 | specs "github.com/opencontainers/image-spec/specs-go/v1" 8 | ) 9 | 10 | func NewImageConfig(c *config.InferenceConfig, platform *specs.Platform) *specs.Image { 11 | img := emptyImage(c, platform) 12 | cmd := []string{} 13 | if c.Debug { 14 | cmd = append(cmd, "--debug") 15 | } 16 | if c.Config != "" { 17 | cmd = append(cmd, "--config-file=/config.yaml") 18 | } 19 | 20 | img.Config.Entrypoint = []string{"local-ai"} 21 | img.Config.Cmd = cmd 22 | return img 23 | } 24 | 25 | func emptyImage(c *config.InferenceConfig, platform *specs.Platform) *specs.Image { 26 | img := &specs.Image{ 27 | Platform: specs.Platform{ 28 | Architecture: platform.Architecture, 29 | OS: utils.PlatformLinux, 30 | }, 31 | } 32 | img.RootFS.Type = "layers" 33 | img.Config.WorkingDir = "/" 34 | 35 | img.Config.Env = []string{ 36 | "PATH=" + system.DefaultPathEnv(utils.PlatformLinux), 37 | "CONFIG_FILE=/config.yaml", 38 | } 39 | 40 | cudaEnv := []string{ 41 | "PATH=" + system.DefaultPathEnv(utils.PlatformLinux) + ":/usr/local/cuda/bin", 42 | "NVIDIA_REQUIRE_CUDA=cuda>=12.0", 43 | "NVIDIA_DRIVER_CAPABILITIES=compute,utility", 44 | "NVIDIA_VISIBLE_DEVICES=all", 45 | "LD_LIBRARY_PATH=/usr/local/cuda/lib64", 46 | "BUILD_TYPE=cublas", 47 | "CUDA_HOME=/usr/local/cuda", 48 | } 49 | if c.Runtime == utils.RuntimeNVIDIA { 50 | img.Config.Env = append(img.Config.Env, cudaEnv...) 51 | } 52 | 53 | return img 54 | } 55 | -------------------------------------------------------------------------------- /pkg/build/args.go: -------------------------------------------------------------------------------- 1 | package build 2 | 3 | import ( 4 | "fmt" 5 | "path" 6 | "strings" 7 | 8 | "github.com/kaito-project/aikit/pkg/aikit/config" 9 | "github.com/kaito-project/aikit/pkg/aikit2llb/inference" 10 | "github.com/kaito-project/aikit/pkg/utils" 11 | ) 12 | 13 | // parseBuildArgs parses the build arguments and configures inference settings. 14 | func parseBuildArgs(opts map[string]string, inferenceCfg *config.InferenceConfig) error { 15 | if inferenceCfg == nil { 16 | return nil 17 | } 18 | 19 | // Get model and runtime arguments 20 | modelArg := getBuildArg(opts, "model") 21 | runtimeArg := getBuildArg(opts, "runtime") 22 | 23 | // Set the runtime if provided 24 | if runtimeArg != "" { 25 | inferenceCfg.Runtime = runtimeArg 26 | } 27 | 28 | // Set the model if provided 29 | if modelArg != "" { 30 | var modelName, modelSource string 31 | var err error 32 | 33 | // Handle based on the URL prefix 34 | switch { 35 | case strings.HasPrefix(modelArg, "huggingface://"): 36 | // Handle Hugging Face URLs with optional branch 37 | modelSource, modelName, err = inference.ParseHuggingFaceURL(modelArg) 38 | if err != nil { 39 | return err 40 | } 41 | 42 | case strings.HasPrefix(modelArg, "http://"), strings.HasPrefix(modelArg, "https://"): 43 | // Handle HTTP(S) URLs directly 44 | modelName = utils.FileNameFromURL(modelArg) 45 | modelSource = modelArg 46 | 47 | case strings.HasPrefix(modelArg, "oci://"): 48 | // Handle OCI URLs 49 | modelName = parseOCIURL(modelArg) 50 | modelSource = modelArg 51 | 52 | default: 53 | // Assume it's a local file path 54 | modelName = path.Base(modelArg) 55 | modelSource = modelArg 56 | } 57 | 58 | // Set the inference configuration 59 | inferenceCfg.Models = []config.Model{ 60 | { 61 | Name: modelName, 62 | Source: modelSource, 63 | }, 64 | } 65 | inferenceCfg.Config = generateInferenceConfig(modelName) 66 | } 67 | 68 | return nil 69 | } 70 | 71 | // generateInferenceConfig generates the inference configuration for the given model name. 72 | func generateInferenceConfig(modelName string) string { 73 | return fmt.Sprintf(` 74 | - name: %[1]s 75 | backend: llama 76 | parameters: 77 | model: %[1]s`, modelName) 78 | } 79 | 80 | // parseOCIURL extracts model name for OCI-based models. 81 | func parseOCIURL(source string) string { 82 | const ollamaRegistryURL = "registry.ollama.ai" 83 | artifactURL := strings.TrimPrefix(source, "oci://") 84 | var modelName string 85 | 86 | if strings.HasPrefix(artifactURL, ollamaRegistryURL) { 87 | // Special handling for Ollama registry 88 | artifactURLWithoutTag := strings.Split(artifactURL, ":")[0] 89 | modelName = strings.Split(artifactURLWithoutTag, "/")[2] 90 | } else { 91 | // Generic OCI artifact 92 | modelName = path.Base(artifactURL) 93 | modelName = strings.Split(modelName, ":")[0] 94 | modelName = strings.Split(modelName, "@")[0] 95 | } 96 | 97 | return modelName 98 | } 99 | -------------------------------------------------------------------------------- /pkg/finetune/target_unsloth.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from unsloth import is_bfloat16_supported 4 | from transformers import TrainingArguments, DataCollatorForSeq2Seq 5 | from unsloth import FastLanguageModel 6 | import torch 7 | from trl import SFTTrainer 8 | from transformers import TrainingArguments 9 | from datasets import load_dataset 10 | import yaml 11 | 12 | with open('config.yaml', 'r') as config_file: 13 | try: 14 | data = yaml.safe_load(config_file) 15 | print(data) 16 | except yaml.YAMLError as exc: 17 | print(exc) 18 | 19 | cfg = data.get('config').get('unsloth') 20 | max_seq_length = cfg.get('maxSeqLength') 21 | 22 | model, tokenizer = FastLanguageModel.from_pretrained( 23 | model_name=data.get('baseModel'), 24 | max_seq_length=max_seq_length, 25 | dtype=None, 26 | load_in_4bit=True, 27 | ) 28 | 29 | model = FastLanguageModel.get_peft_model( 30 | model, 31 | r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128 32 | target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", 33 | "gate_proj", "up_proj", "down_proj",], 34 | lora_alpha = 16, 35 | lora_dropout = 0, # Supports any, but = 0 is optimized 36 | bias = "none", # Supports any, but = "none" is optimized 37 | use_gradient_checkpointing="unsloth", 38 | random_state = 3407, 39 | use_rslora = False, # We support rank stabilized LoRA 40 | loftq_config = None, # And LoftQ 41 | ) 42 | 43 | # TODO: right now, this is hardcoded for alpaca. use the dataset type here in the future to make this customizable 44 | alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. 45 | 46 | ### Instruction: 47 | {} 48 | 49 | ### Input: 50 | {} 51 | 52 | ### Response: 53 | {}""" 54 | 55 | EOS_TOKEN = tokenizer.eos_token 56 | def formatting_prompts_func(examples): 57 | instructions = examples["instruction"] 58 | inputs = examples["input"] 59 | outputs = examples["output"] 60 | texts = [] 61 | for instruction, input, output in zip(instructions, inputs, outputs): 62 | # Must add EOS_TOKEN, otherwise your generation will go on forever! 63 | text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN 64 | texts.append(text) 65 | return { "text" : texts, } 66 | pass 67 | 68 | from datasets import load_dataset 69 | source = data.get('datasets')[0]['source'] 70 | 71 | if source.startswith('http'): 72 | dataset = load_dataset("json", data_files={"train": source}, split="train") 73 | else: 74 | dataset = load_dataset(source, split = "train") 75 | 76 | dataset = dataset.map(formatting_prompts_func, batched=True) 77 | 78 | trainer = SFTTrainer( 79 | model=model, 80 | train_dataset=dataset, 81 | dataset_text_field="text", 82 | max_seq_length=max_seq_length, 83 | data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer), 84 | tokenizer=tokenizer, 85 | dataset_num_proc = 2, 86 | packing = cfg.get('packing'), # Can make training 5x faster for short sequences. 87 | args=TrainingArguments( 88 | per_device_train_batch_size=cfg.get('batchSize'), 89 | gradient_accumulation_steps=cfg.get('gradientAccumulationSteps'), 90 | warmup_steps=cfg.get('warmupSteps'), 91 | max_steps=cfg.get('maxSteps'), 92 | learning_rate = cfg.get('learningRate'), 93 | fp16=not is_bfloat16_supported(), 94 | bf16=is_bfloat16_supported(), 95 | logging_steps=cfg.get('loggingSteps'), 96 | optim=cfg.get('optimizer'), 97 | weight_decay = cfg.get('weightDecay'), 98 | lr_scheduler_type = cfg.get('lrSchedulerType'), 99 | seed=cfg.get('seed'), 100 | output_dir="outputs", 101 | ), 102 | ) 103 | trainer.train() 104 | 105 | output = data.get('output') 106 | model.save_pretrained_gguf(output.get('name'), tokenizer, 107 | quantization_method=output.get('quantize')) 108 | -------------------------------------------------------------------------------- /pkg/utils/const.go: -------------------------------------------------------------------------------- 1 | package utils // nolint:revive 2 | 3 | const ( 4 | RuntimeNVIDIA = "cuda" 5 | RuntimeAppleSilicon = "applesilicon" // experimental apple silicon runtime with vulkan arm64 support 6 | 7 | BackendExllamaV2 = "exllama2" 8 | BackendDiffusers = "diffusers" 9 | BackendLlamaCpp = "llama-cpp" 10 | 11 | BackendOCIRegistry = "quay.io/go-skynet/local-ai-backends" 12 | 13 | TargetUnsloth = "unsloth" 14 | 15 | DatasetAlpaca = "alpaca" 16 | 17 | APIv1alpha1 = "v1alpha1" 18 | 19 | UbuntuBase = "docker.io/library/ubuntu:22.04" 20 | AppleSiliconBase = "ghcr.io/kaito-project/aikit/applesilicon/base:latest" 21 | CudaDevel = "nvcr.io/nvidia/cuda:12.3.2-devel-ubuntu22.04" 22 | 23 | PlatformLinux = "linux" 24 | PlatformAMD64 = "amd64" 25 | PlatformARM64 = "arm64" 26 | ) 27 | -------------------------------------------------------------------------------- /pkg/utils/util.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "path" 7 | 8 | "github.com/moby/buildkit/client/llb" 9 | ) 10 | 11 | func FileNameFromURL(urlString string) string { 12 | parsedURL, err := url.Parse(urlString) 13 | if err != nil { 14 | panic(err) 15 | } 16 | return path.Base(parsedURL.Path) 17 | } 18 | 19 | func Sh(cmd string) llb.RunOption { 20 | return llb.Args([]string{"/bin/sh", "-c", cmd}) 21 | } 22 | 23 | func Shf(cmd string, v ...interface{}) llb.RunOption { 24 | return llb.Args([]string{"/bin/sh", "-c", fmt.Sprintf(cmd, v...)}) 25 | } 26 | 27 | func Bashf(cmd string, v ...interface{}) llb.RunOption { 28 | return llb.Args([]string{"/bin/bash", "-c", fmt.Sprintf(cmd, v...)}) 29 | } 30 | -------------------------------------------------------------------------------- /pkg/utils/util_test.go: -------------------------------------------------------------------------------- 1 | package utils // nolint:revive 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func Test_FileNameFromURL(t *testing.T) { 8 | type args struct { 9 | urlString string 10 | } 11 | tests := []struct { 12 | name string 13 | args args 14 | want string 15 | }{ 16 | { 17 | name: "simple", 18 | args: args{urlString: "http://foo.bar/baz"}, 19 | want: "baz", 20 | }, 21 | { 22 | name: "complex", 23 | args: args{urlString: "http://foo.bar/baz.tar.gz"}, 24 | want: "baz.tar.gz", 25 | }, 26 | { 27 | name: "complex with path", 28 | args: args{urlString: "http://foo.bar/baz.tar.gz?foo=bar"}, 29 | want: "baz.tar.gz", 30 | }, 31 | } 32 | for _, tt := range tests { 33 | t.Run(tt.name, func(t *testing.T) { 34 | if got := FileNameFromURL(tt.args.urlString); got != tt.want { 35 | t.Errorf("FileNameFromURL() = %v, want %v", got, tt.want) 36 | } 37 | }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | // Version is the aikit version. 4 | var Version string 5 | -------------------------------------------------------------------------------- /scripts/parse-models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | extract_model_name() { 4 | echo "$1" | sed -E ' 5 | s/^llama-(3\.[1-9])-([0-9]+\.?[0-9]*b)-.*/llama\1/;t; 6 | s/^flux-([0-9]+)-dev$/flux\1/;t; 7 | s/^phi-(3\.5)-([0-9]+\.?[0-9]*b)-.*/phi\1/;t; 8 | s/^([a-z]+-[a-z]+)-([0-9]+\.?[0-9]*b)$/\1/;t; 9 | s/^([a-z]+)-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; 10 | s/^([a-z]+)-([0-9]+)-.*/\1\2/; 11 | s/^([a-z]+)-([0-9]+\.?[0-9]*b)$/\1/' 12 | } 13 | 14 | extract_model_size() { 15 | echo "$1" | sed -E ' 16 | s/^llama-(3\.[1-9])-([0-9]+\.?[0-9]*b)-.*/\2/;t; 17 | s/^flux-[0-9]+-dev$/dev/;t; 18 | s/^[a-z]+-[a-z]+-([0-9]+\.?[0-9]*b)$/\1/;t; 19 | s/^[a-z]+-([0-9]+x[0-9]+b|[0-9]+\.?[0-9]*b)-.*/\1/; 20 | s/^[a-z]+-[0-9]+(\.[0-9]+)?-([0-9]+\.?[0-9]*b).*/\2/; 21 | s/^[a-z]+-([0-9]+\.?[0-9]*b)$/\1/' 22 | } 23 | 24 | extract_model_type() { 25 | echo "$1" | sed -n -e 's/^flux-[0-9]+-\(dev\)$/\1/p' -e 's/.*\(chat\).*/\1/p' -e 's/.*\(instruct\).*/\1/p' 26 | } 27 | 28 | for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "llama-3.2-1b-instruct" "llama-3.2-3b-instruct" "phi-3-3.8b" "phi-3.5-3.8b-instruct" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.3-70b-instruct" "mixtral-8x7b-instruct" "flux-1-dev" "qwq-32b" "phi-4-14b-instruct" "gpt-oss-20b" "gpt-oss-120b"; do 29 | echo "Model: $MODEL" 30 | echo " Name: $(extract_model_name "$MODEL")" 31 | echo " Size: $(extract_model_size "$MODEL")" 32 | echo " Type: $(extract_model_type "$MODEL")" 33 | echo 34 | done 35 | -------------------------------------------------------------------------------- /test/aikitfile-args.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | -------------------------------------------------------------------------------- /test/aikitfile-cpu-exllama2.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | backends: 5 | - exllama2 6 | models: 7 | - name: Llama2-7B-chat-exl2/output.safetensors 8 | source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/resolve/2.5bpw/output.safetensors 9 | - name: Llama2-7B-chat-exl2/tokenizer.model 10 | source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/resolve/2.5bpw/tokenizer.model 11 | - name: Llama2-7B-chat-exl2/config.json 12 | source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/raw/2.5bpw/config.json 13 | config: | 14 | - name: llama-2-7b-chat 15 | backend: exllama2 16 | context_size: 4096 17 | parameters: 18 | model: "Llama2-7B-chat-exl2" 19 | temperature: 0.2 20 | -------------------------------------------------------------------------------- /test/aikitfile-dev.yaml: -------------------------------------------------------------------------------- 1 | #syntax=ghcr.io/kaito-project/aikit/aikit:dev 2 | apiVersion: v1alpha1 3 | debug: true 4 | models: 5 | - name: llama-2-7b-chat 6 | source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf 7 | config: | 8 | - name: llama-2-7b-chat 9 | backend: llama 10 | parameters: 11 | top_k: 80 12 | temperature: 0.2 13 | top_p: 0.7 14 | model: llama-2-7b-chat.Q4_K_M.gguf 15 | context_size: 4096 16 | -------------------------------------------------------------------------------- /test/aikitfile-diffusers.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | backends: 6 | - diffusers 7 | models: 8 | - name: "dreamshaper_assets/DreamShaper_8_pruned.safetensors" 9 | sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd 10 | source: "https://huggingface.co/Lykon/DreamShaper/resolve/main/DreamShaper_8_pruned.safetensors" 11 | config: | 12 | - name: dreamshaper 13 | backend: diffusers 14 | parameters: 15 | model: dreamshaper_assets/DreamShaper_8_pruned.safetensors 16 | diffusers: 17 | pipeline_type: StableDiffusionPipeline 18 | cuda: true 19 | enable_parameters: "negative_prompt,num_inference_steps" 20 | scheduler_type: "k_dpmpp_2m" 21 | step: 25 22 | f16: true 23 | -------------------------------------------------------------------------------- /test/aikitfile-exllama2-exl2.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | backends: 6 | - exllama2 7 | models: 8 | - name: Llama2-7B-chat-exl2/output.safetensors 9 | source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/resolve/2.5bpw/output.safetensors 10 | - name: Llama2-7B-chat-exl2/tokenizer.model 11 | source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/resolve/2.5bpw/tokenizer.model 12 | - name: Llama2-7B-chat-exl2/config.json 13 | source: https://huggingface.co/turboderp/Llama2-7B-chat-exl2/raw/2.5bpw/config.json 14 | config: | 15 | - name: llama-2-7b-chat 16 | backend: exllama2 17 | context_size: 4096 18 | parameters: 19 | model: "Llama2-7B-chat-exl2" 20 | temperature: 0.2 21 | -------------------------------------------------------------------------------- /test/aikitfile-exllama2-gptq.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | backends: 6 | - exllama2 7 | models: 8 | - name: Llama-2-7B-Chat-GPTQ/model.safetensors 9 | source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GPTQ/resolve/main/model.safetensors 10 | - name: Llama-2-7B-Chat-GPTQ/tokenizer.model 11 | source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GPTQ/resolve/main/tokenizer.model 12 | - name: Llama-2-7B-Chat-GPTQ/config.json 13 | source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GPTQ/resolve/main/config.json 14 | config: | 15 | - name: llama-2-7b-chat 16 | backend: exllama2 17 | context_size: 4096 18 | parameters: 19 | model: "Llama-2-7B-Chat-GPTQ" 20 | temperature: 0.2 21 | -------------------------------------------------------------------------------- /test/aikitfile-flux-schnell.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | backends: 6 | - diffusers 7 | config: | 8 | - name: flux-1-schnell 9 | backend: diffusers 10 | parameters: 11 | model: black-forest-labs/FLUX.1-schnell 12 | diffusers: 13 | cfg_scale: 0 14 | cuda: true 15 | enable_parameters: num_inference_steps 16 | pipeline_type: FluxPipeline 17 | f16: true 18 | step: 25 19 | low_vram: true 20 | -------------------------------------------------------------------------------- /test/aikitfile-hf.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | models: 5 | - name: llama-3.2-1b-instruct 6 | source: huggingface://MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/Llama-3.2-1B-Instruct.Q4_K_M.gguf 7 | promptTemplates: 8 | - name: chatMsg 9 | template: | 10 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 11 | 12 | {{ if .FunctionCall -}} 13 | Function call: 14 | {{ else if eq .RoleName \"tool\" -}} 15 | Function response: 16 | {{ end -}} 17 | {{ if .Content -}} 18 | {{.Content -}} 19 | {{ else if .FunctionCall -}} 20 | {{ toJson .FunctionCall -}} 21 | {{ end -}} 22 | <|eot_id|> 23 | - name: function 24 | template: | 25 | <|start_header_id|>system<|end_header_id|> 26 | 27 | You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: 28 | <tools> 29 | {{range .Functions}} 30 | {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} 31 | {{end}} 32 | </tools> 33 | Use the following pydantic model json schema for each tool call you will make: 34 | {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> 35 | Function call: 36 | - name: chat 37 | template: | 38 | <|begin_of_text|>{{.Input }} 39 | <|start_header_id|>assistant<|end_header_id|> 40 | - name: completion 41 | template: | 42 | {{.Input}} 43 | config: | 44 | - name: llama-3.2-1b-instruct 45 | backend: llama 46 | parameters: 47 | model: Llama-3.2-1B-Instruct.Q4_K_M.gguf 48 | context_size: 8192 49 | template: 50 | chat_message: \"chatMsg\" 51 | function: \"function\" 52 | chat: \"chat\" 53 | completion: \"completion\" 54 | stopwords: 55 | - <|im_end|> 56 | - <dummy32000> 57 | - <|eot_id|> 58 | - <|end_of_text|> 59 | -------------------------------------------------------------------------------- /test/aikitfile-llama-cpp.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | backends: 5 | - llama-cpp 6 | models: 7 | - name: llama-3.2-1b-instruct 8 | source: https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf 9 | sha256: "e4650dd6b45ef456066b11e4927f775eef4dd1e0e8473c3c0f27dd19ee13cc4e" 10 | promptTemplates: 11 | - name: chatMsg 12 | template: | 13 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 14 | 15 | {{ if .FunctionCall -}} 16 | Function call: 17 | {{ else if eq .RoleName \"tool\" -}} 18 | Function response: 19 | {{ end -}} 20 | {{ if .Content -}} 21 | {{.Content -}} 22 | {{ else if .FunctionCall -}} 23 | {{ toJson .FunctionCall -}} 24 | {{ end -}} 25 | <|eot_id|> 26 | - name: function 27 | template: | 28 | <|start_header_id|>system<|end_header_id|> 29 | 30 | You have access to the following functions: 31 | 32 | {{range .Functions}} 33 | Use the function '{{.Name}}' to '{{.Description}}' 34 | {{toJson .Parameters}} 35 | {{end}} 36 | 37 | Think very carefully before calling functions. 38 | If a you choose to call a function ONLY reply in the following format with no prefix or suffix: 39 | 40 | <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function> 41 | 42 | Reminder: 43 | - If looking for real time information use relevant functions before falling back to searching on internet 44 | - Function calls MUST follow the specified format, start with <function= and end with </function> 45 | - Required parameters MUST be specified 46 | - Only call one function at a time 47 | - Put the entire function call reply on one line 48 | <|eot_id|> 49 | {{.Input }} 50 | <|start_header_id|>assistant<|end_header_id|> 51 | - name: chat 52 | template: | 53 | {{.Input }} 54 | <|start_header_id|>assistant<|end_header_id|> 55 | - name: completion 56 | template: | 57 | {{.Input}} 58 | config: | 59 | - name: llama-3.2-1b-instruct 60 | backend: llama-cpp 61 | function: 62 | disable_no_action: true 63 | grammar: 64 | disable: true 65 | response_regex: 66 | - <function=(?P<name>\w+)>(?P<arguments>.*)</function> 67 | parameters: 68 | model: Llama-3.2-1B-Instruct.Q4_K_M.gguf 69 | context_size: 8192 70 | template: 71 | chat_message: \"chatMsg\" 72 | function: \"function\" 73 | chat: \"chat\" 74 | completion: \"completion\" 75 | stopwords: 76 | - <|im_end|> 77 | - <dummy32000> 78 | - <|eot_id|> 79 | - <|end_of_text|> 80 | -------------------------------------------------------------------------------- /test/aikitfile-llama-cuda.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: llama-3.2-1b-instruct 7 | source: https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf 8 | sha256: "e4650dd6b45ef456066b11e4927f775eef4dd1e0e8473c3c0f27dd19ee13cc4e" 9 | promptTemplates: 10 | - name: chatMsg 11 | template: | 12 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 13 | 14 | {{ if .FunctionCall -}} 15 | Function call: 16 | {{ else if eq .RoleName \"tool\" -}} 17 | Function response: 18 | {{ end -}} 19 | {{ if .Content -}} 20 | {{.Content -}} 21 | {{ else if .FunctionCall -}} 22 | {{ toJson .FunctionCall -}} 23 | {{ end -}} 24 | <|eot_id|> 25 | - name: function 26 | template: | 27 | <|start_header_id|>system<|end_header_id|> 28 | 29 | You have access to the following functions: 30 | 31 | {{range .Functions}} 32 | Use the function '{{.Name}}' to '{{.Description}}' 33 | {{toJson .Parameters}} 34 | {{end}} 35 | 36 | Think very carefully before calling functions. 37 | If a you choose to call a function ONLY reply in the following format with no prefix or suffix: 38 | 39 | <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function> 40 | 41 | Reminder: 42 | - If looking for real time information use relevant functions before falling back to searching on internet 43 | - Function calls MUST follow the specified format, start with <function= and end with </function> 44 | - Required parameters MUST be specified 45 | - Only call one function at a time 46 | - Put the entire function call reply on one line 47 | <|eot_id|> 48 | {{.Input }} 49 | <|start_header_id|>assistant<|end_header_id|> 50 | - name: chat 51 | template: | 52 | {{.Input }} 53 | <|start_header_id|>assistant<|end_header_id|> 54 | - name: completion 55 | template: | 56 | {{.Input}} 57 | config: | 58 | - name: llama-3.2-1b-instruct 59 | backend: llama 60 | function: 61 | disable_no_action: true 62 | grammar: 63 | disable: true 64 | response_regex: 65 | - <function=(?P<name>\w+)>(?P<arguments>.*)</function> 66 | parameters: 67 | model: Llama-3.2-1B-Instruct.Q4_K_M.gguf 68 | context_size: 8192 69 | f16: true 70 | template: 71 | chat_message: \"chatMsg\" 72 | function: \"function\" 73 | chat: \"chat\" 74 | completion: \"completion\" 75 | stopwords: 76 | - <|im_end|> 77 | - <dummy32000> 78 | - <|eot_id|> 79 | - <|end_of_text|> 80 | -------------------------------------------------------------------------------- /test/aikitfile-llama.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | backends: 5 | - llama-cpp 6 | models: 7 | - name: llama-3.2-1b-instruct 8 | source: https://huggingface.co/MaziyarPanahi/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct.Q4_K_M.gguf 9 | sha256: "e4650dd6b45ef456066b11e4927f775eef4dd1e0e8473c3c0f27dd19ee13cc4e" 10 | promptTemplates: 11 | - name: chatMsg 12 | template: | 13 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 14 | 15 | {{ if .FunctionCall -}} 16 | Function call: 17 | {{ else if eq .RoleName \"tool\" -}} 18 | Function response: 19 | {{ end -}} 20 | {{ if .Content -}} 21 | {{.Content -}} 22 | {{ else if .FunctionCall -}} 23 | {{ toJson .FunctionCall -}} 24 | {{ end -}} 25 | <|eot_id|> 26 | - name: function 27 | template: | 28 | <|start_header_id|>system<|end_header_id|> 29 | 30 | You have access to the following functions: 31 | 32 | {{range .Functions}} 33 | Use the function '{{.Name}}' to '{{.Description}}' 34 | {{toJson .Parameters}} 35 | {{end}} 36 | 37 | Think very carefully before calling functions. 38 | If a you choose to call a function ONLY reply in the following format with no prefix or suffix: 39 | 40 | <function=example_function_name>{{`{{\"example_name\": \"example_value\"}}`}}</function> 41 | 42 | Reminder: 43 | - If looking for real time information use relevant functions before falling back to searching on internet 44 | - Function calls MUST follow the specified format, start with <function= and end with </function> 45 | - Required parameters MUST be specified 46 | - Only call one function at a time 47 | - Put the entire function call reply on one line 48 | <|eot_id|> 49 | {{.Input }} 50 | <|start_header_id|>assistant<|end_header_id|> 51 | - name: chat 52 | template: | 53 | {{.Input }} 54 | <|start_header_id|>assistant<|end_header_id|> 55 | - name: completion 56 | template: | 57 | {{.Input}} 58 | config: | 59 | - name: llama-3.2-1b-instruct 60 | backend: llama 61 | function: 62 | disable_no_action: true 63 | grammar: 64 | disable: true 65 | response_regex: 66 | - <function=(?P<name>\w+)>(?P<arguments>.*)</function> 67 | parameters: 68 | model: Llama-3.2-1B-Instruct.Q4_K_M.gguf 69 | context_size: 8192 70 | template: 71 | chat_message: \"chatMsg\" 72 | function: \"function\" 73 | chat: \"chat\" 74 | completion: \"completion\" 75 | stopwords: 76 | - <|im_end|> 77 | - <dummy32000> 78 | - <|eot_id|> 79 | - <|end_of_text|> 80 | -------------------------------------------------------------------------------- /test/aikitfile-llava.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | models: 5 | - name: ggml-model-q4_k.gguf 6 | source: https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q4_k.gguf 7 | sha256: 5be58c339d8762e72d482a3e071a58d2df07df4a7aaabf8869415ae2b0f088d6 8 | promptTemplates: 9 | - name: chat-simple 10 | template: | 11 | A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. 12 | {{.Input}} 13 | ASSISTANT: 14 | - name: mmproj-model-f16.gguf 15 | source: https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf 16 | sha256: 2e467eba710002839e0966d5e329942bb836eabd4e787bc713b07eff1d8ea13b 17 | config: | 18 | - name: llava 19 | backend: llama-cpp 20 | context_size: 4096 21 | threads: 4 22 | mmap: true 23 | mmproj: mmproj-model-f16.gguf 24 | roles: 25 | user: \"USER:\" 26 | assistant: \"ASSISTANT:\" 27 | system: \"SYSTEM:\" 28 | parameters: 29 | model: ggml-model-q4_k.gguf 30 | temperature: 0.2 31 | top_k: 40 32 | top_p: 0.95 33 | template: 34 | chat: chat-simple 35 | -------------------------------------------------------------------------------- /test/aikitfile-oci.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | models: 5 | - name: llama-3.2-1b-instruct 6 | source: oci://registry.ollama.ai/library/llama3.2:1b 7 | promptTemplates: 8 | - name: chatMsg 9 | template: | 10 | <|start_header_id|>{{if eq .RoleName \"assistant\"}}assistant{{else if eq .RoleName \"system\"}}system{{else if eq .RoleName \"tool\"}}tool{{else if eq .RoleName \"user\"}}user{{end}}<|end_header_id|> 11 | 12 | {{ if .FunctionCall -}} 13 | Function call: 14 | {{ else if eq .RoleName \"tool\" -}} 15 | Function response: 16 | {{ end -}} 17 | {{ if .Content -}} 18 | {{.Content -}} 19 | {{ else if .FunctionCall -}} 20 | {{ toJson .FunctionCall -}} 21 | {{ end -}} 22 | <|eot_id|> 23 | - name: function 24 | template: | 25 | <|start_header_id|>system<|end_header_id|> 26 | 27 | You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: 28 | <tools> 29 | {{range .Functions}} 30 | {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }} 31 | {{end}} 32 | </tools> 33 | Use the following pydantic model json schema for each tool call you will make: 34 | {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|> 35 | Function call: 36 | - name: chat 37 | template: | 38 | <|begin_of_text|>{{.Input }} 39 | <|start_header_id|>assistant<|end_header_id|> 40 | - name: completion 41 | template: | 42 | {{.Input}} 43 | config: | 44 | - name: llama-3.2-1b-instruct 45 | backend: llama 46 | parameters: 47 | model: llama3.2 48 | context_size: 8192 49 | template: 50 | chat_message: \"chatMsg\" 51 | function: \"function\" 52 | chat: \"chat\" 53 | completion: \"completion\" 54 | stopwords: 55 | - <|im_end|> 56 | - <dummy32000> 57 | - <|eot_id|> 58 | - <|end_of_text|> 59 | -------------------------------------------------------------------------------- /test/aikitfile-unsloth-custom.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | debug: true 4 | runtime: cuda 5 | models: 6 | - name: custom 7 | source: model-q4_k_m.gguf 8 | promptTemplates: 9 | - name: instruct 10 | template: | 11 | Below is an instruction that describes a task. Write a response that appropriately completes the request. 12 | 13 | ### Instruction: 14 | {{.Input}} 15 | 16 | ### Response: 17 | config: | 18 | - name: custom 19 | backend: llama 20 | parameters: 21 | model: model-q4_k_m.gguf 22 | context_size: 4096 23 | f16: true 24 | mmap: true 25 | template: 26 | chat: instruct 27 | -------------------------------------------------------------------------------- /test/aikitfile-unsloth.yaml: -------------------------------------------------------------------------------- 1 | #syntax=aikit:test 2 | apiVersion: v1alpha1 3 | baseModel: unsloth/Meta-Llama-3.1-8B 4 | datasets: 5 | - source: "yahma/alpaca-cleaned" 6 | type: alpaca 7 | config: 8 | unsloth: 9 | packing: false 10 | maxSeqLength: 2048 11 | loadIn4bit: true 12 | batchSize: 2 13 | gradientAccumulationSteps: 4 14 | warmupSteps: 10 15 | maxSteps: 60 16 | learningRate: 0.0002 17 | loggingSteps: 1 18 | optimizer: adamw_8bit 19 | weightDecay: 0.01 20 | lrSchedulerType: linear 21 | seed: 42 22 | output: 23 | quantize: q4_k_m 24 | name: model 25 | -------------------------------------------------------------------------------- /website/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | -------------------------------------------------------------------------------- /website/README.md: -------------------------------------------------------------------------------- 1 | # Website 2 | 3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator. 4 | 5 | ### Installation 6 | 7 | ``` 8 | $ yarn 9 | ``` 10 | 11 | ### Local Development 12 | 13 | ``` 14 | $ yarn start 15 | ``` 16 | 17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server. 18 | 19 | ### Build 20 | 21 | ``` 22 | $ yarn build 23 | ``` 24 | 25 | This command generates static content into the `build` directory and can be served using any static contents hosting service. 26 | 27 | ### Deployment 28 | 29 | Using SSH: 30 | 31 | ``` 32 | $ USE_SSH=true yarn deploy 33 | ``` 34 | 35 | Not using SSH: 36 | 37 | ``` 38 | $ GIT_USER=<Your GitHub username> yarn deploy 39 | ``` 40 | 41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch. 42 | -------------------------------------------------------------------------------- /website/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')], 3 | }; 4 | -------------------------------------------------------------------------------- /website/docs/architecture.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Architecture 3 | --- 4 | 5 | Below is the architecture diagram for AIKit: 6 | 7 | [![AIKit Architecture](../static/img/architecture.png)](../static/img/architecture.png) 8 | -------------------------------------------------------------------------------- /website/docs/demo.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Demos 3 | --- 4 | 5 | Below are various demos to help you get started with AIKit. 6 | 7 | ## Inference 8 | 9 | ### Running inference with CUDA 10 | 11 | https://www.youtube.com/watch?v=yFh_Zfk34PE 12 | 13 | ### Running inference (on ARM64) 14 | 15 | https://www.youtube.com/watch?v=O0AOnxXp-o4 16 | 17 | ### Using Flux-1 Dev to generate images 18 | 19 | https://www.youtube.com/watch?v=m38h4afJlBE 20 | 21 | ## Building a model image 22 | 23 | ### Building a model image from model in Hugging Face and running inference 24 | 25 | https://www.youtube.com/watch?v=DI5NbdEFLC8 26 | 27 | ### Building a model image from model in ollama (OCI artifacts) and running inference 28 | 29 | https://www.youtube.com/watch?v=G6PrzhEe_p8 30 | 31 | ### Building a model image from configuration and running inference 32 | 33 | https://www.youtube.com/watch?v=5AQfG5VwN2c 34 | 35 | ## Fine tuning 36 | 37 | ### Fine Tuning Mistral 7b with OpenHermes dataset and building a model image for inference 38 | 39 | https://www.youtube.com/watch?v=FZuVb-9i-94 40 | -------------------------------------------------------------------------------- /website/docs/diffusion.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Diffusion 3 | --- 4 | 5 | AIKit supports [`diffusers`](#diffusers) backend. 6 | 7 | ## diffusers 8 | 9 | `diffusers` backend uses the huggingface [`diffusers`](https://huggingface.co/docs/diffusers/en/index) library to generate images. This backend only supports CUDA runtime. 10 | 11 | ### Example 12 | 13 | :::warning 14 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below. 15 | ::: 16 | 17 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-diffusers.yaml 18 | 19 | ## stablediffusion NCNN 20 | 21 | https://github.com/EdVince/Stable-Diffusion-NCNN 22 | 23 | This backend: 24 | - provides support for Stable Diffusion models 25 | - does not support CUDA runtime yet 26 | 27 | :::note 28 | This has been deprecated as of `v0.18.0` release. 29 | ::: 30 | 31 | ### Example 32 | 33 | :::warning 34 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below. 35 | ::: 36 | 37 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-stablediffusion.yaml 38 | 39 | ### Demo 40 | 41 | https://www.youtube.com/watch?v=gh7b-rt70Ug 42 | -------------------------------------------------------------------------------- /website/docs/exllama2.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Exllama v2 (GPTQ and EXL2) 3 | --- 4 | 5 | [ExLlamaV2](https://github.com/turboderp/exllamav2) is an inference library for running local LLMs on modern consumer GPUs. 6 | 7 | This backend: 8 | - provides support for GPTQ and EXL2 models 9 | - requires CUDA runtime 10 | 11 | :::note 12 | This is an experimental backend and it may change in the future. 13 | ::: 14 | 15 | ## Example 16 | 17 | :::warning 18 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below. 19 | ::: 20 | 21 | ### EXL2 22 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-exllama2-exl2.yaml 23 | 24 | ### GPTQ 25 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-exllama2-gptq.yaml 26 | -------------------------------------------------------------------------------- /website/docs/fine-tune.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Fine Tuning 3 | --- 4 | 5 | Fine tuning process allows the adaptation of pre-trained models to domain-specific data. At this time, AIKit fine tuning process is only supported with NVIDIA GPUs. 6 | 7 | :::note 8 | Due to limitations with BuildKit and NVIDIA, it is essential that the GPU driver version on your host matches the version AIKit will install in the container during the build process. 9 | 10 | To determine your host GPU driver version, you can execute `nvidia-smi` or `cat /proc/driver/nvidia/version`. 11 | 12 | For information on the GPU driver versions supported by AIKit, please visit https://download.nvidia.com/XFree86/Linux-x86_64/. 13 | 14 | Should your host GPU driver version not be listed, you will need to update to a compatible version available in the NVIDIA downloads mentioned above. It's important to note that there's no need to directly install drivers from the NVIDIA downloads; the versions simply need to be consistent. 15 | 16 | We hope to optimize this process in the future to eliminate this requirement. 17 | ::: 18 | 19 | ## Getting Started 20 | 21 | To get started, you need to create a builder to be able to access host GPU devices. 22 | 23 | Create a builder with the following configuration: 24 | 25 | ```bash 26 | docker buildx create --name aikit-builder --use --buildkitd-flags '--allow-insecure-entitlement security.insecure' 27 | ``` 28 | 29 | :::tip 30 | Additionally, you can build using other BuildKit drivers, such as [Kubernetes driver](https://docs.docker.com/build/drivers/kubernetes/) by setting `--driver=kubernetes` if you are interested in building using a Kubernetes cluster. Please see [BuildKit Drivers](https://docs.docker.com/build/drivers/) for more information. 31 | ::: 32 | 33 | ## Targets and Configuration 34 | 35 | AIKit is capable of supporting multiple fine tuning implementation targets. At this time, [Unsloth](https://github.com/unslothai/unsloth) is the only supported target, but can be extended for other fine tuning implementations in the future. 36 | 37 | ### Unsloth 38 | 39 | Create a YAML file with your configuration. For example, minimum config looks like: 40 | 41 | ```yaml 42 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 43 | apiVersion: v1alpha1 44 | baseModel: "unsloth/llama-2-7b-bnb-4bit" # base model to be fine tuned. this can be any model from Huggingface. For unsloth optimized base models, see https://huggingface.co/unsloth 45 | datasets: 46 | - source: "yahma/alpaca-cleaned" # data set to be used for fine tuning. This can be a Huggingface dataset or a URL pointing to a JSON file 47 | type: "alpaca" # type of dataset. only alpaca is supported at this time. 48 | config: 49 | unsloth: 50 | ``` 51 | 52 | For full configuration, please refer to [Fine Tune API Specifications](./specs-finetune.md). 53 | 54 | :::note 55 | Please refer to [Unsloth documentation](https://github.com/unslothai/unsloth) for more information about Unsloth configuration. 56 | ::: 57 | 58 | #### Example Configuration 59 | 60 | :::warning 61 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the example below. 62 | ::: 63 | 64 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-unsloth.yaml 65 | 66 | 67 | ## Build 68 | 69 | Build using following command and make sure to replace `--target` with the fine-tuning implementation of your choice (`unsloth` is the only option supported at this time), `--file` with the path to your configuration YAML and `--output` with the output directory of the finetuned model. 70 | 71 | ```bash 72 | docker buildx build --builder aikit-builder --allow security.insecure --file "/path/to/config.yaml" --output "/path/to/output" --target unsloth --progress plain . 73 | ``` 74 | 75 | Depending on your setup and configuration, build process may take some time. At the end of the build, the fine-tuned model will automatically be quantized with the specified format and output to the path specified in the `--output`. 76 | 77 | Output will be a `GGUF` model file with the name and quanization format from the configuration. For example: 78 | 79 | ```bash 80 | $ ls -al _output 81 | -rw-r--r-- 1 kaito-project kaito-project 7161089856 Mar 3 00:19 aikit-model-q4_k_m.gguf 82 | ``` 83 | 84 | ## Demo 85 | 86 | https://www.youtube.com/watch?v=FZuVb-9i-94 87 | 88 | ## What's next? 89 | 90 | 👉 Now that you have a fine-tuned model output as a GGUF file, you can refer to [Creating Model Images](./create-images.md) on how to create an image with AIKit to serve your fine-tuned model! 91 | 92 | ## Troubleshooting 93 | 94 | ### Build fails with `failed to solve: DeadlineExceeded: context deadline exceeded` 95 | 96 | This is a known issue with BuildKit and might be related to disk speed. For more information, please see https://github.com/moby/buildkit/issues/4327 97 | 98 | ### Build fails with `ERROR 404: Not Found.` 99 | 100 | This issue arises from a discrepancy between the GPU driver versions on your host and the container. Unfortunately, a matching version for your host driver is not available in the NVIDIA downloads at this time. For further details, please consult the note provided at the beginning of this page. 101 | 102 | If you are on Windows Subsystem for Linux (WSL), WSL doesn't expose the host driver version information on `/proc/driver/nvidia/version`. Due to this limitation, WSL is not supported at this time. 103 | -------------------------------------------------------------------------------- /website/docs/gpu.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: GPU Acceleration 3 | --- 4 | 5 | :::note 6 | At this time, only NVIDIA GPU acceleration is supported, with experimental support for Apple Silicon. Please open an issue if you'd like to see support for other GPU vendors. 7 | ::: 8 | 9 | ## NVIDIA 10 | 11 | AIKit supports GPU accelerated inferencing with [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). You must also have [NVIDIA Drivers](https://www.nvidia.com/en-us/drivers/unix/) installed on your host machine. 12 | 13 | For Kubernetes, [NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator) provides a streamlined way to install the NVIDIA drivers and container toolkit to configure your cluster to use GPUs. 14 | 15 | To get started with GPU-accelerated inferencing, make sure to set the following in your `aikitfile` and build your model. 16 | 17 | ```yaml 18 | runtime: cuda # use NVIDIA CUDA runtime 19 | ``` 20 | 21 | For `llama` backend, set the following in your `config`: 22 | 23 | ```yaml 24 | f16: true # use float16 precision 25 | gpu_layers: 35 # number of layers to offload to GPU 26 | low_vram: true # for devices with low VRAM 27 | ``` 28 | 29 | :::tip 30 | Make sure to customize these values based on your model and GPU specs. 31 | ::: 32 | 33 | :::note 34 | For `exllama2` backend, GPU acceleration is enabled by default and cannot be disabled. 35 | ::: 36 | 37 | After building the model, you can run it with [`--gpus all`](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#gpu-enumeration) flag to enable GPU support: 38 | 39 | ```bash 40 | # for pre-made models, replace "my-model" with the image name 41 | docker run --rm --gpus all -p 8080:8080 my-model 42 | ``` 43 | 44 | If GPU acceleration is working, you'll see output that is similar to following in the debug logs: 45 | 46 | ```bash 47 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr ggml_init_cublas: found 1 CUDA devices: 48 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr Device 0: Tesla T4, compute capability 7.5 49 | ... 50 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: using CUDA for GPU acceleration 51 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: mem required = 70.41 MB (+ 2048.00 MB per state) 52 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading 32 repeating layers to GPU 53 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading non-repeating layers to GPU 54 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading v cache to GPU 55 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading k cache to GPU 56 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloaded 35/35 layers to GPU 57 | 5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: VRAM used: 5869 MB 58 | ``` 59 | 60 | ### Demo 61 | 62 | https://www.youtube.com/watch?v=yFh_Zfk34PE 63 | 64 | ## Apple Silicon (experimental) 65 | 66 | :::note 67 | Apple Silicon is an experimental runtime and it may change in the future. This runtime is specific to Apple Silicon only, and it will not work as expected on other architectures, including Intel Macs. 68 | ::: 69 | 70 | AIKit supports Apple Silicon GPU acceleration with Podman Desktop for Mac with [`libkrun`](https://github.com/containers/libkrun). Please see [Podman Desktop documentation](https://podman-desktop.io/docs/podman/gpu) on how to enable GPU support. 71 | 72 | To get started with Apple Silicon GPU-accelerated inferencing, make sure to set the following in your `aikitfile` and build your model. 73 | 74 | ```yaml 75 | runtime: applesilicon # use Apple Silicon runtime 76 | ``` 77 | 78 | Please note that only the default `llama.cpp` backend with `gguf` models are supported for Apple Silicon. 79 | 80 | After building the model, you can run it with: 81 | 82 | ```bash 83 | # for pre-made models, replace "my-model" with the image name 84 | podman run --rm --device /dev/dri -p 8080:8080 my-model 85 | ``` 86 | 87 | If GPU acceleration is working, you'll see output that is similar to following in the debug logs: 88 | 89 | ```bash 90 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr ggml_vulkan: Found 1 Vulkan devices: 91 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr Vulkan0: Virtio-GPU Venus (Apple M1 Max) (venus) | uma: 1 | fp16: 1 | warp size: 32 92 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llama_load_model_from_file: using device Vulkan0 (Virtio-GPU Venus (Apple M1 Max)) - 65536 MiB free 93 | ... 94 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors: offloading 32 repeating layers to GPU 95 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors: offloading output layer to GPU 96 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors: offloaded 33/33 layers to GPU 97 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors: CPU_Mapped model buffer size = 52.84 MiB 98 | 6:16AM DBG GRPC(phi-3.5-3.8b-instruct-127.0.0.1:39883): stderr llm_load_tensors: Vulkan0 model buffer size = 2228.82 MiB 99 | ``` 100 | -------------------------------------------------------------------------------- /website/docs/intro.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Introduction 3 | slug: / 4 | --- 5 | 6 | AIKit is a comprehensive platform to quickly get started to host, deploy, build and fine-tune large language models (LLMs). 7 | 8 | AIKit offers two main capabilities: 9 | 10 | - **Inference**: AIKit uses [LocalAI](https://localai.io/), which supports a wide range of inference capabilities and formats. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open LLMs! 11 | 12 | - **[Fine Tuning](fine-tune.md)**: AIKit offers an extensible fine tuning interface. It supports [Unsloth](https://github.com/unslothai/unsloth) for fast, memory efficient, and easy fine-tuning experience. 13 | 14 | 👉 To get started, please see [Quick Start](quick-start.md)! 15 | 16 | ## Features 17 | 18 | - 💡 No GPU, or Internet access is required for inference! 19 | - 🐳 No additional tools are needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/) or [Podman](https://podman.io)! 20 | - 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [chiseled](https://ubuntu.com/containers/chiseled) image 21 | - 🎵 [Fine tune support](fine-tune.md) 22 | - 🚀 Easy to use declarative configuration for [inference](specs-inference.md) and [fine tuning](specs-finetune.md) 23 | - ✨ OpenAI API compatible to use with any OpenAI API compatible client 24 | - 📸 [Multi-modal model support](vision.md) 25 | - 🖼️ [Image generation support](diffusion.md) 26 | - 🦙 Support for GGUF ([`llama`](https://github.com/ggerganov/llama.cpp)), GPTQ or EXL2 ([`exllama2`](https://github.com/turboderp/exllamav2)), and GGML ([`llama-ggml`](https://github.com/ggerganov/llama.cpp)) models 27 | - 🚢 [Kubernetes deployment ready](#kubernetes-deployment) 28 | - 📦 Supports multiple models with a single image 29 | - 🖥️ Supports [AMD64 and ARM64](create-images.md#multi-platform-support) CPUs and [GPU-accelerated inferencing with NVIDIA GPUs](gpu.md) 30 | - 🔐 Ensure [supply chain security](security.md) with SBOMs, Provenance attestations, and signed images 31 | - 🌈 Support for non-proprietary and self-hosted container registries to store model images 32 | -------------------------------------------------------------------------------- /website/docs/llama-cpp.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: llama.cpp (GGUF and GGML) 3 | --- 4 | 5 | AIKit utilizes and depends on [llama.cpp](https://github.com/ggerganov/llama.cpp), which provides inference of Meta's LLaMA model (and others) in pure C/C++, for the `llama` backend. 6 | 7 | This is the default backend for `aikit`. No additional configuration is required. 8 | 9 | This backend: 10 | - provides support for GGUF (recommended) and GGML models 11 | - supports both CPU (`avx2`, `avx` or `fallback`) and CUDA runtimes 12 | 13 | ## Example 14 | 15 | :::warning 16 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below. 17 | ::: 18 | 19 | ### CPU 20 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-llama.yaml 21 | 22 | ### GPU (CUDA) 23 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-llama-cuda.yaml 24 | -------------------------------------------------------------------------------- /website/docs/quick-start.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Quick Start 3 | --- 4 | 5 | You can get started with AIKit quickly on your local machine without a GPU! 6 | 7 | ```bash 8 | docker run -d --rm -p 8080:8080 ghcr.io/kaito-project/aikit/llama3.1:8b 9 | ``` 10 | 11 | After running this, navigate to [http://localhost:8080/chat](http://localhost:8080/chat) to access the WebUI. 12 | 13 | ## API 14 | 15 | AIKit provides an OpenAI API compatible endpoint, so you can use any OpenAI API compatible client to send requests to open LLMs! 16 | 17 | For example: 18 | 19 | ```bash 20 | curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ 21 | "model": "llama-3.1-8b-instruct", 22 | "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] 23 | }' 24 | ``` 25 | 26 | Output should be similar to: 27 | 28 | ```jsonc 29 | { 30 | // ... 31 | "model": "llama-3.1-8b-instruct", 32 | "choices": [ 33 | { 34 | "index": 0, 35 | "finish_reason": "stop", 36 | "message": { 37 | "role": "assistant", 38 | "content": "Kubernetes is an open-source container orchestration system that automates the deployment, scaling, and management of applications and services, allowing developers to focus on writing code rather than managing infrastructure." 39 | } 40 | } 41 | ], 42 | // ... 43 | } 44 | ``` 45 | 46 | That's it! 🎉 API is OpenAI compatible so this is a drop-in replacement for any OpenAI API compatible client. 47 | 48 | ## Demo 49 | 50 | https://www.youtube.com/watch?v=O0AOnxXp-o4 51 | 52 | ## What's next? 53 | 54 | 👉 If you are interested in other pre-made models (such as Gemma, Mixtral or Phi), please refer to [Pre-made models](./premade-models.md). 55 | 56 | 👉 If you are interested in learning more about how to create your own custom model images, please refer to [Creating Model Images](./create-images.md). 57 | 58 | 👉 If you are interested in fine tuning a model with domain-specific knowledge, please refer to [Fine Tuning](./fine-tune.md). 59 | -------------------------------------------------------------------------------- /website/docs/release.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Release Process 3 | --- 4 | 5 | The release process is as follows: 6 | 7 | - Trigger the [release-pr action](https://github.com/kaito-project/aikit/actions/workflows/release-pr.yaml) with the version to release to create a release PR. Merge the PR to the applicable `release-X.Y` branch. 8 | 9 | - Tag the `release-X.Y` branch with a version number that's semver compliant (vMAJOR.MINOR.PATCH), and push the tag to GitHub. 10 | 11 | ```bash 12 | git tag v0.1.0 13 | git push origin v0.1.0 14 | ``` 15 | 16 | - GitHub Actions will automatically build the AIKit image and push the versioned and `latest` tag to GitHub Container Registry (GHCR) using [release action](https://github.com/kaito-project/aikit/actions/workflows/release.yaml). 17 | 18 | - Once release is done, trigger [update models](https://github.com/kaito-project/aikit/actions/workflows/update-models.yaml) action to update the pre-built models. 19 | 20 | - Mixtral 8x7b and Llama 3 70b models does not fit into GitHub runners due to their size. Trigger self-hosted [update models](https://github.com/kaito-project/aikit/actions/workflows/update-models-self.yaml) action to update these pre-built models. 21 | -------------------------------------------------------------------------------- /website/docs/security.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Supply Chain Security 3 | --- 4 | 5 | AIKit is designed with security in mind. Our approach to supply chain security includes detailed tracking of software components, transparent build processes, and proactive vulnerability management. This ensures that every part of our software ecosystem remains secure and trustworthy. 6 | 7 | ## SBOM (Software Bill of Materials) 8 | 9 | AIKit publishes [Software Bill of Materials (SBOM)](https://www.cisa.gov/sbom) for each release and for all [pre-made models](premade-models.md). The SBOM is a comprehensive list of all the components and dependencies used in the project, detailing their versions, licenses, and sources. This transparency helps users and stakeholders understand what software is included, facilitating better risk management and compliance with security and licensing requirements. 10 | 11 | To access the SBOM for a specific AIKit image, use the following command: 12 | 13 | ```bash 14 | # update this with the image you want to inspect 15 | IMAGE=ghcr.io/kaito-project/aikit/llama3.1:8b 16 | docker buildx imagetools inspect $IMAGE --format "{{ json .SBOM.SPDX }}" 17 | ``` 18 | 19 | The output will provide a detailed JSON document listing all the software components in the image, including direct and transitive dependencies. For more information, please visit [Docker SBOM documentation](https://docs.docker.com/build/attestations/sbom/). 20 | 21 | ## Provenance attestation 22 | 23 | Provenance attestation provides a detailed record of how and where an image was built, offering transparency and trust in the build process. AIKit uses BuildKit to generate and publish provenance data for each of its images. This data includes information about the build environment, the build process, and the source control context, ensuring that the images are traceable and verifiable from their origins to their final state. 24 | 25 | To inspect the provenance attestation for an AIKit image, you can use the following command: 26 | 27 | ```bash 28 | # update this with the image you want to inspect 29 | IMAGE=ghcr.io/kaito-project/aikit/llama3.1:8b 30 | docker buildx imagetools inspect $IMAGE --format "{{ json .Provenance.SLSA }}" 31 | ``` 32 | 33 | This command will output a JSON file containing the build provenance details, including the source repository, commit hash, build configuration, and more. This helps verify that the image was built from trusted sources and has not been tampered with. For more information, please visit [Docker Provenance documentation](https://docs.docker.com/build/attestations/slsa-provenance/). 34 | 35 | ## Vulnerability Patching 36 | 37 | Ensuring that our images are free from known vulnerabilities is crucial. Not only AIKit uses a custom distroless-based base image to reduce the number of vulnerabilities, attack surface and size, AIKit uses [Copacetic](https://github.com/project-copacetic/copacetic) to scan and patch OS-based vulnerabilities for all [pre-made models](premade-models.md) on a weekly basis. Copacetic automates the process of identifying and remediating security issues, helping us maintain a robust and secure software supply chain. 38 | 39 | Every week, Copacetic performs the following actions: 40 | 41 | - Scan: It analyzes the images for vulnerabilities using [Trivy](https://github.com/aquasecurity/trivy) against a comprehensive database of known security issues. 42 | - Patch: It automatically applies patches or updates to mitigate any identified vulnerabilities using [Copacetic](https://github.com/project-copacetic/copacetic). 43 | - Publish: It updates the images with the latest security fixes and publishes them to our container registry. 44 | 45 | This automated and regular process ensures that our users always have access to the most secure and up-to-date images. You can monitor the status and results of these scans on our security dashboard. 46 | 47 | ## Image Signature Verification 48 | 49 | AIKit and pre-made models are keyless signed with OIDC in GitHub Actions with [cosign](https://github.com/sigstore/cosign). You can verify the images with the following commands: 50 | 51 | ```bash 52 | IMAGE=ghcr.io/kaito-project/aikit/llama2:7b # update this with the image you want to verify 53 | DIGEST=$(cosign triangulate ${IMAGE} --type digest) 54 | cosign verify ${DIGEST} \ 55 | --certificate-oidc-issuer https://token.actions.githubusercontent.com \ 56 | --certificate-identity-regexp 'https://github\.com/kaito-project/aikit/\.github/workflows/.+' 57 | ``` 58 | 59 | You should see an output similar to the following: 60 | 61 | ```bash 62 | Verification for ghcr.io/kaito-project/aikit/llama2@sha256:d47fdba491a9a47ce4911539a77e0c0a12b2e14f5beed88cb8072924b02130b4 -- 63 | The following checks were performed on each of these signatures: 64 | - The cosign claims were validated 65 | - Existence of the claims in the transparency log was verified offline 66 | - The code-signing certificate was verified using trusted certificate authority certificates 67 | ... 68 | ``` 69 | -------------------------------------------------------------------------------- /website/docs/specs-finetune.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Fine Tuning API Specifications 3 | --- 4 | 5 | ## v1alpha1 6 | 7 | ```yaml 8 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 9 | apiVersion: # required. only v1alpha1 is supported at the moment 10 | baseModel: # required. any base model from Huggingface. for unsloth, see for 4bit pre-quantized models: https://huggingface.co/unsloth 11 | datasets: 12 | - source: # required. this can be a Huggingface dataset repo or a URL pointing to a JSON file 13 | type: # required. can be "alpaca". only alpaca is supported at the moment 14 | config: 15 | unsloth: 16 | packing: # optional. defaults to false. can make training 5x faster for short sequences. 17 | maxSeqLength: # optional. defaults to 2048 18 | loadIn4bit: # optional. defaults to true 19 | batchSize: # optional. default to 2 20 | gradientAccumulationSteps: # optional. defaults to 4 21 | warmupSteps: # optional. defaults to 10 22 | maxSteps: # optional. defaults to 60 23 | learningRate: # optional. defaults to 0.0002 24 | loggingSteps: # optional. defaults to 1 25 | optimizer: # optional. defaults to adamw_8bit 26 | weightDecay: # optional. defaults to 0.01 27 | lrSchedulerType: # optional. defaults to linear 28 | seed: # optional. defaults to 42 29 | output: 30 | quantize: # optional. defaults to q4_k_m. for unsloth, see for allowed quantization methods: https://github.com/unslothai/unsloth/wiki#saving-to-gguf. 31 | name: # optional. defaults to "aikit-model" 32 | ``` 33 | 34 | Example: 35 | 36 | ```yaml 37 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 38 | apiVersion: v1alpha1 39 | baseModel: unsloth/mistral-7b-instruct-v0.2-bnb-4bit 40 | datasets: 41 | - source: yahma/alpaca-cleaned 42 | type: alpaca 43 | config: 44 | unsloth: 45 | packing: false 46 | maxSeqLength: 2048 47 | loadIn4bit: true 48 | batchSize: 2 49 | gradientAccumulationSteps: 4 50 | warmupSteps: 10 51 | maxSteps: 60 52 | learningRate: 0.0002 53 | loggingSteps: 1 54 | optimizer: adamw_8bit 55 | weightDecay: 0.01 56 | lrSchedulerType: linear 57 | seed: 42 58 | output: 59 | quantize: q4_k_m 60 | name: model 61 | ``` 62 | -------------------------------------------------------------------------------- /website/docs/specs-inference.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Inference API Specifications 3 | --- 4 | 5 | ## v1alpha1 6 | 7 | ```yaml 8 | apiVersion: # required. only v1alpha1 is supported at the moment 9 | debug: # optional. if set to true, debug logs will be printed 10 | runtime: # optional. defaults to avx. can be "avx", "avx2", "avx512", "cuda" 11 | backends: # optional. list of additional backends. can be "llama-cpp" (default), "exllama2", "diffusers" 12 | models: # required. list of models to build 13 | - name: # required. name of the model 14 | source: # required. source of the model. can be a url or a local file 15 | sha256: # optional. sha256 hash of the model file 16 | promptTemplates: # optional. list of prompt templates for a model 17 | - name: # required. name of the template 18 | template: # required. template string 19 | config: # optional. list of config files 20 | ``` 21 | 22 | Example: 23 | 24 | ```yaml 25 | #syntax=ghcr.io/kaito-project/aikit/aikit:latest 26 | apiVersion: v1alpha1 27 | debug: true 28 | runtime: cuda 29 | models: 30 | - name: llama-2-7b-chat 31 | source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf 32 | sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" 33 | promptTemplates: 34 | - name: "llama-2-7b-chat" 35 | template: | 36 | {{if eq .RoleName \"assistant\"}}{{.Content}}{{else}} 37 | [INST] 38 | {{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName \"system\"}}<<SYS>>{{.Content}}<</SYS>> 39 | 40 | {{else if .Content}}{{.Content}}{{end}} 41 | [/INST] 42 | {{end}} 43 | config: | 44 | - name: \"llama-2-7b-chat\" 45 | backend: \"llama\" 46 | parameters: 47 | top_k: 80 48 | temperature: 0.2 49 | top_p: 0.7 50 | model: \"llama-2-7b-chat.Q4_K_M.gguf\" 51 | context_size: 4096 52 | roles: 53 | function: 'Function Result:' 54 | assistant_function_call: 'Function Call:' 55 | assistant: 'Assistant:' 56 | user: 'User:' 57 | system: 'System:' 58 | template: 59 | chat_message: \"llama-2-7b-chat\" 60 | system_prompt: \"You are a helpful assistant, below is a conversation, please respond with the next message and do not ask follow-up questions\" 61 | ``` 62 | -------------------------------------------------------------------------------- /website/docs/vision.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Vision 3 | --- 4 | 5 | Vision is supported through [llama-cpp](llama-cpp.md) and [llava](https://llava.hliu.cc/). 6 | 7 | 8 | ## Example 9 | 10 | :::warning 11 | Please make sure to change syntax to `#syntax=ghcr.io/kaito-project/aikit/aikit:latest` in the examples below. 12 | ::: 13 | 14 | https://github.com/kaito-project/aikit/blob/main/test/aikitfile-llava.yaml 15 | 16 | ## Demo 17 | 18 | [![Vision with LLaVA 19 | ](https://asciinema.org/a/626553.svg 'Vision with LLaVA')](https://asciinema.org/a/626553) 20 | -------------------------------------------------------------------------------- /website/docusaurus.config.js: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | // `@type` JSDoc annotations allow editor autocompletion and type checking 3 | // (when paired with `@ts-check`). 4 | // There are various equivalent ways to declare your Docusaurus config. 5 | // See: https://docusaurus.io/docs/api/docusaurus-config 6 | 7 | import {themes as prismThemes} from 'prism-react-renderer'; 8 | 9 | /** @type {import('@docusaurus/types').Config} */ 10 | const config = { 11 | title: 'AIKit', 12 | tagline: 'Fine-tune, build, and deploy open-source LLMs easily!', 13 | favicon: 'img/favicon.ico', 14 | headTags: [ 15 | { 16 | tagName: "meta", 17 | attributes: { 18 | // Allow Algolia crawler to index the site 19 | // See https://www.algolia.com/doc/tools/crawler/getting-started/create-crawler/#verify-your-domain. 20 | name: "algolia-site-verification", 21 | content: "58101301D914B63C", 22 | } 23 | }, 24 | ], 25 | 26 | // Set the production url of your site here 27 | url: 'https://kaito-project.github.io', 28 | // Set the /<baseUrl>/ pathname under which your site is served 29 | // For GitHub pages deployment, it is often '/<projectName>/' 30 | baseUrl: '/aikit/docs/', 31 | 32 | // GitHub pages deployment config. 33 | // If you aren't using GitHub pages, you don't need these. 34 | organizationName: 'kaito-project', // Usually your GitHub org/user name. 35 | projectName: 'aikit', // Usually your repo name. 36 | 37 | onBrokenLinks: 'throw', // throw 38 | onBrokenMarkdownLinks: 'warn', 39 | 40 | // Even if you don't use internationalization, you can use this field to set 41 | // useful metadata like html lang. For example, if your site is Chinese, you 42 | // may want to replace "en" with "zh-Hans". 43 | i18n: { 44 | defaultLocale: 'en', 45 | locales: ['en'], 46 | }, 47 | 48 | presets: [ 49 | [ 50 | 'classic', 51 | /** @type {import('@docusaurus/preset-classic').Options} */ 52 | ({ 53 | docs: { 54 | routeBasePath: '/', 55 | sidebarPath: './sidebars.js', 56 | // Please change this to your repo. 57 | // Remove this to remove the "edit this page" links. 58 | editUrl: 59 | 'https://github.com/kaito-project/aikit/blob/main/website/docs/', 60 | }, 61 | blog: false, 62 | theme: { 63 | customCss: './src/css/custom.css', 64 | }, 65 | }), 66 | ], 67 | ], 68 | 69 | themeConfig: 70 | /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ 71 | ({ 72 | // Replace with your project's social card 73 | image: 'img/logo.png', 74 | navbar: { 75 | title: 'AIKit', 76 | logo: { 77 | alt: 'AIKit Logo', 78 | src: 'img/logo.svg', 79 | }, 80 | items: [ 81 | { 82 | href: 'https://github.com/kaito-project/aikit', 83 | position: 'right', 84 | className: 'header-github-link', 85 | 'aria-label': 'GitHub repository', 86 | }, 87 | ], 88 | }, 89 | footer: { 90 | style: 'dark', 91 | copyright: `Copyright © ${new Date().getFullYear()} Sertac Ozercan`, 92 | }, 93 | prism: { 94 | theme: prismThemes.github, 95 | darkTheme: prismThemes.dracula, 96 | additionalLanguages: ['bash', 'json', 'yaml'], 97 | }, 98 | colorMode: { 99 | defaultMode: 'light', 100 | disableSwitch: false, 101 | respectPrefersColorScheme: true, 102 | }, 103 | announcementBar: { 104 | id: 'announcementBar-1', // Increment on change 105 | content: `⭐️ If you like AIKit, please give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/kaito-project/aikit">GitHub</a>!</a>`, 106 | }, 107 | algolia: { 108 | appId: 'BWYV6PMJ5K', 109 | apiKey: 'e2cfa004b0a812062660e0039aca0bda', 110 | indexName: 'aikit-crawler', 111 | }, 112 | }), 113 | }; 114 | 115 | export default config; 116 | -------------------------------------------------------------------------------- /website/osv-scanner.toml: -------------------------------------------------------------------------------- 1 | [[IgnoredVulns]] 2 | id = "GHSA-pxg6-pf52-xh8x" 3 | reason = "Not applicable to core AIKit; used in static website generation only." 4 | 5 | [[IgnoredVulns]] 6 | id = "GHSA-76c9-3jph-rj3q" 7 | reason = "Not applicable to core AIKit; used in static website generation only." 8 | 9 | [[IgnoredVulns]] 10 | id = "GHSA-rhx6-c78j-4q9w" 11 | reason = "Not applicable to core AIKit; used in static website generation only." 12 | 13 | [[IgnoredVulns]] 14 | id = "GHSA-9wv6-86v2-598j" 15 | reason = "Not applicable to core AIKit; used in static website generation only." 16 | 17 | [[IgnoredVulns]] 18 | id = "GHSA-4v9v-hfq4-rm2v" 19 | reason = "Not applicable to core AIKit; used in static website generation only." 20 | 21 | [[IgnoredVulns]] 22 | id = "GHSA-9jgg-88mc-972h" 23 | reason = "Not applicable to core AIKit; used in static website generation only." 24 | -------------------------------------------------------------------------------- /website/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "website", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids" 15 | }, 16 | "dependencies": { 17 | "@docusaurus/core": "3.1.0", 18 | "@docusaurus/preset-classic": "3.1.0", 19 | "@mdx-js/react": "^3.0.0", 20 | "clsx": "^2.0.0", 21 | "prism-react-renderer": "^2.3.0", 22 | "react": "^18.0.0", 23 | "react-dom": "^18.0.0" 24 | }, 25 | "devDependencies": { 26 | "@docusaurus/module-type-aliases": "3.1.0", 27 | "@docusaurus/types": "3.1.0" 28 | }, 29 | "browserslist": { 30 | "production": [ 31 | ">0.5%", 32 | "not dead", 33 | "not op_mini all" 34 | ], 35 | "development": [ 36 | "last 3 chrome version", 37 | "last 3 firefox version", 38 | "last 5 safari version" 39 | ] 40 | }, 41 | "engines": { 42 | "node": ">=18.0" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /website/sidebars.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Creating a sidebar enables you to: 3 | - create an ordered group of docs 4 | - render a sidebar for each doc of that group 5 | - provide next/previous navigation 6 | 7 | The sidebars can be generated from the filesystem, or explicitly defined here. 8 | 9 | Create as many sidebars as you want. 10 | */ 11 | 12 | // @ts-check 13 | 14 | /** @type {import('@docusaurus/plugin-content-docs').SidebarsConfig} */ 15 | const sidebars = { 16 | sidebar: [ 17 | { 18 | type: 'category', 19 | label: 'Getting Started', 20 | collapsed: false, 21 | items: [ 22 | 'intro', 23 | 'quick-start', 24 | 'premade-models', 25 | 'demo', 26 | ], 27 | }, 28 | { 29 | type: 'category', 30 | label: 'Features', 31 | collapsed: false, 32 | items: [ 33 | 'create-images', 34 | 'fine-tune', 35 | 'vision', 36 | 'gpu', 37 | 'kubernetes', 38 | 'security', 39 | ], 40 | }, 41 | { 42 | type: 'category', 43 | label: 'Specifications', 44 | collapsed: false, 45 | items: [ 46 | 'specs-inference', 47 | 'specs-finetune', 48 | ], 49 | }, 50 | { 51 | type: 'category', 52 | label: 'Inference Supported Backends', 53 | collapsed: false, 54 | items: [ 55 | 'llama-cpp', 56 | 'exllama2', 57 | 'diffusion', 58 | ], 59 | }, 60 | { 61 | type: 'category', 62 | label: 'Contributing', 63 | collapsed: false, 64 | items: [ 65 | 'architecture', 66 | 'release', 67 | ], 68 | }, 69 | ], 70 | }; 71 | 72 | export default sidebars; 73 | -------------------------------------------------------------------------------- /website/src/css/custom.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Any CSS included here will be global. The classic template 3 | * bundles Infima by default. Infima is a CSS framework designed to 4 | * work well for content-centric websites. 5 | */ 6 | 7 | /* You can override the default Infima variables here. */ 8 | :root { 9 | --ifm-color-primary: #2e8555; 10 | --ifm-color-primary-dark: #29784c; 11 | --ifm-color-primary-darker: #277148; 12 | --ifm-color-primary-darkest: #205d3b; 13 | --ifm-color-primary-light: #33925d; 14 | --ifm-color-primary-lighter: #359962; 15 | --ifm-color-primary-lightest: #3cad6e; 16 | --ifm-code-font-size: 95%; 17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); 18 | } 19 | 20 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 21 | [data-theme='dark'] { 22 | --ifm-color-primary: #25c2a0; 23 | --ifm-color-primary-dark: #21af90; 24 | --ifm-color-primary-darker: #1fa588; 25 | --ifm-color-primary-darkest: #1a8870; 26 | --ifm-color-primary-light: #29d5b0; 27 | --ifm-color-primary-lighter: #32d8b4; 28 | --ifm-color-primary-lightest: #4fddbf; 29 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); 30 | } 31 | 32 | .header-github-link::before { 33 | content: ''; 34 | width: 24px; 35 | height: 24px; 36 | display: flex; 37 | background-color: var(--ifm-navbar-link-color); 38 | mask-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E"); 39 | transition: background-color var(--ifm-transition-fast) var(--ifm-transition-timing-default); 40 | } 41 | 42 | .header-github-link:hover::before { 43 | background-color: var(--ifm-navbar-link-hover-color); 44 | } -------------------------------------------------------------------------------- /website/static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaito-project/aikit/1a0aa94bb930508fb34e65a3bf76d4ad3b174642/website/static/.nojekyll -------------------------------------------------------------------------------- /website/static/img/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaito-project/aikit/1a0aa94bb930508fb34e65a3bf76d4ad3b174642/website/static/img/architecture.png -------------------------------------------------------------------------------- /website/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaito-project/aikit/1a0aa94bb930508fb34e65a3bf76d4ad3b174642/website/static/img/favicon.ico -------------------------------------------------------------------------------- /website/static/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaito-project/aikit/1a0aa94bb930508fb34e65a3bf76d4ad3b174642/website/static/img/logo.png --------------------------------------------------------------------------------