├── .gitignore
├── .gitmodules
├── checkpoint.sh
├── comfyui
    ├── build-and-push.comfyui.sh
    ├── comfyui.Dockerfile
    ├── env.sh
    ├── preset.rocm-6.3.3.sh
    ├── preset.rocm-6.4.4.sh
    └── readme.md
├── docs
    └── images
    │   └── temperatures.png
├── env.sh
├── llama.cpp
    ├── build-and-push.rocm.sh
    ├── build-and-push.vulkan.sh
    ├── env.sh
    ├── llamacpp-offload-calculator
    │   ├── .gitattributes
    │   ├── .gitignore
    │   ├── ArkProjects.LlamaOffloadCalc.sln
    │   ├── ArkProjects.LlamaOffloadCalc
    │   │   ├── ArkProjects.LlamaOffloadCalc.csproj
    │   │   ├── LLamaDevice.cs
    │   │   ├── LLamaDeviceType.cs
    │   │   ├── LLamaGgufMetadataExtractor.cs
    │   │   ├── LLamaLogsParser.cs
    │   │   ├── Options
    │   │   │   ├── LLamaDeviceOptions.cs
    │   │   │   ├── OffloadCalculationOptions.cs
    │   │   │   ├── OffloadCalculationOptionsValidator.cs
    │   │   │   └── TensorsOffloadRuleOptions.cs
    │   │   ├── Program.cs
    │   │   ├── Properties
    │   │   │   └── launchSettings.json
    │   │   ├── TensorMetadata.cs
    │   │   ├── appsettings.GLM-4.5-Air-UD-Q6_K_XL.yaml
    │   │   ├── appsettings.gpt-oss-120b-F16.yaml
    │   │   └── appsettings.yaml
    │   ├── GGUFSharp
    │   │   ├── .gitignore
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── SampleFiles
    │   │   │   ├── FilesList.txt
    │   │   │   ├── example.gguf
    │   │   │   └── genTestFile.py
    │   │   └── src
    │   │   │   └── GGUFSharp
    │   │   │       ├── GGUFSharp.Test
    │   │   │           ├── BasicFeatureTest.cs
    │   │   │           ├── GGUFSharp.Test.csproj
    │   │   │           └── MSTestSettings.cs
    │   │   │       ├── GGUFSharp.sln
    │   │   │       └── GGUFSharp
    │   │   │           ├── GGUFDataTypeEnum.cs
    │   │   │           ├── GGUFFile.cs
    │   │   │           ├── GGUFHeader.cs
    │   │   │           ├── GGUFMetaItem.cs
    │   │   │           ├── GGUFReader.cs
    │   │   │           ├── GGUFSharp.csproj
    │   │   │           ├── GGUFStreamReader.cs
    │   │   │           ├── GGUFTensorInfo.cs
    │   │   │           └── GGUFTensorType.cs
    │   └── readme.md
    ├── preset.rocm-6.3.3.sh
    ├── preset.rocm-6.4.4.sh
    ├── preset.rocm-7.0.0.sh
    └── readme.md
├── pytorch
    ├── build-and-push.torch.sh
    ├── env.sh
    ├── preset.torch-2.7.1-rocm-6.3.3.sh
    ├── preset.torch-2.7.1-rocm-6.4.4.sh
    ├── preset.torch-2.8.0-rocm-6.3.3.sh
    ├── preset.torch-2.8.0-rocm-6.4.4.sh
    ├── preset.torch-2.8.0-rocm-7.0.2.sh
    ├── preset.torch-2.9.0-rocm-7.0.2.sh
    ├── readme.md
    ├── submodules
    │   └── .gitkeep
    └── torch.Dockerfile
├── readme.md
├── rocm
    ├── build-and-push.rocm.sh
    ├── env.sh
    ├── preset.rocm-6.3.3.sh
    ├── preset.rocm-6.4.4.sh
    ├── preset.rocm-7.0.0.sh
    ├── preset.rocm-7.0.2.sh
    ├── readme.md
    ├── rocm.Dockerfile
    └── submodules
    │   └── .gitkeep
└── vllm
    ├── benchmark
        ├── ResultsConverter
        │   ├── .gitignore
        │   ├── ResultsConverter.sln
        │   └── ResultsConverter
        │   │   ├── MarkdownTableBuilder.cs
        │   │   ├── MarkdownTableBuilderExtensions.cs
        │   │   ├── Program.cs
        │   │   ├── Properties
        │   │       └── launchSettings.json
        │   │   ├── ResultsConverter.csproj
        │   │   └── VllmBenchResult.cs
        ├── readme.md
        └── results
        │   ├── app
        │       └── vllm
        │       │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251006-130816.json
        │       │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251007-162504.json
        │       │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251012-111624.json
        │       │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251013-140107.json
        │       │   ├── openai-infqps-concurrency16-gemma-3-27b-it-qat-autoawq-20251012-201017.json
        │       │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251006-132621.json
        │       │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251007-171239.json
        │       │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251012-112842.json
        │       │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251013-141355.json
        │       │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251006-134724.json
        │       │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251007-173243.json
        │       │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251012-114201.json
        │       │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251013-142754.json
        │       │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251006-140759.json
        │       │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251007-175203.json
        │       │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251012-115501.json
        │       │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251013-144145.json
        │       │   └── openai-infqps-concurrency8-gemma-3-27b-it-qat-autoawq-20251012-121023.json
        │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251005-221837.json
        │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251006-130816.json
        │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251007-162504.json
        │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251012-111624.json
        │   ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251013-140107.json
        │   ├── openai-infqps-concurrency16-gemma-3-27b-it-qat-autoawq-20251012-201017.json
        │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251005-214604.json
        │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251006-132621.json
        │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251007-171239.json
        │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251012-112842.json
        │   ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251013-141355.json
        │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251005-212640.json
        │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251006-134724.json
        │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251007-173243.json
        │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251012-114201.json
        │   ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251013-142754.json
        │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251005-210513.json
        │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251006-140759.json
        │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251007-175203.json
        │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251012-115501.json
        │   ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251013-144145.json
        │   └── openai-infqps-concurrency8-gemma-3-27b-it-qat-autoawq-20251012-121023.json
    ├── build-and-push.vllm.sh
    ├── env.sh
    ├── preset.0.10.2-rocm-6.4.4.sh
    ├── preset.0.11.0-rocm-6.3.3.sh
    ├── preset.0.8.5-rocm-6.3.3.sh
    ├── readme.md
    ├── submodules
        └── .gitkeep
    └── vllm.Dockerfile


/.gitignore:
--------------------------------------------------------------------------------
1 | /**/*.log
2 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "llama.cpp/submodules/llama.cpp"]
2 | 	url = ../../ggml-org/llama.cpp.git
3 | 	path = llama.cpp/submodules/llama.cpp
4 | [submodule "comfyui/submodules/ComfyUI"]
5 | 	path = comfyui/submodules/ComfyUI
6 | 	url = ../../comfyanonymous/ComfyUI.git
7 | 


--------------------------------------------------------------------------------
/checkpoint.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | source ./env.sh
 4 | 
 5 | if ! [ -z "$(git status --porcelain)" ]; then 
 6 |   echo "Workdir is dirty. Exit"
 7 |   exit 10
 8 | fi
 9 | 
10 | TAG_NAME=$(git_get_current_tag)
11 | if [ "$TAG_NAME" == "" ]; then
12 |   TAG_NAME="$(date +%Y%m%d%H%M%S)"
13 |   git tag -a "$TAG_NAME" -m "none"
14 |   echo -e "New tag $TAG_NAME"
15 | else
16 |   echo "Commit already tagged with $TAG_NAME"
17 | fi
18 | 


--------------------------------------------------------------------------------
/comfyui/build-and-push.comfyui.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | cd $(dirname $0)
 5 | source ../env.sh
 6 | 
 7 | IMAGE_TAGS=(
 8 |   "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-torch-${COMFYUI_PYTORCH_VERSION}-rocm-${COMFYUI_ROCM_VERSION}-patch-${REPO_GIT_REF}"
 9 |   "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-rocm-${COMFYUI_ROCM_VERSION}-patch-${REPO_GIT_REF}"
10 |   "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-rocm-${COMFYUI_ROCM_VERSION}"
11 |   "$COMFYUI_IMAGE:latest-rocm-${COMFYUI_ROCM_VERSION}"
12 | )
13 | 
14 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
15 |   echo "${IMAGE_TAGS[0]} already in registry. Skip"
16 |   exit 0
17 | fi
18 | 
19 | DOCKER_EXTRA_ARGS=()
20 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
21 |   DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
22 | done
23 | 
24 | mkdir ./logs || true
25 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
26 |   --build-arg BASE_PYTORCH_IMAGE=$COMFYUI_TORCH_IMAGE:${COMFYUI_PYTORCH_VERSION}-rocm-${COMFYUI_ROCM_VERSION} \
27 |   --progress=plain --target final -f ./comfyui.Dockerfile --push ./submodules/ComfyUI 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
28 | 


--------------------------------------------------------------------------------
/comfyui/comfyui.Dockerfile:
--------------------------------------------------------------------------------
1 | ARG BASE_PYTORCH_IMAGE=docker.io/mixa3607/pytorch-gfx906:v2.7.1-rocm-6.3.3
2 | 
3 | FROM ${BASE_PYTORCH_IMAGE} AS final
4 | WORKDIR /comfyui
5 | COPY ./requirements.txt ./requirements.txt
6 | RUN sed -i 's|torchaudio||g' requirements.txt && pip install -r requirements.txt
7 | COPY ./ ./
8 | 


--------------------------------------------------------------------------------
/comfyui/env.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | pushd $(dirname ${BASH_SOURCE[0]})
 4 | 
 5 | if [ "$COMFYUI_IMAGE" == "" ]; then
 6 |   COMFYUI_IMAGE=docker.io/mixa3607/comfyui-gfx906
 7 | fi
 8 | 
 9 | if [ "$COMFYUI_TORCH_IMAGE" == "" ]; then
10 |   COMFYUI_TORCH_IMAGE="docker.io/mixa3607/pytorch-gfx906"
11 | fi
12 | if [ "$COMFYUI_ROCM_VERSION" == "" ]; then
13 |   COMFYUI_ROCM_VERSION="6.3.3"
14 | fi
15 | if [ "$COMFYUI_PYTORCH_VERSION" == "" ]; then
16 |   COMFYUI_PYTORCH_VERSION="v2.7.1"
17 | fi
18 | 
19 | if [ "$COMFYUI_GIT_REF" == "" ]; then
20 |   COMFYUI_GIT_REF="$(git_get_current_tag submodules/ComfyUI)"
21 | fi
22 | if [ "$COMFYUI_GIT_REF" == "" ]; then
23 |   COMFYUI_GIT_REF="$(git_get_current_sha submodules/ComfyUI)"
24 | fi
25 | 
26 | popd
27 | 


--------------------------------------------------------------------------------
/comfyui/preset.rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export COMFYUI_ROCM_VERSION="6.3.3"
4 | export COMFYUI_PYTORCH_VERSION="v2.7.1"
5 | 


--------------------------------------------------------------------------------
/comfyui/preset.rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export COMFYUI_ROCM_VERSION="6.4.4"
4 | export COMFYUI_PYTORCH_VERSION="v2.7.1"
5 | 


--------------------------------------------------------------------------------
/comfyui/readme.md:
--------------------------------------------------------------------------------
 1 | # llama.cpp GFX906
 2 | The most powerful and modular diffusion model GUI, api and backend with a graph/nodes interface. https://github.com/comfyanonymous/ComfyUI
 3 | 
 4 | Recommend use `docker.io/mixa3607/comfyui-gfx906:latest-rocm-6.4.4`
 5 | 
 6 | ## Benchmarks
 7 | | tag                                                  | rocm  | comfy   | pytorch | preset | batch | exec time (sec) |
 8 | |------------------------------------------------------|-------|---------|---------|--------|-------|-----------------|
 9 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1  | SDXL   | 1     | 33              |
10 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1  | SDXL   | 2     | 65              |
11 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1  | SD 1.5 | 1     | 3,8             |
12 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1  | SD 1.5 | 2     | 7               |
13 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1  | SDXL   | 1     | 33              |
14 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1  | SDXL   | 2     | 65              |
15 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1  | SD 1.5 | 1     | 3,8             |
16 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1  | SD 1.5 | 2     | 7               |
17 | 
18 | ## Run
19 | ### Docker
20 | See https://github.com/hartmark/sd-rocm/blob/main/docker-compose.yml
21 | 
22 | ### Kubernetes
23 | Helm chart and samples [mixa3607 charts](https://github.com/mixa3607/charts)
24 | 
25 | ## Build
26 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.comfyui.sh`:
27 | ```bash
28 | $ . preset.rocm-6.4.4.sh
29 | $ ./build-and-push.comfyui.sh
30 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
31 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
32 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
33 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
34 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
35 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
36 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
37 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
38 | #0 building with "remote" instance using remote driver
39 | #...............
40 | #14 DONE 583.8s
41 | ```
42 | 


--------------------------------------------------------------------------------
/docs/images/temperatures.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/docs/images/temperatures.png


--------------------------------------------------------------------------------
/env.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | function docker_image_pushed {
 4 |   if docker buildx imagetools inspect "$1" > /dev/null 2> /dev/null; then
 5 |     return 0
 6 |   else
 7 |     return 1
 8 |   fi
 9 | }
10 | 
11 | function git_get_current_tag {
12 |   if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi
13 |   git tag --points-at HEAD | sed 's|+||g'
14 |   if [ "$1" != "" ]; then popd > /dev/null; fi
15 | }
16 | 
17 | function git_get_origin {
18 |   if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi
19 |   git config --get remote.origin.url
20 |   if [ "$1" != "" ]; then popd > /dev/null; fi
21 | }
22 | 
23 | 
24 | function git_get_current_sha {
25 |   if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi
26 |   git rev-parse --short HEAD
27 |   if [ "$1" != "" ]; then popd > /dev/null; fi
28 | }
29 | 
30 | if [ "$REPO_GIT_REF" == "" ]; then
31 |   REPO_GIT_REF="$(git_get_current_tag)"
32 | fi
33 | if [ "$REPO_GIT_REF" == "" ]; then
34 |   REPO_GIT_REF="$(git_get_current_sha)"
35 | fi
36 | 
37 | if [ "$BASE_UBUNTU_REGISTRY" == "" ]; then
38 |   BASE_UBUNTU_REGISTRY=docker.io/library
39 | fi
40 | 
41 | source $(dirname ${BASH_SOURCE[0]})/rocm/env.sh
42 | source $(dirname ${BASH_SOURCE[0]})/llama.cpp/env.sh
43 | source $(dirname ${BASH_SOURCE[0]})/comfyui/env.sh
44 | source $(dirname ${BASH_SOURCE[0]})/vllm/env.sh
45 | source $(dirname ${BASH_SOURCE[0]})/pytorch/env.sh
46 | 


--------------------------------------------------------------------------------
/llama.cpp/build-and-push.rocm.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | cd $(dirname $0)
 5 | source ../env.sh
 6 | 
 7 | IMAGE_TAGS=(
 8 |   "$LLAMA_IMAGE:full-${LLAMA_GIT_REF}-rocm-${LLAMA_ROCM_VERSION}-patch-${REPO_GIT_REF}"
 9 |   "$LLAMA_IMAGE:full-${LLAMA_GIT_REF}-rocm-${LLAMA_ROCM_VERSION}"
10 |   "$LLAMA_IMAGE:full-rocm-${LLAMA_ROCM_VERSION}"
11 | )
12 | 
13 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
14 |   echo "${IMAGE_TAGS[0]} already in registry. Skip"
15 |   exit 0
16 | fi
17 | 
18 | DOCKER_EXTRA_ARGS=()
19 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
20 |   DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
21 | done
22 | 
23 | mkdir ./logs || true
24 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
25 |   --build-arg BASE_ROCM_DEV_CONTAINER=$PATCHED_ROCM_IMAGE:${LLAMA_ROCM_VERSION}-complete \
26 |   --build-arg ROCM_DOCKER_ARCH=$ROCM_ARCH \
27 |   --build-arg ROCM_VERSION=$LLAMA_ROCM_VERSION \
28 |   --build-arg AMDGPU_VERSION=$LLAMA_ROCM_VERSION \
29 |   --progress=plain --target full -f ./submodules/llama.cpp/.devops/rocm.Dockerfile ./submodules/llama.cpp 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
30 | 


--------------------------------------------------------------------------------
/llama.cpp/build-and-push.vulkan.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | cd $(dirname $0)
 5 | source ../env.sh
 6 | 
 7 | IMAGE_TAGS=(
 8 |   "$PATCHED_LLAMA_IMAGE:full-${LLAMA_GIT_REF}-vulkan-patch-${REPO_GIT_REF}"
 9 |   "$PATCHED_LLAMA_IMAGE:full-${LLAMA_GIT_REF}-vulkan"
10 | )
11 | 
12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
13 |   echo "${IMAGE_TAGS[0]} already in registry. Skip"
14 |   exit 0
15 | fi
16 | 
17 | DOCKER_EXTRA_ARGS=()
18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
19 |   DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
20 | done
21 | 
22 | mkdir ./logs || true
23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
24 |   --build-arg UBUNTU_VERSION="24.04" \
25 |   --progress=plain --target full -f ./submodules/llama.cpp/.devops/vulkan.Dockerfile ./submodules/llama.cpp 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
26 | 


--------------------------------------------------------------------------------
/llama.cpp/env.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | pushd $(dirname ${BASH_SOURCE[0]})
 4 | 
 5 | if [ "$LLAMA_IMAGE" == "" ]; then
 6 |   LLAMA_IMAGE=docker.io/mixa3607/llama.cpp-gfx906
 7 | fi
 8 | 
 9 | # rocm ver
10 | if [ "$LLAMA_ROCM_VERSION" == "" ]; then
11 |   LLAMA_ROCM_VERSION=7.0.0
12 | fi
13 | 
14 | if [ "$LLAMA_GIT_REF" == "" ]; then
15 |   LLAMA_GIT_REF="$(git_get_current_tag submodules/llama.cpp)"
16 | fi
17 | if [ "$LLAMA_GIT_REF" == "" ]; then
18 |   LLAMA_GIT_REF="$(git_get_current_sha submodules/llama.cpp)"
19 | fi
20 | 
21 | popd
22 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/.gitattributes:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Set default behavior to automatically normalize line endings.
 3 | ###############################################################################
 4 | * text=auto
 5 | 
 6 | ###############################################################################
 7 | # Set default behavior for command prompt diff.
 8 | #
 9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs     diff=csharp
14 | 
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following 
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln       merge=binary
26 | #*.csproj    merge=binary
27 | #*.vbproj    merge=binary
28 | #*.vcxproj   merge=binary
29 | #*.vcproj    merge=binary
30 | #*.dbproj    merge=binary
31 | #*.fsproj    merge=binary
32 | #*.lsproj    merge=binary
33 | #*.wixproj   merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj   merge=binary
36 | #*.wwaproj   merge=binary
37 | 
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg   binary
44 | #*.png   binary
45 | #*.gif   binary
46 | 
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | # 
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the 
52 | # entries below.
53 | ###############################################################################
54 | #*.doc   diff=astextplain
55 | #*.DOC   diff=astextplain
56 | #*.docx  diff=astextplain
57 | #*.DOCX  diff=astextplain
58 | #*.dot   diff=astextplain
59 | #*.DOT   diff=astextplain
60 | #*.pdf   diff=astextplain
61 | #*.PDF   diff=astextplain
62 | #*.rtf   diff=astextplain
63 | #*.RTF   diff=astextplain
64 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Mono auto generated files
 17 | mono_crash.*
 18 | 
 19 | # Build results
 20 | [Dd]ebug/
 21 | [Dd]ebugPublic/
 22 | [Rr]elease/
 23 | [Rr]eleases/
 24 | x64/
 25 | x86/
 26 | [Ww][Ii][Nn]32/
 27 | [Aa][Rr][Mm]/
 28 | [Aa][Rr][Mm]64/
 29 | bld/
 30 | [Bb]in/
 31 | [Oo]bj/
 32 | [Oo]ut/
 33 | [Ll]og/
 34 | [Ll]ogs/
 35 | 
 36 | # Visual Studio 2015/2017 cache/options directory
 37 | .vs/
 38 | # Uncomment if you have tasks that create the project's static files in wwwroot
 39 | #wwwroot/
 40 | 
 41 | # Visual Studio 2017 auto generated files
 42 | Generated\ Files/
 43 | 
 44 | # MSTest test Results
 45 | [Tt]est[Rr]esult*/
 46 | [Bb]uild[Ll]og.*
 47 | 
 48 | # NUnit
 49 | *.VisualState.xml
 50 | TestResult.xml
 51 | nunit-*.xml
 52 | 
 53 | # Build Results of an ATL Project
 54 | [Dd]ebugPS/
 55 | [Rr]eleasePS/
 56 | dlldata.c
 57 | 
 58 | # Benchmark Results
 59 | BenchmarkDotNet.Artifacts/
 60 | 
 61 | # .NET Core
 62 | project.lock.json
 63 | project.fragment.lock.json
 64 | artifacts/
 65 | 
 66 | # ASP.NET Scaffolding
 67 | ScaffoldingReadMe.txt
 68 | 
 69 | # StyleCop
 70 | StyleCopReport.xml
 71 | 
 72 | # Files built by Visual Studio
 73 | *_i.c
 74 | *_p.c
 75 | *_h.h
 76 | *.ilk
 77 | *.meta
 78 | *.obj
 79 | *.iobj
 80 | *.pch
 81 | *.pdb
 82 | *.ipdb
 83 | *.pgc
 84 | *.pgd
 85 | *.rsp
 86 | *.sbr
 87 | *.tlb
 88 | *.tli
 89 | *.tlh
 90 | *.tmp
 91 | *.tmp_proj
 92 | *_wpftmp.csproj
 93 | *.log
 94 | *.vspscc
 95 | *.vssscc
 96 | .builds
 97 | *.pidb
 98 | *.svclog
 99 | *.scc
100 | 
101 | # Chutzpah Test files
102 | _Chutzpah*
103 | 
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 | 
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 | 
121 | # Visual Studio Trace Files
122 | *.e2e
123 | 
124 | # TFS 2012 Local Workspace
125 | $tf/
126 | 
127 | # Guidance Automation Toolkit
128 | *.gpState
129 | 
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 | 
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 | 
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 | 
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 | 
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 | 
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 | 
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 | 
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 | 
163 | # Web workbench (sass)
164 | .sass-cache/
165 | 
166 | # Installshield output folder
167 | [Ee]xpress/
168 | 
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 | 
179 | # Click-Once directory
180 | publish/
181 | 
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 | 
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 | 
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 | 
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 | 
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 | 
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 | 
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 | 
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 | 
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 | 
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 | 
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 | 
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 | 
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 | 
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 | 
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 | 
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 | 
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 | 
288 | # Visual Studio 6 build log
289 | *.plg
290 | 
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 | 
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 | 
297 | # Visual Studio LightSwitch build output
298 | **/*.HTMLClient/GeneratedArtifacts
299 | **/*.DesktopClient/GeneratedArtifacts
300 | **/*.DesktopClient/ModelManifest.xml
301 | **/*.Server/GeneratedArtifacts
302 | **/*.Server/ModelManifest.xml
303 | _Pvt_Extensions
304 | 
305 | # Paket dependency manager
306 | .paket/paket.exe
307 | paket-files/
308 | 
309 | # FAKE - F# Make
310 | .fake/
311 | 
312 | # CodeRush personal settings
313 | .cr/personal
314 | 
315 | # Python Tools for Visual Studio (PTVS)
316 | __pycache__/
317 | *.pyc
318 | 
319 | # Cake - Uncomment if you are using it
320 | # tools/**
321 | # !tools/packages.config
322 | 
323 | # Tabs Studio
324 | *.tss
325 | 
326 | # Telerik's JustMock configuration file
327 | *.jmconfig
328 | 
329 | # BizTalk build output
330 | *.btp.cs
331 | *.btm.cs
332 | *.odx.cs
333 | *.xsd.cs
334 | 
335 | # OpenCover UI analysis results
336 | OpenCover/
337 | 
338 | # Azure Stream Analytics local run output
339 | ASALocalRun/
340 | 
341 | # MSBuild Binary and Structured Log
342 | *.binlog
343 | 
344 | # NVidia Nsight GPU debugger configuration file
345 | *.nvuser
346 | 
347 | # MFractors (Xamarin productivity tool) working folder
348 | .mfractor/
349 | 
350 | # Local History for Visual Studio
351 | .localhistory/
352 | 
353 | # BeatPulse healthcheck temp database
354 | healthchecksdb
355 | 
356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
357 | MigrationBackup/
358 | 
359 | # Ionide (cross platform F# VS Code tools) working folder
360 | .ionide/
361 | 
362 | # Fody - auto-generated XML schema
363 | FodyWeavers.xsd


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 17
 4 | VisualStudioVersion = 17.13.35931.197 d17.13
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ArkProjects.LlamaOffloadCalc", "ArkProjects.LlamaOffloadCalc\ArkProjects.LlamaOffloadCalc.csproj", "{AB281ECC-61B1-4575-B34D-F14DEB3814FD}"
 7 | EndProject
 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp", "GGUFSharp\src\GGUFSharp\GGUFSharp\GGUFSharp.csproj", "{A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}"
 9 | EndProject
10 | Global
11 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | 		Debug|Any CPU = Debug|Any CPU
13 | 		Release|Any CPU = Release|Any CPU
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | 		{AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | 		{AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | 		{AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Release|Any CPU.Build.0 = Release|Any CPU
20 | 		{A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | 		{A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | 		{A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | 		{A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Release|Any CPU.Build.0 = Release|Any CPU
24 | 	EndGlobalSection
25 | 	GlobalSection(SolutionProperties) = preSolution
26 | 		HideSolutionNode = FALSE
27 | 	EndGlobalSection
28 | 	GlobalSection(ExtensibilityGlobals) = postSolution
29 | 		SolutionGuid = {C06FD4ED-87AF-4232-9D9E-D5D6EA618808}
30 | 	EndGlobalSection
31 | EndGlobal
32 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/ArkProjects.LlamaOffloadCalc.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net8.0</TargetFramework>
 6 |     <ImplicitUsings>enable</ImplicitUsings>
 7 |     <Nullable>enable</Nullable>
 8 |   </PropertyGroup>
 9 | 
10 |   <ItemGroup>
11 |     <PackageReference Include="FluentValidation" Version="12.0.0" />
12 |     <PackageReference Include="Microsoft.Extensions.Configuration" Version="9.0.8" />
13 |     <PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="9.0.8" />
14 |     <PackageReference Include="Microsoft.Extensions.Configuration.CommandLine" Version="9.0.8" />
15 |     <PackageReference Include="Microsoft.Extensions.Configuration.EnvironmentVariables" Version="9.0.8" />
16 |     <PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="9.0.8" />
17 |     <PackageReference Include="NetEscapades.Configuration.Yaml" Version="3.1.0" />
18 |   </ItemGroup>
19 | 
20 |   <ItemGroup>
21 |     <ProjectReference Include="..\GGUFSharp\src\GGUFSharp\GGUFSharp\GGUFSharp.csproj" />
22 |   </ItemGroup>
23 | 
24 |   <ItemGroup>
25 |     <None Update="files/**/*">
26 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
27 |     </None>
28 |   </ItemGroup>
29 | 
30 |   <ItemGroup>
31 |     <None Update="appsettings*.yaml">
32 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
33 |     </None>
34 |   </ItemGroup>
35 | 
36 | </Project>
37 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaDevice.cs:
--------------------------------------------------------------------------------
 1 | ﻿namespace ArkProjects.LlmCalc;
 2 | 
 3 | public class LLamaDevice
 4 | {
 5 |     public required LLamaDeviceType Type { get; set; }
 6 |     public required string Name { get; set; }
 7 |     public string PciBus { get; set; } = "";
 8 | 
 9 |     public required long TotalSize { get; set; }
10 |     public long ReservedMemory { get; set; }
11 | 
12 |     public List<TensorMetadata> Tensors { get; set; } = [];
13 |     public List<int> Layers { get; set; } = [];
14 |     public double LayersPortion { get; set; } = 0;
15 | 
16 |     public long GetUsedSpace() => ReservedMemory + Tensors.Aggregate(0L, (current, tensor) => current + tensor.Size);
17 | 
18 |     public long GetFreeSpace() => TotalSize - GetUsedSpace();
19 | }


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaDeviceType.cs:
--------------------------------------------------------------------------------
1 | ﻿namespace ArkProjects.LlmCalc;
2 | 
3 | public enum LLamaDeviceType
4 | {
5 |     Unknown,
6 |     GPU,
7 |     CPU,
8 | }


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaGgufMetadataExtractor.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System.Text.RegularExpressions;
 2 | using GGUFSharp;
 3 | 
 4 | namespace ArkProjects.LlmCalc;
 5 | 
 6 | public class LLamaGgufMetadataExtractor
 7 | {
 8 |     private readonly string _ggufModelPath;
 9 |     private readonly Regex _nameMatchRegex = new Regex(@"(?<name>^.+)-(?<current>\d{5})-of-(?<total>\d{5}).gguf");
10 | 
11 |     public LLamaGgufMetadataExtractor(string ggufModelPath)
12 |     {
13 |         _ggufModelPath = ggufModelPath;
14 |     }
15 | 
16 |     public List<TensorMetadata> ExtractMetadata()
17 |     {
18 |         var reader = new GGUFReader();
19 |         var tensorInfos = new List<TensorMetadata>();
20 |         var fileName = Path.GetFileName(_ggufModelPath);
21 |         var match = _nameMatchRegex.Match(fileName);
22 |         if (!match.Success)
23 |         {
24 |             var file = _ggufModelPath;
25 |             Console.WriteLine($"Reading {file}");
26 |             var f = reader.Read(file);
27 |             tensorInfos.AddRange(f.TensorInfos.Select(t => new TensorMetadata(t)));
28 |         }
29 |         else
30 |         {
31 |             var totalParts = int.Parse(match.Groups["total"].Value);
32 |             var name = match.Groups["name"].Value;
33 | 
34 |             for (int i = 1; i <= totalParts; i++)
35 |             {
36 |                 var file = Path.Combine(Path.GetDirectoryName(_ggufModelPath)!,
37 |                     $"{name}-{i:D5}-of-{totalParts:D5}.gguf");
38 |                 Console.WriteLine($"Reading {file}");
39 |                 var f = reader.Read(file);
40 |                 tensorInfos.AddRange(f.TensorInfos.Select(t => new TensorMetadata(t)));
41 |             }
42 |         }
43 | 
44 |         return tensorInfos;
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaLogsParser.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System.Text.RegularExpressions;
 2 | 
 3 | namespace ArkProjects.LlmCalc;
 4 | 
 5 | public class LLamaLogsParser
 6 | {
 7 |     private readonly string _filePath;
 8 | 
 9 |     public LLamaLogsParser(string filePath)
10 |     {
11 |         _filePath = filePath;
12 |     }
13 | 
14 |     public Dictionary<string, List<int>> ExtractAssignedLayers()
15 |     {
16 |         var regex = new Regex(@"load_tensors: layer +(?<layer>\d+) assigned to device (?<device>\S+), ");
17 |         var result = new Dictionary<string, List<int>>();
18 | 
19 |         var start = -1;
20 |         var lines = File.ReadAllLines(_filePath);
21 |         for (var i = 0; i < lines.Length; i++)
22 |         {
23 |             var line = lines[i];
24 |             var match = regex.Match(line);
25 |             if (start < 0 && match.Success)
26 |             {
27 |                 start = i;
28 |             }
29 | 
30 |             if (match.Success)
31 |             {
32 |                 result.TryAdd(match.Groups["device"].Value, new List<int>());
33 |                 result[match.Groups["device"].Value].Add(int.Parse(match.Groups["layer"].Value));
34 |             }
35 | 
36 | 
37 |             if (start >= 0 && !match.Success)
38 |             {
39 |                 break;
40 |             }
41 |         }
42 | 
43 |         return result;
44 |     }
45 | 
46 |     public Dictionary<string, List<string>> ExtractAssignedTensors()
47 |     {
48 |         var regex = new Regex(@"tensor (?<tensor>\S+) \(.+\) buffer type overridden to (?<device>\S+)");
49 |         var result = new Dictionary<string, List<string>>();
50 | 
51 |         var start = -1;
52 |         var lines = File.ReadAllLines(_filePath);
53 |         for (var i = 0; i < lines.Length; i++)
54 |         {
55 |             var line = lines[i];
56 |             var match = regex.Match(line);
57 |             if (start < 0 && match.Success)
58 |             {
59 |                 start = i;
60 |             }
61 | 
62 |             if (match.Success)
63 |             {
64 |                 result.TryAdd(match.Groups["device"].Value, new List<string>());
65 |                 result[match.Groups["device"].Value].Add(match.Groups["tensor"].Value);
66 |             }
67 | 
68 | 
69 |             if (start >= 0 && !match.Success)
70 |             {
71 |                 break;
72 |             }
73 |         }
74 | 
75 |         return result;
76 |     }
77 | }


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/LLamaDeviceOptions.cs:
--------------------------------------------------------------------------------
 1 | ﻿namespace ArkProjects.LlmCalc.Options;
 2 | 
 3 | public class LLamaDeviceOptions
 4 | {
 5 |     public required LLamaDeviceType Type { get; set; }
 6 |     public string PciBus { get; set; } = "";
 7 | 
 8 |     public required long TotalSizeMb { get; set; }
 9 |     public long ReservedMemoryMb { get; set; }
10 | 
11 |     public double LayersPortion { get; set; } = 0;
12 |     public int Id { get; set; }
13 | }


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/OffloadCalculationOptions.cs:
--------------------------------------------------------------------------------
 1 | ﻿namespace ArkProjects.LlmCalc.Options;
 2 | 
 3 | public class OffloadCalculationOptions
 4 | {
 5 |     public bool PrintTensorsSize { get; set; } = false;
 6 |     public bool PrintHelmCharConfig { get; set; } = false;
 7 |     public bool PrintCmdConfig { get; set; } = false;
 8 |     public required string GgufFile { get; set; }
 9 |     public required Dictionary<string, LLamaDeviceOptions> Devices { get; set; }
10 |     public required Dictionary<string, TensorsOffloadRuleOptions> OffloadRules { get; set; }
11 | }


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/OffloadCalculationOptionsValidator.cs:
--------------------------------------------------------------------------------
 1 | ﻿using FluentValidation;
 2 | 
 3 | namespace ArkProjects.LlmCalc.Options;
 4 | 
 5 | public class OffloadCalculationOptionsValidator : AbstractValidator<OffloadCalculationOptions>
 6 | {
 7 |     public OffloadCalculationOptionsValidator()
 8 |     {
 9 |         RuleFor(x => x.GgufFile)
10 |             .NotEmpty()
11 |             .Must(x => File.Exists(x)).WithMessage("gguf file not exist");
12 | 
13 |         RuleFor(x => x.Devices)
14 |             .Must(x => x.GroupBy(y => y.Value.Id).All(y => y.Count() == 1))
15 |             .WithMessage("Each device must have unique id");
16 |         RuleFor(x => x.OffloadRules)
17 |             .Must(x => x.GroupBy(y => y.Value.Id).All(y => y.Count() == 1))
18 |             .WithMessage("Each offload rule must have unique id");
19 | 
20 |         RuleFor(x => x.Devices)
21 |             .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.Unknown) == 0)
22 |             .WithMessage("Type must be set for each device");
23 |         RuleFor(x => x.Devices)
24 |             .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.GPU) >= 1)
25 |             .WithMessage("1 or more GPUs must be defined");
26 |         RuleFor(x => x.Devices)
27 |             .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.CPU) == 1)
28 |             .WithMessage("Single gpu must be defined");
29 |         RuleFor(x => x.OffloadRules)
30 |             .Must(x => x.Count > 0)
31 |             .WithMessage("1 or more offloading rule must be defined");
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/TensorsOffloadRuleOptions.cs:
--------------------------------------------------------------------------------
1 | ﻿namespace ArkProjects.LlmCalc.Options;
2 | 
3 | public class TensorsOffloadRuleOptions
4 | {
5 |     public required string Regex { get; set; }
6 |     public int Id { get; set; }
7 |     public int Priority { get; set; }
8 | }


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿using ArkProjects.LlmCalc;
  2 | using ArkProjects.LlmCalc.Options;
  3 | using FluentValidation;
  4 | using Microsoft.Extensions.Configuration;
  5 | using System.Text;
  6 | using System.Text.RegularExpressions;
  7 | 
  8 | 
  9 | var options = GetOptions(args);
 10 | new OffloadCalculationOptionsValidator().ValidateAndThrow(options);
 11 | 
 12 | var tensorsOffloadRules = options.OffloadRules
 13 |     .Select(x => (rule: x.Value, name: x.Key, regex: new Regex(x.Value.Regex), tensors: new List<TensorMetadata>()))
 14 |     .OrderBy(x => x.rule.Id)
 15 |     .ToArray();
 16 | 
 17 | var devices = options.Devices
 18 |     .OrderBy(x => x.Value.Id)
 19 |     .Select(x => new LLamaDevice()
 20 |     {
 21 |         Name = x.Key,
 22 |         TotalSize = x.Value.TotalSizeMb * 1024 * 1024,
 23 |         ReservedMemory = x.Value.ReservedMemoryMb * 1024 * 1024,
 24 |         Type = x.Value.Type,
 25 |         LayersPortion = x.Value.LayersPortion,
 26 |         PciBus = x.Value.PciBus,
 27 |         Layers = new List<int>(),
 28 |         Tensors = new List<TensorMetadata>()
 29 |     })
 30 |     .ToList();
 31 | {
 32 |     var gpus = devices.Where(x => x.Type == LLamaDeviceType.GPU).ToList();
 33 |     if (gpus.Sum(x => x.LayersPortion) == 0)
 34 |     {
 35 |         var gpuMem = gpus.Sum(x => x.TotalSize);
 36 |         foreach (var gpu in gpus)
 37 |         {
 38 |             gpu.LayersPortion = (double)gpu.TotalSize / gpuMem;
 39 |         }
 40 |     }
 41 | }
 42 | 
 43 | var tensorInfos = new LLamaGgufMetadataExtractor(options.GgufFile)
 44 |     .ExtractMetadata()
 45 |     .Where(x => x.BlkId != -1)
 46 |     .OrderBy(x => x.BlkId)
 47 |     .ThenBy(x => x.Name)
 48 |     .ToList();
 49 | 
 50 | // split layers
 51 | var assignedLayers = new Dictionary<string, List<int>>();
 52 | {
 53 |     var layersCount = tensorInfos.Select(x => x.BlkId).Distinct().Count();
 54 |     var layerIds = tensorInfos.Select(x => x.BlkId).Distinct().OrderBy(x => x).ToList();
 55 |     var s = devices.Where(x => x.LayersPortion > 0).Sum(x => x.LayersPortion);
 56 |     foreach (var device in devices.OrderBy(x => x.LayersPortion))
 57 |     {
 58 |         if (device.LayersPortion <= 0)
 59 |             continue;
 60 |         var c = (int)(layersCount / s * device.LayersPortion);
 61 |         assignedLayers[device.Name] = layerIds.Take(c).ToList();
 62 |         layerIds = layerIds.Skip(c).ToList();
 63 |     }
 64 | 
 65 |     if (layerIds.Count > 0)
 66 |     {
 67 |         assignedLayers[devices.GroupBy(x => x.LayersPortion).MaxBy(x => x.Key)!.Last().Name].AddRange(layerIds);
 68 |     }
 69 | }
 70 | 
 71 | // split tensors
 72 | foreach (var info in tensorInfos)
 73 | {
 74 |     tensorsOffloadRules.FirstOrDefault(x => x.regex.IsMatch(info.Name)).tensors?.Add(info);
 75 | }
 76 | 
 77 | // apply layers
 78 | {
 79 |     foreach (var assignedLayer in assignedLayers)
 80 |     {
 81 |         var device = devices.First(x => x.Name == assignedLayer.Key);
 82 |         device.Layers.AddRange(assignedLayer.Value);
 83 |         device.Tensors.AddRange(tensorInfos.Where(x => assignedLayer.Value.Contains(x.BlkId)));
 84 |     }
 85 | 
 86 |     if (!devices
 87 |             .SelectMany(x => x.Tensors)
 88 |             .OrderBy(x => x.Name)
 89 |             .SequenceEqual(tensorInfos.OrderBy(x => x.Name))
 90 |        )
 91 |     {
 92 |         throw new Exception();
 93 |     }
 94 | }
 95 | 
 96 | // offload tensors
 97 | foreach (var device in devices.Where(x => x.Type == LLamaDeviceType.GPU))
 98 | {
 99 |     var dst = devices.First(x => x.Type == LLamaDeviceType.CPU);
100 |     while (device.GetFreeSpace() < 0)
101 |     {
102 |         var t = tensorsOffloadRules
103 |             .SelectMany(x => x.tensors.Select(y => (x.rule.Priority, y)))
104 |             .OrderByDescending(x => x.Priority)
105 |             .ThenBy(x => x.y.BlkId)
106 |             .ThenBy(x => x.y.Name)
107 |             .Select(x => x.y)
108 |             .First(x => device.Tensors.Contains(x));
109 |         Console.WriteLine($"Move {t.Name,-25} ({t.Size / 1024 / 1024} Mb) from {device.Name} to {dst.Name}");
110 |         device.Tensors.Remove(t);
111 |         dst.Tensors.Add(t);
112 |     }
113 | }
114 | 
115 | 
116 | if (options.PrintTensorsSize)
117 |     PrintTensorsSize(tensorInfos);
118 | PrintDevicesUtilization(devices);
119 | PrintTensorsOffloadResult();
120 | if (options.PrintHelmCharConfig)
121 |     PrintHelmChartConfig(devices);
122 | if (options.PrintCmdConfig)
123 |     PrintCmdConfig(devices);
124 | 
125 | 
126 | return;
127 | 
128 | static void PrintDevicesUtilization(IEnumerable<LLamaDevice> devices)
129 | {
130 |     Console.WriteLine("======= Device memory usage");
131 |     foreach (var device in devices)
132 |     {
133 |         Console.WriteLine($"{device.Name,-10} " +
134 |                           $"{device.GetUsedSpace() / 1024 / 1024} Mb of {device.TotalSize / 1024 / 1024} Mb " +
135 |                           $"({device.Tensors.Aggregate(0L, (current, tensor) => current + tensor.Size) / 1024 / 1024})");
136 |     }
137 | 
138 |     Console.WriteLine();
139 | }
140 | 
141 | static void PrintHelmChartConfig(IReadOnlyList<LLamaDevice> devices)
142 | {
143 |     Console.WriteLine("======= Helm chart config");
144 |     var sb = new StringBuilder();
145 |     sb.Append("extraEnvVars:\n");
146 |     sb.Append($"  - name: LLAMA_ARG_MAIN_GPU\n" +
147 |               $"    value: '0'\n");
148 |     sb.Append($"  - name: LLAMA_ARG_TENSOR_SPLIT\n" +
149 |               $"    value: '{string.Join(',', devices.Select(x => x.Layers.Count))}'\n");
150 |     sb.Append("\n");
151 | 
152 |     sb.Append("modelTensorsOverride:\n");
153 |     foreach (var device in devices.Where(x => x.Type != LLamaDeviceType.GPU && x.Tensors.Count > 0))
154 |     {
155 |         sb.Append($"  - name: {device.Name}\n" +
156 |                   $"    tensors:\n");
157 |         foreach (var tensor in device.Tensors)
158 |         {
159 |             sb.Append($"    - {tensor.Name}\n");
160 |         }
161 |     }
162 | 
163 |     Console.WriteLine(sb);
164 | }
165 | 
166 | static void PrintCmdConfig(IReadOnlyList<LLamaDevice> devices)
167 | {
168 |     Console.WriteLine("======= CMD config");
169 |     var sb = new StringBuilder();
170 |     sb.Append("--main-gpu 0 ");
171 |     sb.Append($"--tensor-split \"{string.Join(',', devices.Select(x => x.Layers.Count))}\" ");
172 |     devices
173 |         .Where(x => x.Type != LLamaDeviceType.GPU && x.Tensors.Count > 0)
174 |         .Select(x => $"--override-tensor \"({string.Join('|', x.Tensors.Select(t => t.Name))})={x.Name}\" ")
175 |         .ToList()
176 |         .ForEach(x => sb.Append(x));
177 |     Console.WriteLine(sb);
178 |     Console.WriteLine();
179 | }
180 | 
181 | static void PrintTensorsSize(IEnumerable<TensorMetadata> tensorInfos)
182 | {
183 |     Console.WriteLine("======= Tensors size");
184 |     foreach (var tensorInfo in tensorInfos.OrderBy(x => x.Size))
185 |     {
186 |         Console.WriteLine($"{tensorInfo.Name,-30} {tensorInfo.Size / 1024 / 1024} Mb");
187 |     }
188 | }
189 | 
190 | void PrintTensorsOffloadResult()
191 | {
192 |     Console.WriteLine("======= Tensors offload result");
193 |     foreach (var t in tensorsOffloadRules)
194 |     {
195 |         var offloadByDevice = devices
196 |             .Select(x => (x.Name, x.Tensors.Count(y => t.tensors.Contains(y))))
197 |             .ToList();
198 |         Console.WriteLine(
199 |             $"Offload {t.name,-24} ({t.rule.Priority}) {(t.tensors.Count - offloadByDevice.Sum(x => x.Item2)).ToString(),-2} " +
200 |             $"({string.Join(", ", offloadByDevice.Select(x => $"{x.Name} = {x.Item2.ToString(),-2}"))}) " +
201 |             $"of {t.tensors.Count}");
202 |     }
203 | 
204 |     Console.WriteLine();
205 | }
206 | 
207 | static OffloadCalculationOptions GetOptions(string[] args)
208 | {
209 |     var mapping = new Dictionary<string, string>()
210 |     {
211 |         { "-e", "environment" }
212 |     };
213 |     string? env;
214 |     // stage 0
215 |     {
216 |         var cfgBuilder = new ConfigurationBuilder()
217 |             .AddJsonFile("appsettings.json", true)
218 |             .AddYamlFile("appsettings.yaml", true)
219 |             .AddYamlFile("appsettings.yml", true)
220 |             .AddEnvironmentVariables()
221 |             .AddCommandLine(args, mapping);
222 | 
223 |         var cfgRoot = cfgBuilder.Build();
224 |         env = cfgRoot["environment"] ?? null;
225 |     }
226 | 
227 |     // stage 1
228 |     {
229 |         var cfgBuilder = new ConfigurationBuilder()
230 |             .AddJsonFile("appsettings.json", true)
231 |             .AddYamlFile("appsettings.yaml", true)
232 |             .AddYamlFile("appsettings.yml", true);
233 | 
234 |         if (!string.IsNullOrWhiteSpace(env))
235 |         {
236 |             cfgBuilder
237 |                 .AddJsonFile($"appsettings.{env}.json", true)
238 |                 .AddYamlFile($"appsettings.{env}.yaml", true)
239 |                 .AddYamlFile($"appsettings.{env}.yml", true);
240 |         }
241 | 
242 |         cfgBuilder
243 |             .AddEnvironmentVariables()
244 |             .AddCommandLine(args, mapping);
245 | 
246 |         var cfgRoot = cfgBuilder.Build();
247 |         return cfgRoot.Get<OffloadCalculationOptions>()!;
248 |     }
249 | }
250 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Properties/launchSettings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "profiles": {
 3 |     "GLM-4.5-Air-UD-Q6_K_XL": {
 4 |       "commandName": "Project",
 5 |       "commandLineArgs": "-e GLM-4.5-Air-UD-Q6_K_XL"
 6 |     },
 7 |     "gpt-oss-120b-F16": {
 8 |       "commandName": "Project",
 9 |       "commandLineArgs": "-e gpt-oss-120b-F16"
10 |     }
11 |   }
12 | }


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/TensorMetadata.cs:
--------------------------------------------------------------------------------
 1 | ﻿using GGUFSharp;
 2 | 
 3 | namespace ArkProjects.LlmCalc;
 4 | 
 5 | public class TensorMetadata
 6 | {
 7 |     public TensorMetadata(GGUFTensorInfo tensorInfo)
 8 |     {
 9 |         TensorInfo = tensorInfo;
10 |         if (Name.StartsWith("blk"))
11 |             BlkId = int.Parse(Name.Split(".").Skip(1).First());
12 |     }
13 | 
14 |     public GGUFTensorInfo TensorInfo { get; }
15 |     public string Name => TensorInfo.Name;
16 |     public long Size => (long)TensorInfo.Size;
17 |     public int BlkId { get; } = -1;
18 | }


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.GLM-4.5-Air-UD-Q6_K_XL.yaml:
--------------------------------------------------------------------------------
 1 | GgufFile: "\
 2 |   \\\\TRUENAS/trash3/kube-volumes/pvc-38077fbf-f7fa-46d8-9ad0-17c2ba5bf869/hub\
 3 |   /models--unsloth--GLM-4.5-Air-GGUF/snapshots/a5133889a6e29d42a1e71784b2ae8514fb28156f\
 4 |   /UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf"
 5 | 
 6 | Devices:
 7 |   ROCm0:
 8 |     ReservedMemoryMb: 1024
 9 |   ROCm1:
10 |     ReservedMemory: 3584


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.gpt-oss-120b-F16.yaml:
--------------------------------------------------------------------------------
 1 | GgufFile: "\
 2 |   \\\\TRUENAS/trash3/kube-volumes/pvc-38077fbf-f7fa-46d8-9ad0-17c2ba5bf869/hub\
 3 |   /models--unsloth--gpt-oss-120b-GGUF/snapshots/91daeef64d6b1e1078ad1d007f9efa98526d7bf1\
 4 |   /gpt-oss-120b-F16.gguf"
 5 | 
 6 | Devices:
 7 |   ROCm0:
 8 |     ReservedMemoryMb: 9300
 9 |   ROCm1:
10 |     ReservedMemoryMb: 8500


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.yaml:
--------------------------------------------------------------------------------
 1 | PrintCmdConfig: true
 2 | PrintHelmCharConfig: true
 3 | PrintTensorsSize: false
 4 | 
 5 | # list from 'llama-server --list-devices' + CPU
 6 | Devices:
 7 |   ROCm0:
 8 |     Id: 1
 9 |     Type: GPU
10 |     TotalSizeMb: 32768
11 |     PciBus: 0000:01:00.0
12 |   ROCm1:
13 |     Id: 2
14 |     Type: GPU
15 |     TotalSizeMb: 32768
16 |     PciBus: 0000:02:00.0
17 |   CPU:
18 |     Id: 3
19 |     Type: CPU
20 |     TotalSizeMb: 131072
21 |     ReservedMemory: 0
22 | 
23 | # offloading rules
24 | OffloadRules:
25 |   ffn_gate_exps:
26 |     Id: 1
27 |     Regex: '^blk\.\d+\.ffn_gate_exps.weight'
28 |     Priority: 10
29 |   ffn_up_exps:
30 |     Id: 2
31 |     Regex: '^blk\.\d+\.ffn_up_exps.weight'
32 |     Priority: 20
33 |   ffn_down_exps:
34 |     Id: 3
35 |     Regex: '^blk\.\d+\.ffn_down_exps.weight'
36 |     Priority: 20


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Mono auto generated files
 17 | mono_crash.*
 18 | 
 19 | # Build results
 20 | [Dd]ebug/
 21 | [Dd]ebugPublic/
 22 | [Rr]elease/
 23 | [Rr]eleases/
 24 | x64/
 25 | x86/
 26 | [Ww][Ii][Nn]32/
 27 | [Aa][Rr][Mm]/
 28 | [Aa][Rr][Mm]64/
 29 | bld/
 30 | [Bb]in/
 31 | [Oo]bj/
 32 | [Ll]og/
 33 | [Ll]ogs/
 34 | 
 35 | # Visual Studio 2015/2017 cache/options directory
 36 | .vs/
 37 | # Uncomment if you have tasks that create the project's static files in wwwroot
 38 | #wwwroot/
 39 | 
 40 | # Visual Studio 2017 auto generated files
 41 | Generated\ Files/
 42 | 
 43 | # MSTest test Results
 44 | [Tt]est[Rr]esult*/
 45 | [Bb]uild[Ll]og.*
 46 | 
 47 | # NUnit
 48 | *.VisualState.xml
 49 | TestResult.xml
 50 | nunit-*.xml
 51 | 
 52 | # Build Results of an ATL Project
 53 | [Dd]ebugPS/
 54 | [Rr]eleasePS/
 55 | dlldata.c
 56 | 
 57 | # Benchmark Results
 58 | BenchmarkDotNet.Artifacts/
 59 | 
 60 | # .NET Core
 61 | project.lock.json
 62 | project.fragment.lock.json
 63 | artifacts/
 64 | 
 65 | # ASP.NET Scaffolding
 66 | ScaffoldingReadMe.txt
 67 | 
 68 | # StyleCop
 69 | StyleCopReport.xml
 70 | 
 71 | # Files built by Visual Studio
 72 | *_i.c
 73 | *_p.c
 74 | *_h.h
 75 | *.ilk
 76 | *.meta
 77 | *.obj
 78 | *.iobj
 79 | *.pch
 80 | *.pdb
 81 | *.ipdb
 82 | *.pgc
 83 | *.pgd
 84 | *.rsp
 85 | *.sbr
 86 | *.tlb
 87 | *.tli
 88 | *.tlh
 89 | *.tmp
 90 | *.tmp_proj
 91 | *_wpftmp.csproj
 92 | *.log
 93 | *.tlog
 94 | *.vspscc
 95 | *.vssscc
 96 | .builds
 97 | *.pidb
 98 | *.svclog
 99 | *.scc
100 | 
101 | # Chutzpah Test files
102 | _Chutzpah*
103 | 
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 | 
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 | 
121 | # Visual Studio Trace Files
122 | *.e2e
123 | 
124 | # TFS 2012 Local Workspace
125 | $tf/
126 | 
127 | # Guidance Automation Toolkit
128 | *.gpState
129 | 
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 | 
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 | 
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 | 
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 | 
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 | 
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 | 
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 | 
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 | 
163 | # Web workbench (sass)
164 | .sass-cache/
165 | 
166 | # Installshield output folder
167 | [Ee]xpress/
168 | 
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 | 
179 | # Click-Once directory
180 | publish/
181 | 
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 | 
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 | 
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 | 
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 | 
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 | 
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 | 
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 | 
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 | 
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 | 
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 | 
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 | 
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 | 
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 | 
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 | 
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 | 
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 | 
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 | 
288 | # Visual Studio 6 build log
289 | *.plg
290 | 
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 | 
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 | 
297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298 | *.vbp
299 | 
300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301 | *.dsw
302 | *.dsp
303 | 
304 | # Visual Studio 6 technical files
305 | *.ncb
306 | *.aps
307 | 
308 | # Visual Studio LightSwitch build output
309 | **/*.HTMLClient/GeneratedArtifacts
310 | **/*.DesktopClient/GeneratedArtifacts
311 | **/*.DesktopClient/ModelManifest.xml
312 | **/*.Server/GeneratedArtifacts
313 | **/*.Server/ModelManifest.xml
314 | _Pvt_Extensions
315 | 
316 | # Paket dependency manager
317 | .paket/paket.exe
318 | paket-files/
319 | 
320 | # FAKE - F# Make
321 | .fake/
322 | 
323 | # CodeRush personal settings
324 | .cr/personal
325 | 
326 | # Python Tools for Visual Studio (PTVS)
327 | __pycache__/
328 | *.pyc
329 | 
330 | # Cake - Uncomment if you are using it
331 | # tools/**
332 | # !tools/packages.config
333 | 
334 | # Tabs Studio
335 | *.tss
336 | 
337 | # Telerik's JustMock configuration file
338 | *.jmconfig
339 | 
340 | # BizTalk build output
341 | *.btp.cs
342 | *.btm.cs
343 | *.odx.cs
344 | *.xsd.cs
345 | 
346 | # OpenCover UI analysis results
347 | OpenCover/
348 | 
349 | # Azure Stream Analytics local run output
350 | ASALocalRun/
351 | 
352 | # MSBuild Binary and Structured Log
353 | *.binlog
354 | 
355 | # NVidia Nsight GPU debugger configuration file
356 | *.nvuser
357 | 
358 | # MFractors (Xamarin productivity tool) working folder
359 | .mfractor/
360 | 
361 | # Local History for Visual Studio
362 | .localhistory/
363 | 
364 | # Visual Studio History (VSHistory) files
365 | .vshistory/
366 | 
367 | # BeatPulse healthcheck temp database
368 | healthchecksdb
369 | 
370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
371 | MigrationBackup/
372 | 
373 | # Ionide (cross platform F# VS Code tools) working folder
374 | .ionide/
375 | 
376 | # Fody - auto-generated XML schema
377 | FodyWeavers.xsd
378 | 
379 | # VS Code files for those working on multiple tools
380 | .vscode/*
381 | !.vscode/settings.json
382 | !.vscode/tasks.json
383 | !.vscode/launch.json
384 | !.vscode/extensions.json
385 | *.code-workspace
386 | 
387 | # Local History for Visual Studio Code
388 | .history/
389 | 
390 | # Windows Installer files from build outputs
391 | *.cab
392 | *.msi
393 | *.msix
394 | *.msm
395 | *.msp
396 | 
397 | # JetBrains Rider
398 | *.sln.iml
399 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Clock Set Bird
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/README.md:
--------------------------------------------------------------------------------
1 | # GGUFSharp
2 | A library for read/write GGUF file
3 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/FilesList.txt:
--------------------------------------------------------------------------------
1 | bartowski/Phi-3.5-mini-instruct-GGUF/Phi-3.5-mini-instruct-IQ2_M.gguf


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/example.gguf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/example.gguf


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/genTestFile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | import numpy as np
 6 | 
 7 | from gguf import GGUFWriter  # noqa: E402
 8 | 
 9 | 
10 | # Example usage:
11 | def writer_example() -> None:
12 |     # Example usage with a file
13 |     gguf_writer = GGUFWriter("example.gguf", "llama")
14 | 
15 |     gguf_writer.add_block_count(12)
16 |     gguf_writer.add_uint32("answer", 42)  # Write a 32-bit integer
17 |     gguf_writer.add_float32("answer_in_float", 42.0)  # Write a 32-bit float
18 |     gguf_writer.add_custom_alignment(64)
19 | 
20 |     tensor1 = np.ones((32,), dtype=np.float32) * 100.0
21 |     tensor2 = np.ones((64,), dtype=np.float32) * 101.0
22 |     tensor3 = np.ones((96,), dtype=np.float32) * 102.0
23 | 
24 |     gguf_writer.add_tensor("tensor1", tensor1)
25 |     gguf_writer.add_tensor("tensor2", tensor2)
26 |     gguf_writer.add_tensor("tensor3", tensor3)
27 | 
28 |     gguf_writer.write_header_to_file()
29 |     gguf_writer.write_kv_data_to_file()
30 |     gguf_writer.write_tensors_to_file()
31 | 
32 |     gguf_writer.close()
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     writer_example()
37 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/BasicFeatureTest.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System.Diagnostics;
 2 | using System.Linq;
 3 | using System.Runtime.InteropServices;
 4 | 
 5 | namespace GGUFSharp.Test
 6 | {
 7 |     [TestClass]
 8 |     [DoNotParallelize]
 9 |     public sealed class BasicFeatureTest
10 |     {
11 |         private string GGUFFilePath = @"example.gguf";
12 |         private string[] example_meta =
13 |             {
14 |                 "general.architecture:llama",
15 |                 "llama.block_count:GGUF_METADATA_VALUE_TYPE_UINT32",
16 |                 "answer:GGUF_METADATA_VALUE_TYPE_UINT32",
17 |                 "answer_in_float:GGUF_METADATA_VALUE_TYPE_FLOAT32",
18 |                 "general.alignment:GGUF_METADATA_VALUE_TYPE_UINT32"
19 |             };
20 |         private string[] example_tensorInfo =
21 |         {
22 |             "tensor1",
23 |             "tensor2",
24 |             "tensor3"
25 |         };
26 |         [TestMethod]
27 |         public void ReadBasicInfo()
28 |         {
29 |             GGUFReader reader = new GGUFReader();
30 |             var f = reader.Read(GGUFFilePath);
31 |             Assert.IsTrue(f.MetaItems.Select(x=>x.ToString()).SequenceEqual(example_meta));
32 |             Assert.IsTrue(f.TensorInfos.Select(x => x.Name).SequenceEqual(example_tensorInfo));
33 |         }
34 |         [TestMethod]
35 |         public void ReadTensorData()
36 |         {
37 |             GGUFReader reader = new GGUFReader();
38 |             var f=reader.Read(GGUFFilePath);
39 |             using var t1=reader.ReadTensorData(f,f.TensorInfos.FirstOrDefault());
40 |             var data = t1.Memory.Slice(0,(int)f.TensorInfos.First().Size);
41 |             var dataF=MemoryMarshal.Cast<byte,float>(data.Span);
42 |             foreach (var item in dataF)
43 |             {
44 |                 Assert.AreEqual(item, 100);
45 |             }
46 |         }
47 |         
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/GGUFSharp.Test.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <TargetFramework>net8.0</TargetFramework>
 5 |     <LangVersion>latest</LangVersion>
 6 |     <ImplicitUsings>enable</ImplicitUsings>
 7 |     <Nullable>enable</Nullable>
 8 |   </PropertyGroup>
 9 | 
10 |   <ItemGroup>
11 |     <None Include="..\..\..\SampleFiles\example.gguf" Link="example.gguf">
12 |       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
13 |     </None>
14 |   </ItemGroup>
15 | 
16 |   <ItemGroup>
17 |     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
18 |     <PackageReference Include="MSTest" Version="3.6.1" />
19 |   </ItemGroup>
20 | 
21 |   <ItemGroup>
22 |     <ProjectReference Include="..\GGUFSharp\GGUFSharp.csproj" />
23 |   </ItemGroup>
24 | 
25 |   <ItemGroup>
26 |     <Using Include="Microsoft.VisualStudio.TestTools.UnitTesting" />
27 |   </ItemGroup>
28 | 
29 | </Project>
30 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/MSTestSettings.cs:
--------------------------------------------------------------------------------
1 | ﻿[assembly: Parallelize(Scope = ExecutionScope.MethodLevel)]
2 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 17
 4 | VisualStudioVersion = 17.12.35506.116 d17.12
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp", "GGUFSharp\GGUFSharp.csproj", "{3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}"
 7 | EndProject
 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp.Test", "GGUFSharp.Test\GGUFSharp.Test.csproj", "{31135C56-06FB-42A0-B098-2A166A437A8D}"
 9 | EndProject
10 | Global
11 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | 		Debug|Any CPU = Debug|Any CPU
13 | 		Release|Any CPU = Release|Any CPU
14 | 	EndGlobalSection
15 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | 		{3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | 		{3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | 		{3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | 		{3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Release|Any CPU.Build.0 = Release|Any CPU
20 | 		{31135C56-06FB-42A0-B098-2A166A437A8D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | 		{31135C56-06FB-42A0-B098-2A166A437A8D}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | 		{31135C56-06FB-42A0-B098-2A166A437A8D}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | 		{31135C56-06FB-42A0-B098-2A166A437A8D}.Release|Any CPU.Build.0 = Release|Any CPU
24 | 	EndGlobalSection
25 | 	GlobalSection(SolutionProperties) = preSolution
26 | 		HideSolutionNode = FALSE
27 | 	EndGlobalSection
28 | EndGlobal
29 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFDataTypeEnum.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System;
 2 | using System.Collections.Generic;
 3 | using System.Text;
 4 | 
 5 | namespace GGUFSharp
 6 | {
 7 |     public enum GGUFDataTypeEnum : uint
 8 |     {
 9 |         // The value is a 8-bit unsigned integer.
10 |         GGUF_METADATA_VALUE_TYPE_UINT8 = 0,
11 |         // The value is a 8-bit signed integer.
12 |         GGUF_METADATA_VALUE_TYPE_INT8 = 1,
13 |         // The value is a 16-bit unsigned little-endian integer.
14 |         GGUF_METADATA_VALUE_TYPE_UINT16 = 2,
15 |         // The value is a 16-bit signed little-endian integer.
16 |         GGUF_METADATA_VALUE_TYPE_INT16 = 3,
17 |         // The value is a 32-bit unsigned little-endian integer.
18 |         GGUF_METADATA_VALUE_TYPE_UINT32 = 4,
19 |         // The value is a 32-bit signed little-endian integer.
20 |         GGUF_METADATA_VALUE_TYPE_INT32 = 5,
21 |         // The value is a 32-bit IEEE754 floating point number.
22 |         GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6,
23 |         // The value is a boolean.
24 |         // 1-byte value where 0 is false and 1 is true.
25 |         // Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy.
26 |         GGUF_METADATA_VALUE_TYPE_BOOL = 7,
27 |         // The value is a UTF-8 non-null-terminated string, with length prepended.
28 |         GGUF_METADATA_VALUE_TYPE_STRING = 8,
29 |         // The value is an array of other values, with the length and type prepended.
30 |         ///
31 |         // Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
32 |         GGUF_METADATA_VALUE_TYPE_ARRAY = 9,
33 |         // The value is a 64-bit unsigned little-endian integer.
34 |         GGUF_METADATA_VALUE_TYPE_UINT64 = 10,
35 |         // The value is a 64-bit signed little-endian integer.
36 |         GGUF_METADATA_VALUE_TYPE_INT64 = 11,
37 |         // The value is a 64-bit IEEE754 floating point number.
38 |         GGUF_METADATA_VALUE_TYPE_FLOAT64 = 12,
39 |     }
40 |     public static class GGUFDataTypeEnumHelper
41 |     {
42 |         public static int GetDataTypeSize(this GGUFDataTypeEnum dateType) => dateType switch
43 |         {
44 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT8 => 1,
45 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT8 => 1,
46 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT16 =>2,
47 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT16 => 2,
48 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT32 => 4,
49 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT32 => 4,
50 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_FLOAT32 => 4,
51 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_BOOL => 1,
52 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING => -1,
53 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY => -1,
54 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT64 => 8,
55 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT64 => 8,
56 |             GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_FLOAT64 => 8
57 |         };
58 |     }
59 |     
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFFile.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System;
 2 | using System.Collections.Generic;
 3 | using System.Text;
 4 | 
 5 | namespace GGUFSharp
 6 | {
 7 |     public class GGUFFile
 8 |     {
 9 |         public string FilePath { get; set; }
10 |         public uint Version { get; set; }
11 | 
12 |         public ulong DataStartOffset { get; set; }
13 |         public List<GGUFTensorInfo> TensorInfos { get; set; }
14 |         public List<GGUFMetaItem> MetaItems { get; set; }
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFHeader.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System;
 2 | using System.Collections.Generic;
 3 | using System.Runtime.InteropServices;
 4 | using System.Text;
 5 | 
 6 | namespace GGUFSharp
 7 | {
 8 |     //[StructLayout(LayoutKind.Explicit)]
 9 |     public class GGUFHeader
10 |     {
11 |         //[FieldOffset(0)]
12 |         public uint MagicCode;
13 |         //[FieldOffset(4)]
14 |         public uint Version;
15 | 
16 |         //[FieldOffset(8)]
17 |         public ulong TensorCount;
18 | 
19 |         //[FieldOffset(24)]
20 |         public ulong MetaKVCount;
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFMetaItem.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System;
 2 | using System.Collections.Generic;
 3 | using System.Linq;
 4 | using System.Text;
 5 | 
 6 | namespace GGUFSharp
 7 | {
 8 |     public   class GGUFMetaItem
 9 |     {
10 |         public GGUFDataTypeEnum DataType { get; set; }
11 |         public GGUFDataTypeEnum? ArrayElementType { get; set; }
12 |         public string Name { get; set; }
13 |         public byte[] RawData { get; set; }
14 |         public string[] ArrayStrings { get; set; }
15 |         public override string ToString()
16 |         {
17 |             StringBuilder sb = new StringBuilder($"{Name}:");
18 |             switch(DataType)
19 |             {
20 |                 case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING:
21 |                     sb.Append(Encoding.UTF8.GetString(RawData));
22 |                     break;
23 |                 case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY:
24 |                     if (ArrayElementType==GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING)
25 |                     {
26 |                         if (ArrayStrings.Length>10)
27 |                         {
28 |                             sb.Append($"{string.Join(", ", ArrayStrings.Take(10))}...");
29 |                         }
30 |                         else
31 |                         {
32 |                             sb.Append(string.Join(", ", ArrayStrings));
33 |                         }
34 |                     }
35 |                     else
36 |                     {
37 |                         sb.Append($"[{Enum.GetName(typeof(GGUFDataTypeEnum), ArrayElementType)}]");
38 |                     }
39 |                     break;
40 |                 default:
41 |                     sb.Append(Enum.GetName(typeof(GGUFDataTypeEnum), DataType));
42 |                     break;
43 |             };
44 |             return sb.ToString();
45 |         }
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFReader.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System;
  2 | using System.Buffers;
  3 | using System.Collections.Generic;
  4 | using System.Diagnostics;
  5 | using System.IO;
  6 | using System.IO.MemoryMappedFiles;
  7 | using System.Linq;
  8 | using System.Runtime.InteropServices;
  9 | using System.Text;
 10 | 
 11 | namespace GGUFSharp
 12 | {
 13 |     public class GGUFReader
 14 |     {
 15 |         public GGUFFile Read(string filePath)
 16 |         {
 17 |             using var fs = MemoryMappedFile.CreateFromFile(filePath);
 18 |             using var s = fs.CreateViewStream(0, 0, MemoryMappedFileAccess.Read);
 19 |             var header = readHeader(s);
 20 |             //using var meta = fs.CreateViewStream(24, 100*1024 * 1024, MemoryMappedFileAccess.Read);
 21 |             var d = readMetaData(s, header.MetaKVCount).ToList();
 22 |             
 23 |             //foreach (var item in d)
 24 |             //{
 25 |             //    Debug.WriteLine($"{item.Name}, {item.ToString()}");
 26 |             //}
 27 | 
 28 |             var t = readTensorData(s, header.TensorCount).ToList();
 29 |             ulong alignment = 32;//TODO: read align from header
 30 | 
 31 | 
 32 |             ulong startOffset = (ulong)s.Position +(alignment-((ulong)s.Position % alignment))% alignment;
 33 |             var sortedItems = t.OrderBy(x => x.Offset).ToList();
 34 |             for (var i = 0; i < sortedItems.Count - 1; i++)
 35 |             {
 36 |                 sortedItems[i].Size = sortedItems[i + 1].Offset - sortedItems[i].Offset;
 37 |             }
 38 |             var last = sortedItems.Last();
 39 |             last.Size = (ulong)new FileInfo(filePath).Length - last.Offset-startOffset;
 40 | 
 41 | 
 42 |             //foreach (var item in t)
 43 |             //{
 44 |             //    Debug.WriteLine($"[Tensor]{item.Name},{item.DimensionCount},{item.TensorType.ToString()},{item.Offset}");
 45 |             //}
 46 |             return new GGUFFile()
 47 |             {
 48 |                 FilePath = filePath,
 49 |                 MetaItems = d,
 50 |                 TensorInfos = sortedItems,
 51 |                 Version = header.Version,
 52 |                 DataStartOffset = startOffset,
 53 |             };
 54 | 
 55 |         }
 56 | 
 57 |         public IMemoryOwner<byte> ReadTensorData(GGUFFile file,GGUFTensorInfo tensor)
 58 |         {
 59 |             using var fs = MemoryMappedFile.CreateFromFile(file.FilePath);
 60 |             using var s = fs.CreateViewStream((long)(file.DataStartOffset+tensor.Offset), (long)tensor.Size, MemoryMappedFileAccess.Read);
 61 |             if (tensor.Size>int.MaxValue)
 62 |             {
 63 |                 throw new NotSupportedException("Not supoorted by now, tensor size shoud not larger than max value of int32");
 64 |             }
 65 |             var om = MemoryPool<byte>.Shared.Rent((int)tensor.Size);
 66 |             //BinaryReader br=new BinaryReader(s);
 67 |             s.Read(om.Memory.Span);
 68 |             return om;
 69 |         }
 70 | 
 71 | 
 72 |         private GGUFHeader readHeader(Stream header)
 73 |         {
 74 |             using BinaryReader br = new BinaryReader(header, Encoding.UTF8, true);
 75 |             GGUFHeader result = new GGUFHeader();
 76 |             result.MagicCode = br.ReadUInt32();
 77 |             if (result.MagicCode != 0x46554747) // "GGUF" in little-endian bytes order
 78 |             {
 79 |                 throw new InvalidOperationException("Invalid magic code");
 80 |             }
 81 |             result.Version = br.ReadUInt32();
 82 |             result.TensorCount = br.ReadUInt64();
 83 |             result.MetaKVCount = br.ReadUInt64();
 84 |             return result;
 85 |         }
 86 | 
 87 |         private IEnumerable<GGUFMetaItem> readMetaData(Stream meta, ulong MetaCount)
 88 |         {
 89 |             using BinaryReader br = new BinaryReader(meta, Encoding.UTF8, true);
 90 |             for (ulong i = 0; i < MetaCount; i++)
 91 |             {
 92 | 
 93 |                 GGUFMetaItem result = new GGUFMetaItem();
 94 |                 result.Name = readString(br);
 95 |                 result.DataType = (GGUFDataTypeEnum)br.ReadUInt32();
 96 |                 int size;
 97 |                 switch (result.DataType)
 98 |                 {
 99 |                     case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING:
100 |                         size = (int)br.ReadUInt64();
101 |                         break;
102 |                     case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY:
103 |                         GGUFDataTypeEnum elementType = (GGUFDataTypeEnum)br.ReadUInt32();
104 | 
105 |                         if (elementType == GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY)
106 |                         {
107 |                             throw new NotSupportedException("Nested array is not supported");
108 |                         }
109 |                         ulong elementCount = br.ReadUInt64();
110 |                         if (elementType == GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING)
111 |                         {
112 |                             result.ArrayStrings = new string[elementCount];
113 |                             result.ArrayElementType = GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING;
114 |                             for (ulong j = 0; j < elementCount; j++)
115 |                             {
116 |                                 result.ArrayStrings[j] = readString(br);
117 |                             }
118 |                             size = 0;
119 |                         }
120 |                         else
121 |                         {
122 |                             result.ArrayElementType = elementType;
123 |                             size = (elementType.GetDataTypeSize() * (int)elementCount);
124 |                         }
125 | 
126 |                         break;
127 |                     default:
128 |                         size = result.DataType.GetDataTypeSize();
129 |                         break;
130 |                 }
131 |                 if (size > 0)
132 |                 {
133 |                     result.RawData = br.ReadBytes(size);
134 |                 }
135 | 
136 | 
137 |                 yield return result;
138 |             }
139 | 
140 |         }
141 |         private IEnumerable<GGUFTensorInfo> readTensorData(Stream stream, ulong tensorCount)
142 |         {
143 |             using BinaryReader br = new BinaryReader(stream, Encoding.UTF8, true);
144 |             for (ulong i = 0; i < tensorCount; i++)
145 |             {
146 |                 GGUFTensorInfo result = new GGUFTensorInfo();
147 |                 result.Name = readString(br);
148 |                 result.DimensionCount = br.ReadUInt32();
149 |                 result.Dimensions = readArray<ulong>(br, result.DimensionCount).ToArray();
150 |                 result.TensorType = (GGUFTensorType)br.ReadUInt32();
151 |                 result.Offset = br.ReadUInt64();
152 |                 yield return result;
153 |             }
154 |         }
155 | 
156 |         private string readString(BinaryReader reader)
157 |         {
158 |             var l = reader.ReadUInt64();
159 |             var x = reader.ReadBytes((int)l);
160 |             return System.Text.Encoding.UTF8.GetString(x);
161 |         }
162 | 
163 |         private Span<T> readArray<T>(BinaryReader reader, UInt64 elementCount = 0) where T : struct
164 |         {
165 |             if (elementCount == 0)
166 |             {
167 |                 elementCount = reader.ReadUInt64();
168 |             }
169 |             int length = Marshal.SizeOf<T>() * (int)elementCount;
170 |             byte[] buffer = new byte[length];
171 |             reader.Read(buffer, 0, length);
172 |             return MemoryMarshal.Cast<byte, T>(buffer);
173 |         }
174 |     }
175 | }
176 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFSharp.csproj:
--------------------------------------------------------------------------------
1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
2 | 
3 |   <PropertyGroup>
4 |     <TargetFramework>netstandard2.1</TargetFramework>
5 |     <Nullable>enable</Nullable>
6 |   </PropertyGroup>
7 | 
8 | </Project>
9 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFStreamReader.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System;
 2 | using System.Collections.Generic;
 3 | using System.IO;
 4 | using System.Text;
 5 | 
 6 | namespace GGUFSharp
 7 | {
 8 |     internal class GGUFStreamReader : BinaryReader
 9 |     {
10 |         public GGUFStreamReader(Stream stream) : base(stream)
11 |         {
12 |         }
13 |         
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFTensorInfo.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System;
 2 | using System.Collections.Generic;
 3 | using System.Linq;
 4 | using System.Text;
 5 | 
 6 | namespace GGUFSharp
 7 | {
 8 |     public class GGUFTensorInfo
 9 |     {
10 |         public string Name { get; set; }
11 |         public UInt32 DimensionCount { get; set; }
12 |         public UInt64[] Dimensions { get; set; }
13 |         public GGUFTensorType TensorType { get; set; }
14 |         public UInt64 Offset { get; set; }
15 |         public UInt64 Size { get; set; }
16 |         
17 | 
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFTensorType.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System;
 2 | using System.Collections.Generic;
 3 | using System.Text;
 4 | 
 5 | namespace GGUFSharp
 6 | {
 7 |     public enum GGUFTensorType:UInt32
 8 |     {
 9 |         GGML_TYPE_F32 = 0,
10 |         GGML_TYPE_F16 = 1,
11 |         GGML_TYPE_Q4_0 = 2,
12 |         GGML_TYPE_Q4_1 = 3,
13 |         // GGML_TYPE_Q4_2 = 4, support has been removed
14 |         // GGML_TYPE_Q4_3 = 5, support has been removed
15 |         GGML_TYPE_Q5_0 = 6,
16 |         GGML_TYPE_Q5_1 = 7,
17 |         GGML_TYPE_Q8_0 = 8,
18 |         GGML_TYPE_Q8_1 = 9,
19 |         GGML_TYPE_Q2_K = 10,
20 |         GGML_TYPE_Q3_K = 11,
21 |         GGML_TYPE_Q4_K = 12,
22 |         GGML_TYPE_Q5_K = 13,
23 |         GGML_TYPE_Q6_K = 14,
24 |         GGML_TYPE_Q8_K = 15,
25 |         GGML_TYPE_IQ2_XXS = 16,
26 |         GGML_TYPE_IQ2_XS = 17,
27 |         GGML_TYPE_IQ3_XXS = 18,
28 |         GGML_TYPE_IQ1_S = 19,
29 |         GGML_TYPE_IQ4_NL = 20,
30 |         GGML_TYPE_IQ3_S = 21,
31 |         GGML_TYPE_IQ2_S = 22,
32 |         GGML_TYPE_IQ4_XS = 23,
33 |         GGML_TYPE_I8 = 24,
34 |         GGML_TYPE_I16 = 25,
35 |         GGML_TYPE_I32 = 26,
36 |         GGML_TYPE_I64 = 27,
37 |         GGML_TYPE_F64 = 28,
38 |         GGML_TYPE_IQ1_M = 29,
39 |         GGML_TYPE_COUNT,
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/readme.md:
--------------------------------------------------------------------------------
  1 | # llama.cpp tesnsors offload calculator
  2 | 
  3 | ## Conf
  4 | 
  5 | Supported config formats:
  6 | - json
  7 | - yaml
  8 | - env variables
  9 | - cmd args
 10 | 
 11 | Example:
 12 | ### Base config `appsettings.yaml`
 13 | ```yaml
 14 | # print -ot config for command line running
 15 | PrintCmdConfig: true
 16 | # print yaml for https://github.com/mixa3607/charts/tree/master/charts/llamacpp helm chart
 17 | PrintHelmCharConfig: true
 18 | # print size per tensor
 19 | PrintTensorsSize: false
 20 | 
 21 | # list from 'llama-server --list-devices' + CPU
 22 | Devices: # 1+ gpu and 1 cpu
 23 |   ROCm0: # name from llama.cpp output
 24 |     Id: 1 # used for ordering
 25 |     Type: GPU # GPU/CPU
 26 |     TotalSizeMb: 32768  # memory megabytes
 27 |     PciBus: 0000:01:00.0 # not used
 28 |   ROCm1:
 29 |     Id: 2
 30 |     Type: GPU
 31 |     TotalSizeMb: 32768
 32 |     PciBus: 0000:02:00.0
 33 |   CPU:
 34 |     Id: 3
 35 |     Type: CPU
 36 |     TotalSizeMb: 131072
 37 |     ReservedMemory: 0
 38 | 
 39 | # offloading rules
 40 | OffloadRules:
 41 |   ffn_gate_exps: # name
 42 |     Id: 1 # used for ordering
 43 |     Regex: '^blk\.\d+\.ffn_gate_exps.weight' # regex
 44 |     Priority: 10 # lower priority will be offloaded earlier
 45 |   ffn_up_exps:
 46 |     Id: 2
 47 |     Regex: '^blk\.\d+\.ffn_up_exps.weight'
 48 |     Priority: 20
 49 |   ffn_down_exps:
 50 |     Id: 3
 51 |     Regex: '^blk\.\d+\.ffn_down_exps.weight'
 52 |     Priority: 20
 53 | ```
 54 | 
 55 | ### Per model config `appsettings.noname-model.yaml`
 56 | ```yaml
 57 | GgufFile: "/path/to/noname.gguf"
 58 | 
 59 | Devices:
 60 |   ROCm0:
 61 |     ReservedMemoryMb: 10240 # reserved memory for cache, ctx, etc
 62 |     #LayersPortion: 50 # layers percentage can be set manually
 63 |   ROCm1:
 64 |     ReservedMemoryMb: 10240
 65 |     #LayersPortion: 50
 66 | ```
 67 | 
 68 | For the first start, you need to specify an increased ReservedMemoryMb with which llama.cpp will guaranteed to work.
 69 | ```shell
 70 | $ cd ArkProjects.LlamaOffloadCalc
 71 | $ dotnet run -- -e noname-model
 72 | Reading /path/to/noname.gguf
 73 | Move blk.0.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
 74 | Move blk.0.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
 75 | Move blk.1.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
 76 | Move blk.1.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
 77 | Move blk.2.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
 78 | Move blk.2.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
 79 | Move blk.3.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
 80 | Move blk.3.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
 81 | Move blk.4.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
 82 | Move blk.4.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
 83 | Move blk.5.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
 84 | Move blk.5.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
 85 | Move blk.6.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
 86 | Move blk.6.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
 87 | Move blk.7.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
 88 | Move blk.18.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
 89 | Move blk.18.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
 90 | Move blk.19.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
 91 | Move blk.19.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
 92 | Move blk.20.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
 93 | Move blk.20.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
 94 | Move blk.21.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
 95 | Move blk.21.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
 96 | Move blk.22.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
 97 | Move blk.22.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
 98 | Move blk.23.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
 99 | Move blk.23.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
100 | Move blk.24.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
101 | Move blk.24.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
102 | Move blk.25.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
103 | ======= Device memory usage
104 | ROCm0      32231 Mb of 32768 Mb (21991)
105 | ROCm1      32231 Mb of 32768 Mb (21991)
106 | CPU        16136 Mb of 131072 Mb (16136)
107 | 
108 | ======= Tensors offload result
109 | Offload ffn_gate_exps            (10) 0  (ROCm0 = 18, ROCm1 = 18, CPU = 0 ) of 36
110 | Offload ffn_up_exps              (20) 0  (ROCm0 = 11, ROCm1 = 11, CPU = 14) of 36
111 | Offload ffn_down_exps            (20) 0  (ROCm0 = 10, ROCm1 = 10, CPU = 16) of 36
112 | 
113 | ======= Helm chart config
114 | extraEnvVars:
115 |   - name: LLAMA_ARG_MAIN_GPU
116 |     value: '0'
117 |   - name: LLAMA_ARG_TENSOR_SPLIT
118 |     value: '18,18,0'
119 | 
120 | modelTensorsOverride:
121 |   - name: CPU
122 |     tensors:
123 |     - blk.0.ffn_down_exps.weight
124 |     - blk.0.ffn_up_exps.weight
125 |     - blk.1.ffn_down_exps.weight
126 |     - blk.1.ffn_up_exps.weight
127 |     - blk.2.ffn_down_exps.weight
128 |     - blk.2.ffn_up_exps.weight
129 |     - blk.3.ffn_down_exps.weight
130 |     - blk.3.ffn_up_exps.weight
131 |     - blk.4.ffn_down_exps.weight
132 |     - blk.4.ffn_up_exps.weight
133 |     - blk.5.ffn_down_exps.weight
134 |     - blk.5.ffn_up_exps.weight
135 |     - blk.6.ffn_down_exps.weight
136 |     - blk.6.ffn_up_exps.weight
137 |     - blk.7.ffn_down_exps.weight
138 |     - blk.18.ffn_down_exps.weight
139 |     - blk.18.ffn_up_exps.weight
140 |     - blk.19.ffn_down_exps.weight
141 |     - blk.19.ffn_up_exps.weight
142 |     - blk.20.ffn_down_exps.weight
143 |     - blk.20.ffn_up_exps.weight
144 |     - blk.21.ffn_down_exps.weight
145 |     - blk.21.ffn_up_exps.weight
146 |     - blk.22.ffn_down_exps.weight
147 |     - blk.22.ffn_up_exps.weight
148 |     - blk.23.ffn_down_exps.weight
149 |     - blk.23.ffn_up_exps.weight
150 |     - blk.24.ffn_down_exps.weight
151 |     - blk.24.ffn_up_exps.weight
152 |     - blk.25.ffn_down_exps.weight
153 | 
154 | ======= CMD config
155 | --main-gpu 0 --tensor-split "18,18,0" --override-tensor "(blk.0.ffn_down_exps.weight|blk.0.ffn_up_exps.weight|blk.1.ffn_down_exps.weight|blk.1.ffn_up_exps.weight|blk.2.ffn_down_exps.weight|blk.2.ffn_up_exps.weight|blk.3.ffn_down_exps.weight|blk.3.ffn_up_exps.weight|blk.4.ffn_down_exps.weight|blk.4.ffn_up_exps.weight|blk.5.ffn_down_exps.weight|blk.5.ffn_up_exps.weight|blk.6.ffn_down_exps.weight|blk.6.ffn_up_exps.weight|blk.7.ffn_down_exps.weight|blk.18.ffn_down_exps.weight|blk.18.ffn_up_exps.weight|blk.19.ffn_down_exps.weight|blk.19.ffn_up_exps.weight|blk.20.ffn_down_exps.weight|blk.20.ffn_up_exps.weight|blk.21.ffn_down_exps.weight|blk.21.ffn_up_exps.weight|blk.22.ffn_down_exps.weight|blk.22.ffn_up_exps.weight|blk.23.ffn_down_exps.weight|blk.23.ffn_up_exps.weight|blk.24.ffn_down_exps.weight|blk.24.ffn_up_exps.weight|blk.25.ffn_down_exps.weight)=CPU"
156 | ```
157 | 
158 | ```
159 | ┌┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐ │  ┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐
160 | ││ VRAM: [     30393 / 32752 MiB     ]  GTT: [        14 / 48256 MiB     ]  │ │  │ VRAM: [     31079 / 32752 MiB     ]  GTT: [        14 / 48256 MiB     ]  │ 
161 | └└──────────────────────────────────────────────────────────────────────────┘ │  └──────────────────────────────────────────────────────────────────────────┘
162 | ```
163 | 
164 | After the stress test, you can reduce ReservedMemoryMb by the amount of free memory from the first run.
165 | 
166 | ```yaml
167 | GgufFile: "/path/to/noname.gguf"
168 | Devices:
169 |   ROCm0:
170 |     ReservedMemoryMb: 9300
171 |   ROCm1:
172 |     ReservedMemoryMb: 8500
173 | ```
174 | 
175 | ```shell
176 | $ cd ArkProjects.LlamaOffloadCalc
177 | $ dotnet run -- -e noname-model
178 | Reading /path/to/noname.gguf
179 | Move blk.0.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
180 | Move blk.0.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
181 | Move blk.1.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
182 | Move blk.1.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
183 | Move blk.2.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
184 | Move blk.2.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
185 | Move blk.3.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
186 | Move blk.3.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
187 | Move blk.4.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
188 | Move blk.4.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
189 | Move blk.5.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
190 | Move blk.5.ffn_up_exps.weight  (537 Mb) from ROCm0 to CPU
191 | Move blk.6.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
192 | Move blk.18.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
193 | Move blk.18.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
194 | Move blk.19.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
195 | Move blk.19.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
196 | Move blk.20.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
197 | Move blk.20.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
198 | Move blk.21.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
199 | Move blk.21.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
200 | Move blk.22.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
201 | Move blk.22.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
202 | Move blk.23.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
203 | ======= Device memory usage
204 | ROCm0      32366 Mb of 32768 Mb (23066)
205 | ROCm1      32642 Mb of 32768 Mb (24142)
206 | CPU        12909 Mb of 131072 Mb (12909)
207 | 
208 | ======= Tensors offload result
209 | Offload ffn_gate_exps            (10) 0  (ROCm0 = 18, ROCm1 = 18, CPU = 0 ) of 36
210 | Offload ffn_up_exps              (20) 0  (ROCm0 = 12, ROCm1 = 13, CPU = 11) of 36
211 | Offload ffn_down_exps            (20) 0  (ROCm0 = 11, ROCm1 = 12, CPU = 13) of 36
212 | 
213 | ======= Helm chart config
214 | extraEnvVars:
215 |   - name: LLAMA_ARG_MAIN_GPU
216 |     value: '0'
217 |   - name: LLAMA_ARG_TENSOR_SPLIT
218 |     value: '18,18,0'
219 | 
220 | modelTensorsOverride:
221 |   - name: CPU
222 |     tensors:
223 |     - blk.0.ffn_down_exps.weight
224 |     - blk.0.ffn_up_exps.weight
225 |     - blk.1.ffn_down_exps.weight
226 |     - blk.1.ffn_up_exps.weight
227 |     - blk.2.ffn_down_exps.weight
228 |     - blk.2.ffn_up_exps.weight
229 |     - blk.3.ffn_down_exps.weight
230 |     - blk.3.ffn_up_exps.weight
231 |     - blk.4.ffn_down_exps.weight
232 |     - blk.4.ffn_up_exps.weight
233 |     - blk.5.ffn_down_exps.weight
234 |     - blk.5.ffn_up_exps.weight
235 |     - blk.6.ffn_down_exps.weight
236 |     - blk.18.ffn_down_exps.weight
237 |     - blk.18.ffn_up_exps.weight
238 |     - blk.19.ffn_down_exps.weight
239 |     - blk.19.ffn_up_exps.weight
240 |     - blk.20.ffn_down_exps.weight
241 |     - blk.20.ffn_up_exps.weight
242 |     - blk.21.ffn_down_exps.weight
243 |     - blk.21.ffn_up_exps.weight
244 |     - blk.22.ffn_down_exps.weight
245 |     - blk.22.ffn_up_exps.weight
246 |     - blk.23.ffn_down_exps.weight
247 | 
248 | ======= CMD config
249 | --main-gpu 0 --tensor-split "18,18,0" --override-tensor "(blk.0.ffn_down_exps.weight|blk.0.ffn_up_exps.weight|blk.1.ffn_down_exps.weight|blk.1.ffn_up_exps.weight|blk.2.ffn_down_exps.weight|blk.2.ffn_up_exps.weight|blk.3.ffn_down_exps.weight|blk.3.ffn_up_exps.weight|blk.4.ffn_down_exps.weight|blk.4.ffn_up_exps.weight|blk.5.ffn_down_exps.weight|blk.5.ffn_up_exps.weight|blk.6.ffn_down_exps.weight|blk.18.ffn_down_exps.weight|blk.18.ffn_up_exps.weight|blk.19.ffn_down_exps.weight|blk.19.ffn_up_exps.weight|blk.20.ffn_down_exps.weight|blk.20.ffn_up_exps.weight|blk.21.ffn_down_exps.weight|blk.21.ffn_up_exps.weight|blk.22.ffn_down_exps.weight|blk.22.ffn_up_exps.weight|blk.23.ffn_down_exps.weight)=CPU"
250 | ```
251 | 
252 | ```
253 | ┌┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐ │  ┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐
254 | ││ VRAM: [     32545 / 32752 MiB     ]  GTT: [        14 / 48256 MiB     ]  │ │  │ VRAM: [     32155 / 32752 MiB     ]  GTT: [        14 / 48256 MiB     ]  │ 
255 | └└──────────────────────────────────────────────────────────────────────────┘ │  └──────────────────────────────────────────────────────────────────────────┘
256 | 
257 | prompt eval time =   57715.98 ms /  8777 tokens (    6.58 ms per token,   152.07 tokens per second)
258 |        eval time =   66072.62 ms /   878 tokens (   75.25 ms per token,    13.29 tokens per second)
259 |       total time =  123788.60 ms /  9655 tokens
260 | ```
261 | 


--------------------------------------------------------------------------------
/llama.cpp/preset.rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LLAMA_ROCM_VERSION="6.3.3"
4 | 


--------------------------------------------------------------------------------
/llama.cpp/preset.rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LLAMA_ROCM_VERSION="6.4.4"
4 | 


--------------------------------------------------------------------------------
/llama.cpp/preset.rocm-7.0.0.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LLAMA_ROCM_VERSION="7.0.0"
4 | 


--------------------------------------------------------------------------------
/llama.cpp/readme.md:
--------------------------------------------------------------------------------
 1 | # llama.cpp GFX906
 2 | LLM inference in C/C++ https://github.com/ggml-org/llama.cpp
 3 | 
 4 | Recommend use `docker.io/mixa3607/llama.cpp-gfx906:7.0.0-complete`
 5 | 
 6 | Also see [llamacpp-offload-calculator](./llamacpp-offload-calculator/readme.md)
 7 | 
 8 | ## Benchmarks
 9 | ```shell
10 | export PATH="/app:$PATH"
11 | export LD_LIBRARY_PATH="/app:$LD_LIBRARY_PATH"
12 | 
13 | MODEL=/root/.cache/huggingface/hub/models--ggml-org--gemma-3n-E4B-it-GGUF/snapshots/ee0f0cb58a4b9d5b48dd55b576db22eeeeecdd7e/gemma-3n-E4B-it-Q8_0.gguf
14 | MODEL=/root/.cache/huggingface/hub/models--unsloth--gemma-3-12b-it-GGUF/snapshots/a5592d885c8a933e824f80d2eeda84db95ad2712/gemma-3-12b-it-Q8_0.gguf
15 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-Q4_K_S.gguf
16 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-Q4_0.gguf
17 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-bf16.gguf
18 | MODEL=/root/.cache/huggingface/hub/models--ggml-org--gemma-3-27b-it-GGUF/snapshots/f94c25afed0072339c5fa3b705a7b4222afe5f62/gemma-3-27b-it-f16-00001-of-00002.gguf
19 | 
20 | llama-bench --model $MODEL -t 16 --flash-attn 0
21 | ```
22 | 
23 | ```
24 | ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
25 | ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
26 | ggml_cuda_init: found 2 ROCm devices:
27 |   Device 0: AMD Radeon Graphics, gfx906:sramecc+:xnack- (0x906), VMM: no, Wave Size: 64
28 |   Device 1: AMD Radeon Graphics, gfx906:sramecc+:xnack- (0x906), VMM: no, Wave Size: 64
29 | load_backend: loaded ROCm backend from /app/libggml-hip.so
30 | load_backend: loaded CPU backend from /app/libggml-cpu-haswell.so
31 | ```
32 | 
33 | | rocm  | llama.cpp | model                                |       size |     params | backend    | ngl |            test |                  t/s |
34 | | ----- | --------- | ------------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |
35 | | 6.3.4 | 982e3472  | gemma3n E4B Q8_0                     |   6.84 GiB |     6.87 B | ROCm       |  99 |           pp512 |        483.29 ± 0.68 |
36 | | 6.3.4 | 982e3472  | gemma3n E4B Q8_0                     |   6.84 GiB |     6.87 B | ROCm       |  99 |           tg128 |         33.48 ± 0.43 |
37 | | 6.3.4 | 982e3472  | gemma3 12B Q8_0                      |  11.64 GiB |    11.77 B | ROCm       |  99 |           pp512 |        246.66 ± 0.07 |
38 | | 6.3.4 | 982e3472  | gemma3 12B Q8_0                      |  11.64 GiB |    11.77 B | ROCm       |  99 |           tg128 |         28.41 ± 0.12 |
39 | | 6.3.4 | 982e3472  | qwen3 14B Q4_K - Small               |   7.98 GiB |    14.77 B | ROCm       |  99 |           pp512 |        242.34 ± 0.15 |
40 | | 6.3.4 | 982e3472  | qwen3 14B Q4_K - Small               |   7.98 GiB |    14.77 B | ROCm       |  99 |           tg128 |         35.87 ± 0.15 |
41 | | 6.3.4 | 982e3472  | qwen3 14B Q4_0                       |   7.95 GiB |    14.77 B | ROCm       |  99 |           pp512 |        574.13 ± 0.28 |
42 | | 6.3.4 | 982e3472  | qwen3 14B Q4_0                       |   7.95 GiB |    14.77 B | ROCm       |  99 |           tg128 |         39.02 ± 0.23 |
43 | | 6.3.4 | 982e3472  | qwen3 14B BF16                       |  27.51 GiB |    14.77 B | ROCm       |  99 |           pp512 |        118.01 ± 0.24 |
44 | | 6.3.4 | 982e3472  | qwen3 14B BF16                       |  27.51 GiB |    14.77 B | ROCm       |  99 |           tg128 |         19.33 ± 0.08 |
45 | | 6.3.4 | 982e3472  | gemma3 27B F16                       |  50.31 GiB |    27.01 B | ROCm       |  99 |           pp512 |        236.51 ± 0.14 |
46 | | 6.3.4 | 982e3472  | gemma3 27B F16                       |  50.31 GiB |    27.01 B | ROCm       |  99 |           tg128 |         10.37 ± 0.04 |
47 | | 6.3.4 | 982e3472  | llama4 17Bx16E (Scout) Q3_K - Medium |  48.19 GiB |   107.77 B | ROCm       |  99 |           pp512 |        160.50 ± 0.81 |
48 | | 6.3.4 | 982e3472  | llama4 17Bx16E (Scout) Q3_K - Medium |  48.19 GiB |   107.77 B | ROCm       |  99 |           tg128 |         22.75 ± 0.07 |
49 | | 6.4.1 | 982e3472  | gemma3n E4B Q8_0                     |   6.84 GiB |     6.87 B | ROCm       |  99 |           pp512 |        606.83 ± 0.97 |
50 | | 6.4.1 | 982e3472  | gemma3n E4B Q8_0                     |   6.84 GiB |     6.87 B | ROCm       |  99 |           tg128 |         33.36 ± 0.23 |
51 | | 6.4.1 | 982e3472  | gemma3 12B Q8_0                      |  11.64 GiB |    11.77 B | ROCm       |  99 |           pp512 |        329.70 ± 0.30 |
52 | | 6.4.1 | 982e3472  | gemma3 12B Q8_0                      |  11.64 GiB |    11.77 B | ROCm       |  99 |           tg128 |         28.58 ± 0.15 |
53 | | 6.4.1 | 982e3472  | qwen3 14B Q4_K - Small               |   7.98 GiB |    14.77 B | ROCm       |  99 |           pp512 |        286.58 ± 0.15 |
54 | | 6.4.1 | 982e3472  | qwen3 14B Q4_K - Small               |   7.98 GiB |    14.77 B | ROCm       |  99 |           tg128 |         36.48 ± 0.11 |
55 | | 6.4.1 | 982e3472  | qwen3 14B Q4_0                       |   7.95 GiB |    14.77 B | ROCm       |  99 |           pp512 |        570.15 ± 0.23 |
56 | | 6.4.1 | 982e3472  | qwen3 14B Q4_0                       |   7.95 GiB |    14.77 B | ROCm       |  99 |           tg128 |         38.94 ± 0.16 |
57 | | 6.4.1 | 982e3472  | qwen3 14B BF16                       |  27.51 GiB |    14.77 B | ROCm       |  99 |           pp512 |        119.03 ± 0.31 |
58 | | 6.4.1 | 982e3472  | qwen3 14B BF16                       |  27.51 GiB |    14.77 B | ROCm       |  99 |           tg128 |         19.46 ± 0.10 |
59 | | 6.4.1 | 982e3472  | gemma3 27B F16                       |  50.31 GiB |    27.01 B | ROCm       |  99 |           pp512 |        238.38 ± 0.26 |
60 | | 6.4.1 | 982e3472  | gemma3 27B F16                       |  50.31 GiB |    27.01 B | ROCm       |  99 |           tg128 |         10.41 ± 0.03 |
61 | | 6.4.1 | 982e3472  | llama4 17Bx16E (Scout) Q3_K - Medium |  48.19 GiB |   107.77 B | ROCm       |  99 |           pp512 |        190.52 ± 0.84 |
62 | | 6.4.1 | 982e3472  | llama4 17Bx16E (Scout) Q3_K - Medium |  48.19 GiB |   107.77 B | ROCm       |  99 |           tg128 |         22.96 ± 0.10 |
63 | 
64 | 
65 | ## Run
66 | ### Docker
67 | See https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md + https://github.com/ROCm/vllm/blob/main/docs/deployment/docker.md
68 | 
69 | ### Kubernetes
70 | Helm chart and samples [mixa3607 charts](https://github.com/mixa3607/charts)
71 | 
72 | ## Build
73 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.rocm.sh`:
74 | ```bash
75 | $ . preset.rocm-7.0.0.sh
76 | $ ./build-and-push.rocm.sh
77 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
78 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
79 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
80 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
81 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
82 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
83 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
84 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
85 | #0 building with "remote" instance using remote driver
86 | #...............
87 | #14 DONE 583.8s
88 | ```
89 | 


--------------------------------------------------------------------------------
/pytorch/build-and-push.torch.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | cd $(dirname $0)
 5 | source ../env.sh
 6 | 
 7 | IMAGE_TAGS=(
 8 |   "$TORCH_IMAGE:${TORCH_VERSION}-rocm-${TORCH_ROCM_VERSION}-${REPO_GIT_REF}"
 9 |   "$TORCH_IMAGE:${TORCH_VERSION}-rocm-${TORCH_ROCM_VERSION}"
10 | )
11 | 
12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
13 |   echo "${IMAGE_TAGS[0]} already in registry. Skip"
14 |   exit 0
15 | fi
16 | 
17 | DOCKER_EXTRA_ARGS=()
18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
19 |   DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
20 | done
21 | 
22 | mkdir ./logs || true
23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
24 |   --build-arg BASE_ROCM_IMAGE="${PATCHED_ROCM_IMAGE}:${TORCH_ROCM_VERSION}-complete" \
25 |   --build-arg ROCM_ARCH="${ROCM_ARCH}" \
26 |   --build-arg PYTORCH_BRANCH="$TORCH_VERSION" \
27 |   --build-arg PYTORCH_VISION_BRANCH="$TORCH_VISION_VERSION" \
28 |   --target final -f ./torch.Dockerfile --progress=plain ./submodules 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
29 | 


--------------------------------------------------------------------------------
/pytorch/env.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | pushd $(dirname ${BASH_SOURCE[0]})
 4 | 
 5 | # rocm version
 6 | if [ "$TORCH_ROCM_VERSION" == "" ];  then TORCH_ROCM_VERSION=6.3.3; fi
 7 | # torch git checkpoint
 8 | if [ "$TORCH_VERSION" == "" ];       then TORCH_VERSION="v2.7.1"; fi
 9 | 
10 | # destination image
11 | if [ "$TORCH_IMAGE" == "" ]; then
12 |   TORCH_IMAGE=docker.io/mixa3607/pytorch-gfx906
13 |   #TORCH_IMAGE=registry.arkprojects.space/apps/pytorch-gfx906
14 | fi
15 | 
16 | popd
17 | 


--------------------------------------------------------------------------------
/pytorch/preset.torch-2.7.1-rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export TORCH_ROCM_VERSION="6.3.3"
4 | export TORCH_VERSION="v2.7.1"
5 | export TORCH_VISION_VERSION="v0.21.0"
6 | 


--------------------------------------------------------------------------------
/pytorch/preset.torch-2.7.1-rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export TORCH_ROCM_VERSION="6.4.4"
4 | export TORCH_VERSION="v2.7.1"
5 | export TORCH_VISION_VERSION="v0.21.0"
6 | 


--------------------------------------------------------------------------------
/pytorch/preset.torch-2.8.0-rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export TORCH_ROCM_VERSION="6.3.3"
4 | export TORCH_VERSION="v2.8.0"
5 | export TORCH_VISION_VERSION="v0.23.0"
6 | 


--------------------------------------------------------------------------------
/pytorch/preset.torch-2.8.0-rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export TORCH_ROCM_VERSION="6.4.4"
4 | export TORCH_VERSION="v2.8.0"
5 | export TORCH_VISION_VERSION="v0.23.0"
6 | 


--------------------------------------------------------------------------------
/pytorch/preset.torch-2.8.0-rocm-7.0.2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export TORCH_ROCM_VERSION="7.0.2"
4 | export TORCH_VERSION="v2.8.0"
5 | 


--------------------------------------------------------------------------------
/pytorch/preset.torch-2.9.0-rocm-7.0.2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export TORCH_ROCM_VERSION="7.0.2"
4 | export TORCH_VERSION="v2.9.0"
5 | 


--------------------------------------------------------------------------------
/pytorch/readme.md:
--------------------------------------------------------------------------------
 1 | # PyTorch GFX906
 2 | Tensors and Dynamic neural networks in Python with strong GPU acceleration.
 3 | 
 4 | Packages:
 5 | - torch
 6 | - torchvision
 7 | - torchaudio
 8 | 
 9 | Recommend use `docker.io/mixa3607/pytorch-gfx906:(v2.7.1|v2.8.0)-rocm-6.3.3`
10 | 
11 | ## Build
12 | See build vars in `./env.sh`. You also may use presetis `./preset.*.sh`. Exec `./build-and-push.torch.sh`:
13 | ```bash
14 | $ . preset.torch-2.7.1-rocm-6.3.3.sh
15 | $ ./build-and-push.torch.sh
16 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
17 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
18 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
19 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
20 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
21 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
22 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
23 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
24 | #0 building with "remote" instance using remote driver
25 | 
26 | #1 [internal] load build definition from rocm.Dockerfile
27 | #1 transferring dockerfile: 4.95kB done
28 | #1 DONE 0.0s
29 | 
30 | #2 [auth] dockerio-proxy/rocm/dev-ubuntu-24.04:pull rocm/dev-ubuntu-24.04:pull token for registry.arkprojects.space
31 | #2 DONE 0.0s
32 | 
33 | #3 [internal] load metadata for docker.io/rocm/dev-ubuntu-24.04:7.0-complete
34 | #3 DONE 1.8s
35 | 
36 | #4 [internal] load .dockerignore
37 | #4 transferring context: 2B done
38 | #...............
39 | #24 exporting to image
40 | #24 pushing layers 6.5s done
41 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005
42 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 3.3s done
43 | #24 pushing layers 2.0s done
44 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005
45 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 2.2s done
46 | #24 DONE 17.6s
47 | ```
48 | 


--------------------------------------------------------------------------------
/pytorch/submodules/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/pytorch/submodules/.gitkeep


--------------------------------------------------------------------------------
/pytorch/torch.Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_ROCM_IMAGE="docker.io/mixa3607/vllm-gfx906:latest"
 2 | ARG ROCM_ARCH="gfx906"
 3 | ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
 4 | ARG PYTORCH_BRANCH="v2.7.1"
 5 | ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
 6 | ARG PYTORCH_VISION_BRANCH=""
 7 | ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git"
 8 | ARG PYTORCH_AUDIO_BRANCH=""
 9 | 
10 | ############# Base image #############
11 | FROM ${BASE_ROCM_IMAGE} AS rocm_base
12 | # Install basic utilities and Python 3.12
13 | RUN apt-get update && apt-get install -y software-properties-common git python3-pip && \
14 |     add-apt-repository ppa:deadsnakes/ppa && \
15 |     apt-get update -y && \
16 |     apt-get install -y python3.12 python3.12-dev python3.12-venv \
17 |     python3.12-lib2to3 python-is-python3 python3.12-full && \
18 |     update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
19 |     update-alternatives --set python3 /usr/bin/python3.12 && \
20 |     ln -sf /usr/bin/python3.12-config /usr/bin/python3-config && \
21 |     python3 -m pip config set global.break-system-packages true && \
22 |     pip install amdsmi==$(cat /opt/ROCM_VERSION_FULL) && \
23 |     true
24 | 
25 | # Set environment variables
26 | ARG ROCM_ARCH
27 | ENV ROCM_ARCH=$ROCM_ARCH
28 | ENV PYTORCH_ROCM_ARCH=$ROCM_ARCH
29 | ENV PATH=/opt/rocm/llvm/bin:$PATH
30 | ENV ROCM_PATH=/opt/rocm
31 | ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib:
32 | 
33 | ############# Build torch #############
34 | FROM rocm_base AS build_torch
35 | RUN pip install setuptools wheel packaging cmake ninja setuptools_scm jinja2
36 | 
37 | WORKDIR /build/pytorch
38 | ARG PYTORCH_REPO
39 | ARG PYTORCH_BRANCH
40 | RUN git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_BRANCH}" "${PYTORCH_REPO}" .
41 | RUN pip install -r requirements.txt
42 | RUN sed -i 's|7e29c325d5bd33ba896ddb106f5d4fc7d715274dca7fe937f724fffa82017838|1e9b3dddf0c7fc07131c6f0f5266129e83ce2331f459fa2be8c63f4ae91b0f5b|g' cmake/External/aotriton.cmake && \
43 |     python3 tools/amd_build/build_amd.py && \
44 |     CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=/dist && \
45 |     pip install /dist/*.whl
46 | 
47 | ############# Build vision #############
48 | FROM build_torch AS build_vision
49 | WORKDIR /build/vision
50 | ARG PYTORCH_VISION_REPO
51 | ARG PYTORCH_VISION_BRANCH
52 | RUN if [ "${PYTORCH_VISION_BRANCH}" = "" ]; then \
53 |       git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 "${PYTORCH_VISION_REPO}" . && \
54 |       git fetch --depth=1 origin "$(cat /build/pytorch/.github/ci_commit_pins/vision.txt)" && \ 
55 |       git checkout "$(cat /build/pytorch/.github/ci_commit_pins/vision.txt)" && \
56 |       git reset --hard FETCH_HEAD; \
57 |     else \
58 |       git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_VISION_BRANCH}" "${PYTORCH_VISION_REPO}" . ; \
59 |     fi
60 | RUN python3 setup.py bdist_wheel --dist-dir=/dist && \
61 |     pip install /dist/*.whl
62 | 
63 | ############# Build audio #############
64 | FROM build_torch AS build_audio
65 | WORKDIR /build/audio
66 | ARG PYTORCH_AUDIO_REPO
67 | ARG PYTORCH_AUDIO_BRANCH
68 | RUN if [ "${PYTORCH_AUDIO_BRANCH}" = "" ]; then \
69 |       git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 "${PYTORCH_AUDIO_REPO}" . && \
70 |       git fetch --depth=1 origin "$(cat /build/pytorch/.github/ci_commit_pins/audio.txt)" && \ 
71 |       git checkout "$(cat /build/pytorch/.github/ci_commit_pins/audio.txt)" && \
72 |       git reset --hard FETCH_HEAD; \
73 |     else \
74 |       git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_AUDIO_BRANCH}" "${PYTORCH_AUDIO_REPO}" . ; \
75 |     fi
76 | RUN python3 setup.py bdist_wheel --dist-dir=/dist && \
77 |     pip install /dist/*.whl
78 | 
79 | ############# Install all #############
80 | FROM rocm_base AS final
81 | RUN --mount=type=bind,from=build_torch,src=/dist/,target=/dist_torch \
82 |     --mount=type=bind,from=build_vision,src=/dist/,target=/dist_vision \
83 |     --mount=type=bind,from=build_audio,src=/dist/,target=/dist_audio \
84 |     pip install /dist_torch/*.whl /dist_vision/torchvision-*.whl /dist_audio/torchaudio-*.whl && \
85 |     true
86 | 
87 | CMD ["/bin/bash"]
88 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # ML software for deprecated GFX906 arch
 2 | 
 3 | ## Prebuild images
 4 | ### Images
 5 | | Name | Source | Status | Docs |
 6 | | ---- | ------ | ------ | ---- |
 7 | | ROCm | [ROCm](https://github.com/ROCm/ROCm), [rocBLAS](https://github.com/ROCm/rocBLAS) | OK | [readme](./rocm/readme.md) |
 8 | | llama.cpp | [llama.cpp](https://github.com/ggml-org/llama.cpp) | OK | [readme](./llama.cpp/readme.md) |
 9 | | ComfyUI | [ComfyUI](https://github.com/comfyanonymous/ComfyUI) | OK | [readme](./comfyui/readme.md) |
10 | | VLLM | [VLLM](https://github.com/nlzy/vllm-gfx906), [triton](https://github.com/nlzy/triton-gfx906) | OK | [readme](./vllm/readme.md) |
11 | 
12 | ### Deps graph
13 | ```mermaid
14 | flowchart TD
15 |   rocm-src[docker.io/rocm/dev-ubuntu-24.04] --> rocm[docker.io/mixa3607/rocm-gfx906] 
16 |   rocm --> llama[docker.io/mixa3607/llama.cpp-gfx906]
17 |   rocm --> torch[docker.io/mixa3607/pytorch-gfx906]
18 |   torch --> comfyui[docker.io/mixa3607/comfyui-gfx906]
19 |   torch --> vllm[docker.io/mixa3607/vllm-gfx906]
20 | ```
21 | 
22 | ## Perf tuning
23 | Changing smcPPTable/TdcLimitGfx 350 => 150 reduced the hotspot by 10+- degrees with almost no drop in performance in vllm ([table in vllm](./vllm/readme.md#benchmarks))
24 | 
25 | ```console
26 | $ upp -p /sys/class/drm/card${GPU_ID}/device/pp_table set --write smcPPTable/TdcLimitGfx=150
27 | Changing smcPPTable.TdcLimitGfx of type H from 330 to 150 at 0x1fe
28 | Committing changes to '/sys/class/drm/card1/device/pp_table'.
29 | ```
30 | <img src="./docs/images/temperatures.png" alt="temperatures" width="400"/>
31 | 
32 | ## Environment
33 | All software tested on Lenovo RD450X with 256G mem and 2x MI50 32G (x16 + x8). For cooling gpus used [AMD Instinct MI50 blower fan adapter (thingiverse)](https://www.thingiverse.com/thing:7153218).
34 | 
35 | ## RVS
36 | ```shell
37 | cd /opt/rocm-6.4.1/bin
38 | apt update
39 | apt install -y rocm-validation-suite
40 | echo 'actions:
41 | - name: gst-581Tflops-4K4K8K-rand-bf16
42 |   device: all
43 |   module: gst
44 |   log_interval: 3000
45 |   ramp_interval: 5000
46 |   duration: 15000
47 |   hot_calls: 1000
48 |   copy_matrix: false
49 |   target_stress: 581000
50 |   matrix_size_a: 4864
51 |   matrix_size_b: 4096
52 |   matrix_size_c: 8192
53 |   matrix_init: rand
54 |   data_type: bf16_r
55 |   lda: 8320
56 |   ldb: 8320
57 |   ldc: 4992
58 |   ldd: 4992
59 |   transa: 1
60 |   transb: 0
61 |   alpha: 1
62 |   beta: 0' > ~/gst-581Tflops-4K4K8K-rand-bf16.conf
63 | ./rvs -c ~/gst-581Tflops-4K4K8K-rand-bf16.conf
64 | ```
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/rocm/build-and-push.rocm.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | cd $(dirname $0)
 5 | source ../env.sh
 6 | 
 7 | IMAGE_TAGS=(
 8 |   "$PATCHED_ROCM_IMAGE:${ROCM_VERSION}-${REPO_GIT_REF}-complete"
 9 |   "$PATCHED_ROCM_IMAGE:${ROCM_VERSION}-complete"
10 | )
11 | 
12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
13 |   echo "${IMAGE_TAGS[0]} already in registry. Skip"
14 |   exit 0
15 | fi
16 | 
17 | DOCKER_EXTRA_ARGS=()
18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
19 |   DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
20 | done
21 | 
22 | mkdir ./logs || true
23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
24 |   --build-arg BASE_ROCM_IMAGE="${BASE_ROCM_IMAGE}:${ROCM_IMAGE_VER}-complete" \
25 |   --build-arg ROCM_ARCH="${ROCM_ARCH}" \
26 |   --target final -f ./rocm.Dockerfile --progress=plain ./submodules 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
27 | 


--------------------------------------------------------------------------------
/rocm/env.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | pushd $(dirname ${BASH_SOURCE[0]})
 4 | 
 5 | # value from tag https://hub.docker.com/r/rocm/dev-ubuntu-24.04/tags e.g. 7.0/6.4.4
 6 | if [ "$ROCM_VERSION" == "" ]; then
 7 |   ROCM_VERSION=6.3.3
 8 | fi
 9 | if [ "$ROCM_IMAGE_VER" == "" ]; then
10 |   ROCM_IMAGE_VER=6.3.3
11 | fi
12 | 
13 | # target arch
14 | if [ "$ROCM_ARCH" == "" ]; then
15 |   ROCM_ARCH=gfx906
16 | fi
17 | 
18 | # source image
19 | if [ "$BASE_ROCM_IMAGE" == "" ]; then
20 |   BASE_ROCM_IMAGE=docker.io/rocm/dev-ubuntu-24.04
21 | fi
22 | 
23 | # destination image
24 | if [ "$PATCHED_ROCM_IMAGE" == "" ]; then
25 |   PATCHED_ROCM_IMAGE=docker.io/mixa3607/rocm-gfx906
26 |   #PATCHED_ROCM_IMAGE=registry.arkprojects.space/apps/rocm-gfx906
27 | fi
28 | 
29 | popd
30 | 


--------------------------------------------------------------------------------
/rocm/preset.rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export ROCM_VERSION="6.3.3"
4 | export ROCM_IMAGE_VER="6.3.3"
5 | 


--------------------------------------------------------------------------------
/rocm/preset.rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export ROCM_VERSION="6.4.4"
4 | export ROCM_IMAGE_VER="6.4.4"
5 | 


--------------------------------------------------------------------------------
/rocm/preset.rocm-7.0.0.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export ROCM_VERSION="7.0.0"
4 | export ROCM_IMAGE_VER="7.0"
5 | 


--------------------------------------------------------------------------------
/rocm/preset.rocm-7.0.2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export ROCM_VERSION="7.0.2"
4 | export ROCM_IMAGE_VER="7.0.2"
5 | 


--------------------------------------------------------------------------------
/rocm/readme.md:
--------------------------------------------------------------------------------
 1 | # ROCm GFX906
 2 | Open software stack that includes programming models, tools, compilers, libraries, and runtimes for AI and HPC solution development on AMD GPUs.
 3 | In 6.4+ gfx906 support was dropped but may be manually compiled.
 4 | 
 5 | At this moment rebuild:
 6 | - rccl
 7 | - rocblas+tensile
 8 | 
 9 | Recommend use `docker.io/mixa3607/rocm-gfx906:6.4.4-complete`
10 | 
11 | ## Run
12 | ### Docker
13 | TODO
14 | 
15 | ### Kubernetes
16 | ```yaml
17 | apiVersion: apps/v1
18 | kind: Deployment
19 | metadata:
20 |   name: rocmdev
21 |   namespace: ns-vllm
22 |   labels:
23 |     app: rocmdev
24 | spec:
25 |   strategy:
26 |     type: Recreate
27 |   replicas: 1
28 |   selector:
29 |     matchLabels:
30 |       app: rocmdev
31 |   template:
32 |     metadata:
33 |       labels:
34 |         app: rocmdev
35 |     spec:
36 |       containers:
37 |         - name: rocmdev
38 |           image: docker.io.mixa3607/rocm-gfx906:7.0.0-20251005035204-complete
39 |           imagePullPolicy: Always
40 |           securityContext:
41 |             privileged: true
42 |             runAsNonRoot: false
43 |             runAsGroup: 0
44 |             runAsUser: 0
45 |           command: [ "/bin/bash", "-c" ]
46 |           args:
47 |             - "apt install tmux wget -y; wget https://gist.githubusercontent.com/mixa3607/1e6d3ee7d87b018484cf80c7928b4c33/raw/.tmux.conf -O ~/.tmux.conf; while true; do sleep 1s; done;"
48 |             #- sleep inf
49 | ```
50 | 
51 | ## Build
52 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.rocm.sh`:
53 | ```bash
54 | $ . preset.rocm-7.0.0.sh
55 | $ ./build-and-push.rocm.sh
56 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
57 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
58 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
59 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
60 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
61 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
62 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
63 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
64 | #0 building with "remote" instance using remote driver
65 | 
66 | #1 [internal] load build definition from rocm.Dockerfile
67 | #1 transferring dockerfile: 4.95kB done
68 | #1 DONE 0.0s
69 | 
70 | #2 [auth] dockerio-proxy/rocm/dev-ubuntu-24.04:pull rocm/dev-ubuntu-24.04:pull token for registry.arkprojects.space
71 | #2 DONE 0.0s
72 | 
73 | #3 [internal] load metadata for docker.io/rocm/dev-ubuntu-24.04:7.0-complete
74 | #3 DONE 1.8s
75 | 
76 | #4 [internal] load .dockerignore
77 | #4 transferring context: 2B done
78 | #...............
79 | #24 exporting to image
80 | #24 pushing layers 6.5s done
81 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005
82 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 3.3s done
83 | #24 pushing layers 2.0s done
84 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005
85 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 2.2s done
86 | #24 DONE 17.6s
87 | ```
88 | 


--------------------------------------------------------------------------------
/rocm/rocm.Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG ROCM_ARCH="gfx906"
 2 | ARG BASE_ROCM_IMAGE="rocm/dev-ubuntu-24.04:6.4.4-complete"
 3 | ARG ROCBLAS_REPO="https://github.com/ROCm/rocBLAS"
 4 | ARG TENSILE_REPO="https://github.com/ROCm/Tensile"
 5 | ARG RCCL_REPO="https://github.com/ROCm/rccl"
 6 | 
 7 | ############# Base image #############
 8 | FROM ${BASE_ROCM_IMAGE} AS rocm_base
 9 | # ROCm ver
10 | RUN ROCM_VERSION_MAJOR=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\1|1p') && \
11 |     ROCM_VERSION_MINOR=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\2|1p') && \
12 |     ROCM_VERSION_PATCH=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\3|1p') && \
13 |     echo "$ROCM_VERSION_MAJOR" > /opt/ROCM_VERSION_MAJOR && \
14 |     echo "$ROCM_VERSION_MINOR" > /opt/ROCM_VERSION_MINOR && \
15 |     echo "$ROCM_VERSION_PATCH" > /opt/ROCM_VERSION_PATCH && \
16 |     echo "$ROCM_VERSION_MAJOR.$ROCM_VERSION_MINOR" > /opt/ROCM_VERSION && \
17 |     echo "$ROCM_VERSION_MAJOR.$ROCM_VERSION_MINOR.$ROCM_VERSION_PATCH" > /opt/ROCM_VERSION_FULL && \
18 |     echo "Detected rocm version is $(cat /opt/ROCM_VERSION_FULL)" && \
19 |     true
20 | 
21 | ############# Build base #############
22 | FROM rocm_base AS build_base
23 | RUN apt-get update && apt-get install -y git cmake libfmt-dev
24 | WORKDIR /rebuild-deps
25 | 
26 | ############# Build rocBLAS #############
27 | FROM build_base AS build_rocblas
28 | ARG ROCBLAS_REPO
29 | ARG TENSILE_REPO
30 | RUN git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${ROCBLAS_REPO} rocBLAS && \
31 |     git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${TENSILE_REPO} Tensile && \
32 |     true
33 | 
34 | WORKDIR /rebuild-deps/rocBLAS
35 | ARG ROCM_ARCH
36 | ENV PACKAGE_NAME=rocblas
37 | RUN dpkg -s ${PACKAGE_NAME}
38 | RUN ./install.sh --dependencies --rmake_invoked
39 | RUN export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \
40 |     echo "Installed package version is \"$INSTALLED_PACKAGE_VERSION\"" && \
41 |     export ROCM_LIBPATCH_VERSION=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\4|1') && \
42 |     echo "Set ROCM_LIBPATCH_VERSION to \"$ROCM_LIBPATCH_VERSION\"" && \
43 |     export CPACK_DEBIAN_PACKAGE_RELEASE=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\5|1') && \
44 |     echo "Set CPACK_DEBIAN_PACKAGE_RELEASE to \"$CPACK_DEBIAN_PACKAGE_RELEASE\"" && \
45 |     python3 ./rmake.py \
46 |       --install_invoked \
47 |       --build_dir=$(realpath ./build) \
48 |       --src_path=$(realpath .) \
49 |       --architecture ${ROCM_ARCH} \
50 |       --test_local_path=$(realpath ../Tensile) && \
51 |     cd ./build/release  && \
52 |     make package && \
53 |     mkdir -p /dist && cp *.deb /dist && \
54 |     true
55 | RUN cd ./build/release && \
56 |     export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \
57 |     export BUILDED_PACKAGE_VERSION=$(dpkg -I /dist/${PACKAGE_NAME}_*.deb | sed -nE 's|^ *Version: (.+)$|\1|p') && \
58 |     if [ "$BUILDED_PACKAGE_VERSION" != "$INSTALLED_PACKAGE_VERSION" ]; then echo "ERR: Builded version is $BUILDED_PACKAGE_VERSION but expected $INSTALLED_PACKAGE_VERSION"; exit 10; fi && \
59 |     true
60 | 
61 | ############# Build rccl #############
62 | FROM build_base AS build_rccl
63 | ARG RCCL_REPO
64 | RUN git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${RCCL_REPO} rccl && \
65 |     true
66 | 
67 | WORKDIR /rebuild-deps/rccl
68 | ARG ROCM_ARCH
69 | ENV PACKAGE_NAME=rccl
70 | RUN dpkg -s ${PACKAGE_NAME}
71 | RUN export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \
72 |     echo "Installed package version is \"$INSTALLED_PACKAGE_VERSION\"" && \
73 |     export ROCM_LIBPATCH_VERSION=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\4|1') && \
74 |     echo "Set ROCM_LIBPATCH_VERSION to \"$ROCM_LIBPATCH_VERSION\"" && \
75 |     export CPACK_DEBIAN_PACKAGE_RELEASE=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\5|1') && \
76 |     echo "Set CPACK_DEBIAN_PACKAGE_RELEASE to \"$CPACK_DEBIAN_PACKAGE_RELEASE\"" && \
77 |     ./install.sh --package_build --amdgpu_targets ${ROCM_ARCH} && \
78 |     mkdir -p /dist && cp ./build/release/*.deb /dist && \
79 |     true
80 | RUN cd ./build/release && \
81 |     export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \
82 |     export BUILDED_PACKAGE_VERSION=$(dpkg -I /dist/${PACKAGE_NAME}_*.deb | sed -nE 's|^ *Version: (.+)$|\1|p') && \
83 |     if [ "$BUILDED_PACKAGE_VERSION" != "$INSTALLED_PACKAGE_VERSION" ]; then echo "ERR: Builded version is $BUILDED_PACKAGE_VERSION but expected $INSTALLED_PACKAGE_VERSION"; exit 10; fi && \
84 |     true
85 | 
86 | ############# Patched image #############
87 | FROM rocm_base AS final
88 | RUN apt-get update && apt-get install -y libfmt-dev
89 | # Install rocblas 
90 | RUN --mount=type=bind,from=build_rocblas,src=/dist/,target=/dist \
91 |     dpkg -i /dist/*.deb
92 | # Install rccl
93 | RUN --mount=type=bind,from=build_rccl,src=/dist/,target=/dist \
94 |     dpkg -i /dist/*.deb
95 | 
96 | # Validate apt deps state
97 | RUN apt-get install
98 | 


--------------------------------------------------------------------------------
/rocm/submodules/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/rocm/submodules/.gitkeep


--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.rsuser
  8 | *.suo
  9 | *.user
 10 | *.userosscache
 11 | *.sln.docstates
 12 | 
 13 | # User-specific files (MonoDevelop/Xamarin Studio)
 14 | *.userprefs
 15 | 
 16 | # Mono auto generated files
 17 | mono_crash.*
 18 | 
 19 | # Build results
 20 | [Dd]ebug/
 21 | [Dd]ebugPublic/
 22 | [Rr]elease/
 23 | [Rr]eleases/
 24 | x64/
 25 | x86/
 26 | [Ww][Ii][Nn]32/
 27 | [Aa][Rr][Mm]/
 28 | [Aa][Rr][Mm]64/
 29 | bld/
 30 | [Bb]in/
 31 | [Oo]bj/
 32 | [Oo]ut/
 33 | [Ll]og/
 34 | [Ll]ogs/
 35 | 
 36 | # Visual Studio 2015/2017 cache/options directory
 37 | .vs/
 38 | # Uncomment if you have tasks that create the project's static files in wwwroot
 39 | #wwwroot/
 40 | 
 41 | # Visual Studio 2017 auto generated files
 42 | Generated\ Files/
 43 | 
 44 | # MSTest test Results
 45 | [Tt]est[Rr]esult*/
 46 | [Bb]uild[Ll]og.*
 47 | 
 48 | # NUnit
 49 | *.VisualState.xml
 50 | TestResult.xml
 51 | nunit-*.xml
 52 | 
 53 | # Build Results of an ATL Project
 54 | [Dd]ebugPS/
 55 | [Rr]eleasePS/
 56 | dlldata.c
 57 | 
 58 | # Benchmark Results
 59 | BenchmarkDotNet.Artifacts/
 60 | 
 61 | # .NET Core
 62 | project.lock.json
 63 | project.fragment.lock.json
 64 | artifacts/
 65 | 
 66 | # ASP.NET Scaffolding
 67 | ScaffoldingReadMe.txt
 68 | 
 69 | # StyleCop
 70 | StyleCopReport.xml
 71 | 
 72 | # Files built by Visual Studio
 73 | *_i.c
 74 | *_p.c
 75 | *_h.h
 76 | *.ilk
 77 | *.meta
 78 | *.obj
 79 | *.iobj
 80 | *.pch
 81 | *.pdb
 82 | *.ipdb
 83 | *.pgc
 84 | *.pgd
 85 | *.rsp
 86 | *.sbr
 87 | *.tlb
 88 | *.tli
 89 | *.tlh
 90 | *.tmp
 91 | *.tmp_proj
 92 | *_wpftmp.csproj
 93 | *.log
 94 | *.vspscc
 95 | *.vssscc
 96 | .builds
 97 | *.pidb
 98 | *.svclog
 99 | *.scc
100 | 
101 | # Chutzpah Test files
102 | _Chutzpah*
103 | 
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 | 
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 | 
121 | # Visual Studio Trace Files
122 | *.e2e
123 | 
124 | # TFS 2012 Local Workspace
125 | $tf/
126 | 
127 | # Guidance Automation Toolkit
128 | *.gpState
129 | 
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 | 
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 | 
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 | 
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 | 
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 | 
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 | 
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 | 
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 | 
163 | # Web workbench (sass)
164 | .sass-cache/
165 | 
166 | # Installshield output folder
167 | [Ee]xpress/
168 | 
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 | 
179 | # Click-Once directory
180 | publish/
181 | 
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 | 
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 | 
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 | 
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 | 
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 | 
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 | 
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 | 
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 | 
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 | 
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 | 
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 | 
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 | 
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 | 
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 | 
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 | 
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 | 
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 | 
288 | # Visual Studio 6 build log
289 | *.plg
290 | 
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 | 
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 | 
297 | # Visual Studio LightSwitch build output
298 | **/*.HTMLClient/GeneratedArtifacts
299 | **/*.DesktopClient/GeneratedArtifacts
300 | **/*.DesktopClient/ModelManifest.xml
301 | **/*.Server/GeneratedArtifacts
302 | **/*.Server/ModelManifest.xml
303 | _Pvt_Extensions
304 | 
305 | # Paket dependency manager
306 | .paket/paket.exe
307 | paket-files/
308 | 
309 | # FAKE - F# Make
310 | .fake/
311 | 
312 | # CodeRush personal settings
313 | .cr/personal
314 | 
315 | # Python Tools for Visual Studio (PTVS)
316 | __pycache__/
317 | *.pyc
318 | 
319 | # Cake - Uncomment if you are using it
320 | # tools/**
321 | # !tools/packages.config
322 | 
323 | # Tabs Studio
324 | *.tss
325 | 
326 | # Telerik's JustMock configuration file
327 | *.jmconfig
328 | 
329 | # BizTalk build output
330 | *.btp.cs
331 | *.btm.cs
332 | *.odx.cs
333 | *.xsd.cs
334 | 
335 | # OpenCover UI analysis results
336 | OpenCover/
337 | 
338 | # Azure Stream Analytics local run output
339 | ASALocalRun/
340 | 
341 | # MSBuild Binary and Structured Log
342 | *.binlog
343 | 
344 | # NVidia Nsight GPU debugger configuration file
345 | *.nvuser
346 | 
347 | # MFractors (Xamarin productivity tool) working folder
348 | .mfractor/
349 | 
350 | # Local History for Visual Studio
351 | .localhistory/
352 | 
353 | # BeatPulse healthcheck temp database
354 | healthchecksdb
355 | 
356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
357 | MigrationBackup/
358 | 
359 | # Ionide (cross platform F# VS Code tools) working folder
360 | .ionide/
361 | 
362 | # Fody - auto-generated XML schema
363 | FodyWeavers.xsd


--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio Version 17
 4 | VisualStudioVersion = 17.14.36414.22 d17.14
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ResultsConverter", "ResultsConverter\ResultsConverter.csproj", "{F1ADC5F6-4208-4BF3-9612-A30E48364174}"
 7 | EndProject
 8 | Global
 9 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | 		Debug|Any CPU = Debug|Any CPU
11 | 		Release|Any CPU = Release|Any CPU
12 | 	EndGlobalSection
13 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | 		{F1ADC5F6-4208-4BF3-9612-A30E48364174}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | 		{F1ADC5F6-4208-4BF3-9612-A30E48364174}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | 		{F1ADC5F6-4208-4BF3-9612-A30E48364174}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | 		{F1ADC5F6-4208-4BF3-9612-A30E48364174}.Release|Any CPU.Build.0 = Release|Any CPU
18 | 	EndGlobalSection
19 | 	GlobalSection(SolutionProperties) = preSolution
20 | 		HideSolutionNode = FALSE
21 | 	EndGlobalSection
22 | 	GlobalSection(ExtensibilityGlobals) = postSolution
23 | 		SolutionGuid = {41BCD38E-4CD3-453E-9363-9DAE08F9519C}
24 | 	EndGlobalSection
25 | EndGlobal
26 | 


--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter/MarkdownTableBuilder.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System.Text;
  2 | 
  3 | // https://github.com/marcolink/MarkdownTable
  4 | namespace MarkdownTable
  5 | {
  6 |     public class MarkdownTableBuilder
  7 |     {
  8 |         private string[] header = { };
  9 |         private readonly List<object[]> rows = new List<object[]>();
 10 | 
 11 |         private readonly char verticalChar;
 12 |         private readonly char horizontalChar;
 13 |         private readonly char outerBorderChar;
 14 |         private readonly int padding;
 15 |         private readonly int minColumnWidth;
 16 |         private readonly StringBuilder rowBuilder;
 17 | 
 18 |         private enum Align
 19 |         {
 20 |             Left,
 21 |             Right,
 22 |             Center
 23 |         }
 24 | 
 25 |         public MarkdownTableBuilder()
 26 |         {
 27 |             rowBuilder = new StringBuilder();
 28 |             horizontalChar = '-';
 29 |             outerBorderChar = ' ';
 30 |             verticalChar = '|';
 31 |             padding = 1;
 32 |         }
 33 | 
 34 |         #region Interface
 35 | 
 36 |         public MarkdownTableBuilder WithHeader(params string[] header)
 37 |         {
 38 |             this.header = header;
 39 |             return this;
 40 |         }
 41 | 
 42 |         public MarkdownTableBuilder WithRow(params object[] row)
 43 |         {
 44 |             rows.Add(row);
 45 |             return this;
 46 |         }
 47 | 
 48 |         public MarkdownTableBuilder Clear()
 49 |         {
 50 |             header = new string[] { };
 51 |             rows.Clear();
 52 |             return this;
 53 |         }
 54 | 
 55 |         public override string ToString()
 56 |         {
 57 |             var output = new StringBuilder();
 58 |             var maxCols = MaxColumns();
 59 | 
 60 |             if (header.Length > 0)
 61 |             {
 62 |                 output.AppendLine(Row(header, maxCols));
 63 |             }
 64 | 
 65 |             output.AppendLine(HorizontalLine());
 66 | 
 67 |             rows.ForEach(row => { output.AppendLine(Row(row, maxCols)); });
 68 | 
 69 |             return output.ToString();
 70 |         }
 71 | 
 72 |         #endregion
 73 | 
 74 |         #region Calculation
 75 | 
 76 |         private int ColumnWidth(int index)
 77 |         {
 78 |             var width = 1;
 79 | 
 80 |             if (header != null && index < header.Length)
 81 |             {
 82 |                 width = header[index].Length;
 83 |             }
 84 | 
 85 |             return Column(index).Length == 0
 86 |                 ? 1
 87 |                 : Math.Max(width,
 88 |                     Column(index).Max(r => r != null ? r.Length : 0));
 89 |         }
 90 | 
 91 |         private int[] SizeRow()
 92 |         {
 93 |             var row = new List<int>();
 94 |             var maxCols = MaxColumns();
 95 |             for (var i = 0; i < maxCols; i++)
 96 |             {
 97 |                 row.Add(ColumnWidth(i));
 98 |             }
 99 | 
100 |             return row.ToArray();
101 |         }
102 | 
103 |         private Align[] AlignmentRow()
104 |         {
105 |             var row = new List<Align>();
106 |             var maxCols = MaxColumns();
107 | 
108 |             for (var i = 0; i < maxCols; i++)
109 |             {
110 |                 var alignment = Align.Left;
111 | 
112 |                 row.Add(alignment);
113 |             }
114 | 
115 |             return row.ToArray();
116 |         }
117 | 
118 |         private int MaxColumns()
119 |         {
120 |             var result = 0;
121 |             if (header != null)
122 |             {
123 |                 result = header.Length;
124 |             }
125 | 
126 |             rows.ForEach(row => { result = Math.Max(row.Length, result); });
127 |             return result;
128 |         }
129 | 
130 |         private string[] Column(int index)
131 |         {
132 |             var column = new List<string>();
133 |             rows.ForEach(row => { column.Add(index < row.Length ? row[index].ToString() : null); });
134 |             return column.ToArray();
135 |         }
136 | 
137 |         #endregion
138 | 
139 |         #region Creation
140 | 
141 |         private static string Fill(int size, char fillChar = ' ')
142 |         {
143 |             return new string(fillChar, Math.Max(size, 0));
144 |         }
145 | 
146 |         private string HorizontalLine()
147 |         {
148 |             var format = Fill(1, outerBorderChar) + "{0}" + Fill(1, outerBorderChar);
149 |             var content = SizeRow()
150 |                 .Select(col => Fill(col + 2 * padding, horizontalChar))
151 |                 .Aggregate((a, b) => a + Fill(1, verticalChar) + b);
152 |             return string.Format(format, content);
153 |         }
154 | 
155 |         private string Row(object[] row, int maxCols, Align align = Align.Left)
156 |         {
157 |             rowBuilder.Length = 0;
158 |             rowBuilder.Append(outerBorderChar);
159 | 
160 |             for (var i = 0; i < row.Length; i++)
161 |             {
162 |                 var maxColWidth = ColumnWidth(i);
163 |                 var format = "{0,-" + maxColWidth + "}";
164 | 
165 |                 rowBuilder.Append(Fill(padding));
166 |                 rowBuilder.Append(string.Format(format, row[i]));
167 |                 rowBuilder.Append(Fill(padding));
168 |                 rowBuilder.Append(i == maxCols - 1 ? outerBorderChar : verticalChar);
169 |             }
170 | 
171 |             var j = row.Length - 1;
172 |             while (j++ < maxCols - 1)
173 |             {
174 |                 var maxColWidth = ColumnWidth(j);
175 |                 rowBuilder.Append(Fill(maxColWidth + 2 * padding));
176 |                 rowBuilder.Append(j == maxCols - 1 ? outerBorderChar : verticalChar);
177 |             }
178 | 
179 |             return rowBuilder.ToString();
180 |         }
181 | 
182 |         #endregion
183 |     }
184 | }


--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter/MarkdownTableBuilderExtensions.cs:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | // https://github.com/marcolink/MarkdownTable
 3 | namespace MarkdownTable
 4 | {
 5 |     public static class MarkdownTableBuilderExtensions
 6 |     {
 7 |         public static string ToMardownTableString<T>(this IEnumerable<T> rows)
 8 |         {
 9 |             var builder = new MarkdownTableBuilder();
10 |             var properties = typeof(T).GetProperties().Where(p => p.PropertyType.IsRenderable()).ToArray();
11 |             var fields = typeof(T).GetFields().Where(f => f.FieldType.IsRenderable()).ToArray();
12 | 
13 |             builder.WithHeader(properties.Select(p => p.Name).Concat(fields.Select(f => f.Name)).ToArray());
14 | 
15 |             foreach (var row in rows)
16 |             {
17 |                 builder.WithRow(properties.Select(p => p.GetValue(row, null))
18 |                     .Concat(fields.Select(f => f.GetValue(row))).ToArray());
19 |             }
20 | 
21 |             return builder.ToString();
22 |         }
23 | 
24 |         private static bool IsRenderable(this Type type)
25 |         {
26 |             return type.IsNumeric()
27 |                    || Type.GetTypeCode(type) == TypeCode.String
28 |                    || Type.GetTypeCode(type) == TypeCode.Boolean;
29 |         }
30 | 
31 |         private static bool IsNumeric(this Type type)
32 |         {
33 |             switch (Type.GetTypeCode(type))
34 |             {
35 |                 case TypeCode.Decimal:
36 |                 case TypeCode.Double:
37 |                 case TypeCode.Single:
38 |                 case TypeCode.Byte:
39 |                 case TypeCode.Int16:
40 |                 case TypeCode.Int32:
41 |                 case TypeCode.Int64:
42 |                 case TypeCode.SByte:
43 |                 case TypeCode.UInt16:
44 |                 case TypeCode.UInt32:
45 |                 case TypeCode.UInt64:
46 |                     return true;
47 |                 case TypeCode.Object:
48 |                     if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Nullable<>))
49 |                     {
50 |                         return Nullable.GetUnderlyingType(type).IsNumeric();
51 |                     }
52 |                     return false;
53 |                 default:
54 |                     return false;
55 |             }
56 |         }
57 |     }
58 | }


--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter/Program.cs:
--------------------------------------------------------------------------------
 1 | ﻿// See https://aka.ms/new-console-template for more information
 2 | 
 3 | using System.CommandLine;
 4 | using System.Text.Json;
 5 | 
 6 | Console.WriteLine("Hello, World!");
 7 | var resultsDirOpt = new Option<string>("--results-dir", "-i")
 8 | {
 9 |     Required = true,
10 | };
11 | 
12 | var genTableCommand = new Command("gen-table", "Generate md table from results")
13 | {
14 |     Options = { resultsDirOpt }
15 | };
16 | genTableCommand.SetAction(result => GenerateTable(result.GetRequiredValue(resultsDirOpt)));
17 | 
18 | var rootCommand = new RootCommand();
19 | rootCommand.Subcommands.Add(genTableCommand);
20 | return rootCommand.Parse(args).Invoke();
21 | 
22 | 
23 | static void GenerateTable(string resultsDir)
24 | {
25 |     var results = new List<VllmBenchResult>();
26 |     foreach (var file in Directory.GetFiles(resultsDir, "*.json", SearchOption.TopDirectoryOnly))
27 |     {
28 |         Console.WriteLine($"Reading {file}");
29 |         results.Add(JsonSerializer.Deserialize<VllmBenchResult>(File.ReadAllText(file))!);
30 |     }
31 | 
32 |     results = results.OrderBy(x => x.Date).ToList();
33 | 
34 |     var table = new MarkdownTable.MarkdownTableBuilder();
35 |     table.WithHeader("date", "rocm", "torch", "vllm",
36 |         "triton", "TP", "PwrCap", "Model", "Prompts",
37 |         "Threads", "Duration", "RPS",
38 |         "Output TPS", "Total TPS", "About");
39 |     foreach (var result in results)
40 |     {
41 |         var fields = new List<string>();
42 |         fields.Add(result.Date);
43 |         fields.Add(result.MetadataRocmVer);
44 |         fields.Add(result.MetadataTorchVer);
45 |         fields.Add(result.MetadataVllmVer);
46 |         fields.Add(result.MetadataTritonVer);
47 |         fields.Add(result.MetadataTensorParallelism);
48 |         fields.Add(result.MetadataPowerCap);
49 |         fields.Add(result.ModelId);
50 |         fields.Add(result.NumPrompts.ToString());
51 |         fields.Add(result.MaxConcurrency.ToString());
52 | 
53 |         fields.Add(TimeSpan.FromSeconds(result.Duration).ToString());
54 |         fields.Add(result.RequestThroughput.ToString("N2"));
55 |         fields.Add(result.OutputThroughput.ToString("N2"));
56 |         fields.Add(result.TotalTokenThroughput.ToString("N2"));
57 | 
58 |         fields.Add(result.MetadataAbout);
59 |         //fields.Add(result.MetadataBenchmarkAuthor);
60 | 
61 |         table.WithRow(fields.ToArray());
62 |     }
63 | 
64 |     Console.WriteLine(table.ToString());
65 | }
66 | 


--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "profiles": {
3 |     "ResultsConverter": {
4 |       "commandName": "Project",
5 |       "commandLineArgs": "gen-table -i \\\\wsl.localhost\\Ubuntu\\home\\mixa3607\\REPOS\\mixa3607\\llama.cpp-gfx906\\vllm\\benchmark\\results"
6 |     }
7 |   }
8 | }


--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter/ResultsConverter.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <OutputType>Exe</OutputType>
 5 |     <TargetFramework>net8.0</TargetFramework>
 6 |     <ImplicitUsings>enable</ImplicitUsings>
 7 |     <Nullable>enable</Nullable>
 8 |   </PropertyGroup>
 9 | 
10 |   <ItemGroup>
11 |     <PackageReference Include="System.CommandLine" Version="2.0.0-rc.1.25451.107" />
12 |   </ItemGroup>
13 | 
14 | </Project>
15 | 


--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter/VllmBenchResult.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System.Text.Json.Serialization;
  2 | 
  3 | public partial class VllmBenchResult
  4 | {
  5 |     [JsonPropertyName("date")]
  6 |     public string Date { get; set; }
  7 | 
  8 |     [JsonPropertyName("endpoint_type")]
  9 |     public string EndpointType { get; set; }
 10 | 
 11 |     [JsonPropertyName("label")]
 12 |     public object Label { get; set; }
 13 | 
 14 |     [JsonPropertyName("model_id")]
 15 |     public string ModelId { get; set; }
 16 | 
 17 |     [JsonPropertyName("tokenizer_id")]
 18 |     public string TokenizerId { get; set; }
 19 | 
 20 |     [JsonPropertyName("num_prompts")]
 21 |     public long NumPrompts { get; set; }
 22 | 
 23 |     [JsonPropertyName("metadata.rocm_ver")]
 24 |     public string MetadataRocmVer { get; set; }
 25 | 
 26 |     [JsonPropertyName("metadata.torch_ver")]
 27 |     public string MetadataTorchVer { get; set; }
 28 | 
 29 |     [JsonPropertyName("metadata.vision_ver")]
 30 |     public string MetadataVisionVer { get; set; }
 31 | 
 32 |     [JsonPropertyName("metadata.vllm_ver")]
 33 |     public string MetadataVllmVer { get; set; }
 34 | 
 35 |     [JsonPropertyName("metadata.triton_ver")]
 36 |     public string MetadataTritonVer { get; set; }
 37 | 
 38 |     [JsonPropertyName("metadata.image")]
 39 |     public string MetadataImage { get; set; }
 40 | 
 41 |     [JsonPropertyName("metadata.tensor_parallelism")]
 42 |     public string MetadataTensorParallelism { get; set; }
 43 | 
 44 |     [JsonPropertyName("metadata.about")]
 45 |     public string MetadataAbout { get; set; }
 46 | 
 47 |     [JsonPropertyName("metadata.benchmark_author")]
 48 |     public string MetadataBenchmarkAuthor { get; set; }
 49 | 
 50 |     [JsonPropertyName("metadata.power_cap")]
 51 |     public string MetadataPowerCap { get; set; }
 52 | 
 53 |     [JsonPropertyName("request_rate")]
 54 |     public string RequestRate { get; set; }
 55 | 
 56 |     [JsonPropertyName("burstiness")]
 57 |     public double Burstiness { get; set; }
 58 | 
 59 |     [JsonPropertyName("max_concurrency")]
 60 |     public long MaxConcurrency { get; set; }
 61 | 
 62 |     [JsonPropertyName("duration")]
 63 |     public double Duration { get; set; }
 64 | 
 65 |     [JsonPropertyName("completed")]
 66 |     public long Completed { get; set; }
 67 | 
 68 |     [JsonPropertyName("total_input_tokens")]
 69 |     public long TotalInputTokens { get; set; }
 70 | 
 71 |     [JsonPropertyName("total_output_tokens")]
 72 |     public long TotalOutputTokens { get; set; }
 73 | 
 74 |     [JsonPropertyName("request_throughput")]
 75 |     public double RequestThroughput { get; set; }
 76 | 
 77 |     [JsonPropertyName("request_goodput")]
 78 |     public object RequestGoodput { get; set; }
 79 | 
 80 |     [JsonPropertyName("output_throughput")]
 81 |     public double OutputThroughput { get; set; }
 82 | 
 83 |     [JsonPropertyName("total_token_throughput")]
 84 |     public double TotalTokenThroughput { get; set; }
 85 | 
 86 |     [JsonPropertyName("input_lens")]
 87 |     public long[] InputLens { get; set; }
 88 | 
 89 |     [JsonPropertyName("output_lens")]
 90 |     public long[] OutputLens { get; set; }
 91 | 
 92 |     [JsonPropertyName("ttfts")]
 93 |     public double[] Ttfts { get; set; }
 94 | 
 95 |     [JsonPropertyName("itls")]
 96 |     public double[][] Itls { get; set; }
 97 | 
 98 |     [JsonPropertyName("generated_texts")]
 99 |     public string[] GeneratedTexts { get; set; }
100 | 
101 |     [JsonPropertyName("errors")]
102 |     public string[] Errors { get; set; }
103 | 
104 |     [JsonPropertyName("mean_ttft_ms")]
105 |     public double MeanTtftMs { get; set; }
106 | 
107 |     [JsonPropertyName("median_ttft_ms")]
108 |     public double MedianTtftMs { get; set; }
109 | 
110 |     [JsonPropertyName("std_ttft_ms")]
111 |     public double StdTtftMs { get; set; }
112 | 
113 |     [JsonPropertyName("p99_ttft_ms")]
114 |     public double P99TtftMs { get; set; }
115 | 
116 |     [JsonPropertyName("mean_tpot_ms")]
117 |     public double MeanTpotMs { get; set; }
118 | 
119 |     [JsonPropertyName("median_tpot_ms")]
120 |     public double MedianTpotMs { get; set; }
121 | 
122 |     [JsonPropertyName("std_tpot_ms")]
123 |     public double StdTpotMs { get; set; }
124 | 
125 |     [JsonPropertyName("p99_tpot_ms")]
126 |     public double P99TpotMs { get; set; }
127 | 
128 |     [JsonPropertyName("mean_itl_ms")]
129 |     public double MeanItlMs { get; set; }
130 | 
131 |     [JsonPropertyName("median_itl_ms")]
132 |     public double MedianItlMs { get; set; }
133 | 
134 |     [JsonPropertyName("std_itl_ms")]
135 |     public double StdItlMs { get; set; }
136 | 
137 |     [JsonPropertyName("p99_itl_ms")]
138 |     public double P99ItlMs { get; set; }
139 | }


--------------------------------------------------------------------------------
/vllm/benchmark/readme.md:
--------------------------------------------------------------------------------
 1 | # vLLM benchmark
 2 | Run all commands in same pod
 3 | 
 4 | ### 1. fill env vars
 5 | ```sh
 6 | export VLLM_USE_V1=1                                        # vllm serve only. Required for gemma3
 7 | export VLLM_SLEEP_WHEN_IDLE=1                               # vllm serve only. Reduce CPU usage when vLLM is idle
 8 | export HUGGING_FACE_HUB_TOKEN=hf_XXXXXXXXXXXXXXXXXXXXXXX    # vllm serve only. HF api token
 9 | export POWER_CAP=225                                        # AMD GPU power cap
10 | export TENSOR_PARALLELISM=2                                 # GPUs count. 1/2/4/8
11 | export BENCHMARK_AUTHOR=mixa3607                            # author
12 | export ABOUT="tested on rd450x 256G inside k3s in lxc"      # misc info
13 | #export IMAGE_NAME="XXXX"                                   # set if not in env
14 | ```
15 | 
16 | ### 2. Run vllm
17 | ```sh
18 | # Run vllm with gemma3 27B in 4 bit quant
19 | vllm serve gaunernst/gemma-3-27b-it-qat-autoawq --tensor-parallel-size $TENSOR_PARALLELISM --max-model-len 8K
20 | ```
21 | 
22 | ### 3. Run benchmarks
23 | ```sh
24 | # Set power cap and run benchmarks
25 | amd-smi set --power-cap $POWER_CAP
26 | echo -e '75 1\n100 2\n125 3\n150 4' | while read SETUP; do
27 |   SETUP=($SETUP)
28 |   vllm bench serve \
29 |     --model gaunernst/gemma-3-27b-it-qat-autoawq \
30 |     --host 127.0.0.1 \
31 |     --num-prompts ${SETUP[0]} --max-concurrency ${SETUP[1]} \
32 |     --dataset-name random --random-input-len 1024 --random-output-len 512 --random-range-ratio 0.1 \
33 |     --save-detailed --save-result --metadata \
34 |       metadata.rocm_ver="$(cat /opt/ROCM_VERSION_FULL)" \
35 |       metadata.torch_ver="$(pip show torch | sed -nE 's|^Version: (.+)|\1|p')" \
36 |       metadata.vision_ver="$(pip show torchvision | sed -nE 's|^Version: (.+)|\1|p')" \
37 |       metadata.vllm_ver="$(pip show vllm | sed -nE 's|^Version: (.+)|\1|p')" \
38 |       metadata.triton_ver="$(pip show triton | sed -nE 's|^Version: (.+)|\1|p')" \
39 |       metadata.image="$IMAGE_NAME" \
40 |       metadata.tensor_parallelism="$TENSOR_PARALLELISM" \
41 |       metadata.about="$ABOUT" \
42 |       metadata.benchmark_author="$BENCHMARK_AUTHOR" \
43 |       metadata.tensor_parallelism="$TENSOR_PARALLELISM" \
44 |       metadata.power_cap="$POWER_CAP"
45 | done
46 | ```
47 | 
48 | ### 4. Copy results from pod
49 | ```sh
50 | kubectl exec -n ns-vllm pods/$(kubectl get pods -n ns-vllm -l app=vllm -o jsonpath='{.items[].metadata.name}') -- bash -c 'tar -zcvf - /app/vllm/*.json .' | tar -zxvf - -C results/
51 | ```
52 | 
53 | ### 3. Generate table
54 | ```sh
55 | dotnet run --project ./ResultsConverter/ResultsConverter/ResultsConverter.csproj -- gen-table -i ./results/
56 | ```
57 | 


--------------------------------------------------------------------------------
/vllm/build-and-push.vllm.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | set -e
 3 | 
 4 | cd $(dirname $0)
 5 | source ../env.sh
 6 | 
 7 | IMAGE_TAGS=(
 8 |   "$VLLM_IMAGE:${VLLM_PRESET_NAME}-${REPO_GIT_REF}"
 9 |   "$VLLM_IMAGE:${VLLM_PRESET_NAME}"
10 | )
11 | 
12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
13 |   echo "${IMAGE_TAGS[0]} already in registry. Skip"
14 |   exit 0
15 | fi
16 | 
17 | DOCKER_EXTRA_ARGS=()
18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
19 |   DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
20 | done
21 | 
22 | mkdir ./logs || true
23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
24 |   --build-arg BASE_PYTORCH_IMAGE=$TORCH_IMAGE:${VLLM_PYTORCH_VERSION}-rocm-${VLLM_ROCM_VERSION} \
25 |   --build-arg VLLM_BRANCH=$VLLM_BRANCH \
26 |   --build-arg TRITON_BRANCH=$VLLM_TRITON_BRANCH \
27 |   --progress=plain --target final -f ./vllm.Dockerfile ./submodules 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
28 | 


--------------------------------------------------------------------------------
/vllm/env.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | pushd $(dirname ${BASH_SOURCE[0]})
 4 | 
 5 | if [ "$VLLM_IMAGE" == "" ]; then
 6 |   VLLM_IMAGE=docker.io/mixa3607/vllm-gfx906
 7 |   #VLLM_IMAGE=registry.arkprojects.space/apps/vllm-gfx906
 8 | fi
 9 | 
10 | if [ "$VLLM_PRESET_NAME" == "" ];           then VLLM_PRESET_NAME=default; fi
11 | # vllm git checkpoint
12 | if [ "$VLLM_BRANCH" == "" ];                then VLLM_BRANCH="v0.10.2"; fi
13 | # triton git checkpoint
14 | if [ "$VLLM_TRITON_BRANCH" == "" ];         then VLLM_TRITON_BRANCH="v3.4.x"; fi
15 | # rocm version
16 | if [ "$VLLM_ROCM_VERSION" == "" ];          then VLLM_ROCM_VERSION=6.4.4; fi
17 | # torch git checkpoint
18 | if [ "$VLLM_PYTORCH_VERSION" == "" ];       then VLLM_PYTORCH_VERSION="v2.7.1"; fi
19 | 
20 | popd
21 | 


--------------------------------------------------------------------------------
/vllm/preset.0.10.2-rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export VLLM_PRESET_NAME="0.10.2-rocm-6.4.4"
4 | export VLLM_ROCM_VERSION="6.4.4"
5 | export VLLM_PYTORCH_VERSION="v2.7.1"
6 | export VLLM_BRANCH="gfx906/v0.10.2"
7 | export VLLM_TRITON_BRANCH="gfx906/v3.3.x"
8 | 


--------------------------------------------------------------------------------
/vllm/preset.0.11.0-rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export VLLM_PRESET_NAME="0.11.0-rocm-6.3.3"
4 | export VLLM_ROCM_VERSION="6.3.3"
5 | export VLLM_PYTORCH_VERSION="v2.8.0"
6 | export VLLM_BRANCH="gfx906/v0.11.0"
7 | export VLLM_TRITON_BRANCH="gfx906/v3.4.x"
8 | 


--------------------------------------------------------------------------------
/vllm/preset.0.8.5-rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export VLLM_PRESET_NAME="0.8.5-rocm-6.3.3"
4 | export VLLM_ROCM_VERSION="6.3.3"
5 | export VLLM_PYTORCH_VERSION="v2.7.1"
6 | export VLLM_BRANCH="v0.8.5+gfx906"
7 | export VLLM_TRITON_BRANCH="gfx906/v3.3.x"
8 | 


--------------------------------------------------------------------------------
/vllm/readme.md:
--------------------------------------------------------------------------------
  1 | # VLLM GFX906
  2 | Used forks by https://github.com/nlzy:
  3 | - https://github.com/nlzy/vllm-gfx906
  4 | - https://github.com/nlzy/triton-gfx906
  5 | 
  6 | ## Benchmarks
  7 | 
  8 | Methodology [benchmark](./benchmark/readme.md)
  9 | 
 10 |   date            | rocm  | torch              | vllm                        | triton            | TP | PwrCap | Model                                | Prompts | Threads | Duration         | RPS  | Output TPS | Total TPS | About                                    
 11 |  -----------------|-------|--------------------|-----------------------------|-------------------|----|--------|--------------------------------------|---------|---------|------------------|------|------------|-----------|----------------------------------------- 
 12 |   20251005-210513 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2  | 150    | gaunernst/gemma-3-27b-it-qat-autoawq | 150     | 4       | 00:20:10.3265325 | 0.12 | 58.77      | 186.03    | tested on rd450x 256G inside k3s in lxc  
 13 |   20251005-212640 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2  | 150    | gaunernst/gemma-3-27b-it-qat-autoawq | 125     | 3       | 00:20:00.2988691 | 0.10 | 48.18      | 154.96    | tested on rd450x 256G inside k3s in lxc  
 14 |   20251005-214604 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2  | 150    | gaunernst/gemma-3-27b-it-qat-autoawq | 100     | 2       | 00:18:05.4545212 | 0.09 | 41.81      | 136.23    | tested on rd450x 256G inside k3s in lxc  
 15 |   20251005-221837 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2  | 150    | gaunernst/gemma-3-27b-it-qat-autoawq | 75      | 1       | 00:27:37.0155547 | 0.05 | 21.18      | 67.61     | tested on rd450x 256G inside k3s in lxc  
 16 |   20251006-130816 | 6.3.3 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm633 | 3.3.0+git2b5c6ef9 | 2  | 150    | gaunernst/gemma-3-27b-it-qat-autoawq | 75      | 1       | 00:19:16.0905731 | 0.06 | 19.44      | 86.00     | tested on rd450x 256G inside k3s in lxc  
 17 |   20251006-132621 | 6.3.3 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm633 | 3.3.0+git2b5c6ef9 | 2  | 150    | gaunernst/gemma-3-27b-it-qat-autoawq | 100     | 2       | 00:17:29.1542989 | 0.10 | 41.52      | 139.21    | tested on rd450x 256G inside k3s in lxc  
 18 |   20251006-134724 | 6.3.3 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm633 | 3.3.0+git2b5c6ef9 | 2  | 150    | gaunernst/gemma-3-27b-it-qat-autoawq | 125     | 3       | 00:20:06.5979349 | 0.10 | 48.32      | 154.54    | tested on rd450x 256G inside k3s in lxc  
 19 |   20251006-140759 | 6.3.3 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm633 | 3.3.0+git2b5c6ef9 | 2  | 150    | gaunernst/gemma-3-27b-it-qat-autoawq | 150     | 4       | 00:19:38.5187576 | 0.13 | 57.69      | 188.37    | tested on rd450x 256G inside k3s in lxc  
 20 |   20251007-162504 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 75      | 1       | 00:19:22.7926510 | 0.06 | 20.08      | 86.25     | tested on rd450x 256G inside k3s in lxc  
 21 |   20251007-171239 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 100     | 2       | 00:16:02.6107616 | 0.10 | 44.64      | 151.11    | tested on rd450x 256G inside k3s in lxc  
 22 |   20251007-173243 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 125     | 3       | 00:19:21.7160991 | 0.11 | 50.35      | 160.67    | tested on rd450x 256G inside k3s in lxc  
 23 |   20251007-175203 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 150     | 4       | 00:18:17.4322852 | 0.14 | 60.88      | 201.22    | tested on rd450x 256G inside k3s in lxc  
 24 |   20251012-111624 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 75      | 1       | 00:21:05.0039699 | 0.06 | 16.07      | 76.89     | tested on rd450x 256G inside k3s in lxc  
 25 |   20251012-112842 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 100     | 2       | 00:11:41.9394741 | 0.14 | 35.56      | 181.57    | tested on rd450x 256G inside k3s in lxc  
 26 |   20251012-114201 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 125     | 3       | 00:12:43.8522526 | 0.16 | 41.50      | 209.29    | tested on rd450x 256G inside k3s in lxc  
 27 |   20251012-115501 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 150     | 4       | 00:12:24.0047521 | 0.20 | 48.33      | 255.35    | tested on rd450x 256G inside k3s in lxc  
 28 |   20251012-121023 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 200     | 8       | 00:13:31.6286220 | 0.25 | 54.78      | 308.18    | tested on rd450x 256G inside k3s in lxc  
 29 |   20251012-201017 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 100     | 16      | 00:07:22.8462734 | 0.23 | 54.01      | 285.44    | tested on rd450x 256G inside k3s in lxc  
 30 |   20251013-140107 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 75      | 1       | 00:20:08.6821350 | 0.06 | 15.66      | 79.31     | TdcLimitGfx=150                          
 31 |   20251013-141355 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 100     | 2       | 00:12:11.5680303 | 0.14 | 34.12      | 174.22    | TdcLimitGfx=150                          
 32 |   20251013-142754 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 125     | 3       | 00:13:21.9331666 | 0.16 | 39.53      | 199.35    | TdcLimitGfx=150                          
 33 |   20251013-144145 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633       | 3.4.0+gite4f3b253 | 2  | 225    | gaunernst/gemma-3-27b-it-qat-autoawq | 150     | 4       | 00:13:13.4142282 | 0.19 | 46.02      | 240.14    | TdcLimitGfx=150                          
 34 | 
 35 | ## Run
 36 | 
 37 | ## DockerHub images
 38 | > ghcr.io registry is deprecated. Use https://hub.docker.com/r/mixa3607/vllm-gfx906 instead
 39 | 
 40 | Vers compatibility table:
 41 | | ROCm | PyTorch | vLLM | triton | model | text | images | misc |
 42 | | ----- | ------- | ---- | ------ | ----- | ---- | ------ | ----|
 43 | | 7.0.0 | ⛔     | ⛔   | ⛔     | ⛔   | ⛔   | ⛔    | can't build any torch ver with llvm20 |
 44 | | 6.3.3 | 2.7.1 | 0.10.2 | 3.3.0 | gaunernst/gemma-3-27b-it-qat-autoawq | ✅️ | ✅️ | ok |
 45 | | 6.4.4 | 2.7.1 | 0.10.2 | 3.3.0 | gaunernst/gemma-3-27b-it-qat-autoawq | ✅️ | ⛔ | requests with images throw exception |
 46 | | 6.3.3 | 2.8.0 | 0.11.0 | 3.4.0 | gaunernst/gemma-3-27b-it-qat-autoawq | ✅️ | ✅️ | ok |
 47 | | 6.4.4 | 2.8.0 | 0.11.0 | 3.4.0 | gaunernst/gemma-3-27b-it-qat-autoawq | ⛔ | ⛔ | all requests throw exception |
 48 | 
 49 | Recommend use `docker.io/mixa3607/vllm-gfx906:0.11.0-rocm-6.3.3`
 50 | 
 51 | ### Docker
 52 | Basics from amd https://github.com/ROCm/vllm/blob/main/docs/deployment/docker.md
 53 | 
 54 | ### Kubernetes
 55 | ```yaml
 56 | ---
 57 | apiVersion: v1
 58 | kind: PersistentVolumeClaim
 59 | metadata:
 60 |   name: vllm-models
 61 |   namespace: ns-vllm
 62 | spec:
 63 |   accessModes:
 64 |     - ReadWriteOnce
 65 |   volumeMode: Filesystem
 66 |   storageClassName: nfs-ssd-1
 67 |   resources:
 68 |     requests:
 69 |       storage: 64Gi
 70 | ---
 71 | apiVersion: apps/v1
 72 | kind: Deployment
 73 | metadata:
 74 |   name: vllm
 75 |   namespace: ns-vllm
 76 |   labels:
 77 |     app: vllm
 78 | spec:
 79 |   strategy:
 80 |     type: Recreate
 81 |   replicas: 1
 82 |   selector:
 83 |     matchLabels:
 84 |       app: vllm
 85 |   template:
 86 |     metadata:
 87 |       labels:
 88 |         app: vllm
 89 |     spec:
 90 |       volumes:
 91 |         - name: models-volume
 92 |           persistentVolumeClaim:
 93 |             claimName: vllm-models
 94 |         - name: dev-kfd
 95 |           hostPath:
 96 |             path: /dev/kfd
 97 |         - name: dev-dri
 98 |           hostPath:
 99 |             path: /dev/dri
100 |         - name: shm
101 |           emptyDir:
102 |             medium: Memory
103 |             sizeLimit: 32G
104 |       containers:
105 |         - name: vllm
106 |           image: docker.io/mixa3607/vllm-gfx906:ella
107 |           imagePullPolicy: Always
108 |           securityContext:
109 |             privileged: true
110 |             runAsNonRoot: false
111 |             runAsGroup: 0
112 |             runAsUser: 0
113 |             seccompProfile:
114 |               type: Unconfined
115 |             capabilities:
116 |               add:
117 |                 - SYS_PTRACE
118 |           command: [ "/bin/bash", "-c" ]
119 |           args:
120 |             #- "while true; do sleep 1s; done;"
121 |             - |
122 |               export VLLM_USE_V1=1
123 |               export HUGGING_FACE_HUB_TOKEN=hf_XXXXXXXXXXXXXXXXXXXXXXX
124 |               exec vllm serve gaunernst/gemma-3-27b-it-qat-autoawq --tensor-parallel-size 2 --max-model-len 16K
125 |           ports:
126 |             - containerPort: 8000
127 |           resources:
128 |             limits:
129 |               memory: 64G
130 |             requests:
131 |               cpu: "6"
132 |               memory: 6G
133 |           volumeMounts:
134 |             - mountPath: /root/.cache/huggingface
135 |               name: models-volume
136 |             - name: shm
137 |               mountPath: /dev/shm
138 |             - name: dev-kfd
139 |               mountPath: /dev/kfd
140 |             - name: dev-dri
141 |               mountPath: /dev/dri
142 | ```
143 | 
144 | ## Gemma3 AWQ patch for 0.11.0
145 | ```bash
146 | echo '
147 | --- /usr/local/lib/python3.12/dist-packages/vllm/config/model.py        2025-10-12 13:22:53.000000000 +0000
148 | +++ /usr/local/lib/python3.12/dist-packages/vllm/config/model.py        2025-10-12 13:59:26.271776131 +0000
149 | @@ -1586,6 +1586,7 @@
150 |      "plamo2": "Numerical instability. Please use bfloat16 or float32 instead.",
151 |      "glm4": "Numerical instability. Please use bfloat16 or float32 instead.",
152 |  }
153 | +_FLOAT16_NOT_SUPPORTED_MODELS = {}
154 | 
155 | 
156 |  def _is_valid_dtype(model_type: str, dtype: torch.dtype):' | patch -d/ -p0
157 | 
158 | echo '
159 | --- /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma3.py
160 | +++ /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma3.py
161 | @@ -329,6 +329,9 @@ class Gemma3DecoderLayer(nn.Module):
162 |          residual: Optional[torch.Tensor],
163 |          **kwargs,
164 |      ) -> tuple[torch.Tensor, torch.Tensor]:
165 | +        # https://github.com/huggingface/transformers/pull/36832
166 | +        if hidden_states.dtype == torch.float16:
167 | +            hidden_states = hidden_states.clamp_(-65504, 65504)
168 |          if residual is None:
169 |              residual = hidden_states
170 |              hidden_states = self.input_layernorm(hidden_states)
171 | @@ -341,11 +344,15 @@ class Gemma3DecoderLayer(nn.Module):
172 |              **kwargs,
173 |          )
174 |          hidden_states = self.post_attention_layernorm(hidden_states)
175 | +        if hidden_states.dtype == torch.float16:
176 | +            hidden_states = hidden_states.clamp_(-65504, 65504)
177 | 
178 |          hidden_states, residual = self.pre_feedforward_layernorm(
179 |              hidden_states, residual)
180 |          hidden_states = self.mlp(hidden_states)
181 |          hidden_states = self.post_feedforward_layernorm(hidden_states)
182 | +        if hidden_states.dtype == torch.float16:
183 | +            hidden_states = hidden_states.clamp_(-65504, 65504)
184 |          return hidden_states, residual
185 | 
186 | 
187 | @@ -552,4 +559,4 @@ class Gemma3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
188 |              skip_prefixes=(["lm_head."]
189 |                             if self.config.tie_word_embeddings else None),
190 |          )
191 | -        return loader.load_weights(weights)
192 | +        return loader.load_weights(weights)' | patch -d/ -p0
193 | ```
194 | 
195 | 
196 | ## Build
197 | See build vars in `./env.sh`. You also may use presetis `./preset.*.sh`. Exec `./build-and-push.vllm.sh`:
198 | ```bash
199 | $ . preset.0.11.0-rocm-6.3.3.sh
200 | $ ./build-and-push.vllm.sh
201 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
202 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
203 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
204 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
205 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
206 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
207 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
208 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
209 | #0 building with "remote" instance using remote driver
210 | #...............
211 | #14 DONE 583.8s
212 | ```
213 | 


--------------------------------------------------------------------------------
/vllm/submodules/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/vllm/submodules/.gitkeep


--------------------------------------------------------------------------------
/vllm/vllm.Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_PYTORCH_IMAGE="docker.io/mixa3607/pytorch-gfx906:v2.7.1-rocm-6.3.3"
 2 | ARG VLLM_REPO="https://github.com/nlzy/vllm-gfx906.git"
 3 | ARG VLLM_BRANCH="main"
 4 | ARG TRITON_REPO="https://github.com/nlzy/triton-gfx906.git"
 5 | ARG TRITON_BRANCH="main"
 6 | 
 7 | ############# Base image #############
 8 | FROM ${BASE_PYTORCH_IMAGE} AS rocm_base
 9 | # Install basic utilities and Python 3.12
10 | RUN pip install amdsmi==$(cat /opt/ROCM_VERSION_FULL)
11 | 
12 | # Set environment variables
13 | ENV PYTORCH_ROCM_ARCH=$ROCM_ARCH
14 | ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib:
15 | ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
16 | ENV TOKENIZERS_PARALLELISM=false
17 | ENV HIP_FORCE_DEV_KERNARG=1
18 | ENV VLLM_TARGET_DEVICE=rocm
19 | 
20 | ############# Build base #############
21 | FROM rocm_base AS build_base
22 | RUN pip3 install ninja 'cmake<4' wheel pybind11 setuptools_scm
23 | 
24 | ############# Build triton #############
25 | FROM build_base AS build_triton
26 | ARG TRITON_REPO
27 | ARG TRITON_BRANCH
28 | WORKDIR /app
29 | RUN git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch ${TRITON_BRANCH} ${TRITON_REPO} triton
30 | WORKDIR /app/triton
31 | # "if" used for diff between triton 3.3.0<=>3.4.0
32 | RUN if [ ! -f setup.py ]; then cd python; fi; python3 setup.py bdist_wheel --dist-dir=/dist
33 | RUN ls /dist
34 | 
35 | ############# Build vllm #############
36 | FROM build_base AS build_vllm
37 | ARG VLLM_REPO
38 | ARG VLLM_BRANCH
39 | WORKDIR /app
40 | RUN git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch ${VLLM_BRANCH} ${VLLM_REPO} vllm
41 | WORKDIR /app/vllm
42 | RUN pip install -r requirements/rocm.txt
43 | RUN python3 setup.py bdist_wheel --dist-dir=/dist
44 | RUN ls /dist 
45 | 
46 | ############# Install all #############
47 | FROM rocm_base AS final
48 | WORKDIR /app/vllm
49 | RUN --mount=type=bind,from=build_vllm,src=/app/vllm/requirements,target=/app/vllm/requirements \
50 |     --mount=type=bind,from=build_vllm,src=/dist/,target=/dist_vllm \
51 |     --mount=type=bind,from=build_triton,src=/dist/,target=/dist_triton \
52 |     pip install /dist_triton/*.whl /dist_vllm/*.whl && \
53 |     pip install -r requirements/rocm.txt && \
54 |     pip install opentelemetry-sdk opentelemetry-api opentelemetry-semantic-conventions-ai opentelemetry-exporter-otlp && \
55 |     true
56 | 
57 | CMD ["/bin/bash"]
58 | 


--------------------------------------------------------------------------------