├── .gitignore
├── .gitmodules
├── checkpoint.sh
├── comfyui
├── build-and-push.comfyui.sh
├── comfyui.Dockerfile
├── env.sh
├── preset.rocm-6.3.3.sh
├── preset.rocm-6.4.4.sh
└── readme.md
├── docs
└── images
│ └── temperatures.png
├── env.sh
├── llama.cpp
├── build-and-push.rocm.sh
├── build-and-push.vulkan.sh
├── env.sh
├── llamacpp-offload-calculator
│ ├── .gitattributes
│ ├── .gitignore
│ ├── ArkProjects.LlamaOffloadCalc.sln
│ ├── ArkProjects.LlamaOffloadCalc
│ │ ├── ArkProjects.LlamaOffloadCalc.csproj
│ │ ├── LLamaDevice.cs
│ │ ├── LLamaDeviceType.cs
│ │ ├── LLamaGgufMetadataExtractor.cs
│ │ ├── LLamaLogsParser.cs
│ │ ├── Options
│ │ │ ├── LLamaDeviceOptions.cs
│ │ │ ├── OffloadCalculationOptions.cs
│ │ │ ├── OffloadCalculationOptionsValidator.cs
│ │ │ └── TensorsOffloadRuleOptions.cs
│ │ ├── Program.cs
│ │ ├── Properties
│ │ │ └── launchSettings.json
│ │ ├── TensorMetadata.cs
│ │ ├── appsettings.GLM-4.5-Air-UD-Q6_K_XL.yaml
│ │ ├── appsettings.gpt-oss-120b-F16.yaml
│ │ └── appsettings.yaml
│ ├── GGUFSharp
│ │ ├── .gitignore
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── SampleFiles
│ │ │ ├── FilesList.txt
│ │ │ ├── example.gguf
│ │ │ └── genTestFile.py
│ │ └── src
│ │ │ └── GGUFSharp
│ │ │ ├── GGUFSharp.Test
│ │ │ ├── BasicFeatureTest.cs
│ │ │ ├── GGUFSharp.Test.csproj
│ │ │ └── MSTestSettings.cs
│ │ │ ├── GGUFSharp.sln
│ │ │ └── GGUFSharp
│ │ │ ├── GGUFDataTypeEnum.cs
│ │ │ ├── GGUFFile.cs
│ │ │ ├── GGUFHeader.cs
│ │ │ ├── GGUFMetaItem.cs
│ │ │ ├── GGUFReader.cs
│ │ │ ├── GGUFSharp.csproj
│ │ │ ├── GGUFStreamReader.cs
│ │ │ ├── GGUFTensorInfo.cs
│ │ │ └── GGUFTensorType.cs
│ └── readme.md
├── preset.rocm-6.3.3.sh
├── preset.rocm-6.4.4.sh
├── preset.rocm-7.0.0.sh
└── readme.md
├── pytorch
├── build-and-push.torch.sh
├── env.sh
├── preset.torch-2.7.1-rocm-6.3.3.sh
├── preset.torch-2.7.1-rocm-6.4.4.sh
├── preset.torch-2.8.0-rocm-6.3.3.sh
├── preset.torch-2.8.0-rocm-6.4.4.sh
├── preset.torch-2.8.0-rocm-7.0.2.sh
├── preset.torch-2.9.0-rocm-7.0.2.sh
├── readme.md
├── submodules
│ └── .gitkeep
└── torch.Dockerfile
├── readme.md
├── rocm
├── build-and-push.rocm.sh
├── env.sh
├── preset.rocm-6.3.3.sh
├── preset.rocm-6.4.4.sh
├── preset.rocm-7.0.0.sh
├── preset.rocm-7.0.2.sh
├── readme.md
├── rocm.Dockerfile
└── submodules
│ └── .gitkeep
└── vllm
├── benchmark
├── ResultsConverter
│ ├── .gitignore
│ ├── ResultsConverter.sln
│ └── ResultsConverter
│ │ ├── MarkdownTableBuilder.cs
│ │ ├── MarkdownTableBuilderExtensions.cs
│ │ ├── Program.cs
│ │ ├── Properties
│ │ └── launchSettings.json
│ │ ├── ResultsConverter.csproj
│ │ └── VllmBenchResult.cs
├── readme.md
└── results
│ ├── app
│ └── vllm
│ │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251006-130816.json
│ │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251007-162504.json
│ │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251012-111624.json
│ │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251013-140107.json
│ │ ├── openai-infqps-concurrency16-gemma-3-27b-it-qat-autoawq-20251012-201017.json
│ │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251006-132621.json
│ │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251007-171239.json
│ │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251012-112842.json
│ │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251013-141355.json
│ │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251006-134724.json
│ │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251007-173243.json
│ │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251012-114201.json
│ │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251013-142754.json
│ │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251006-140759.json
│ │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251007-175203.json
│ │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251012-115501.json
│ │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251013-144145.json
│ │ └── openai-infqps-concurrency8-gemma-3-27b-it-qat-autoawq-20251012-121023.json
│ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251005-221837.json
│ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251006-130816.json
│ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251007-162504.json
│ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251012-111624.json
│ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251013-140107.json
│ ├── openai-infqps-concurrency16-gemma-3-27b-it-qat-autoawq-20251012-201017.json
│ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251005-214604.json
│ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251006-132621.json
│ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251007-171239.json
│ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251012-112842.json
│ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251013-141355.json
│ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251005-212640.json
│ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251006-134724.json
│ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251007-173243.json
│ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251012-114201.json
│ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251013-142754.json
│ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251005-210513.json
│ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251006-140759.json
│ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251007-175203.json
│ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251012-115501.json
│ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251013-144145.json
│ └── openai-infqps-concurrency8-gemma-3-27b-it-qat-autoawq-20251012-121023.json
├── build-and-push.vllm.sh
├── env.sh
├── preset.0.10.2-rocm-6.4.4.sh
├── preset.0.11.0-rocm-6.3.3.sh
├── preset.0.8.5-rocm-6.3.3.sh
├── readme.md
├── submodules
└── .gitkeep
└── vllm.Dockerfile
/.gitignore:
--------------------------------------------------------------------------------
1 | /**/*.log
2 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "llama.cpp/submodules/llama.cpp"]
2 | url = ../../ggml-org/llama.cpp.git
3 | path = llama.cpp/submodules/llama.cpp
4 | [submodule "comfyui/submodules/ComfyUI"]
5 | path = comfyui/submodules/ComfyUI
6 | url = ../../comfyanonymous/ComfyUI.git
7 |
--------------------------------------------------------------------------------
/checkpoint.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 |
3 | source ./env.sh
4 |
5 | if ! [ -z "$(git status --porcelain)" ]; then
6 | echo "Workdir is dirty. Exit"
7 | exit 10
8 | fi
9 |
10 | TAG_NAME=$(git_get_current_tag)
11 | if [ "$TAG_NAME" == "" ]; then
12 | TAG_NAME="$(date +%Y%m%d%H%M%S)"
13 | git tag -a "$TAG_NAME" -m "none"
14 | echo -e "New tag $TAG_NAME"
15 | else
16 | echo "Commit already tagged with $TAG_NAME"
17 | fi
18 |
--------------------------------------------------------------------------------
/comfyui/build-and-push.comfyui.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -e
3 |
4 | cd $(dirname $0)
5 | source ../env.sh
6 |
7 | IMAGE_TAGS=(
8 | "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-torch-${COMFYUI_PYTORCH_VERSION}-rocm-${COMFYUI_ROCM_VERSION}-patch-${REPO_GIT_REF}"
9 | "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-rocm-${COMFYUI_ROCM_VERSION}-patch-${REPO_GIT_REF}"
10 | "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-rocm-${COMFYUI_ROCM_VERSION}"
11 | "$COMFYUI_IMAGE:latest-rocm-${COMFYUI_ROCM_VERSION}"
12 | )
13 |
14 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
15 | echo "${IMAGE_TAGS[0]} already in registry. Skip"
16 | exit 0
17 | fi
18 |
19 | DOCKER_EXTRA_ARGS=()
20 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
21 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
22 | done
23 |
24 | mkdir ./logs || true
25 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
26 | --build-arg BASE_PYTORCH_IMAGE=$COMFYUI_TORCH_IMAGE:${COMFYUI_PYTORCH_VERSION}-rocm-${COMFYUI_ROCM_VERSION} \
27 | --progress=plain --target final -f ./comfyui.Dockerfile --push ./submodules/ComfyUI 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
28 |
--------------------------------------------------------------------------------
/comfyui/comfyui.Dockerfile:
--------------------------------------------------------------------------------
1 | ARG BASE_PYTORCH_IMAGE=docker.io/mixa3607/pytorch-gfx906:v2.7.1-rocm-6.3.3
2 |
3 | FROM ${BASE_PYTORCH_IMAGE} AS final
4 | WORKDIR /comfyui
5 | COPY ./requirements.txt ./requirements.txt
6 | RUN sed -i 's|torchaudio||g' requirements.txt && pip install -r requirements.txt
7 | COPY ./ ./
8 |
--------------------------------------------------------------------------------
/comfyui/env.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 |
3 | pushd $(dirname ${BASH_SOURCE[0]})
4 |
5 | if [ "$COMFYUI_IMAGE" == "" ]; then
6 | COMFYUI_IMAGE=docker.io/mixa3607/comfyui-gfx906
7 | fi
8 |
9 | if [ "$COMFYUI_TORCH_IMAGE" == "" ]; then
10 | COMFYUI_TORCH_IMAGE="docker.io/mixa3607/pytorch-gfx906"
11 | fi
12 | if [ "$COMFYUI_ROCM_VERSION" == "" ]; then
13 | COMFYUI_ROCM_VERSION="6.3.3"
14 | fi
15 | if [ "$COMFYUI_PYTORCH_VERSION" == "" ]; then
16 | COMFYUI_PYTORCH_VERSION="v2.7.1"
17 | fi
18 |
19 | if [ "$COMFYUI_GIT_REF" == "" ]; then
20 | COMFYUI_GIT_REF="$(git_get_current_tag submodules/ComfyUI)"
21 | fi
22 | if [ "$COMFYUI_GIT_REF" == "" ]; then
23 | COMFYUI_GIT_REF="$(git_get_current_sha submodules/ComfyUI)"
24 | fi
25 |
26 | popd
27 |
--------------------------------------------------------------------------------
/comfyui/preset.rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export COMFYUI_ROCM_VERSION="6.3.3"
4 | export COMFYUI_PYTORCH_VERSION="v2.7.1"
5 |
--------------------------------------------------------------------------------
/comfyui/preset.rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export COMFYUI_ROCM_VERSION="6.4.4"
4 | export COMFYUI_PYTORCH_VERSION="v2.7.1"
5 |
--------------------------------------------------------------------------------
/comfyui/readme.md:
--------------------------------------------------------------------------------
1 | # llama.cpp GFX906
2 | The most powerful and modular diffusion model GUI, api and backend with a graph/nodes interface. https://github.com/comfyanonymous/ComfyUI
3 |
4 | Recommend use `docker.io/mixa3607/comfyui-gfx906:latest-rocm-6.4.4`
5 |
6 | ## Benchmarks
7 | | tag | rocm | comfy | pytorch | preset | batch | exec time (sec) |
8 | |------------------------------------------------------|-------|---------|---------|--------|-------|-----------------|
9 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1 | SDXL | 1 | 33 |
10 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1 | SDXL | 2 | 65 |
11 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1 | SD 1.5 | 1 | 3,8 |
12 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1 | SD 1.5 | 2 | 7 |
13 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1 | SDXL | 1 | 33 |
14 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1 | SDXL | 2 | 65 |
15 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1 | SD 1.5 | 1 | 3,8 |
16 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1 | SD 1.5 | 2 | 7 |
17 |
18 | ## Run
19 | ### Docker
20 | See https://github.com/hartmark/sd-rocm/blob/main/docker-compose.yml
21 |
22 | ### Kubernetes
23 | Helm chart and samples [mixa3607 charts](https://github.com/mixa3607/charts)
24 |
25 | ## Build
26 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.comfyui.sh`:
27 | ```bash
28 | $ . preset.rocm-6.4.4.sh
29 | $ ./build-and-push.comfyui.sh
30 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
31 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
32 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
33 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
34 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
35 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
36 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
37 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
38 | #0 building with "remote" instance using remote driver
39 | #...............
40 | #14 DONE 583.8s
41 | ```
42 |
--------------------------------------------------------------------------------
/docs/images/temperatures.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/docs/images/temperatures.png
--------------------------------------------------------------------------------
/env.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 |
3 | function docker_image_pushed {
4 | if docker buildx imagetools inspect "$1" > /dev/null 2> /dev/null; then
5 | return 0
6 | else
7 | return 1
8 | fi
9 | }
10 |
11 | function git_get_current_tag {
12 | if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi
13 | git tag --points-at HEAD | sed 's|+||g'
14 | if [ "$1" != "" ]; then popd > /dev/null; fi
15 | }
16 |
17 | function git_get_origin {
18 | if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi
19 | git config --get remote.origin.url
20 | if [ "$1" != "" ]; then popd > /dev/null; fi
21 | }
22 |
23 |
24 | function git_get_current_sha {
25 | if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi
26 | git rev-parse --short HEAD
27 | if [ "$1" != "" ]; then popd > /dev/null; fi
28 | }
29 |
30 | if [ "$REPO_GIT_REF" == "" ]; then
31 | REPO_GIT_REF="$(git_get_current_tag)"
32 | fi
33 | if [ "$REPO_GIT_REF" == "" ]; then
34 | REPO_GIT_REF="$(git_get_current_sha)"
35 | fi
36 |
37 | if [ "$BASE_UBUNTU_REGISTRY" == "" ]; then
38 | BASE_UBUNTU_REGISTRY=docker.io/library
39 | fi
40 |
41 | source $(dirname ${BASH_SOURCE[0]})/rocm/env.sh
42 | source $(dirname ${BASH_SOURCE[0]})/llama.cpp/env.sh
43 | source $(dirname ${BASH_SOURCE[0]})/comfyui/env.sh
44 | source $(dirname ${BASH_SOURCE[0]})/vllm/env.sh
45 | source $(dirname ${BASH_SOURCE[0]})/pytorch/env.sh
46 |
--------------------------------------------------------------------------------
/llama.cpp/build-and-push.rocm.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -e
3 |
4 | cd $(dirname $0)
5 | source ../env.sh
6 |
7 | IMAGE_TAGS=(
8 | "$LLAMA_IMAGE:full-${LLAMA_GIT_REF}-rocm-${LLAMA_ROCM_VERSION}-patch-${REPO_GIT_REF}"
9 | "$LLAMA_IMAGE:full-${LLAMA_GIT_REF}-rocm-${LLAMA_ROCM_VERSION}"
10 | "$LLAMA_IMAGE:full-rocm-${LLAMA_ROCM_VERSION}"
11 | )
12 |
13 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
14 | echo "${IMAGE_TAGS[0]} already in registry. Skip"
15 | exit 0
16 | fi
17 |
18 | DOCKER_EXTRA_ARGS=()
19 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
20 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
21 | done
22 |
23 | mkdir ./logs || true
24 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
25 | --build-arg BASE_ROCM_DEV_CONTAINER=$PATCHED_ROCM_IMAGE:${LLAMA_ROCM_VERSION}-complete \
26 | --build-arg ROCM_DOCKER_ARCH=$ROCM_ARCH \
27 | --build-arg ROCM_VERSION=$LLAMA_ROCM_VERSION \
28 | --build-arg AMDGPU_VERSION=$LLAMA_ROCM_VERSION \
29 | --progress=plain --target full -f ./submodules/llama.cpp/.devops/rocm.Dockerfile ./submodules/llama.cpp 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
30 |
--------------------------------------------------------------------------------
/llama.cpp/build-and-push.vulkan.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -e
3 |
4 | cd $(dirname $0)
5 | source ../env.sh
6 |
7 | IMAGE_TAGS=(
8 | "$PATCHED_LLAMA_IMAGE:full-${LLAMA_GIT_REF}-vulkan-patch-${REPO_GIT_REF}"
9 | "$PATCHED_LLAMA_IMAGE:full-${LLAMA_GIT_REF}-vulkan"
10 | )
11 |
12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
13 | echo "${IMAGE_TAGS[0]} already in registry. Skip"
14 | exit 0
15 | fi
16 |
17 | DOCKER_EXTRA_ARGS=()
18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
19 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
20 | done
21 |
22 | mkdir ./logs || true
23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
24 | --build-arg UBUNTU_VERSION="24.04" \
25 | --progress=plain --target full -f ./submodules/llama.cpp/.devops/vulkan.Dockerfile ./submodules/llama.cpp 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
26 |
--------------------------------------------------------------------------------
/llama.cpp/env.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 |
3 | pushd $(dirname ${BASH_SOURCE[0]})
4 |
5 | if [ "$LLAMA_IMAGE" == "" ]; then
6 | LLAMA_IMAGE=docker.io/mixa3607/llama.cpp-gfx906
7 | fi
8 |
9 | # rocm ver
10 | if [ "$LLAMA_ROCM_VERSION" == "" ]; then
11 | LLAMA_ROCM_VERSION=7.0.0
12 | fi
13 |
14 | if [ "$LLAMA_GIT_REF" == "" ]; then
15 | LLAMA_GIT_REF="$(git_get_current_tag submodules/llama.cpp)"
16 | fi
17 | if [ "$LLAMA_GIT_REF" == "" ]; then
18 | LLAMA_GIT_REF="$(git_get_current_sha submodules/llama.cpp)"
19 | fi
20 |
21 | popd
22 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Oo]ut/
33 | [Ll]og/
34 | [Ll]ogs/
35 |
36 | # Visual Studio 2015/2017 cache/options directory
37 | .vs/
38 | # Uncomment if you have tasks that create the project's static files in wwwroot
39 | #wwwroot/
40 |
41 | # Visual Studio 2017 auto generated files
42 | Generated\ Files/
43 |
44 | # MSTest test Results
45 | [Tt]est[Rr]esult*/
46 | [Bb]uild[Ll]og.*
47 |
48 | # NUnit
49 | *.VisualState.xml
50 | TestResult.xml
51 | nunit-*.xml
52 |
53 | # Build Results of an ATL Project
54 | [Dd]ebugPS/
55 | [Rr]eleasePS/
56 | dlldata.c
57 |
58 | # Benchmark Results
59 | BenchmarkDotNet.Artifacts/
60 |
61 | # .NET Core
62 | project.lock.json
63 | project.fragment.lock.json
64 | artifacts/
65 |
66 | # ASP.NET Scaffolding
67 | ScaffoldingReadMe.txt
68 |
69 | # StyleCop
70 | StyleCopReport.xml
71 |
72 | # Files built by Visual Studio
73 | *_i.c
74 | *_p.c
75 | *_h.h
76 | *.ilk
77 | *.meta
78 | *.obj
79 | *.iobj
80 | *.pch
81 | *.pdb
82 | *.ipdb
83 | *.pgc
84 | *.pgd
85 | *.rsp
86 | *.sbr
87 | *.tlb
88 | *.tli
89 | *.tlh
90 | *.tmp
91 | *.tmp_proj
92 | *_wpftmp.csproj
93 | *.log
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio LightSwitch build output
298 | **/*.HTMLClient/GeneratedArtifacts
299 | **/*.DesktopClient/GeneratedArtifacts
300 | **/*.DesktopClient/ModelManifest.xml
301 | **/*.Server/GeneratedArtifacts
302 | **/*.Server/ModelManifest.xml
303 | _Pvt_Extensions
304 |
305 | # Paket dependency manager
306 | .paket/paket.exe
307 | paket-files/
308 |
309 | # FAKE - F# Make
310 | .fake/
311 |
312 | # CodeRush personal settings
313 | .cr/personal
314 |
315 | # Python Tools for Visual Studio (PTVS)
316 | __pycache__/
317 | *.pyc
318 |
319 | # Cake - Uncomment if you are using it
320 | # tools/**
321 | # !tools/packages.config
322 |
323 | # Tabs Studio
324 | *.tss
325 |
326 | # Telerik's JustMock configuration file
327 | *.jmconfig
328 |
329 | # BizTalk build output
330 | *.btp.cs
331 | *.btm.cs
332 | *.odx.cs
333 | *.xsd.cs
334 |
335 | # OpenCover UI analysis results
336 | OpenCover/
337 |
338 | # Azure Stream Analytics local run output
339 | ASALocalRun/
340 |
341 | # MSBuild Binary and Structured Log
342 | *.binlog
343 |
344 | # NVidia Nsight GPU debugger configuration file
345 | *.nvuser
346 |
347 | # MFractors (Xamarin productivity tool) working folder
348 | .mfractor/
349 |
350 | # Local History for Visual Studio
351 | .localhistory/
352 |
353 | # BeatPulse healthcheck temp database
354 | healthchecksdb
355 |
356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
357 | MigrationBackup/
358 |
359 | # Ionide (cross platform F# VS Code tools) working folder
360 | .ionide/
361 |
362 | # Fody - auto-generated XML schema
363 | FodyWeavers.xsd
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.13.35931.197 d17.13
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ArkProjects.LlamaOffloadCalc", "ArkProjects.LlamaOffloadCalc\ArkProjects.LlamaOffloadCalc.csproj", "{AB281ECC-61B1-4575-B34D-F14DEB3814FD}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp", "GGUFSharp\src\GGUFSharp\GGUFSharp\GGUFSharp.csproj", "{A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Any CPU = Debug|Any CPU
13 | Release|Any CPU = Release|Any CPU
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | {AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | {AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | {AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Release|Any CPU.Build.0 = Release|Any CPU
20 | {A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Release|Any CPU.Build.0 = Release|Any CPU
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {C06FD4ED-87AF-4232-9D9E-D5D6EA618808}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/ArkProjects.LlamaOffloadCalc.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net8.0
6 | enable
7 | enable
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | PreserveNewest
27 |
28 |
29 |
30 |
31 |
32 | PreserveNewest
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaDevice.cs:
--------------------------------------------------------------------------------
1 | namespace ArkProjects.LlmCalc;
2 |
3 | public class LLamaDevice
4 | {
5 | public required LLamaDeviceType Type { get; set; }
6 | public required string Name { get; set; }
7 | public string PciBus { get; set; } = "";
8 |
9 | public required long TotalSize { get; set; }
10 | public long ReservedMemory { get; set; }
11 |
12 | public List Tensors { get; set; } = [];
13 | public List Layers { get; set; } = [];
14 | public double LayersPortion { get; set; } = 0;
15 |
16 | public long GetUsedSpace() => ReservedMemory + Tensors.Aggregate(0L, (current, tensor) => current + tensor.Size);
17 |
18 | public long GetFreeSpace() => TotalSize - GetUsedSpace();
19 | }
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaDeviceType.cs:
--------------------------------------------------------------------------------
1 | namespace ArkProjects.LlmCalc;
2 |
3 | public enum LLamaDeviceType
4 | {
5 | Unknown,
6 | GPU,
7 | CPU,
8 | }
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaGgufMetadataExtractor.cs:
--------------------------------------------------------------------------------
1 | using System.Text.RegularExpressions;
2 | using GGUFSharp;
3 |
4 | namespace ArkProjects.LlmCalc;
5 |
6 | public class LLamaGgufMetadataExtractor
7 | {
8 | private readonly string _ggufModelPath;
9 | private readonly Regex _nameMatchRegex = new Regex(@"(?^.+)-(?\d{5})-of-(?\d{5}).gguf");
10 |
11 | public LLamaGgufMetadataExtractor(string ggufModelPath)
12 | {
13 | _ggufModelPath = ggufModelPath;
14 | }
15 |
16 | public List ExtractMetadata()
17 | {
18 | var reader = new GGUFReader();
19 | var tensorInfos = new List();
20 | var fileName = Path.GetFileName(_ggufModelPath);
21 | var match = _nameMatchRegex.Match(fileName);
22 | if (!match.Success)
23 | {
24 | var file = _ggufModelPath;
25 | Console.WriteLine($"Reading {file}");
26 | var f = reader.Read(file);
27 | tensorInfos.AddRange(f.TensorInfos.Select(t => new TensorMetadata(t)));
28 | }
29 | else
30 | {
31 | var totalParts = int.Parse(match.Groups["total"].Value);
32 | var name = match.Groups["name"].Value;
33 |
34 | for (int i = 1; i <= totalParts; i++)
35 | {
36 | var file = Path.Combine(Path.GetDirectoryName(_ggufModelPath)!,
37 | $"{name}-{i:D5}-of-{totalParts:D5}.gguf");
38 | Console.WriteLine($"Reading {file}");
39 | var f = reader.Read(file);
40 | tensorInfos.AddRange(f.TensorInfos.Select(t => new TensorMetadata(t)));
41 | }
42 | }
43 |
44 | return tensorInfos;
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaLogsParser.cs:
--------------------------------------------------------------------------------
1 | using System.Text.RegularExpressions;
2 |
3 | namespace ArkProjects.LlmCalc;
4 |
5 | public class LLamaLogsParser
6 | {
7 | private readonly string _filePath;
8 |
9 | public LLamaLogsParser(string filePath)
10 | {
11 | _filePath = filePath;
12 | }
13 |
14 | public Dictionary> ExtractAssignedLayers()
15 | {
16 | var regex = new Regex(@"load_tensors: layer +(?\d+) assigned to device (?\S+), ");
17 | var result = new Dictionary>();
18 |
19 | var start = -1;
20 | var lines = File.ReadAllLines(_filePath);
21 | for (var i = 0; i < lines.Length; i++)
22 | {
23 | var line = lines[i];
24 | var match = regex.Match(line);
25 | if (start < 0 && match.Success)
26 | {
27 | start = i;
28 | }
29 |
30 | if (match.Success)
31 | {
32 | result.TryAdd(match.Groups["device"].Value, new List());
33 | result[match.Groups["device"].Value].Add(int.Parse(match.Groups["layer"].Value));
34 | }
35 |
36 |
37 | if (start >= 0 && !match.Success)
38 | {
39 | break;
40 | }
41 | }
42 |
43 | return result;
44 | }
45 |
46 | public Dictionary> ExtractAssignedTensors()
47 | {
48 | var regex = new Regex(@"tensor (?\S+) \(.+\) buffer type overridden to (?\S+)");
49 | var result = new Dictionary>();
50 |
51 | var start = -1;
52 | var lines = File.ReadAllLines(_filePath);
53 | for (var i = 0; i < lines.Length; i++)
54 | {
55 | var line = lines[i];
56 | var match = regex.Match(line);
57 | if (start < 0 && match.Success)
58 | {
59 | start = i;
60 | }
61 |
62 | if (match.Success)
63 | {
64 | result.TryAdd(match.Groups["device"].Value, new List());
65 | result[match.Groups["device"].Value].Add(match.Groups["tensor"].Value);
66 | }
67 |
68 |
69 | if (start >= 0 && !match.Success)
70 | {
71 | break;
72 | }
73 | }
74 |
75 | return result;
76 | }
77 | }
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/LLamaDeviceOptions.cs:
--------------------------------------------------------------------------------
1 | namespace ArkProjects.LlmCalc.Options;
2 |
3 | public class LLamaDeviceOptions
4 | {
5 | public required LLamaDeviceType Type { get; set; }
6 | public string PciBus { get; set; } = "";
7 |
8 | public required long TotalSizeMb { get; set; }
9 | public long ReservedMemoryMb { get; set; }
10 |
11 | public double LayersPortion { get; set; } = 0;
12 | public int Id { get; set; }
13 | }
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/OffloadCalculationOptions.cs:
--------------------------------------------------------------------------------
1 | namespace ArkProjects.LlmCalc.Options;
2 |
3 | public class OffloadCalculationOptions
4 | {
5 | public bool PrintTensorsSize { get; set; } = false;
6 | public bool PrintHelmCharConfig { get; set; } = false;
7 | public bool PrintCmdConfig { get; set; } = false;
8 | public required string GgufFile { get; set; }
9 | public required Dictionary Devices { get; set; }
10 | public required Dictionary OffloadRules { get; set; }
11 | }
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/OffloadCalculationOptionsValidator.cs:
--------------------------------------------------------------------------------
1 | using FluentValidation;
2 |
3 | namespace ArkProjects.LlmCalc.Options;
4 |
5 | public class OffloadCalculationOptionsValidator : AbstractValidator
6 | {
7 | public OffloadCalculationOptionsValidator()
8 | {
9 | RuleFor(x => x.GgufFile)
10 | .NotEmpty()
11 | .Must(x => File.Exists(x)).WithMessage("gguf file not exist");
12 |
13 | RuleFor(x => x.Devices)
14 | .Must(x => x.GroupBy(y => y.Value.Id).All(y => y.Count() == 1))
15 | .WithMessage("Each device must have unique id");
16 | RuleFor(x => x.OffloadRules)
17 | .Must(x => x.GroupBy(y => y.Value.Id).All(y => y.Count() == 1))
18 | .WithMessage("Each offload rule must have unique id");
19 |
20 | RuleFor(x => x.Devices)
21 | .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.Unknown) == 0)
22 | .WithMessage("Type must be set for each device");
23 | RuleFor(x => x.Devices)
24 | .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.GPU) >= 1)
25 | .WithMessage("1 or more GPUs must be defined");
26 | RuleFor(x => x.Devices)
27 | .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.CPU) == 1)
28 | .WithMessage("Single gpu must be defined");
29 | RuleFor(x => x.OffloadRules)
30 | .Must(x => x.Count > 0)
31 | .WithMessage("1 or more offloading rule must be defined");
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/TensorsOffloadRuleOptions.cs:
--------------------------------------------------------------------------------
1 | namespace ArkProjects.LlmCalc.Options;
2 |
3 | public class TensorsOffloadRuleOptions
4 | {
5 | public required string Regex { get; set; }
6 | public int Id { get; set; }
7 | public int Priority { get; set; }
8 | }
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Program.cs:
--------------------------------------------------------------------------------
1 | using ArkProjects.LlmCalc;
2 | using ArkProjects.LlmCalc.Options;
3 | using FluentValidation;
4 | using Microsoft.Extensions.Configuration;
5 | using System.Text;
6 | using System.Text.RegularExpressions;
7 |
8 |
9 | var options = GetOptions(args);
10 | new OffloadCalculationOptionsValidator().ValidateAndThrow(options);
11 |
12 | var tensorsOffloadRules = options.OffloadRules
13 | .Select(x => (rule: x.Value, name: x.Key, regex: new Regex(x.Value.Regex), tensors: new List()))
14 | .OrderBy(x => x.rule.Id)
15 | .ToArray();
16 |
17 | var devices = options.Devices
18 | .OrderBy(x => x.Value.Id)
19 | .Select(x => new LLamaDevice()
20 | {
21 | Name = x.Key,
22 | TotalSize = x.Value.TotalSizeMb * 1024 * 1024,
23 | ReservedMemory = x.Value.ReservedMemoryMb * 1024 * 1024,
24 | Type = x.Value.Type,
25 | LayersPortion = x.Value.LayersPortion,
26 | PciBus = x.Value.PciBus,
27 | Layers = new List(),
28 | Tensors = new List()
29 | })
30 | .ToList();
31 | {
32 | var gpus = devices.Where(x => x.Type == LLamaDeviceType.GPU).ToList();
33 | if (gpus.Sum(x => x.LayersPortion) == 0)
34 | {
35 | var gpuMem = gpus.Sum(x => x.TotalSize);
36 | foreach (var gpu in gpus)
37 | {
38 | gpu.LayersPortion = (double)gpu.TotalSize / gpuMem;
39 | }
40 | }
41 | }
42 |
43 | var tensorInfos = new LLamaGgufMetadataExtractor(options.GgufFile)
44 | .ExtractMetadata()
45 | .Where(x => x.BlkId != -1)
46 | .OrderBy(x => x.BlkId)
47 | .ThenBy(x => x.Name)
48 | .ToList();
49 |
50 | // split layers
51 | var assignedLayers = new Dictionary>();
52 | {
53 | var layersCount = tensorInfos.Select(x => x.BlkId).Distinct().Count();
54 | var layerIds = tensorInfos.Select(x => x.BlkId).Distinct().OrderBy(x => x).ToList();
55 | var s = devices.Where(x => x.LayersPortion > 0).Sum(x => x.LayersPortion);
56 | foreach (var device in devices.OrderBy(x => x.LayersPortion))
57 | {
58 | if (device.LayersPortion <= 0)
59 | continue;
60 | var c = (int)(layersCount / s * device.LayersPortion);
61 | assignedLayers[device.Name] = layerIds.Take(c).ToList();
62 | layerIds = layerIds.Skip(c).ToList();
63 | }
64 |
65 | if (layerIds.Count > 0)
66 | {
67 | assignedLayers[devices.GroupBy(x => x.LayersPortion).MaxBy(x => x.Key)!.Last().Name].AddRange(layerIds);
68 | }
69 | }
70 |
71 | // split tensors
72 | foreach (var info in tensorInfos)
73 | {
74 | tensorsOffloadRules.FirstOrDefault(x => x.regex.IsMatch(info.Name)).tensors?.Add(info);
75 | }
76 |
77 | // apply layers
78 | {
79 | foreach (var assignedLayer in assignedLayers)
80 | {
81 | var device = devices.First(x => x.Name == assignedLayer.Key);
82 | device.Layers.AddRange(assignedLayer.Value);
83 | device.Tensors.AddRange(tensorInfos.Where(x => assignedLayer.Value.Contains(x.BlkId)));
84 | }
85 |
86 | if (!devices
87 | .SelectMany(x => x.Tensors)
88 | .OrderBy(x => x.Name)
89 | .SequenceEqual(tensorInfos.OrderBy(x => x.Name))
90 | )
91 | {
92 | throw new Exception();
93 | }
94 | }
95 |
96 | // offload tensors
97 | foreach (var device in devices.Where(x => x.Type == LLamaDeviceType.GPU))
98 | {
99 | var dst = devices.First(x => x.Type == LLamaDeviceType.CPU);
100 | while (device.GetFreeSpace() < 0)
101 | {
102 | var t = tensorsOffloadRules
103 | .SelectMany(x => x.tensors.Select(y => (x.rule.Priority, y)))
104 | .OrderByDescending(x => x.Priority)
105 | .ThenBy(x => x.y.BlkId)
106 | .ThenBy(x => x.y.Name)
107 | .Select(x => x.y)
108 | .First(x => device.Tensors.Contains(x));
109 | Console.WriteLine($"Move {t.Name,-25} ({t.Size / 1024 / 1024} Mb) from {device.Name} to {dst.Name}");
110 | device.Tensors.Remove(t);
111 | dst.Tensors.Add(t);
112 | }
113 | }
114 |
115 |
116 | if (options.PrintTensorsSize)
117 | PrintTensorsSize(tensorInfos);
118 | PrintDevicesUtilization(devices);
119 | PrintTensorsOffloadResult();
120 | if (options.PrintHelmCharConfig)
121 | PrintHelmChartConfig(devices);
122 | if (options.PrintCmdConfig)
123 | PrintCmdConfig(devices);
124 |
125 |
126 | return;
127 |
128 | static void PrintDevicesUtilization(IEnumerable devices)
129 | {
130 | Console.WriteLine("======= Device memory usage");
131 | foreach (var device in devices)
132 | {
133 | Console.WriteLine($"{device.Name,-10} " +
134 | $"{device.GetUsedSpace() / 1024 / 1024} Mb of {device.TotalSize / 1024 / 1024} Mb " +
135 | $"({device.Tensors.Aggregate(0L, (current, tensor) => current + tensor.Size) / 1024 / 1024})");
136 | }
137 |
138 | Console.WriteLine();
139 | }
140 |
141 | static void PrintHelmChartConfig(IReadOnlyList devices)
142 | {
143 | Console.WriteLine("======= Helm chart config");
144 | var sb = new StringBuilder();
145 | sb.Append("extraEnvVars:\n");
146 | sb.Append($" - name: LLAMA_ARG_MAIN_GPU\n" +
147 | $" value: '0'\n");
148 | sb.Append($" - name: LLAMA_ARG_TENSOR_SPLIT\n" +
149 | $" value: '{string.Join(',', devices.Select(x => x.Layers.Count))}'\n");
150 | sb.Append("\n");
151 |
152 | sb.Append("modelTensorsOverride:\n");
153 | foreach (var device in devices.Where(x => x.Type != LLamaDeviceType.GPU && x.Tensors.Count > 0))
154 | {
155 | sb.Append($" - name: {device.Name}\n" +
156 | $" tensors:\n");
157 | foreach (var tensor in device.Tensors)
158 | {
159 | sb.Append($" - {tensor.Name}\n");
160 | }
161 | }
162 |
163 | Console.WriteLine(sb);
164 | }
165 |
166 | static void PrintCmdConfig(IReadOnlyList devices)
167 | {
168 | Console.WriteLine("======= CMD config");
169 | var sb = new StringBuilder();
170 | sb.Append("--main-gpu 0 ");
171 | sb.Append($"--tensor-split \"{string.Join(',', devices.Select(x => x.Layers.Count))}\" ");
172 | devices
173 | .Where(x => x.Type != LLamaDeviceType.GPU && x.Tensors.Count > 0)
174 | .Select(x => $"--override-tensor \"({string.Join('|', x.Tensors.Select(t => t.Name))})={x.Name}\" ")
175 | .ToList()
176 | .ForEach(x => sb.Append(x));
177 | Console.WriteLine(sb);
178 | Console.WriteLine();
179 | }
180 |
181 | static void PrintTensorsSize(IEnumerable tensorInfos)
182 | {
183 | Console.WriteLine("======= Tensors size");
184 | foreach (var tensorInfo in tensorInfos.OrderBy(x => x.Size))
185 | {
186 | Console.WriteLine($"{tensorInfo.Name,-30} {tensorInfo.Size / 1024 / 1024} Mb");
187 | }
188 | }
189 |
190 | void PrintTensorsOffloadResult()
191 | {
192 | Console.WriteLine("======= Tensors offload result");
193 | foreach (var t in tensorsOffloadRules)
194 | {
195 | var offloadByDevice = devices
196 | .Select(x => (x.Name, x.Tensors.Count(y => t.tensors.Contains(y))))
197 | .ToList();
198 | Console.WriteLine(
199 | $"Offload {t.name,-24} ({t.rule.Priority}) {(t.tensors.Count - offloadByDevice.Sum(x => x.Item2)).ToString(),-2} " +
200 | $"({string.Join(", ", offloadByDevice.Select(x => $"{x.Name} = {x.Item2.ToString(),-2}"))}) " +
201 | $"of {t.tensors.Count}");
202 | }
203 |
204 | Console.WriteLine();
205 | }
206 |
207 | static OffloadCalculationOptions GetOptions(string[] args)
208 | {
209 | var mapping = new Dictionary()
210 | {
211 | { "-e", "environment" }
212 | };
213 | string? env;
214 | // stage 0
215 | {
216 | var cfgBuilder = new ConfigurationBuilder()
217 | .AddJsonFile("appsettings.json", true)
218 | .AddYamlFile("appsettings.yaml", true)
219 | .AddYamlFile("appsettings.yml", true)
220 | .AddEnvironmentVariables()
221 | .AddCommandLine(args, mapping);
222 |
223 | var cfgRoot = cfgBuilder.Build();
224 | env = cfgRoot["environment"] ?? null;
225 | }
226 |
227 | // stage 1
228 | {
229 | var cfgBuilder = new ConfigurationBuilder()
230 | .AddJsonFile("appsettings.json", true)
231 | .AddYamlFile("appsettings.yaml", true)
232 | .AddYamlFile("appsettings.yml", true);
233 |
234 | if (!string.IsNullOrWhiteSpace(env))
235 | {
236 | cfgBuilder
237 | .AddJsonFile($"appsettings.{env}.json", true)
238 | .AddYamlFile($"appsettings.{env}.yaml", true)
239 | .AddYamlFile($"appsettings.{env}.yml", true);
240 | }
241 |
242 | cfgBuilder
243 | .AddEnvironmentVariables()
244 | .AddCommandLine(args, mapping);
245 |
246 | var cfgRoot = cfgBuilder.Build();
247 | return cfgRoot.Get()!;
248 | }
249 | }
250 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "profiles": {
3 | "GLM-4.5-Air-UD-Q6_K_XL": {
4 | "commandName": "Project",
5 | "commandLineArgs": "-e GLM-4.5-Air-UD-Q6_K_XL"
6 | },
7 | "gpt-oss-120b-F16": {
8 | "commandName": "Project",
9 | "commandLineArgs": "-e gpt-oss-120b-F16"
10 | }
11 | }
12 | }
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/TensorMetadata.cs:
--------------------------------------------------------------------------------
1 | using GGUFSharp;
2 |
3 | namespace ArkProjects.LlmCalc;
4 |
5 | public class TensorMetadata
6 | {
7 | public TensorMetadata(GGUFTensorInfo tensorInfo)
8 | {
9 | TensorInfo = tensorInfo;
10 | if (Name.StartsWith("blk"))
11 | BlkId = int.Parse(Name.Split(".").Skip(1).First());
12 | }
13 |
14 | public GGUFTensorInfo TensorInfo { get; }
15 | public string Name => TensorInfo.Name;
16 | public long Size => (long)TensorInfo.Size;
17 | public int BlkId { get; } = -1;
18 | }
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.GLM-4.5-Air-UD-Q6_K_XL.yaml:
--------------------------------------------------------------------------------
1 | GgufFile: "\
2 | \\\\TRUENAS/trash3/kube-volumes/pvc-38077fbf-f7fa-46d8-9ad0-17c2ba5bf869/hub\
3 | /models--unsloth--GLM-4.5-Air-GGUF/snapshots/a5133889a6e29d42a1e71784b2ae8514fb28156f\
4 | /UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf"
5 |
6 | Devices:
7 | ROCm0:
8 | ReservedMemoryMb: 1024
9 | ROCm1:
10 | ReservedMemory: 3584
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.gpt-oss-120b-F16.yaml:
--------------------------------------------------------------------------------
1 | GgufFile: "\
2 | \\\\TRUENAS/trash3/kube-volumes/pvc-38077fbf-f7fa-46d8-9ad0-17c2ba5bf869/hub\
3 | /models--unsloth--gpt-oss-120b-GGUF/snapshots/91daeef64d6b1e1078ad1d007f9efa98526d7bf1\
4 | /gpt-oss-120b-F16.gguf"
5 |
6 | Devices:
7 | ROCm0:
8 | ReservedMemoryMb: 9300
9 | ROCm1:
10 | ReservedMemoryMb: 8500
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.yaml:
--------------------------------------------------------------------------------
1 | PrintCmdConfig: true
2 | PrintHelmCharConfig: true
3 | PrintTensorsSize: false
4 |
5 | # list from 'llama-server --list-devices' + CPU
6 | Devices:
7 | ROCm0:
8 | Id: 1
9 | Type: GPU
10 | TotalSizeMb: 32768
11 | PciBus: 0000:01:00.0
12 | ROCm1:
13 | Id: 2
14 | Type: GPU
15 | TotalSizeMb: 32768
16 | PciBus: 0000:02:00.0
17 | CPU:
18 | Id: 3
19 | Type: CPU
20 | TotalSizeMb: 131072
21 | ReservedMemory: 0
22 |
23 | # offloading rules
24 | OffloadRules:
25 | ffn_gate_exps:
26 | Id: 1
27 | Regex: '^blk\.\d+\.ffn_gate_exps.weight'
28 | Priority: 10
29 | ffn_up_exps:
30 | Id: 2
31 | Regex: '^blk\.\d+\.ffn_up_exps.weight'
32 | Priority: 20
33 | ffn_down_exps:
34 | Id: 3
35 | Regex: '^blk\.\d+\.ffn_down_exps.weight'
36 | Priority: 20
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Ll]og/
33 | [Ll]ogs/
34 |
35 | # Visual Studio 2015/2017 cache/options directory
36 | .vs/
37 | # Uncomment if you have tasks that create the project's static files in wwwroot
38 | #wwwroot/
39 |
40 | # Visual Studio 2017 auto generated files
41 | Generated\ Files/
42 |
43 | # MSTest test Results
44 | [Tt]est[Rr]esult*/
45 | [Bb]uild[Ll]og.*
46 |
47 | # NUnit
48 | *.VisualState.xml
49 | TestResult.xml
50 | nunit-*.xml
51 |
52 | # Build Results of an ATL Project
53 | [Dd]ebugPS/
54 | [Rr]eleasePS/
55 | dlldata.c
56 |
57 | # Benchmark Results
58 | BenchmarkDotNet.Artifacts/
59 |
60 | # .NET Core
61 | project.lock.json
62 | project.fragment.lock.json
63 | artifacts/
64 |
65 | # ASP.NET Scaffolding
66 | ScaffoldingReadMe.txt
67 |
68 | # StyleCop
69 | StyleCopReport.xml
70 |
71 | # Files built by Visual Studio
72 | *_i.c
73 | *_p.c
74 | *_h.h
75 | *.ilk
76 | *.meta
77 | *.obj
78 | *.iobj
79 | *.pch
80 | *.pdb
81 | *.ipdb
82 | *.pgc
83 | *.pgd
84 | *.rsp
85 | *.sbr
86 | *.tlb
87 | *.tli
88 | *.tlh
89 | *.tmp
90 | *.tmp_proj
91 | *_wpftmp.csproj
92 | *.log
93 | *.tlog
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298 | *.vbp
299 |
300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301 | *.dsw
302 | *.dsp
303 |
304 | # Visual Studio 6 technical files
305 | *.ncb
306 | *.aps
307 |
308 | # Visual Studio LightSwitch build output
309 | **/*.HTMLClient/GeneratedArtifacts
310 | **/*.DesktopClient/GeneratedArtifacts
311 | **/*.DesktopClient/ModelManifest.xml
312 | **/*.Server/GeneratedArtifacts
313 | **/*.Server/ModelManifest.xml
314 | _Pvt_Extensions
315 |
316 | # Paket dependency manager
317 | .paket/paket.exe
318 | paket-files/
319 |
320 | # FAKE - F# Make
321 | .fake/
322 |
323 | # CodeRush personal settings
324 | .cr/personal
325 |
326 | # Python Tools for Visual Studio (PTVS)
327 | __pycache__/
328 | *.pyc
329 |
330 | # Cake - Uncomment if you are using it
331 | # tools/**
332 | # !tools/packages.config
333 |
334 | # Tabs Studio
335 | *.tss
336 |
337 | # Telerik's JustMock configuration file
338 | *.jmconfig
339 |
340 | # BizTalk build output
341 | *.btp.cs
342 | *.btm.cs
343 | *.odx.cs
344 | *.xsd.cs
345 |
346 | # OpenCover UI analysis results
347 | OpenCover/
348 |
349 | # Azure Stream Analytics local run output
350 | ASALocalRun/
351 |
352 | # MSBuild Binary and Structured Log
353 | *.binlog
354 |
355 | # NVidia Nsight GPU debugger configuration file
356 | *.nvuser
357 |
358 | # MFractors (Xamarin productivity tool) working folder
359 | .mfractor/
360 |
361 | # Local History for Visual Studio
362 | .localhistory/
363 |
364 | # Visual Studio History (VSHistory) files
365 | .vshistory/
366 |
367 | # BeatPulse healthcheck temp database
368 | healthchecksdb
369 |
370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
371 | MigrationBackup/
372 |
373 | # Ionide (cross platform F# VS Code tools) working folder
374 | .ionide/
375 |
376 | # Fody - auto-generated XML schema
377 | FodyWeavers.xsd
378 |
379 | # VS Code files for those working on multiple tools
380 | .vscode/*
381 | !.vscode/settings.json
382 | !.vscode/tasks.json
383 | !.vscode/launch.json
384 | !.vscode/extensions.json
385 | *.code-workspace
386 |
387 | # Local History for Visual Studio Code
388 | .history/
389 |
390 | # Windows Installer files from build outputs
391 | *.cab
392 | *.msi
393 | *.msix
394 | *.msm
395 | *.msp
396 |
397 | # JetBrains Rider
398 | *.sln.iml
399 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Clock Set Bird
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/README.md:
--------------------------------------------------------------------------------
1 | # GGUFSharp
2 | A library for read/write GGUF file
3 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/FilesList.txt:
--------------------------------------------------------------------------------
1 | bartowski/Phi-3.5-mini-instruct-GGUF/Phi-3.5-mini-instruct-IQ2_M.gguf
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/example.gguf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/example.gguf
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/genTestFile.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import sys
3 | from pathlib import Path
4 |
5 | import numpy as np
6 |
7 | from gguf import GGUFWriter # noqa: E402
8 |
9 |
10 | # Example usage:
11 | def writer_example() -> None:
12 | # Example usage with a file
13 | gguf_writer = GGUFWriter("example.gguf", "llama")
14 |
15 | gguf_writer.add_block_count(12)
16 | gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer
17 | gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float
18 | gguf_writer.add_custom_alignment(64)
19 |
20 | tensor1 = np.ones((32,), dtype=np.float32) * 100.0
21 | tensor2 = np.ones((64,), dtype=np.float32) * 101.0
22 | tensor3 = np.ones((96,), dtype=np.float32) * 102.0
23 |
24 | gguf_writer.add_tensor("tensor1", tensor1)
25 | gguf_writer.add_tensor("tensor2", tensor2)
26 | gguf_writer.add_tensor("tensor3", tensor3)
27 |
28 | gguf_writer.write_header_to_file()
29 | gguf_writer.write_kv_data_to_file()
30 | gguf_writer.write_tensors_to_file()
31 |
32 | gguf_writer.close()
33 |
34 |
35 | if __name__ == '__main__':
36 | writer_example()
37 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/BasicFeatureTest.cs:
--------------------------------------------------------------------------------
1 | using System.Diagnostics;
2 | using System.Linq;
3 | using System.Runtime.InteropServices;
4 |
5 | namespace GGUFSharp.Test
6 | {
7 | [TestClass]
8 | [DoNotParallelize]
9 | public sealed class BasicFeatureTest
10 | {
11 | private string GGUFFilePath = @"example.gguf";
12 | private string[] example_meta =
13 | {
14 | "general.architecture:llama",
15 | "llama.block_count:GGUF_METADATA_VALUE_TYPE_UINT32",
16 | "answer:GGUF_METADATA_VALUE_TYPE_UINT32",
17 | "answer_in_float:GGUF_METADATA_VALUE_TYPE_FLOAT32",
18 | "general.alignment:GGUF_METADATA_VALUE_TYPE_UINT32"
19 | };
20 | private string[] example_tensorInfo =
21 | {
22 | "tensor1",
23 | "tensor2",
24 | "tensor3"
25 | };
26 | [TestMethod]
27 | public void ReadBasicInfo()
28 | {
29 | GGUFReader reader = new GGUFReader();
30 | var f = reader.Read(GGUFFilePath);
31 | Assert.IsTrue(f.MetaItems.Select(x=>x.ToString()).SequenceEqual(example_meta));
32 | Assert.IsTrue(f.TensorInfos.Select(x => x.Name).SequenceEqual(example_tensorInfo));
33 | }
34 | [TestMethod]
35 | public void ReadTensorData()
36 | {
37 | GGUFReader reader = new GGUFReader();
38 | var f=reader.Read(GGUFFilePath);
39 | using var t1=reader.ReadTensorData(f,f.TensorInfos.FirstOrDefault());
40 | var data = t1.Memory.Slice(0,(int)f.TensorInfos.First().Size);
41 | var dataF=MemoryMarshal.Cast(data.Span);
42 | foreach (var item in dataF)
43 | {
44 | Assert.AreEqual(item, 100);
45 | }
46 | }
47 |
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/GGUFSharp.Test.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net8.0
5 | latest
6 | enable
7 | enable
8 |
9 |
10 |
11 |
12 | PreserveNewest
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/MSTestSettings.cs:
--------------------------------------------------------------------------------
1 | [assembly: Parallelize(Scope = ExecutionScope.MethodLevel)]
2 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.12.35506.116 d17.12
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp", "GGUFSharp\GGUFSharp.csproj", "{3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp.Test", "GGUFSharp.Test\GGUFSharp.Test.csproj", "{31135C56-06FB-42A0-B098-2A166A437A8D}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Any CPU = Debug|Any CPU
13 | Release|Any CPU = Release|Any CPU
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | {3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | {3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | {3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Release|Any CPU.Build.0 = Release|Any CPU
20 | {31135C56-06FB-42A0-B098-2A166A437A8D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {31135C56-06FB-42A0-B098-2A166A437A8D}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {31135C56-06FB-42A0-B098-2A166A437A8D}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {31135C56-06FB-42A0-B098-2A166A437A8D}.Release|Any CPU.Build.0 = Release|Any CPU
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | EndGlobal
29 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFDataTypeEnum.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 |
5 | namespace GGUFSharp
6 | {
7 | public enum GGUFDataTypeEnum : uint
8 | {
9 | // The value is a 8-bit unsigned integer.
10 | GGUF_METADATA_VALUE_TYPE_UINT8 = 0,
11 | // The value is a 8-bit signed integer.
12 | GGUF_METADATA_VALUE_TYPE_INT8 = 1,
13 | // The value is a 16-bit unsigned little-endian integer.
14 | GGUF_METADATA_VALUE_TYPE_UINT16 = 2,
15 | // The value is a 16-bit signed little-endian integer.
16 | GGUF_METADATA_VALUE_TYPE_INT16 = 3,
17 | // The value is a 32-bit unsigned little-endian integer.
18 | GGUF_METADATA_VALUE_TYPE_UINT32 = 4,
19 | // The value is a 32-bit signed little-endian integer.
20 | GGUF_METADATA_VALUE_TYPE_INT32 = 5,
21 | // The value is a 32-bit IEEE754 floating point number.
22 | GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6,
23 | // The value is a boolean.
24 | // 1-byte value where 0 is false and 1 is true.
25 | // Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy.
26 | GGUF_METADATA_VALUE_TYPE_BOOL = 7,
27 | // The value is a UTF-8 non-null-terminated string, with length prepended.
28 | GGUF_METADATA_VALUE_TYPE_STRING = 8,
29 | // The value is an array of other values, with the length and type prepended.
30 | ///
31 | // Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes.
32 | GGUF_METADATA_VALUE_TYPE_ARRAY = 9,
33 | // The value is a 64-bit unsigned little-endian integer.
34 | GGUF_METADATA_VALUE_TYPE_UINT64 = 10,
35 | // The value is a 64-bit signed little-endian integer.
36 | GGUF_METADATA_VALUE_TYPE_INT64 = 11,
37 | // The value is a 64-bit IEEE754 floating point number.
38 | GGUF_METADATA_VALUE_TYPE_FLOAT64 = 12,
39 | }
40 | public static class GGUFDataTypeEnumHelper
41 | {
42 | public static int GetDataTypeSize(this GGUFDataTypeEnum dateType) => dateType switch
43 | {
44 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT8 => 1,
45 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT8 => 1,
46 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT16 =>2,
47 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT16 => 2,
48 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT32 => 4,
49 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT32 => 4,
50 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_FLOAT32 => 4,
51 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_BOOL => 1,
52 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING => -1,
53 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY => -1,
54 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT64 => 8,
55 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT64 => 8,
56 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_FLOAT64 => 8
57 | };
58 | }
59 |
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFFile.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 |
5 | namespace GGUFSharp
6 | {
7 | public class GGUFFile
8 | {
9 | public string FilePath { get; set; }
10 | public uint Version { get; set; }
11 |
12 | public ulong DataStartOffset { get; set; }
13 | public List TensorInfos { get; set; }
14 | public List MetaItems { get; set; }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFHeader.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Runtime.InteropServices;
4 | using System.Text;
5 |
6 | namespace GGUFSharp
7 | {
8 | //[StructLayout(LayoutKind.Explicit)]
9 | public class GGUFHeader
10 | {
11 | //[FieldOffset(0)]
12 | public uint MagicCode;
13 | //[FieldOffset(4)]
14 | public uint Version;
15 |
16 | //[FieldOffset(8)]
17 | public ulong TensorCount;
18 |
19 | //[FieldOffset(24)]
20 | public ulong MetaKVCount;
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFMetaItem.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace GGUFSharp
7 | {
8 | public class GGUFMetaItem
9 | {
10 | public GGUFDataTypeEnum DataType { get; set; }
11 | public GGUFDataTypeEnum? ArrayElementType { get; set; }
12 | public string Name { get; set; }
13 | public byte[] RawData { get; set; }
14 | public string[] ArrayStrings { get; set; }
15 | public override string ToString()
16 | {
17 | StringBuilder sb = new StringBuilder($"{Name}:");
18 | switch(DataType)
19 | {
20 | case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING:
21 | sb.Append(Encoding.UTF8.GetString(RawData));
22 | break;
23 | case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY:
24 | if (ArrayElementType==GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING)
25 | {
26 | if (ArrayStrings.Length>10)
27 | {
28 | sb.Append($"{string.Join(", ", ArrayStrings.Take(10))}...");
29 | }
30 | else
31 | {
32 | sb.Append(string.Join(", ", ArrayStrings));
33 | }
34 | }
35 | else
36 | {
37 | sb.Append($"[{Enum.GetName(typeof(GGUFDataTypeEnum), ArrayElementType)}]");
38 | }
39 | break;
40 | default:
41 | sb.Append(Enum.GetName(typeof(GGUFDataTypeEnum), DataType));
42 | break;
43 | };
44 | return sb.ToString();
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFReader.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Buffers;
3 | using System.Collections.Generic;
4 | using System.Diagnostics;
5 | using System.IO;
6 | using System.IO.MemoryMappedFiles;
7 | using System.Linq;
8 | using System.Runtime.InteropServices;
9 | using System.Text;
10 |
11 | namespace GGUFSharp
12 | {
13 | public class GGUFReader
14 | {
15 | public GGUFFile Read(string filePath)
16 | {
17 | using var fs = MemoryMappedFile.CreateFromFile(filePath);
18 | using var s = fs.CreateViewStream(0, 0, MemoryMappedFileAccess.Read);
19 | var header = readHeader(s);
20 | //using var meta = fs.CreateViewStream(24, 100*1024 * 1024, MemoryMappedFileAccess.Read);
21 | var d = readMetaData(s, header.MetaKVCount).ToList();
22 |
23 | //foreach (var item in d)
24 | //{
25 | // Debug.WriteLine($"{item.Name}, {item.ToString()}");
26 | //}
27 |
28 | var t = readTensorData(s, header.TensorCount).ToList();
29 | ulong alignment = 32;//TODO: read align from header
30 |
31 |
32 | ulong startOffset = (ulong)s.Position +(alignment-((ulong)s.Position % alignment))% alignment;
33 | var sortedItems = t.OrderBy(x => x.Offset).ToList();
34 | for (var i = 0; i < sortedItems.Count - 1; i++)
35 | {
36 | sortedItems[i].Size = sortedItems[i + 1].Offset - sortedItems[i].Offset;
37 | }
38 | var last = sortedItems.Last();
39 | last.Size = (ulong)new FileInfo(filePath).Length - last.Offset-startOffset;
40 |
41 |
42 | //foreach (var item in t)
43 | //{
44 | // Debug.WriteLine($"[Tensor]{item.Name},{item.DimensionCount},{item.TensorType.ToString()},{item.Offset}");
45 | //}
46 | return new GGUFFile()
47 | {
48 | FilePath = filePath,
49 | MetaItems = d,
50 | TensorInfos = sortedItems,
51 | Version = header.Version,
52 | DataStartOffset = startOffset,
53 | };
54 |
55 | }
56 |
57 | public IMemoryOwner ReadTensorData(GGUFFile file,GGUFTensorInfo tensor)
58 | {
59 | using var fs = MemoryMappedFile.CreateFromFile(file.FilePath);
60 | using var s = fs.CreateViewStream((long)(file.DataStartOffset+tensor.Offset), (long)tensor.Size, MemoryMappedFileAccess.Read);
61 | if (tensor.Size>int.MaxValue)
62 | {
63 | throw new NotSupportedException("Not supoorted by now, tensor size shoud not larger than max value of int32");
64 | }
65 | var om = MemoryPool.Shared.Rent((int)tensor.Size);
66 | //BinaryReader br=new BinaryReader(s);
67 | s.Read(om.Memory.Span);
68 | return om;
69 | }
70 |
71 |
72 | private GGUFHeader readHeader(Stream header)
73 | {
74 | using BinaryReader br = new BinaryReader(header, Encoding.UTF8, true);
75 | GGUFHeader result = new GGUFHeader();
76 | result.MagicCode = br.ReadUInt32();
77 | if (result.MagicCode != 0x46554747) // "GGUF" in little-endian bytes order
78 | {
79 | throw new InvalidOperationException("Invalid magic code");
80 | }
81 | result.Version = br.ReadUInt32();
82 | result.TensorCount = br.ReadUInt64();
83 | result.MetaKVCount = br.ReadUInt64();
84 | return result;
85 | }
86 |
87 | private IEnumerable readMetaData(Stream meta, ulong MetaCount)
88 | {
89 | using BinaryReader br = new BinaryReader(meta, Encoding.UTF8, true);
90 | for (ulong i = 0; i < MetaCount; i++)
91 | {
92 |
93 | GGUFMetaItem result = new GGUFMetaItem();
94 | result.Name = readString(br);
95 | result.DataType = (GGUFDataTypeEnum)br.ReadUInt32();
96 | int size;
97 | switch (result.DataType)
98 | {
99 | case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING:
100 | size = (int)br.ReadUInt64();
101 | break;
102 | case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY:
103 | GGUFDataTypeEnum elementType = (GGUFDataTypeEnum)br.ReadUInt32();
104 |
105 | if (elementType == GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY)
106 | {
107 | throw new NotSupportedException("Nested array is not supported");
108 | }
109 | ulong elementCount = br.ReadUInt64();
110 | if (elementType == GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING)
111 | {
112 | result.ArrayStrings = new string[elementCount];
113 | result.ArrayElementType = GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING;
114 | for (ulong j = 0; j < elementCount; j++)
115 | {
116 | result.ArrayStrings[j] = readString(br);
117 | }
118 | size = 0;
119 | }
120 | else
121 | {
122 | result.ArrayElementType = elementType;
123 | size = (elementType.GetDataTypeSize() * (int)elementCount);
124 | }
125 |
126 | break;
127 | default:
128 | size = result.DataType.GetDataTypeSize();
129 | break;
130 | }
131 | if (size > 0)
132 | {
133 | result.RawData = br.ReadBytes(size);
134 | }
135 |
136 |
137 | yield return result;
138 | }
139 |
140 | }
141 | private IEnumerable readTensorData(Stream stream, ulong tensorCount)
142 | {
143 | using BinaryReader br = new BinaryReader(stream, Encoding.UTF8, true);
144 | for (ulong i = 0; i < tensorCount; i++)
145 | {
146 | GGUFTensorInfo result = new GGUFTensorInfo();
147 | result.Name = readString(br);
148 | result.DimensionCount = br.ReadUInt32();
149 | result.Dimensions = readArray(br, result.DimensionCount).ToArray();
150 | result.TensorType = (GGUFTensorType)br.ReadUInt32();
151 | result.Offset = br.ReadUInt64();
152 | yield return result;
153 | }
154 | }
155 |
156 | private string readString(BinaryReader reader)
157 | {
158 | var l = reader.ReadUInt64();
159 | var x = reader.ReadBytes((int)l);
160 | return System.Text.Encoding.UTF8.GetString(x);
161 | }
162 |
163 | private Span readArray(BinaryReader reader, UInt64 elementCount = 0) where T : struct
164 | {
165 | if (elementCount == 0)
166 | {
167 | elementCount = reader.ReadUInt64();
168 | }
169 | int length = Marshal.SizeOf() * (int)elementCount;
170 | byte[] buffer = new byte[length];
171 | reader.Read(buffer, 0, length);
172 | return MemoryMarshal.Cast(buffer);
173 | }
174 | }
175 | }
176 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFSharp.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.1
5 | enable
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFStreamReader.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.IO;
4 | using System.Text;
5 |
6 | namespace GGUFSharp
7 | {
8 | internal class GGUFStreamReader : BinaryReader
9 | {
10 | public GGUFStreamReader(Stream stream) : base(stream)
11 | {
12 | }
13 |
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFTensorInfo.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace GGUFSharp
7 | {
8 | public class GGUFTensorInfo
9 | {
10 | public string Name { get; set; }
11 | public UInt32 DimensionCount { get; set; }
12 | public UInt64[] Dimensions { get; set; }
13 | public GGUFTensorType TensorType { get; set; }
14 | public UInt64 Offset { get; set; }
15 | public UInt64 Size { get; set; }
16 |
17 |
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFTensorType.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 |
5 | namespace GGUFSharp
6 | {
7 | public enum GGUFTensorType:UInt32
8 | {
9 | GGML_TYPE_F32 = 0,
10 | GGML_TYPE_F16 = 1,
11 | GGML_TYPE_Q4_0 = 2,
12 | GGML_TYPE_Q4_1 = 3,
13 | // GGML_TYPE_Q4_2 = 4, support has been removed
14 | // GGML_TYPE_Q4_3 = 5, support has been removed
15 | GGML_TYPE_Q5_0 = 6,
16 | GGML_TYPE_Q5_1 = 7,
17 | GGML_TYPE_Q8_0 = 8,
18 | GGML_TYPE_Q8_1 = 9,
19 | GGML_TYPE_Q2_K = 10,
20 | GGML_TYPE_Q3_K = 11,
21 | GGML_TYPE_Q4_K = 12,
22 | GGML_TYPE_Q5_K = 13,
23 | GGML_TYPE_Q6_K = 14,
24 | GGML_TYPE_Q8_K = 15,
25 | GGML_TYPE_IQ2_XXS = 16,
26 | GGML_TYPE_IQ2_XS = 17,
27 | GGML_TYPE_IQ3_XXS = 18,
28 | GGML_TYPE_IQ1_S = 19,
29 | GGML_TYPE_IQ4_NL = 20,
30 | GGML_TYPE_IQ3_S = 21,
31 | GGML_TYPE_IQ2_S = 22,
32 | GGML_TYPE_IQ4_XS = 23,
33 | GGML_TYPE_I8 = 24,
34 | GGML_TYPE_I16 = 25,
35 | GGML_TYPE_I32 = 26,
36 | GGML_TYPE_I64 = 27,
37 | GGML_TYPE_F64 = 28,
38 | GGML_TYPE_IQ1_M = 29,
39 | GGML_TYPE_COUNT,
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/llama.cpp/llamacpp-offload-calculator/readme.md:
--------------------------------------------------------------------------------
1 | # llama.cpp tesnsors offload calculator
2 |
3 | ## Conf
4 |
5 | Supported config formats:
6 | - json
7 | - yaml
8 | - env variables
9 | - cmd args
10 |
11 | Example:
12 | ### Base config `appsettings.yaml`
13 | ```yaml
14 | # print -ot config for command line running
15 | PrintCmdConfig: true
16 | # print yaml for https://github.com/mixa3607/charts/tree/master/charts/llamacpp helm chart
17 | PrintHelmCharConfig: true
18 | # print size per tensor
19 | PrintTensorsSize: false
20 |
21 | # list from 'llama-server --list-devices' + CPU
22 | Devices: # 1+ gpu and 1 cpu
23 | ROCm0: # name from llama.cpp output
24 | Id: 1 # used for ordering
25 | Type: GPU # GPU/CPU
26 | TotalSizeMb: 32768 # memory megabytes
27 | PciBus: 0000:01:00.0 # not used
28 | ROCm1:
29 | Id: 2
30 | Type: GPU
31 | TotalSizeMb: 32768
32 | PciBus: 0000:02:00.0
33 | CPU:
34 | Id: 3
35 | Type: CPU
36 | TotalSizeMb: 131072
37 | ReservedMemory: 0
38 |
39 | # offloading rules
40 | OffloadRules:
41 | ffn_gate_exps: # name
42 | Id: 1 # used for ordering
43 | Regex: '^blk\.\d+\.ffn_gate_exps.weight' # regex
44 | Priority: 10 # lower priority will be offloaded earlier
45 | ffn_up_exps:
46 | Id: 2
47 | Regex: '^blk\.\d+\.ffn_up_exps.weight'
48 | Priority: 20
49 | ffn_down_exps:
50 | Id: 3
51 | Regex: '^blk\.\d+\.ffn_down_exps.weight'
52 | Priority: 20
53 | ```
54 |
55 | ### Per model config `appsettings.noname-model.yaml`
56 | ```yaml
57 | GgufFile: "/path/to/noname.gguf"
58 |
59 | Devices:
60 | ROCm0:
61 | ReservedMemoryMb: 10240 # reserved memory for cache, ctx, etc
62 | #LayersPortion: 50 # layers percentage can be set manually
63 | ROCm1:
64 | ReservedMemoryMb: 10240
65 | #LayersPortion: 50
66 | ```
67 |
68 | For the first start, you need to specify an increased ReservedMemoryMb with which llama.cpp will guaranteed to work.
69 | ```shell
70 | $ cd ArkProjects.LlamaOffloadCalc
71 | $ dotnet run -- -e noname-model
72 | Reading /path/to/noname.gguf
73 | Move blk.0.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
74 | Move blk.0.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
75 | Move blk.1.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
76 | Move blk.1.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
77 | Move blk.2.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
78 | Move blk.2.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
79 | Move blk.3.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
80 | Move blk.3.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
81 | Move blk.4.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
82 | Move blk.4.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
83 | Move blk.5.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
84 | Move blk.5.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
85 | Move blk.6.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
86 | Move blk.6.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
87 | Move blk.7.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
88 | Move blk.18.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
89 | Move blk.18.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
90 | Move blk.19.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
91 | Move blk.19.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
92 | Move blk.20.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
93 | Move blk.20.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
94 | Move blk.21.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
95 | Move blk.21.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
96 | Move blk.22.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
97 | Move blk.22.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
98 | Move blk.23.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
99 | Move blk.23.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
100 | Move blk.24.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
101 | Move blk.24.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
102 | Move blk.25.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
103 | ======= Device memory usage
104 | ROCm0 32231 Mb of 32768 Mb (21991)
105 | ROCm1 32231 Mb of 32768 Mb (21991)
106 | CPU 16136 Mb of 131072 Mb (16136)
107 |
108 | ======= Tensors offload result
109 | Offload ffn_gate_exps (10) 0 (ROCm0 = 18, ROCm1 = 18, CPU = 0 ) of 36
110 | Offload ffn_up_exps (20) 0 (ROCm0 = 11, ROCm1 = 11, CPU = 14) of 36
111 | Offload ffn_down_exps (20) 0 (ROCm0 = 10, ROCm1 = 10, CPU = 16) of 36
112 |
113 | ======= Helm chart config
114 | extraEnvVars:
115 | - name: LLAMA_ARG_MAIN_GPU
116 | value: '0'
117 | - name: LLAMA_ARG_TENSOR_SPLIT
118 | value: '18,18,0'
119 |
120 | modelTensorsOverride:
121 | - name: CPU
122 | tensors:
123 | - blk.0.ffn_down_exps.weight
124 | - blk.0.ffn_up_exps.weight
125 | - blk.1.ffn_down_exps.weight
126 | - blk.1.ffn_up_exps.weight
127 | - blk.2.ffn_down_exps.weight
128 | - blk.2.ffn_up_exps.weight
129 | - blk.3.ffn_down_exps.weight
130 | - blk.3.ffn_up_exps.weight
131 | - blk.4.ffn_down_exps.weight
132 | - blk.4.ffn_up_exps.weight
133 | - blk.5.ffn_down_exps.weight
134 | - blk.5.ffn_up_exps.weight
135 | - blk.6.ffn_down_exps.weight
136 | - blk.6.ffn_up_exps.weight
137 | - blk.7.ffn_down_exps.weight
138 | - blk.18.ffn_down_exps.weight
139 | - blk.18.ffn_up_exps.weight
140 | - blk.19.ffn_down_exps.weight
141 | - blk.19.ffn_up_exps.weight
142 | - blk.20.ffn_down_exps.weight
143 | - blk.20.ffn_up_exps.weight
144 | - blk.21.ffn_down_exps.weight
145 | - blk.21.ffn_up_exps.weight
146 | - blk.22.ffn_down_exps.weight
147 | - blk.22.ffn_up_exps.weight
148 | - blk.23.ffn_down_exps.weight
149 | - blk.23.ffn_up_exps.weight
150 | - blk.24.ffn_down_exps.weight
151 | - blk.24.ffn_up_exps.weight
152 | - blk.25.ffn_down_exps.weight
153 |
154 | ======= CMD config
155 | --main-gpu 0 --tensor-split "18,18,0" --override-tensor "(blk.0.ffn_down_exps.weight|blk.0.ffn_up_exps.weight|blk.1.ffn_down_exps.weight|blk.1.ffn_up_exps.weight|blk.2.ffn_down_exps.weight|blk.2.ffn_up_exps.weight|blk.3.ffn_down_exps.weight|blk.3.ffn_up_exps.weight|blk.4.ffn_down_exps.weight|blk.4.ffn_up_exps.weight|blk.5.ffn_down_exps.weight|blk.5.ffn_up_exps.weight|blk.6.ffn_down_exps.weight|blk.6.ffn_up_exps.weight|blk.7.ffn_down_exps.weight|blk.18.ffn_down_exps.weight|blk.18.ffn_up_exps.weight|blk.19.ffn_down_exps.weight|blk.19.ffn_up_exps.weight|blk.20.ffn_down_exps.weight|blk.20.ffn_up_exps.weight|blk.21.ffn_down_exps.weight|blk.21.ffn_up_exps.weight|blk.22.ffn_down_exps.weight|blk.22.ffn_up_exps.weight|blk.23.ffn_down_exps.weight|blk.23.ffn_up_exps.weight|blk.24.ffn_down_exps.weight|blk.24.ffn_up_exps.weight|blk.25.ffn_down_exps.weight)=CPU"
156 | ```
157 |
158 | ```
159 | ┌┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐ │ ┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐
160 | ││ VRAM: [ 30393 / 32752 MiB ] GTT: [ 14 / 48256 MiB ] │ │ │ VRAM: [ 31079 / 32752 MiB ] GTT: [ 14 / 48256 MiB ] │
161 | └└──────────────────────────────────────────────────────────────────────────┘ │ └──────────────────────────────────────────────────────────────────────────┘
162 | ```
163 |
164 | After the stress test, you can reduce ReservedMemoryMb by the amount of free memory from the first run.
165 |
166 | ```yaml
167 | GgufFile: "/path/to/noname.gguf"
168 | Devices:
169 | ROCm0:
170 | ReservedMemoryMb: 9300
171 | ROCm1:
172 | ReservedMemoryMb: 8500
173 | ```
174 |
175 | ```shell
176 | $ cd ArkProjects.LlamaOffloadCalc
177 | $ dotnet run -- -e noname-model
178 | Reading /path/to/noname.gguf
179 | Move blk.0.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
180 | Move blk.0.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
181 | Move blk.1.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
182 | Move blk.1.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
183 | Move blk.2.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
184 | Move blk.2.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
185 | Move blk.3.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
186 | Move blk.3.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
187 | Move blk.4.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
188 | Move blk.4.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
189 | Move blk.5.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
190 | Move blk.5.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU
191 | Move blk.6.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU
192 | Move blk.18.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
193 | Move blk.18.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
194 | Move blk.19.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
195 | Move blk.19.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
196 | Move blk.20.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
197 | Move blk.20.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
198 | Move blk.21.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
199 | Move blk.21.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
200 | Move blk.22.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
201 | Move blk.22.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU
202 | Move blk.23.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU
203 | ======= Device memory usage
204 | ROCm0 32366 Mb of 32768 Mb (23066)
205 | ROCm1 32642 Mb of 32768 Mb (24142)
206 | CPU 12909 Mb of 131072 Mb (12909)
207 |
208 | ======= Tensors offload result
209 | Offload ffn_gate_exps (10) 0 (ROCm0 = 18, ROCm1 = 18, CPU = 0 ) of 36
210 | Offload ffn_up_exps (20) 0 (ROCm0 = 12, ROCm1 = 13, CPU = 11) of 36
211 | Offload ffn_down_exps (20) 0 (ROCm0 = 11, ROCm1 = 12, CPU = 13) of 36
212 |
213 | ======= Helm chart config
214 | extraEnvVars:
215 | - name: LLAMA_ARG_MAIN_GPU
216 | value: '0'
217 | - name: LLAMA_ARG_TENSOR_SPLIT
218 | value: '18,18,0'
219 |
220 | modelTensorsOverride:
221 | - name: CPU
222 | tensors:
223 | - blk.0.ffn_down_exps.weight
224 | - blk.0.ffn_up_exps.weight
225 | - blk.1.ffn_down_exps.weight
226 | - blk.1.ffn_up_exps.weight
227 | - blk.2.ffn_down_exps.weight
228 | - blk.2.ffn_up_exps.weight
229 | - blk.3.ffn_down_exps.weight
230 | - blk.3.ffn_up_exps.weight
231 | - blk.4.ffn_down_exps.weight
232 | - blk.4.ffn_up_exps.weight
233 | - blk.5.ffn_down_exps.weight
234 | - blk.5.ffn_up_exps.weight
235 | - blk.6.ffn_down_exps.weight
236 | - blk.18.ffn_down_exps.weight
237 | - blk.18.ffn_up_exps.weight
238 | - blk.19.ffn_down_exps.weight
239 | - blk.19.ffn_up_exps.weight
240 | - blk.20.ffn_down_exps.weight
241 | - blk.20.ffn_up_exps.weight
242 | - blk.21.ffn_down_exps.weight
243 | - blk.21.ffn_up_exps.weight
244 | - blk.22.ffn_down_exps.weight
245 | - blk.22.ffn_up_exps.weight
246 | - blk.23.ffn_down_exps.weight
247 |
248 | ======= CMD config
249 | --main-gpu 0 --tensor-split "18,18,0" --override-tensor "(blk.0.ffn_down_exps.weight|blk.0.ffn_up_exps.weight|blk.1.ffn_down_exps.weight|blk.1.ffn_up_exps.weight|blk.2.ffn_down_exps.weight|blk.2.ffn_up_exps.weight|blk.3.ffn_down_exps.weight|blk.3.ffn_up_exps.weight|blk.4.ffn_down_exps.weight|blk.4.ffn_up_exps.weight|blk.5.ffn_down_exps.weight|blk.5.ffn_up_exps.weight|blk.6.ffn_down_exps.weight|blk.18.ffn_down_exps.weight|blk.18.ffn_up_exps.weight|blk.19.ffn_down_exps.weight|blk.19.ffn_up_exps.weight|blk.20.ffn_down_exps.weight|blk.20.ffn_up_exps.weight|blk.21.ffn_down_exps.weight|blk.21.ffn_up_exps.weight|blk.22.ffn_down_exps.weight|blk.22.ffn_up_exps.weight|blk.23.ffn_down_exps.weight)=CPU"
250 | ```
251 |
252 | ```
253 | ┌┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐ │ ┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐
254 | ││ VRAM: [ 32545 / 32752 MiB ] GTT: [ 14 / 48256 MiB ] │ │ │ VRAM: [ 32155 / 32752 MiB ] GTT: [ 14 / 48256 MiB ] │
255 | └└──────────────────────────────────────────────────────────────────────────┘ │ └──────────────────────────────────────────────────────────────────────────┘
256 |
257 | prompt eval time = 57715.98 ms / 8777 tokens ( 6.58 ms per token, 152.07 tokens per second)
258 | eval time = 66072.62 ms / 878 tokens ( 75.25 ms per token, 13.29 tokens per second)
259 | total time = 123788.60 ms / 9655 tokens
260 | ```
261 |
--------------------------------------------------------------------------------
/llama.cpp/preset.rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export LLAMA_ROCM_VERSION="6.3.3"
4 |
--------------------------------------------------------------------------------
/llama.cpp/preset.rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export LLAMA_ROCM_VERSION="6.4.4"
4 |
--------------------------------------------------------------------------------
/llama.cpp/preset.rocm-7.0.0.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export LLAMA_ROCM_VERSION="7.0.0"
4 |
--------------------------------------------------------------------------------
/llama.cpp/readme.md:
--------------------------------------------------------------------------------
1 | # llama.cpp GFX906
2 | LLM inference in C/C++ https://github.com/ggml-org/llama.cpp
3 |
4 | Recommend use `docker.io/mixa3607/llama.cpp-gfx906:7.0.0-complete`
5 |
6 | Also see [llamacpp-offload-calculator](./llamacpp-offload-calculator/readme.md)
7 |
8 | ## Benchmarks
9 | ```shell
10 | export PATH="/app:$PATH"
11 | export LD_LIBRARY_PATH="/app:$LD_LIBRARY_PATH"
12 |
13 | MODEL=/root/.cache/huggingface/hub/models--ggml-org--gemma-3n-E4B-it-GGUF/snapshots/ee0f0cb58a4b9d5b48dd55b576db22eeeeecdd7e/gemma-3n-E4B-it-Q8_0.gguf
14 | MODEL=/root/.cache/huggingface/hub/models--unsloth--gemma-3-12b-it-GGUF/snapshots/a5592d885c8a933e824f80d2eeda84db95ad2712/gemma-3-12b-it-Q8_0.gguf
15 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-Q4_K_S.gguf
16 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-Q4_0.gguf
17 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-bf16.gguf
18 | MODEL=/root/.cache/huggingface/hub/models--ggml-org--gemma-3-27b-it-GGUF/snapshots/f94c25afed0072339c5fa3b705a7b4222afe5f62/gemma-3-27b-it-f16-00001-of-00002.gguf
19 |
20 | llama-bench --model $MODEL -t 16 --flash-attn 0
21 | ```
22 |
23 | ```
24 | ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
25 | ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
26 | ggml_cuda_init: found 2 ROCm devices:
27 | Device 0: AMD Radeon Graphics, gfx906:sramecc+:xnack- (0x906), VMM: no, Wave Size: 64
28 | Device 1: AMD Radeon Graphics, gfx906:sramecc+:xnack- (0x906), VMM: no, Wave Size: 64
29 | load_backend: loaded ROCm backend from /app/libggml-hip.so
30 | load_backend: loaded CPU backend from /app/libggml-cpu-haswell.so
31 | ```
32 |
33 | | rocm | llama.cpp | model | size | params | backend | ngl | test | t/s |
34 | | ----- | --------- | ------------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: |
35 | | 6.3.4 | 982e3472 | gemma3n E4B Q8_0 | 6.84 GiB | 6.87 B | ROCm | 99 | pp512 | 483.29 ± 0.68 |
36 | | 6.3.4 | 982e3472 | gemma3n E4B Q8_0 | 6.84 GiB | 6.87 B | ROCm | 99 | tg128 | 33.48 ± 0.43 |
37 | | 6.3.4 | 982e3472 | gemma3 12B Q8_0 | 11.64 GiB | 11.77 B | ROCm | 99 | pp512 | 246.66 ± 0.07 |
38 | | 6.3.4 | 982e3472 | gemma3 12B Q8_0 | 11.64 GiB | 11.77 B | ROCm | 99 | tg128 | 28.41 ± 0.12 |
39 | | 6.3.4 | 982e3472 | qwen3 14B Q4_K - Small | 7.98 GiB | 14.77 B | ROCm | 99 | pp512 | 242.34 ± 0.15 |
40 | | 6.3.4 | 982e3472 | qwen3 14B Q4_K - Small | 7.98 GiB | 14.77 B | ROCm | 99 | tg128 | 35.87 ± 0.15 |
41 | | 6.3.4 | 982e3472 | qwen3 14B Q4_0 | 7.95 GiB | 14.77 B | ROCm | 99 | pp512 | 574.13 ± 0.28 |
42 | | 6.3.4 | 982e3472 | qwen3 14B Q4_0 | 7.95 GiB | 14.77 B | ROCm | 99 | tg128 | 39.02 ± 0.23 |
43 | | 6.3.4 | 982e3472 | qwen3 14B BF16 | 27.51 GiB | 14.77 B | ROCm | 99 | pp512 | 118.01 ± 0.24 |
44 | | 6.3.4 | 982e3472 | qwen3 14B BF16 | 27.51 GiB | 14.77 B | ROCm | 99 | tg128 | 19.33 ± 0.08 |
45 | | 6.3.4 | 982e3472 | gemma3 27B F16 | 50.31 GiB | 27.01 B | ROCm | 99 | pp512 | 236.51 ± 0.14 |
46 | | 6.3.4 | 982e3472 | gemma3 27B F16 | 50.31 GiB | 27.01 B | ROCm | 99 | tg128 | 10.37 ± 0.04 |
47 | | 6.3.4 | 982e3472 | llama4 17Bx16E (Scout) Q3_K - Medium | 48.19 GiB | 107.77 B | ROCm | 99 | pp512 | 160.50 ± 0.81 |
48 | | 6.3.4 | 982e3472 | llama4 17Bx16E (Scout) Q3_K - Medium | 48.19 GiB | 107.77 B | ROCm | 99 | tg128 | 22.75 ± 0.07 |
49 | | 6.4.1 | 982e3472 | gemma3n E4B Q8_0 | 6.84 GiB | 6.87 B | ROCm | 99 | pp512 | 606.83 ± 0.97 |
50 | | 6.4.1 | 982e3472 | gemma3n E4B Q8_0 | 6.84 GiB | 6.87 B | ROCm | 99 | tg128 | 33.36 ± 0.23 |
51 | | 6.4.1 | 982e3472 | gemma3 12B Q8_0 | 11.64 GiB | 11.77 B | ROCm | 99 | pp512 | 329.70 ± 0.30 |
52 | | 6.4.1 | 982e3472 | gemma3 12B Q8_0 | 11.64 GiB | 11.77 B | ROCm | 99 | tg128 | 28.58 ± 0.15 |
53 | | 6.4.1 | 982e3472 | qwen3 14B Q4_K - Small | 7.98 GiB | 14.77 B | ROCm | 99 | pp512 | 286.58 ± 0.15 |
54 | | 6.4.1 | 982e3472 | qwen3 14B Q4_K - Small | 7.98 GiB | 14.77 B | ROCm | 99 | tg128 | 36.48 ± 0.11 |
55 | | 6.4.1 | 982e3472 | qwen3 14B Q4_0 | 7.95 GiB | 14.77 B | ROCm | 99 | pp512 | 570.15 ± 0.23 |
56 | | 6.4.1 | 982e3472 | qwen3 14B Q4_0 | 7.95 GiB | 14.77 B | ROCm | 99 | tg128 | 38.94 ± 0.16 |
57 | | 6.4.1 | 982e3472 | qwen3 14B BF16 | 27.51 GiB | 14.77 B | ROCm | 99 | pp512 | 119.03 ± 0.31 |
58 | | 6.4.1 | 982e3472 | qwen3 14B BF16 | 27.51 GiB | 14.77 B | ROCm | 99 | tg128 | 19.46 ± 0.10 |
59 | | 6.4.1 | 982e3472 | gemma3 27B F16 | 50.31 GiB | 27.01 B | ROCm | 99 | pp512 | 238.38 ± 0.26 |
60 | | 6.4.1 | 982e3472 | gemma3 27B F16 | 50.31 GiB | 27.01 B | ROCm | 99 | tg128 | 10.41 ± 0.03 |
61 | | 6.4.1 | 982e3472 | llama4 17Bx16E (Scout) Q3_K - Medium | 48.19 GiB | 107.77 B | ROCm | 99 | pp512 | 190.52 ± 0.84 |
62 | | 6.4.1 | 982e3472 | llama4 17Bx16E (Scout) Q3_K - Medium | 48.19 GiB | 107.77 B | ROCm | 99 | tg128 | 22.96 ± 0.10 |
63 |
64 |
65 | ## Run
66 | ### Docker
67 | See https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md + https://github.com/ROCm/vllm/blob/main/docs/deployment/docker.md
68 |
69 | ### Kubernetes
70 | Helm chart and samples [mixa3607 charts](https://github.com/mixa3607/charts)
71 |
72 | ## Build
73 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.rocm.sh`:
74 | ```bash
75 | $ . preset.rocm-7.0.0.sh
76 | $ ./build-and-push.rocm.sh
77 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
78 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
79 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
80 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
81 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
82 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
83 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
84 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
85 | #0 building with "remote" instance using remote driver
86 | #...............
87 | #14 DONE 583.8s
88 | ```
89 |
--------------------------------------------------------------------------------
/pytorch/build-and-push.torch.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -e
3 |
4 | cd $(dirname $0)
5 | source ../env.sh
6 |
7 | IMAGE_TAGS=(
8 | "$TORCH_IMAGE:${TORCH_VERSION}-rocm-${TORCH_ROCM_VERSION}-${REPO_GIT_REF}"
9 | "$TORCH_IMAGE:${TORCH_VERSION}-rocm-${TORCH_ROCM_VERSION}"
10 | )
11 |
12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
13 | echo "${IMAGE_TAGS[0]} already in registry. Skip"
14 | exit 0
15 | fi
16 |
17 | DOCKER_EXTRA_ARGS=()
18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
19 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
20 | done
21 |
22 | mkdir ./logs || true
23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
24 | --build-arg BASE_ROCM_IMAGE="${PATCHED_ROCM_IMAGE}:${TORCH_ROCM_VERSION}-complete" \
25 | --build-arg ROCM_ARCH="${ROCM_ARCH}" \
26 | --build-arg PYTORCH_BRANCH="$TORCH_VERSION" \
27 | --build-arg PYTORCH_VISION_BRANCH="$TORCH_VISION_VERSION" \
28 | --target final -f ./torch.Dockerfile --progress=plain ./submodules 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
29 |
--------------------------------------------------------------------------------
/pytorch/env.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 |
3 | pushd $(dirname ${BASH_SOURCE[0]})
4 |
5 | # rocm version
6 | if [ "$TORCH_ROCM_VERSION" == "" ]; then TORCH_ROCM_VERSION=6.3.3; fi
7 | # torch git checkpoint
8 | if [ "$TORCH_VERSION" == "" ]; then TORCH_VERSION="v2.7.1"; fi
9 |
10 | # destination image
11 | if [ "$TORCH_IMAGE" == "" ]; then
12 | TORCH_IMAGE=docker.io/mixa3607/pytorch-gfx906
13 | #TORCH_IMAGE=registry.arkprojects.space/apps/pytorch-gfx906
14 | fi
15 |
16 | popd
17 |
--------------------------------------------------------------------------------
/pytorch/preset.torch-2.7.1-rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export TORCH_ROCM_VERSION="6.3.3"
4 | export TORCH_VERSION="v2.7.1"
5 | export TORCH_VISION_VERSION="v0.21.0"
6 |
--------------------------------------------------------------------------------
/pytorch/preset.torch-2.7.1-rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export TORCH_ROCM_VERSION="6.4.4"
4 | export TORCH_VERSION="v2.7.1"
5 | export TORCH_VISION_VERSION="v0.21.0"
6 |
--------------------------------------------------------------------------------
/pytorch/preset.torch-2.8.0-rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export TORCH_ROCM_VERSION="6.3.3"
4 | export TORCH_VERSION="v2.8.0"
5 | export TORCH_VISION_VERSION="v0.23.0"
6 |
--------------------------------------------------------------------------------
/pytorch/preset.torch-2.8.0-rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export TORCH_ROCM_VERSION="6.4.4"
4 | export TORCH_VERSION="v2.8.0"
5 | export TORCH_VISION_VERSION="v0.23.0"
6 |
--------------------------------------------------------------------------------
/pytorch/preset.torch-2.8.0-rocm-7.0.2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export TORCH_ROCM_VERSION="7.0.2"
4 | export TORCH_VERSION="v2.8.0"
5 |
--------------------------------------------------------------------------------
/pytorch/preset.torch-2.9.0-rocm-7.0.2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export TORCH_ROCM_VERSION="7.0.2"
4 | export TORCH_VERSION="v2.9.0"
5 |
--------------------------------------------------------------------------------
/pytorch/readme.md:
--------------------------------------------------------------------------------
1 | # PyTorch GFX906
2 | Tensors and Dynamic neural networks in Python with strong GPU acceleration.
3 |
4 | Packages:
5 | - torch
6 | - torchvision
7 | - torchaudio
8 |
9 | Recommend use `docker.io/mixa3607/pytorch-gfx906:(v2.7.1|v2.8.0)-rocm-6.3.3`
10 |
11 | ## Build
12 | See build vars in `./env.sh`. You also may use presetis `./preset.*.sh`. Exec `./build-and-push.torch.sh`:
13 | ```bash
14 | $ . preset.torch-2.7.1-rocm-6.3.3.sh
15 | $ ./build-and-push.torch.sh
16 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
17 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
18 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
19 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
20 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
21 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
22 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
23 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
24 | #0 building with "remote" instance using remote driver
25 |
26 | #1 [internal] load build definition from rocm.Dockerfile
27 | #1 transferring dockerfile: 4.95kB done
28 | #1 DONE 0.0s
29 |
30 | #2 [auth] dockerio-proxy/rocm/dev-ubuntu-24.04:pull rocm/dev-ubuntu-24.04:pull token for registry.arkprojects.space
31 | #2 DONE 0.0s
32 |
33 | #3 [internal] load metadata for docker.io/rocm/dev-ubuntu-24.04:7.0-complete
34 | #3 DONE 1.8s
35 |
36 | #4 [internal] load .dockerignore
37 | #4 transferring context: 2B done
38 | #...............
39 | #24 exporting to image
40 | #24 pushing layers 6.5s done
41 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005
42 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 3.3s done
43 | #24 pushing layers 2.0s done
44 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005
45 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 2.2s done
46 | #24 DONE 17.6s
47 | ```
48 |
--------------------------------------------------------------------------------
/pytorch/submodules/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/pytorch/submodules/.gitkeep
--------------------------------------------------------------------------------
/pytorch/torch.Dockerfile:
--------------------------------------------------------------------------------
1 | ARG BASE_ROCM_IMAGE="docker.io/mixa3607/vllm-gfx906:latest"
2 | ARG ROCM_ARCH="gfx906"
3 | ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git"
4 | ARG PYTORCH_BRANCH="v2.7.1"
5 | ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
6 | ARG PYTORCH_VISION_BRANCH=""
7 | ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git"
8 | ARG PYTORCH_AUDIO_BRANCH=""
9 |
10 | ############# Base image #############
11 | FROM ${BASE_ROCM_IMAGE} AS rocm_base
12 | # Install basic utilities and Python 3.12
13 | RUN apt-get update && apt-get install -y software-properties-common git python3-pip && \
14 | add-apt-repository ppa:deadsnakes/ppa && \
15 | apt-get update -y && \
16 | apt-get install -y python3.12 python3.12-dev python3.12-venv \
17 | python3.12-lib2to3 python-is-python3 python3.12-full && \
18 | update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
19 | update-alternatives --set python3 /usr/bin/python3.12 && \
20 | ln -sf /usr/bin/python3.12-config /usr/bin/python3-config && \
21 | python3 -m pip config set global.break-system-packages true && \
22 | pip install amdsmi==$(cat /opt/ROCM_VERSION_FULL) && \
23 | true
24 |
25 | # Set environment variables
26 | ARG ROCM_ARCH
27 | ENV ROCM_ARCH=$ROCM_ARCH
28 | ENV PYTORCH_ROCM_ARCH=$ROCM_ARCH
29 | ENV PATH=/opt/rocm/llvm/bin:$PATH
30 | ENV ROCM_PATH=/opt/rocm
31 | ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib:
32 |
33 | ############# Build torch #############
34 | FROM rocm_base AS build_torch
35 | RUN pip install setuptools wheel packaging cmake ninja setuptools_scm jinja2
36 |
37 | WORKDIR /build/pytorch
38 | ARG PYTORCH_REPO
39 | ARG PYTORCH_BRANCH
40 | RUN git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_BRANCH}" "${PYTORCH_REPO}" .
41 | RUN pip install -r requirements.txt
42 | RUN sed -i 's|7e29c325d5bd33ba896ddb106f5d4fc7d715274dca7fe937f724fffa82017838|1e9b3dddf0c7fc07131c6f0f5266129e83ce2331f459fa2be8c63f4ae91b0f5b|g' cmake/External/aotriton.cmake && \
43 | python3 tools/amd_build/build_amd.py && \
44 | CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=/dist && \
45 | pip install /dist/*.whl
46 |
47 | ############# Build vision #############
48 | FROM build_torch AS build_vision
49 | WORKDIR /build/vision
50 | ARG PYTORCH_VISION_REPO
51 | ARG PYTORCH_VISION_BRANCH
52 | RUN if [ "${PYTORCH_VISION_BRANCH}" = "" ]; then \
53 | git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 "${PYTORCH_VISION_REPO}" . && \
54 | git fetch --depth=1 origin "$(cat /build/pytorch/.github/ci_commit_pins/vision.txt)" && \
55 | git checkout "$(cat /build/pytorch/.github/ci_commit_pins/vision.txt)" && \
56 | git reset --hard FETCH_HEAD; \
57 | else \
58 | git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_VISION_BRANCH}" "${PYTORCH_VISION_REPO}" . ; \
59 | fi
60 | RUN python3 setup.py bdist_wheel --dist-dir=/dist && \
61 | pip install /dist/*.whl
62 |
63 | ############# Build audio #############
64 | FROM build_torch AS build_audio
65 | WORKDIR /build/audio
66 | ARG PYTORCH_AUDIO_REPO
67 | ARG PYTORCH_AUDIO_BRANCH
68 | RUN if [ "${PYTORCH_AUDIO_BRANCH}" = "" ]; then \
69 | git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 "${PYTORCH_AUDIO_REPO}" . && \
70 | git fetch --depth=1 origin "$(cat /build/pytorch/.github/ci_commit_pins/audio.txt)" && \
71 | git checkout "$(cat /build/pytorch/.github/ci_commit_pins/audio.txt)" && \
72 | git reset --hard FETCH_HEAD; \
73 | else \
74 | git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_AUDIO_BRANCH}" "${PYTORCH_AUDIO_REPO}" . ; \
75 | fi
76 | RUN python3 setup.py bdist_wheel --dist-dir=/dist && \
77 | pip install /dist/*.whl
78 |
79 | ############# Install all #############
80 | FROM rocm_base AS final
81 | RUN --mount=type=bind,from=build_torch,src=/dist/,target=/dist_torch \
82 | --mount=type=bind,from=build_vision,src=/dist/,target=/dist_vision \
83 | --mount=type=bind,from=build_audio,src=/dist/,target=/dist_audio \
84 | pip install /dist_torch/*.whl /dist_vision/torchvision-*.whl /dist_audio/torchaudio-*.whl && \
85 | true
86 |
87 | CMD ["/bin/bash"]
88 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # ML software for deprecated GFX906 arch
2 |
3 | ## Prebuild images
4 | ### Images
5 | | Name | Source | Status | Docs |
6 | | ---- | ------ | ------ | ---- |
7 | | ROCm | [ROCm](https://github.com/ROCm/ROCm), [rocBLAS](https://github.com/ROCm/rocBLAS) | OK | [readme](./rocm/readme.md) |
8 | | llama.cpp | [llama.cpp](https://github.com/ggml-org/llama.cpp) | OK | [readme](./llama.cpp/readme.md) |
9 | | ComfyUI | [ComfyUI](https://github.com/comfyanonymous/ComfyUI) | OK | [readme](./comfyui/readme.md) |
10 | | VLLM | [VLLM](https://github.com/nlzy/vllm-gfx906), [triton](https://github.com/nlzy/triton-gfx906) | OK | [readme](./vllm/readme.md) |
11 |
12 | ### Deps graph
13 | ```mermaid
14 | flowchart TD
15 | rocm-src[docker.io/rocm/dev-ubuntu-24.04] --> rocm[docker.io/mixa3607/rocm-gfx906]
16 | rocm --> llama[docker.io/mixa3607/llama.cpp-gfx906]
17 | rocm --> torch[docker.io/mixa3607/pytorch-gfx906]
18 | torch --> comfyui[docker.io/mixa3607/comfyui-gfx906]
19 | torch --> vllm[docker.io/mixa3607/vllm-gfx906]
20 | ```
21 |
22 | ## Perf tuning
23 | Changing smcPPTable/TdcLimitGfx 350 => 150 reduced the hotspot by 10+- degrees with almost no drop in performance in vllm ([table in vllm](./vllm/readme.md#benchmarks))
24 |
25 | ```console
26 | $ upp -p /sys/class/drm/card${GPU_ID}/device/pp_table set --write smcPPTable/TdcLimitGfx=150
27 | Changing smcPPTable.TdcLimitGfx of type H from 330 to 150 at 0x1fe
28 | Committing changes to '/sys/class/drm/card1/device/pp_table'.
29 | ```
30 |
31 |
32 | ## Environment
33 | All software tested on Lenovo RD450X with 256G mem and 2x MI50 32G (x16 + x8). For cooling gpus used [AMD Instinct MI50 blower fan adapter (thingiverse)](https://www.thingiverse.com/thing:7153218).
34 |
35 | ## RVS
36 | ```shell
37 | cd /opt/rocm-6.4.1/bin
38 | apt update
39 | apt install -y rocm-validation-suite
40 | echo 'actions:
41 | - name: gst-581Tflops-4K4K8K-rand-bf16
42 | device: all
43 | module: gst
44 | log_interval: 3000
45 | ramp_interval: 5000
46 | duration: 15000
47 | hot_calls: 1000
48 | copy_matrix: false
49 | target_stress: 581000
50 | matrix_size_a: 4864
51 | matrix_size_b: 4096
52 | matrix_size_c: 8192
53 | matrix_init: rand
54 | data_type: bf16_r
55 | lda: 8320
56 | ldb: 8320
57 | ldc: 4992
58 | ldd: 4992
59 | transa: 1
60 | transb: 0
61 | alpha: 1
62 | beta: 0' > ~/gst-581Tflops-4K4K8K-rand-bf16.conf
63 | ./rvs -c ~/gst-581Tflops-4K4K8K-rand-bf16.conf
64 | ```
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/rocm/build-and-push.rocm.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -e
3 |
4 | cd $(dirname $0)
5 | source ../env.sh
6 |
7 | IMAGE_TAGS=(
8 | "$PATCHED_ROCM_IMAGE:${ROCM_VERSION}-${REPO_GIT_REF}-complete"
9 | "$PATCHED_ROCM_IMAGE:${ROCM_VERSION}-complete"
10 | )
11 |
12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then
13 | echo "${IMAGE_TAGS[0]} already in registry. Skip"
14 | exit 0
15 | fi
16 |
17 | DOCKER_EXTRA_ARGS=()
18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do
19 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}")
20 | done
21 |
22 | mkdir ./logs || true
23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \
24 | --build-arg BASE_ROCM_IMAGE="${BASE_ROCM_IMAGE}:${ROCM_IMAGE_VER}-complete" \
25 | --build-arg ROCM_ARCH="${ROCM_ARCH}" \
26 | --target final -f ./rocm.Dockerfile --progress=plain ./submodules 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log
27 |
--------------------------------------------------------------------------------
/rocm/env.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 |
3 | pushd $(dirname ${BASH_SOURCE[0]})
4 |
5 | # value from tag https://hub.docker.com/r/rocm/dev-ubuntu-24.04/tags e.g. 7.0/6.4.4
6 | if [ "$ROCM_VERSION" == "" ]; then
7 | ROCM_VERSION=6.3.3
8 | fi
9 | if [ "$ROCM_IMAGE_VER" == "" ]; then
10 | ROCM_IMAGE_VER=6.3.3
11 | fi
12 |
13 | # target arch
14 | if [ "$ROCM_ARCH" == "" ]; then
15 | ROCM_ARCH=gfx906
16 | fi
17 |
18 | # source image
19 | if [ "$BASE_ROCM_IMAGE" == "" ]; then
20 | BASE_ROCM_IMAGE=docker.io/rocm/dev-ubuntu-24.04
21 | fi
22 |
23 | # destination image
24 | if [ "$PATCHED_ROCM_IMAGE" == "" ]; then
25 | PATCHED_ROCM_IMAGE=docker.io/mixa3607/rocm-gfx906
26 | #PATCHED_ROCM_IMAGE=registry.arkprojects.space/apps/rocm-gfx906
27 | fi
28 |
29 | popd
30 |
--------------------------------------------------------------------------------
/rocm/preset.rocm-6.3.3.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export ROCM_VERSION="6.3.3"
4 | export ROCM_IMAGE_VER="6.3.3"
5 |
--------------------------------------------------------------------------------
/rocm/preset.rocm-6.4.4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export ROCM_VERSION="6.4.4"
4 | export ROCM_IMAGE_VER="6.4.4"
5 |
--------------------------------------------------------------------------------
/rocm/preset.rocm-7.0.0.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export ROCM_VERSION="7.0.0"
4 | export ROCM_IMAGE_VER="7.0"
5 |
--------------------------------------------------------------------------------
/rocm/preset.rocm-7.0.2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export ROCM_VERSION="7.0.2"
4 | export ROCM_IMAGE_VER="7.0.2"
5 |
--------------------------------------------------------------------------------
/rocm/readme.md:
--------------------------------------------------------------------------------
1 | # ROCm GFX906
2 | Open software stack that includes programming models, tools, compilers, libraries, and runtimes for AI and HPC solution development on AMD GPUs.
3 | In 6.4+ gfx906 support was dropped but may be manually compiled.
4 |
5 | At this moment rebuild:
6 | - rccl
7 | - rocblas+tensile
8 |
9 | Recommend use `docker.io/mixa3607/rocm-gfx906:6.4.4-complete`
10 |
11 | ## Run
12 | ### Docker
13 | TODO
14 |
15 | ### Kubernetes
16 | ```yaml
17 | apiVersion: apps/v1
18 | kind: Deployment
19 | metadata:
20 | name: rocmdev
21 | namespace: ns-vllm
22 | labels:
23 | app: rocmdev
24 | spec:
25 | strategy:
26 | type: Recreate
27 | replicas: 1
28 | selector:
29 | matchLabels:
30 | app: rocmdev
31 | template:
32 | metadata:
33 | labels:
34 | app: rocmdev
35 | spec:
36 | containers:
37 | - name: rocmdev
38 | image: docker.io.mixa3607/rocm-gfx906:7.0.0-20251005035204-complete
39 | imagePullPolicy: Always
40 | securityContext:
41 | privileged: true
42 | runAsNonRoot: false
43 | runAsGroup: 0
44 | runAsUser: 0
45 | command: [ "/bin/bash", "-c" ]
46 | args:
47 | - "apt install tmux wget -y; wget https://gist.githubusercontent.com/mixa3607/1e6d3ee7d87b018484cf80c7928b4c33/raw/.tmux.conf -O ~/.tmux.conf; while true; do sleep 1s; done;"
48 | #- sleep inf
49 | ```
50 |
51 | ## Build
52 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.rocm.sh`:
53 | ```bash
54 | $ . preset.rocm-7.0.0.sh
55 | $ ./build-and-push.rocm.sh
56 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
57 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
58 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
59 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
60 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
61 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
62 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
63 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm
64 | #0 building with "remote" instance using remote driver
65 |
66 | #1 [internal] load build definition from rocm.Dockerfile
67 | #1 transferring dockerfile: 4.95kB done
68 | #1 DONE 0.0s
69 |
70 | #2 [auth] dockerio-proxy/rocm/dev-ubuntu-24.04:pull rocm/dev-ubuntu-24.04:pull token for registry.arkprojects.space
71 | #2 DONE 0.0s
72 |
73 | #3 [internal] load metadata for docker.io/rocm/dev-ubuntu-24.04:7.0-complete
74 | #3 DONE 1.8s
75 |
76 | #4 [internal] load .dockerignore
77 | #4 transferring context: 2B done
78 | #...............
79 | #24 exporting to image
80 | #24 pushing layers 6.5s done
81 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005
82 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 3.3s done
83 | #24 pushing layers 2.0s done
84 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005
85 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 2.2s done
86 | #24 DONE 17.6s
87 | ```
88 |
--------------------------------------------------------------------------------
/rocm/rocm.Dockerfile:
--------------------------------------------------------------------------------
1 | ARG ROCM_ARCH="gfx906"
2 | ARG BASE_ROCM_IMAGE="rocm/dev-ubuntu-24.04:6.4.4-complete"
3 | ARG ROCBLAS_REPO="https://github.com/ROCm/rocBLAS"
4 | ARG TENSILE_REPO="https://github.com/ROCm/Tensile"
5 | ARG RCCL_REPO="https://github.com/ROCm/rccl"
6 |
7 | ############# Base image #############
8 | FROM ${BASE_ROCM_IMAGE} AS rocm_base
9 | # ROCm ver
10 | RUN ROCM_VERSION_MAJOR=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\1|1p') && \
11 | ROCM_VERSION_MINOR=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\2|1p') && \
12 | ROCM_VERSION_PATCH=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\3|1p') && \
13 | echo "$ROCM_VERSION_MAJOR" > /opt/ROCM_VERSION_MAJOR && \
14 | echo "$ROCM_VERSION_MINOR" > /opt/ROCM_VERSION_MINOR && \
15 | echo "$ROCM_VERSION_PATCH" > /opt/ROCM_VERSION_PATCH && \
16 | echo "$ROCM_VERSION_MAJOR.$ROCM_VERSION_MINOR" > /opt/ROCM_VERSION && \
17 | echo "$ROCM_VERSION_MAJOR.$ROCM_VERSION_MINOR.$ROCM_VERSION_PATCH" > /opt/ROCM_VERSION_FULL && \
18 | echo "Detected rocm version is $(cat /opt/ROCM_VERSION_FULL)" && \
19 | true
20 |
21 | ############# Build base #############
22 | FROM rocm_base AS build_base
23 | RUN apt-get update && apt-get install -y git cmake libfmt-dev
24 | WORKDIR /rebuild-deps
25 |
26 | ############# Build rocBLAS #############
27 | FROM build_base AS build_rocblas
28 | ARG ROCBLAS_REPO
29 | ARG TENSILE_REPO
30 | RUN git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${ROCBLAS_REPO} rocBLAS && \
31 | git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${TENSILE_REPO} Tensile && \
32 | true
33 |
34 | WORKDIR /rebuild-deps/rocBLAS
35 | ARG ROCM_ARCH
36 | ENV PACKAGE_NAME=rocblas
37 | RUN dpkg -s ${PACKAGE_NAME}
38 | RUN ./install.sh --dependencies --rmake_invoked
39 | RUN export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \
40 | echo "Installed package version is \"$INSTALLED_PACKAGE_VERSION\"" && \
41 | export ROCM_LIBPATCH_VERSION=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\4|1') && \
42 | echo "Set ROCM_LIBPATCH_VERSION to \"$ROCM_LIBPATCH_VERSION\"" && \
43 | export CPACK_DEBIAN_PACKAGE_RELEASE=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\5|1') && \
44 | echo "Set CPACK_DEBIAN_PACKAGE_RELEASE to \"$CPACK_DEBIAN_PACKAGE_RELEASE\"" && \
45 | python3 ./rmake.py \
46 | --install_invoked \
47 | --build_dir=$(realpath ./build) \
48 | --src_path=$(realpath .) \
49 | --architecture ${ROCM_ARCH} \
50 | --test_local_path=$(realpath ../Tensile) && \
51 | cd ./build/release && \
52 | make package && \
53 | mkdir -p /dist && cp *.deb /dist && \
54 | true
55 | RUN cd ./build/release && \
56 | export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \
57 | export BUILDED_PACKAGE_VERSION=$(dpkg -I /dist/${PACKAGE_NAME}_*.deb | sed -nE 's|^ *Version: (.+)$|\1|p') && \
58 | if [ "$BUILDED_PACKAGE_VERSION" != "$INSTALLED_PACKAGE_VERSION" ]; then echo "ERR: Builded version is $BUILDED_PACKAGE_VERSION but expected $INSTALLED_PACKAGE_VERSION"; exit 10; fi && \
59 | true
60 |
61 | ############# Build rccl #############
62 | FROM build_base AS build_rccl
63 | ARG RCCL_REPO
64 | RUN git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${RCCL_REPO} rccl && \
65 | true
66 |
67 | WORKDIR /rebuild-deps/rccl
68 | ARG ROCM_ARCH
69 | ENV PACKAGE_NAME=rccl
70 | RUN dpkg -s ${PACKAGE_NAME}
71 | RUN export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \
72 | echo "Installed package version is \"$INSTALLED_PACKAGE_VERSION\"" && \
73 | export ROCM_LIBPATCH_VERSION=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\4|1') && \
74 | echo "Set ROCM_LIBPATCH_VERSION to \"$ROCM_LIBPATCH_VERSION\"" && \
75 | export CPACK_DEBIAN_PACKAGE_RELEASE=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\5|1') && \
76 | echo "Set CPACK_DEBIAN_PACKAGE_RELEASE to \"$CPACK_DEBIAN_PACKAGE_RELEASE\"" && \
77 | ./install.sh --package_build --amdgpu_targets ${ROCM_ARCH} && \
78 | mkdir -p /dist && cp ./build/release/*.deb /dist && \
79 | true
80 | RUN cd ./build/release && \
81 | export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \
82 | export BUILDED_PACKAGE_VERSION=$(dpkg -I /dist/${PACKAGE_NAME}_*.deb | sed -nE 's|^ *Version: (.+)$|\1|p') && \
83 | if [ "$BUILDED_PACKAGE_VERSION" != "$INSTALLED_PACKAGE_VERSION" ]; then echo "ERR: Builded version is $BUILDED_PACKAGE_VERSION but expected $INSTALLED_PACKAGE_VERSION"; exit 10; fi && \
84 | true
85 |
86 | ############# Patched image #############
87 | FROM rocm_base AS final
88 | RUN apt-get update && apt-get install -y libfmt-dev
89 | # Install rocblas
90 | RUN --mount=type=bind,from=build_rocblas,src=/dist/,target=/dist \
91 | dpkg -i /dist/*.deb
92 | # Install rccl
93 | RUN --mount=type=bind,from=build_rccl,src=/dist/,target=/dist \
94 | dpkg -i /dist/*.deb
95 |
96 | # Validate apt deps state
97 | RUN apt-get install
98 |
--------------------------------------------------------------------------------
/rocm/submodules/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/rocm/submodules/.gitkeep
--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Oo]ut/
33 | [Ll]og/
34 | [Ll]ogs/
35 |
36 | # Visual Studio 2015/2017 cache/options directory
37 | .vs/
38 | # Uncomment if you have tasks that create the project's static files in wwwroot
39 | #wwwroot/
40 |
41 | # Visual Studio 2017 auto generated files
42 | Generated\ Files/
43 |
44 | # MSTest test Results
45 | [Tt]est[Rr]esult*/
46 | [Bb]uild[Ll]og.*
47 |
48 | # NUnit
49 | *.VisualState.xml
50 | TestResult.xml
51 | nunit-*.xml
52 |
53 | # Build Results of an ATL Project
54 | [Dd]ebugPS/
55 | [Rr]eleasePS/
56 | dlldata.c
57 |
58 | # Benchmark Results
59 | BenchmarkDotNet.Artifacts/
60 |
61 | # .NET Core
62 | project.lock.json
63 | project.fragment.lock.json
64 | artifacts/
65 |
66 | # ASP.NET Scaffolding
67 | ScaffoldingReadMe.txt
68 |
69 | # StyleCop
70 | StyleCopReport.xml
71 |
72 | # Files built by Visual Studio
73 | *_i.c
74 | *_p.c
75 | *_h.h
76 | *.ilk
77 | *.meta
78 | *.obj
79 | *.iobj
80 | *.pch
81 | *.pdb
82 | *.ipdb
83 | *.pgc
84 | *.pgd
85 | *.rsp
86 | *.sbr
87 | *.tlb
88 | *.tli
89 | *.tlh
90 | *.tmp
91 | *.tmp_proj
92 | *_wpftmp.csproj
93 | *.log
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio LightSwitch build output
298 | **/*.HTMLClient/GeneratedArtifacts
299 | **/*.DesktopClient/GeneratedArtifacts
300 | **/*.DesktopClient/ModelManifest.xml
301 | **/*.Server/GeneratedArtifacts
302 | **/*.Server/ModelManifest.xml
303 | _Pvt_Extensions
304 |
305 | # Paket dependency manager
306 | .paket/paket.exe
307 | paket-files/
308 |
309 | # FAKE - F# Make
310 | .fake/
311 |
312 | # CodeRush personal settings
313 | .cr/personal
314 |
315 | # Python Tools for Visual Studio (PTVS)
316 | __pycache__/
317 | *.pyc
318 |
319 | # Cake - Uncomment if you are using it
320 | # tools/**
321 | # !tools/packages.config
322 |
323 | # Tabs Studio
324 | *.tss
325 |
326 | # Telerik's JustMock configuration file
327 | *.jmconfig
328 |
329 | # BizTalk build output
330 | *.btp.cs
331 | *.btm.cs
332 | *.odx.cs
333 | *.xsd.cs
334 |
335 | # OpenCover UI analysis results
336 | OpenCover/
337 |
338 | # Azure Stream Analytics local run output
339 | ASALocalRun/
340 |
341 | # MSBuild Binary and Structured Log
342 | *.binlog
343 |
344 | # NVidia Nsight GPU debugger configuration file
345 | *.nvuser
346 |
347 | # MFractors (Xamarin productivity tool) working folder
348 | .mfractor/
349 |
350 | # Local History for Visual Studio
351 | .localhistory/
352 |
353 | # BeatPulse healthcheck temp database
354 | healthchecksdb
355 |
356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
357 | MigrationBackup/
358 |
359 | # Ionide (cross platform F# VS Code tools) working folder
360 | .ionide/
361 |
362 | # Fody - auto-generated XML schema
363 | FodyWeavers.xsd
--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.14.36414.22 d17.14
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ResultsConverter", "ResultsConverter\ResultsConverter.csproj", "{F1ADC5F6-4208-4BF3-9612-A30E48364174}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Release|Any CPU = Release|Any CPU
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {F1ADC5F6-4208-4BF3-9612-A30E48364174}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | {F1ADC5F6-4208-4BF3-9612-A30E48364174}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | {F1ADC5F6-4208-4BF3-9612-A30E48364174}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | {F1ADC5F6-4208-4BF3-9612-A30E48364174}.Release|Any CPU.Build.0 = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {41BCD38E-4CD3-453E-9363-9DAE08F9519C}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/vllm/benchmark/ResultsConverter/ResultsConverter/MarkdownTableBuilder.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | // https://github.com/marcolink/MarkdownTable
4 | namespace MarkdownTable
5 | {
6 | public class MarkdownTableBuilder
7 | {
8 | private string[] header = { };
9 | private readonly List