├── .gitignore ├── .gitmodules ├── checkpoint.sh ├── comfyui ├── build-and-push.comfyui.sh ├── comfyui.Dockerfile ├── env.sh ├── preset.rocm-6.3.3.sh ├── preset.rocm-6.4.4.sh └── readme.md ├── docs └── images │ └── temperatures.png ├── env.sh ├── llama.cpp ├── build-and-push.rocm.sh ├── build-and-push.vulkan.sh ├── env.sh ├── llamacpp-offload-calculator │ ├── .gitattributes │ ├── .gitignore │ ├── ArkProjects.LlamaOffloadCalc.sln │ ├── ArkProjects.LlamaOffloadCalc │ │ ├── ArkProjects.LlamaOffloadCalc.csproj │ │ ├── LLamaDevice.cs │ │ ├── LLamaDeviceType.cs │ │ ├── LLamaGgufMetadataExtractor.cs │ │ ├── LLamaLogsParser.cs │ │ ├── Options │ │ │ ├── LLamaDeviceOptions.cs │ │ │ ├── OffloadCalculationOptions.cs │ │ │ ├── OffloadCalculationOptionsValidator.cs │ │ │ └── TensorsOffloadRuleOptions.cs │ │ ├── Program.cs │ │ ├── Properties │ │ │ └── launchSettings.json │ │ ├── TensorMetadata.cs │ │ ├── appsettings.GLM-4.5-Air-UD-Q6_K_XL.yaml │ │ ├── appsettings.gpt-oss-120b-F16.yaml │ │ └── appsettings.yaml │ ├── GGUFSharp │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── SampleFiles │ │ │ ├── FilesList.txt │ │ │ ├── example.gguf │ │ │ └── genTestFile.py │ │ └── src │ │ │ └── GGUFSharp │ │ │ ├── GGUFSharp.Test │ │ │ ├── BasicFeatureTest.cs │ │ │ ├── GGUFSharp.Test.csproj │ │ │ └── MSTestSettings.cs │ │ │ ├── GGUFSharp.sln │ │ │ └── GGUFSharp │ │ │ ├── GGUFDataTypeEnum.cs │ │ │ ├── GGUFFile.cs │ │ │ ├── GGUFHeader.cs │ │ │ ├── GGUFMetaItem.cs │ │ │ ├── GGUFReader.cs │ │ │ ├── GGUFSharp.csproj │ │ │ ├── GGUFStreamReader.cs │ │ │ ├── GGUFTensorInfo.cs │ │ │ └── GGUFTensorType.cs │ └── readme.md ├── preset.rocm-6.3.3.sh ├── preset.rocm-6.4.4.sh ├── preset.rocm-7.0.0.sh └── readme.md ├── pytorch ├── build-and-push.torch.sh ├── env.sh ├── preset.torch-2.7.1-rocm-6.3.3.sh ├── preset.torch-2.7.1-rocm-6.4.4.sh ├── preset.torch-2.8.0-rocm-6.3.3.sh ├── preset.torch-2.8.0-rocm-6.4.4.sh ├── preset.torch-2.8.0-rocm-7.0.2.sh ├── preset.torch-2.9.0-rocm-7.0.2.sh ├── readme.md ├── submodules │ └── .gitkeep └── torch.Dockerfile ├── readme.md ├── rocm ├── build-and-push.rocm.sh ├── env.sh ├── preset.rocm-6.3.3.sh ├── preset.rocm-6.4.4.sh ├── preset.rocm-7.0.0.sh ├── preset.rocm-7.0.2.sh ├── readme.md ├── rocm.Dockerfile └── submodules │ └── .gitkeep └── vllm ├── benchmark ├── ResultsConverter │ ├── .gitignore │ ├── ResultsConverter.sln │ └── ResultsConverter │ │ ├── MarkdownTableBuilder.cs │ │ ├── MarkdownTableBuilderExtensions.cs │ │ ├── Program.cs │ │ ├── Properties │ │ └── launchSettings.json │ │ ├── ResultsConverter.csproj │ │ └── VllmBenchResult.cs ├── readme.md └── results │ ├── app │ └── vllm │ │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251006-130816.json │ │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251007-162504.json │ │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251012-111624.json │ │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251013-140107.json │ │ ├── openai-infqps-concurrency16-gemma-3-27b-it-qat-autoawq-20251012-201017.json │ │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251006-132621.json │ │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251007-171239.json │ │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251012-112842.json │ │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251013-141355.json │ │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251006-134724.json │ │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251007-173243.json │ │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251012-114201.json │ │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251013-142754.json │ │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251006-140759.json │ │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251007-175203.json │ │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251012-115501.json │ │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251013-144145.json │ │ └── openai-infqps-concurrency8-gemma-3-27b-it-qat-autoawq-20251012-121023.json │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251005-221837.json │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251006-130816.json │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251007-162504.json │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251012-111624.json │ ├── openai-infqps-concurrency1-gemma-3-27b-it-qat-autoawq-20251013-140107.json │ ├── openai-infqps-concurrency16-gemma-3-27b-it-qat-autoawq-20251012-201017.json │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251005-214604.json │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251006-132621.json │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251007-171239.json │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251012-112842.json │ ├── openai-infqps-concurrency2-gemma-3-27b-it-qat-autoawq-20251013-141355.json │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251005-212640.json │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251006-134724.json │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251007-173243.json │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251012-114201.json │ ├── openai-infqps-concurrency3-gemma-3-27b-it-qat-autoawq-20251013-142754.json │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251005-210513.json │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251006-140759.json │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251007-175203.json │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251012-115501.json │ ├── openai-infqps-concurrency4-gemma-3-27b-it-qat-autoawq-20251013-144145.json │ └── openai-infqps-concurrency8-gemma-3-27b-it-qat-autoawq-20251012-121023.json ├── build-and-push.vllm.sh ├── env.sh ├── preset.0.10.2-rocm-6.4.4.sh ├── preset.0.11.0-rocm-6.3.3.sh ├── preset.0.8.5-rocm-6.3.3.sh ├── readme.md ├── submodules └── .gitkeep └── vllm.Dockerfile /.gitignore: -------------------------------------------------------------------------------- 1 | /**/*.log 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "llama.cpp/submodules/llama.cpp"] 2 | url = ../../ggml-org/llama.cpp.git 3 | path = llama.cpp/submodules/llama.cpp 4 | [submodule "comfyui/submodules/ComfyUI"] 5 | path = comfyui/submodules/ComfyUI 6 | url = ../../comfyanonymous/ComfyUI.git 7 | -------------------------------------------------------------------------------- /checkpoint.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | source ./env.sh 4 | 5 | if ! [ -z "$(git status --porcelain)" ]; then 6 | echo "Workdir is dirty. Exit" 7 | exit 10 8 | fi 9 | 10 | TAG_NAME=$(git_get_current_tag) 11 | if [ "$TAG_NAME" == "" ]; then 12 | TAG_NAME="$(date +%Y%m%d%H%M%S)" 13 | git tag -a "$TAG_NAME" -m "none" 14 | echo -e "New tag $TAG_NAME" 15 | else 16 | echo "Commit already tagged with $TAG_NAME" 17 | fi 18 | -------------------------------------------------------------------------------- /comfyui/build-and-push.comfyui.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | cd $(dirname $0) 5 | source ../env.sh 6 | 7 | IMAGE_TAGS=( 8 | "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-torch-${COMFYUI_PYTORCH_VERSION}-rocm-${COMFYUI_ROCM_VERSION}-patch-${REPO_GIT_REF}" 9 | "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-rocm-${COMFYUI_ROCM_VERSION}-patch-${REPO_GIT_REF}" 10 | "$COMFYUI_IMAGE:${COMFYUI_GIT_REF}-rocm-${COMFYUI_ROCM_VERSION}" 11 | "$COMFYUI_IMAGE:latest-rocm-${COMFYUI_ROCM_VERSION}" 12 | ) 13 | 14 | if docker_image_pushed ${IMAGE_TAGS[0]}; then 15 | echo "${IMAGE_TAGS[0]} already in registry. Skip" 16 | exit 0 17 | fi 18 | 19 | DOCKER_EXTRA_ARGS=() 20 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do 21 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}") 22 | done 23 | 24 | mkdir ./logs || true 25 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \ 26 | --build-arg BASE_PYTORCH_IMAGE=$COMFYUI_TORCH_IMAGE:${COMFYUI_PYTORCH_VERSION}-rocm-${COMFYUI_ROCM_VERSION} \ 27 | --progress=plain --target final -f ./comfyui.Dockerfile --push ./submodules/ComfyUI 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log 28 | -------------------------------------------------------------------------------- /comfyui/comfyui.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_PYTORCH_IMAGE=docker.io/mixa3607/pytorch-gfx906:v2.7.1-rocm-6.3.3 2 | 3 | FROM ${BASE_PYTORCH_IMAGE} AS final 4 | WORKDIR /comfyui 5 | COPY ./requirements.txt ./requirements.txt 6 | RUN sed -i 's|torchaudio||g' requirements.txt && pip install -r requirements.txt 7 | COPY ./ ./ 8 | -------------------------------------------------------------------------------- /comfyui/env.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | pushd $(dirname ${BASH_SOURCE[0]}) 4 | 5 | if [ "$COMFYUI_IMAGE" == "" ]; then 6 | COMFYUI_IMAGE=docker.io/mixa3607/comfyui-gfx906 7 | fi 8 | 9 | if [ "$COMFYUI_TORCH_IMAGE" == "" ]; then 10 | COMFYUI_TORCH_IMAGE="docker.io/mixa3607/pytorch-gfx906" 11 | fi 12 | if [ "$COMFYUI_ROCM_VERSION" == "" ]; then 13 | COMFYUI_ROCM_VERSION="6.3.3" 14 | fi 15 | if [ "$COMFYUI_PYTORCH_VERSION" == "" ]; then 16 | COMFYUI_PYTORCH_VERSION="v2.7.1" 17 | fi 18 | 19 | if [ "$COMFYUI_GIT_REF" == "" ]; then 20 | COMFYUI_GIT_REF="$(git_get_current_tag submodules/ComfyUI)" 21 | fi 22 | if [ "$COMFYUI_GIT_REF" == "" ]; then 23 | COMFYUI_GIT_REF="$(git_get_current_sha submodules/ComfyUI)" 24 | fi 25 | 26 | popd 27 | -------------------------------------------------------------------------------- /comfyui/preset.rocm-6.3.3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export COMFYUI_ROCM_VERSION="6.3.3" 4 | export COMFYUI_PYTORCH_VERSION="v2.7.1" 5 | -------------------------------------------------------------------------------- /comfyui/preset.rocm-6.4.4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export COMFYUI_ROCM_VERSION="6.4.4" 4 | export COMFYUI_PYTORCH_VERSION="v2.7.1" 5 | -------------------------------------------------------------------------------- /comfyui/readme.md: -------------------------------------------------------------------------------- 1 | # llama.cpp GFX906 2 | The most powerful and modular diffusion model GUI, api and backend with a graph/nodes interface. https://github.com/comfyanonymous/ComfyUI 3 | 4 | Recommend use `docker.io/mixa3607/comfyui-gfx906:latest-rocm-6.4.4` 5 | 6 | ## Benchmarks 7 | | tag | rocm | comfy | pytorch | preset | batch | exec time (sec) | 8 | |------------------------------------------------------|-------|---------|---------|--------|-------|-----------------| 9 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1 | SDXL | 1 | 33 | 10 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1 | SDXL | 2 | 65 | 11 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1 | SD 1.5 | 1 | 3,8 | 12 | | v0.3.63-torch-v2.7.1-rocm-6.4.4-patch-20251010004720 | 6.4.4 | v0.3.63 | v2.7.1 | SD 1.5 | 2 | 7 | 13 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1 | SDXL | 1 | 33 | 14 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1 | SDXL | 2 | 65 | 15 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1 | SD 1.5 | 1 | 3,8 | 16 | | v0.3.63-torch-v2.7.1-rocm-6.3.3-patch-20251010004720 | 6.3.3 | v0.3.63 | v2.7.1 | SD 1.5 | 2 | 7 | 17 | 18 | ## Run 19 | ### Docker 20 | See https://github.com/hartmark/sd-rocm/blob/main/docker-compose.yml 21 | 22 | ### Kubernetes 23 | Helm chart and samples [mixa3607 charts](https://github.com/mixa3607/charts) 24 | 25 | ## Build 26 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.comfyui.sh`: 27 | ```bash 28 | $ . preset.rocm-6.4.4.sh 29 | $ ./build-and-push.comfyui.sh 30 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 31 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 32 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 33 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 34 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 35 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 36 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 37 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 38 | #0 building with "remote" instance using remote driver 39 | #............... 40 | #14 DONE 583.8s 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/images/temperatures.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/docs/images/temperatures.png -------------------------------------------------------------------------------- /env.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | function docker_image_pushed { 4 | if docker buildx imagetools inspect "$1" > /dev/null 2> /dev/null; then 5 | return 0 6 | else 7 | return 1 8 | fi 9 | } 10 | 11 | function git_get_current_tag { 12 | if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi 13 | git tag --points-at HEAD | sed 's|+||g' 14 | if [ "$1" != "" ]; then popd > /dev/null; fi 15 | } 16 | 17 | function git_get_origin { 18 | if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi 19 | git config --get remote.origin.url 20 | if [ "$1" != "" ]; then popd > /dev/null; fi 21 | } 22 | 23 | 24 | function git_get_current_sha { 25 | if [ "$1" != "" ]; then pushd "$1" > /dev/null; fi 26 | git rev-parse --short HEAD 27 | if [ "$1" != "" ]; then popd > /dev/null; fi 28 | } 29 | 30 | if [ "$REPO_GIT_REF" == "" ]; then 31 | REPO_GIT_REF="$(git_get_current_tag)" 32 | fi 33 | if [ "$REPO_GIT_REF" == "" ]; then 34 | REPO_GIT_REF="$(git_get_current_sha)" 35 | fi 36 | 37 | if [ "$BASE_UBUNTU_REGISTRY" == "" ]; then 38 | BASE_UBUNTU_REGISTRY=docker.io/library 39 | fi 40 | 41 | source $(dirname ${BASH_SOURCE[0]})/rocm/env.sh 42 | source $(dirname ${BASH_SOURCE[0]})/llama.cpp/env.sh 43 | source $(dirname ${BASH_SOURCE[0]})/comfyui/env.sh 44 | source $(dirname ${BASH_SOURCE[0]})/vllm/env.sh 45 | source $(dirname ${BASH_SOURCE[0]})/pytorch/env.sh 46 | -------------------------------------------------------------------------------- /llama.cpp/build-and-push.rocm.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | cd $(dirname $0) 5 | source ../env.sh 6 | 7 | IMAGE_TAGS=( 8 | "$LLAMA_IMAGE:full-${LLAMA_GIT_REF}-rocm-${LLAMA_ROCM_VERSION}-patch-${REPO_GIT_REF}" 9 | "$LLAMA_IMAGE:full-${LLAMA_GIT_REF}-rocm-${LLAMA_ROCM_VERSION}" 10 | "$LLAMA_IMAGE:full-rocm-${LLAMA_ROCM_VERSION}" 11 | ) 12 | 13 | if docker_image_pushed ${IMAGE_TAGS[0]}; then 14 | echo "${IMAGE_TAGS[0]} already in registry. Skip" 15 | exit 0 16 | fi 17 | 18 | DOCKER_EXTRA_ARGS=() 19 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do 20 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}") 21 | done 22 | 23 | mkdir ./logs || true 24 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \ 25 | --build-arg BASE_ROCM_DEV_CONTAINER=$PATCHED_ROCM_IMAGE:${LLAMA_ROCM_VERSION}-complete \ 26 | --build-arg ROCM_DOCKER_ARCH=$ROCM_ARCH \ 27 | --build-arg ROCM_VERSION=$LLAMA_ROCM_VERSION \ 28 | --build-arg AMDGPU_VERSION=$LLAMA_ROCM_VERSION \ 29 | --progress=plain --target full -f ./submodules/llama.cpp/.devops/rocm.Dockerfile ./submodules/llama.cpp 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log 30 | -------------------------------------------------------------------------------- /llama.cpp/build-and-push.vulkan.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | cd $(dirname $0) 5 | source ../env.sh 6 | 7 | IMAGE_TAGS=( 8 | "$PATCHED_LLAMA_IMAGE:full-${LLAMA_GIT_REF}-vulkan-patch-${REPO_GIT_REF}" 9 | "$PATCHED_LLAMA_IMAGE:full-${LLAMA_GIT_REF}-vulkan" 10 | ) 11 | 12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then 13 | echo "${IMAGE_TAGS[0]} already in registry. Skip" 14 | exit 0 15 | fi 16 | 17 | DOCKER_EXTRA_ARGS=() 18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do 19 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}") 20 | done 21 | 22 | mkdir ./logs || true 23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \ 24 | --build-arg UBUNTU_VERSION="24.04" \ 25 | --progress=plain --target full -f ./submodules/llama.cpp/.devops/vulkan.Dockerfile ./submodules/llama.cpp 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log 26 | -------------------------------------------------------------------------------- /llama.cpp/env.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | pushd $(dirname ${BASH_SOURCE[0]}) 4 | 5 | if [ "$LLAMA_IMAGE" == "" ]; then 6 | LLAMA_IMAGE=docker.io/mixa3607/llama.cpp-gfx906 7 | fi 8 | 9 | # rocm ver 10 | if [ "$LLAMA_ROCM_VERSION" == "" ]; then 11 | LLAMA_ROCM_VERSION=7.0.0 12 | fi 13 | 14 | if [ "$LLAMA_GIT_REF" == "" ]; then 15 | LLAMA_GIT_REF="$(git_get_current_tag submodules/llama.cpp)" 16 | fi 17 | if [ "$LLAMA_GIT_REF" == "" ]; then 18 | LLAMA_GIT_REF="$(git_get_current_sha submodules/llama.cpp)" 19 | fi 20 | 21 | popd 22 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Oo]ut/ 33 | [Ll]og/ 34 | [Ll]ogs/ 35 | 36 | # Visual Studio 2015/2017 cache/options directory 37 | .vs/ 38 | # Uncomment if you have tasks that create the project's static files in wwwroot 39 | #wwwroot/ 40 | 41 | # Visual Studio 2017 auto generated files 42 | Generated\ Files/ 43 | 44 | # MSTest test Results 45 | [Tt]est[Rr]esult*/ 46 | [Bb]uild[Ll]og.* 47 | 48 | # NUnit 49 | *.VisualState.xml 50 | TestResult.xml 51 | nunit-*.xml 52 | 53 | # Build Results of an ATL Project 54 | [Dd]ebugPS/ 55 | [Rr]eleasePS/ 56 | dlldata.c 57 | 58 | # Benchmark Results 59 | BenchmarkDotNet.Artifacts/ 60 | 61 | # .NET Core 62 | project.lock.json 63 | project.fragment.lock.json 64 | artifacts/ 65 | 66 | # ASP.NET Scaffolding 67 | ScaffoldingReadMe.txt 68 | 69 | # StyleCop 70 | StyleCopReport.xml 71 | 72 | # Files built by Visual Studio 73 | *_i.c 74 | *_p.c 75 | *_h.h 76 | *.ilk 77 | *.meta 78 | *.obj 79 | *.iobj 80 | *.pch 81 | *.pdb 82 | *.ipdb 83 | *.pgc 84 | *.pgd 85 | *.rsp 86 | *.sbr 87 | *.tlb 88 | *.tli 89 | *.tlh 90 | *.tmp 91 | *.tmp_proj 92 | *_wpftmp.csproj 93 | *.log 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio LightSwitch build output 298 | **/*.HTMLClient/GeneratedArtifacts 299 | **/*.DesktopClient/GeneratedArtifacts 300 | **/*.DesktopClient/ModelManifest.xml 301 | **/*.Server/GeneratedArtifacts 302 | **/*.Server/ModelManifest.xml 303 | _Pvt_Extensions 304 | 305 | # Paket dependency manager 306 | .paket/paket.exe 307 | paket-files/ 308 | 309 | # FAKE - F# Make 310 | .fake/ 311 | 312 | # CodeRush personal settings 313 | .cr/personal 314 | 315 | # Python Tools for Visual Studio (PTVS) 316 | __pycache__/ 317 | *.pyc 318 | 319 | # Cake - Uncomment if you are using it 320 | # tools/** 321 | # !tools/packages.config 322 | 323 | # Tabs Studio 324 | *.tss 325 | 326 | # Telerik's JustMock configuration file 327 | *.jmconfig 328 | 329 | # BizTalk build output 330 | *.btp.cs 331 | *.btm.cs 332 | *.odx.cs 333 | *.xsd.cs 334 | 335 | # OpenCover UI analysis results 336 | OpenCover/ 337 | 338 | # Azure Stream Analytics local run output 339 | ASALocalRun/ 340 | 341 | # MSBuild Binary and Structured Log 342 | *.binlog 343 | 344 | # NVidia Nsight GPU debugger configuration file 345 | *.nvuser 346 | 347 | # MFractors (Xamarin productivity tool) working folder 348 | .mfractor/ 349 | 350 | # Local History for Visual Studio 351 | .localhistory/ 352 | 353 | # BeatPulse healthcheck temp database 354 | healthchecksdb 355 | 356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 357 | MigrationBackup/ 358 | 359 | # Ionide (cross platform F# VS Code tools) working folder 360 | .ionide/ 361 | 362 | # Fody - auto-generated XML schema 363 | FodyWeavers.xsd -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.13.35931.197 d17.13 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ArkProjects.LlamaOffloadCalc", "ArkProjects.LlamaOffloadCalc\ArkProjects.LlamaOffloadCalc.csproj", "{AB281ECC-61B1-4575-B34D-F14DEB3814FD}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp", "GGUFSharp\src\GGUFSharp\GGUFSharp\GGUFSharp.csproj", "{A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {AB281ECC-61B1-4575-B34D-F14DEB3814FD}.Release|Any CPU.Build.0 = Release|Any CPU 20 | {A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {A3A53FDD-DA1B-68F2-CAB6-5800C258B19E}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {C06FD4ED-87AF-4232-9D9E-D5D6EA618808} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/ArkProjects.LlamaOffloadCalc.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net8.0 6 | enable 7 | enable 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | PreserveNewest 27 | 28 | 29 | 30 | 31 | 32 | PreserveNewest 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaDevice.cs: -------------------------------------------------------------------------------- 1 | namespace ArkProjects.LlmCalc; 2 | 3 | public class LLamaDevice 4 | { 5 | public required LLamaDeviceType Type { get; set; } 6 | public required string Name { get; set; } 7 | public string PciBus { get; set; } = ""; 8 | 9 | public required long TotalSize { get; set; } 10 | public long ReservedMemory { get; set; } 11 | 12 | public List Tensors { get; set; } = []; 13 | public List Layers { get; set; } = []; 14 | public double LayersPortion { get; set; } = 0; 15 | 16 | public long GetUsedSpace() => ReservedMemory + Tensors.Aggregate(0L, (current, tensor) => current + tensor.Size); 17 | 18 | public long GetFreeSpace() => TotalSize - GetUsedSpace(); 19 | } -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaDeviceType.cs: -------------------------------------------------------------------------------- 1 | namespace ArkProjects.LlmCalc; 2 | 3 | public enum LLamaDeviceType 4 | { 5 | Unknown, 6 | GPU, 7 | CPU, 8 | } -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaGgufMetadataExtractor.cs: -------------------------------------------------------------------------------- 1 | using System.Text.RegularExpressions; 2 | using GGUFSharp; 3 | 4 | namespace ArkProjects.LlmCalc; 5 | 6 | public class LLamaGgufMetadataExtractor 7 | { 8 | private readonly string _ggufModelPath; 9 | private readonly Regex _nameMatchRegex = new Regex(@"(?^.+)-(?\d{5})-of-(?\d{5}).gguf"); 10 | 11 | public LLamaGgufMetadataExtractor(string ggufModelPath) 12 | { 13 | _ggufModelPath = ggufModelPath; 14 | } 15 | 16 | public List ExtractMetadata() 17 | { 18 | var reader = new GGUFReader(); 19 | var tensorInfos = new List(); 20 | var fileName = Path.GetFileName(_ggufModelPath); 21 | var match = _nameMatchRegex.Match(fileName); 22 | if (!match.Success) 23 | { 24 | var file = _ggufModelPath; 25 | Console.WriteLine($"Reading {file}"); 26 | var f = reader.Read(file); 27 | tensorInfos.AddRange(f.TensorInfos.Select(t => new TensorMetadata(t))); 28 | } 29 | else 30 | { 31 | var totalParts = int.Parse(match.Groups["total"].Value); 32 | var name = match.Groups["name"].Value; 33 | 34 | for (int i = 1; i <= totalParts; i++) 35 | { 36 | var file = Path.Combine(Path.GetDirectoryName(_ggufModelPath)!, 37 | $"{name}-{i:D5}-of-{totalParts:D5}.gguf"); 38 | Console.WriteLine($"Reading {file}"); 39 | var f = reader.Read(file); 40 | tensorInfos.AddRange(f.TensorInfos.Select(t => new TensorMetadata(t))); 41 | } 42 | } 43 | 44 | return tensorInfos; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/LLamaLogsParser.cs: -------------------------------------------------------------------------------- 1 | using System.Text.RegularExpressions; 2 | 3 | namespace ArkProjects.LlmCalc; 4 | 5 | public class LLamaLogsParser 6 | { 7 | private readonly string _filePath; 8 | 9 | public LLamaLogsParser(string filePath) 10 | { 11 | _filePath = filePath; 12 | } 13 | 14 | public Dictionary> ExtractAssignedLayers() 15 | { 16 | var regex = new Regex(@"load_tensors: layer +(?\d+) assigned to device (?\S+), "); 17 | var result = new Dictionary>(); 18 | 19 | var start = -1; 20 | var lines = File.ReadAllLines(_filePath); 21 | for (var i = 0; i < lines.Length; i++) 22 | { 23 | var line = lines[i]; 24 | var match = regex.Match(line); 25 | if (start < 0 && match.Success) 26 | { 27 | start = i; 28 | } 29 | 30 | if (match.Success) 31 | { 32 | result.TryAdd(match.Groups["device"].Value, new List()); 33 | result[match.Groups["device"].Value].Add(int.Parse(match.Groups["layer"].Value)); 34 | } 35 | 36 | 37 | if (start >= 0 && !match.Success) 38 | { 39 | break; 40 | } 41 | } 42 | 43 | return result; 44 | } 45 | 46 | public Dictionary> ExtractAssignedTensors() 47 | { 48 | var regex = new Regex(@"tensor (?\S+) \(.+\) buffer type overridden to (?\S+)"); 49 | var result = new Dictionary>(); 50 | 51 | var start = -1; 52 | var lines = File.ReadAllLines(_filePath); 53 | for (var i = 0; i < lines.Length; i++) 54 | { 55 | var line = lines[i]; 56 | var match = regex.Match(line); 57 | if (start < 0 && match.Success) 58 | { 59 | start = i; 60 | } 61 | 62 | if (match.Success) 63 | { 64 | result.TryAdd(match.Groups["device"].Value, new List()); 65 | result[match.Groups["device"].Value].Add(match.Groups["tensor"].Value); 66 | } 67 | 68 | 69 | if (start >= 0 && !match.Success) 70 | { 71 | break; 72 | } 73 | } 74 | 75 | return result; 76 | } 77 | } -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/LLamaDeviceOptions.cs: -------------------------------------------------------------------------------- 1 | namespace ArkProjects.LlmCalc.Options; 2 | 3 | public class LLamaDeviceOptions 4 | { 5 | public required LLamaDeviceType Type { get; set; } 6 | public string PciBus { get; set; } = ""; 7 | 8 | public required long TotalSizeMb { get; set; } 9 | public long ReservedMemoryMb { get; set; } 10 | 11 | public double LayersPortion { get; set; } = 0; 12 | public int Id { get; set; } 13 | } -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/OffloadCalculationOptions.cs: -------------------------------------------------------------------------------- 1 | namespace ArkProjects.LlmCalc.Options; 2 | 3 | public class OffloadCalculationOptions 4 | { 5 | public bool PrintTensorsSize { get; set; } = false; 6 | public bool PrintHelmCharConfig { get; set; } = false; 7 | public bool PrintCmdConfig { get; set; } = false; 8 | public required string GgufFile { get; set; } 9 | public required Dictionary Devices { get; set; } 10 | public required Dictionary OffloadRules { get; set; } 11 | } -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/OffloadCalculationOptionsValidator.cs: -------------------------------------------------------------------------------- 1 | using FluentValidation; 2 | 3 | namespace ArkProjects.LlmCalc.Options; 4 | 5 | public class OffloadCalculationOptionsValidator : AbstractValidator 6 | { 7 | public OffloadCalculationOptionsValidator() 8 | { 9 | RuleFor(x => x.GgufFile) 10 | .NotEmpty() 11 | .Must(x => File.Exists(x)).WithMessage("gguf file not exist"); 12 | 13 | RuleFor(x => x.Devices) 14 | .Must(x => x.GroupBy(y => y.Value.Id).All(y => y.Count() == 1)) 15 | .WithMessage("Each device must have unique id"); 16 | RuleFor(x => x.OffloadRules) 17 | .Must(x => x.GroupBy(y => y.Value.Id).All(y => y.Count() == 1)) 18 | .WithMessage("Each offload rule must have unique id"); 19 | 20 | RuleFor(x => x.Devices) 21 | .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.Unknown) == 0) 22 | .WithMessage("Type must be set for each device"); 23 | RuleFor(x => x.Devices) 24 | .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.GPU) >= 1) 25 | .WithMessage("1 or more GPUs must be defined"); 26 | RuleFor(x => x.Devices) 27 | .Must(x => x.Count(d => d.Value.Type == LLamaDeviceType.CPU) == 1) 28 | .WithMessage("Single gpu must be defined"); 29 | RuleFor(x => x.OffloadRules) 30 | .Must(x => x.Count > 0) 31 | .WithMessage("1 or more offloading rule must be defined"); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Options/TensorsOffloadRuleOptions.cs: -------------------------------------------------------------------------------- 1 | namespace ArkProjects.LlmCalc.Options; 2 | 3 | public class TensorsOffloadRuleOptions 4 | { 5 | public required string Regex { get; set; } 6 | public int Id { get; set; } 7 | public int Priority { get; set; } 8 | } -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Program.cs: -------------------------------------------------------------------------------- 1 | using ArkProjects.LlmCalc; 2 | using ArkProjects.LlmCalc.Options; 3 | using FluentValidation; 4 | using Microsoft.Extensions.Configuration; 5 | using System.Text; 6 | using System.Text.RegularExpressions; 7 | 8 | 9 | var options = GetOptions(args); 10 | new OffloadCalculationOptionsValidator().ValidateAndThrow(options); 11 | 12 | var tensorsOffloadRules = options.OffloadRules 13 | .Select(x => (rule: x.Value, name: x.Key, regex: new Regex(x.Value.Regex), tensors: new List())) 14 | .OrderBy(x => x.rule.Id) 15 | .ToArray(); 16 | 17 | var devices = options.Devices 18 | .OrderBy(x => x.Value.Id) 19 | .Select(x => new LLamaDevice() 20 | { 21 | Name = x.Key, 22 | TotalSize = x.Value.TotalSizeMb * 1024 * 1024, 23 | ReservedMemory = x.Value.ReservedMemoryMb * 1024 * 1024, 24 | Type = x.Value.Type, 25 | LayersPortion = x.Value.LayersPortion, 26 | PciBus = x.Value.PciBus, 27 | Layers = new List(), 28 | Tensors = new List() 29 | }) 30 | .ToList(); 31 | { 32 | var gpus = devices.Where(x => x.Type == LLamaDeviceType.GPU).ToList(); 33 | if (gpus.Sum(x => x.LayersPortion) == 0) 34 | { 35 | var gpuMem = gpus.Sum(x => x.TotalSize); 36 | foreach (var gpu in gpus) 37 | { 38 | gpu.LayersPortion = (double)gpu.TotalSize / gpuMem; 39 | } 40 | } 41 | } 42 | 43 | var tensorInfos = new LLamaGgufMetadataExtractor(options.GgufFile) 44 | .ExtractMetadata() 45 | .Where(x => x.BlkId != -1) 46 | .OrderBy(x => x.BlkId) 47 | .ThenBy(x => x.Name) 48 | .ToList(); 49 | 50 | // split layers 51 | var assignedLayers = new Dictionary>(); 52 | { 53 | var layersCount = tensorInfos.Select(x => x.BlkId).Distinct().Count(); 54 | var layerIds = tensorInfos.Select(x => x.BlkId).Distinct().OrderBy(x => x).ToList(); 55 | var s = devices.Where(x => x.LayersPortion > 0).Sum(x => x.LayersPortion); 56 | foreach (var device in devices.OrderBy(x => x.LayersPortion)) 57 | { 58 | if (device.LayersPortion <= 0) 59 | continue; 60 | var c = (int)(layersCount / s * device.LayersPortion); 61 | assignedLayers[device.Name] = layerIds.Take(c).ToList(); 62 | layerIds = layerIds.Skip(c).ToList(); 63 | } 64 | 65 | if (layerIds.Count > 0) 66 | { 67 | assignedLayers[devices.GroupBy(x => x.LayersPortion).MaxBy(x => x.Key)!.Last().Name].AddRange(layerIds); 68 | } 69 | } 70 | 71 | // split tensors 72 | foreach (var info in tensorInfos) 73 | { 74 | tensorsOffloadRules.FirstOrDefault(x => x.regex.IsMatch(info.Name)).tensors?.Add(info); 75 | } 76 | 77 | // apply layers 78 | { 79 | foreach (var assignedLayer in assignedLayers) 80 | { 81 | var device = devices.First(x => x.Name == assignedLayer.Key); 82 | device.Layers.AddRange(assignedLayer.Value); 83 | device.Tensors.AddRange(tensorInfos.Where(x => assignedLayer.Value.Contains(x.BlkId))); 84 | } 85 | 86 | if (!devices 87 | .SelectMany(x => x.Tensors) 88 | .OrderBy(x => x.Name) 89 | .SequenceEqual(tensorInfos.OrderBy(x => x.Name)) 90 | ) 91 | { 92 | throw new Exception(); 93 | } 94 | } 95 | 96 | // offload tensors 97 | foreach (var device in devices.Where(x => x.Type == LLamaDeviceType.GPU)) 98 | { 99 | var dst = devices.First(x => x.Type == LLamaDeviceType.CPU); 100 | while (device.GetFreeSpace() < 0) 101 | { 102 | var t = tensorsOffloadRules 103 | .SelectMany(x => x.tensors.Select(y => (x.rule.Priority, y))) 104 | .OrderByDescending(x => x.Priority) 105 | .ThenBy(x => x.y.BlkId) 106 | .ThenBy(x => x.y.Name) 107 | .Select(x => x.y) 108 | .First(x => device.Tensors.Contains(x)); 109 | Console.WriteLine($"Move {t.Name,-25} ({t.Size / 1024 / 1024} Mb) from {device.Name} to {dst.Name}"); 110 | device.Tensors.Remove(t); 111 | dst.Tensors.Add(t); 112 | } 113 | } 114 | 115 | 116 | if (options.PrintTensorsSize) 117 | PrintTensorsSize(tensorInfos); 118 | PrintDevicesUtilization(devices); 119 | PrintTensorsOffloadResult(); 120 | if (options.PrintHelmCharConfig) 121 | PrintHelmChartConfig(devices); 122 | if (options.PrintCmdConfig) 123 | PrintCmdConfig(devices); 124 | 125 | 126 | return; 127 | 128 | static void PrintDevicesUtilization(IEnumerable devices) 129 | { 130 | Console.WriteLine("======= Device memory usage"); 131 | foreach (var device in devices) 132 | { 133 | Console.WriteLine($"{device.Name,-10} " + 134 | $"{device.GetUsedSpace() / 1024 / 1024} Mb of {device.TotalSize / 1024 / 1024} Mb " + 135 | $"({device.Tensors.Aggregate(0L, (current, tensor) => current + tensor.Size) / 1024 / 1024})"); 136 | } 137 | 138 | Console.WriteLine(); 139 | } 140 | 141 | static void PrintHelmChartConfig(IReadOnlyList devices) 142 | { 143 | Console.WriteLine("======= Helm chart config"); 144 | var sb = new StringBuilder(); 145 | sb.Append("extraEnvVars:\n"); 146 | sb.Append($" - name: LLAMA_ARG_MAIN_GPU\n" + 147 | $" value: '0'\n"); 148 | sb.Append($" - name: LLAMA_ARG_TENSOR_SPLIT\n" + 149 | $" value: '{string.Join(',', devices.Select(x => x.Layers.Count))}'\n"); 150 | sb.Append("\n"); 151 | 152 | sb.Append("modelTensorsOverride:\n"); 153 | foreach (var device in devices.Where(x => x.Type != LLamaDeviceType.GPU && x.Tensors.Count > 0)) 154 | { 155 | sb.Append($" - name: {device.Name}\n" + 156 | $" tensors:\n"); 157 | foreach (var tensor in device.Tensors) 158 | { 159 | sb.Append($" - {tensor.Name}\n"); 160 | } 161 | } 162 | 163 | Console.WriteLine(sb); 164 | } 165 | 166 | static void PrintCmdConfig(IReadOnlyList devices) 167 | { 168 | Console.WriteLine("======= CMD config"); 169 | var sb = new StringBuilder(); 170 | sb.Append("--main-gpu 0 "); 171 | sb.Append($"--tensor-split \"{string.Join(',', devices.Select(x => x.Layers.Count))}\" "); 172 | devices 173 | .Where(x => x.Type != LLamaDeviceType.GPU && x.Tensors.Count > 0) 174 | .Select(x => $"--override-tensor \"({string.Join('|', x.Tensors.Select(t => t.Name))})={x.Name}\" ") 175 | .ToList() 176 | .ForEach(x => sb.Append(x)); 177 | Console.WriteLine(sb); 178 | Console.WriteLine(); 179 | } 180 | 181 | static void PrintTensorsSize(IEnumerable tensorInfos) 182 | { 183 | Console.WriteLine("======= Tensors size"); 184 | foreach (var tensorInfo in tensorInfos.OrderBy(x => x.Size)) 185 | { 186 | Console.WriteLine($"{tensorInfo.Name,-30} {tensorInfo.Size / 1024 / 1024} Mb"); 187 | } 188 | } 189 | 190 | void PrintTensorsOffloadResult() 191 | { 192 | Console.WriteLine("======= Tensors offload result"); 193 | foreach (var t in tensorsOffloadRules) 194 | { 195 | var offloadByDevice = devices 196 | .Select(x => (x.Name, x.Tensors.Count(y => t.tensors.Contains(y)))) 197 | .ToList(); 198 | Console.WriteLine( 199 | $"Offload {t.name,-24} ({t.rule.Priority}) {(t.tensors.Count - offloadByDevice.Sum(x => x.Item2)).ToString(),-2} " + 200 | $"({string.Join(", ", offloadByDevice.Select(x => $"{x.Name} = {x.Item2.ToString(),-2}"))}) " + 201 | $"of {t.tensors.Count}"); 202 | } 203 | 204 | Console.WriteLine(); 205 | } 206 | 207 | static OffloadCalculationOptions GetOptions(string[] args) 208 | { 209 | var mapping = new Dictionary() 210 | { 211 | { "-e", "environment" } 212 | }; 213 | string? env; 214 | // stage 0 215 | { 216 | var cfgBuilder = new ConfigurationBuilder() 217 | .AddJsonFile("appsettings.json", true) 218 | .AddYamlFile("appsettings.yaml", true) 219 | .AddYamlFile("appsettings.yml", true) 220 | .AddEnvironmentVariables() 221 | .AddCommandLine(args, mapping); 222 | 223 | var cfgRoot = cfgBuilder.Build(); 224 | env = cfgRoot["environment"] ?? null; 225 | } 226 | 227 | // stage 1 228 | { 229 | var cfgBuilder = new ConfigurationBuilder() 230 | .AddJsonFile("appsettings.json", true) 231 | .AddYamlFile("appsettings.yaml", true) 232 | .AddYamlFile("appsettings.yml", true); 233 | 234 | if (!string.IsNullOrWhiteSpace(env)) 235 | { 236 | cfgBuilder 237 | .AddJsonFile($"appsettings.{env}.json", true) 238 | .AddYamlFile($"appsettings.{env}.yaml", true) 239 | .AddYamlFile($"appsettings.{env}.yml", true); 240 | } 241 | 242 | cfgBuilder 243 | .AddEnvironmentVariables() 244 | .AddCommandLine(args, mapping); 245 | 246 | var cfgRoot = cfgBuilder.Build(); 247 | return cfgRoot.Get()!; 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "GLM-4.5-Air-UD-Q6_K_XL": { 4 | "commandName": "Project", 5 | "commandLineArgs": "-e GLM-4.5-Air-UD-Q6_K_XL" 6 | }, 7 | "gpt-oss-120b-F16": { 8 | "commandName": "Project", 9 | "commandLineArgs": "-e gpt-oss-120b-F16" 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/TensorMetadata.cs: -------------------------------------------------------------------------------- 1 | using GGUFSharp; 2 | 3 | namespace ArkProjects.LlmCalc; 4 | 5 | public class TensorMetadata 6 | { 7 | public TensorMetadata(GGUFTensorInfo tensorInfo) 8 | { 9 | TensorInfo = tensorInfo; 10 | if (Name.StartsWith("blk")) 11 | BlkId = int.Parse(Name.Split(".").Skip(1).First()); 12 | } 13 | 14 | public GGUFTensorInfo TensorInfo { get; } 15 | public string Name => TensorInfo.Name; 16 | public long Size => (long)TensorInfo.Size; 17 | public int BlkId { get; } = -1; 18 | } -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.GLM-4.5-Air-UD-Q6_K_XL.yaml: -------------------------------------------------------------------------------- 1 | GgufFile: "\ 2 | \\\\TRUENAS/trash3/kube-volumes/pvc-38077fbf-f7fa-46d8-9ad0-17c2ba5bf869/hub\ 3 | /models--unsloth--GLM-4.5-Air-GGUF/snapshots/a5133889a6e29d42a1e71784b2ae8514fb28156f\ 4 | /UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf" 5 | 6 | Devices: 7 | ROCm0: 8 | ReservedMemoryMb: 1024 9 | ROCm1: 10 | ReservedMemory: 3584 -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.gpt-oss-120b-F16.yaml: -------------------------------------------------------------------------------- 1 | GgufFile: "\ 2 | \\\\TRUENAS/trash3/kube-volumes/pvc-38077fbf-f7fa-46d8-9ad0-17c2ba5bf869/hub\ 3 | /models--unsloth--gpt-oss-120b-GGUF/snapshots/91daeef64d6b1e1078ad1d007f9efa98526d7bf1\ 4 | /gpt-oss-120b-F16.gguf" 5 | 6 | Devices: 7 | ROCm0: 8 | ReservedMemoryMb: 9300 9 | ROCm1: 10 | ReservedMemoryMb: 8500 -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/ArkProjects.LlamaOffloadCalc/appsettings.yaml: -------------------------------------------------------------------------------- 1 | PrintCmdConfig: true 2 | PrintHelmCharConfig: true 3 | PrintTensorsSize: false 4 | 5 | # list from 'llama-server --list-devices' + CPU 6 | Devices: 7 | ROCm0: 8 | Id: 1 9 | Type: GPU 10 | TotalSizeMb: 32768 11 | PciBus: 0000:01:00.0 12 | ROCm1: 13 | Id: 2 14 | Type: GPU 15 | TotalSizeMb: 32768 16 | PciBus: 0000:02:00.0 17 | CPU: 18 | Id: 3 19 | Type: CPU 20 | TotalSizeMb: 131072 21 | ReservedMemory: 0 22 | 23 | # offloading rules 24 | OffloadRules: 25 | ffn_gate_exps: 26 | Id: 1 27 | Regex: '^blk\.\d+\.ffn_gate_exps.weight' 28 | Priority: 10 29 | ffn_up_exps: 30 | Id: 2 31 | Regex: '^blk\.\d+\.ffn_up_exps.weight' 32 | Priority: 20 33 | ffn_down_exps: 34 | Id: 3 35 | Regex: '^blk\.\d+\.ffn_down_exps.weight' 36 | Priority: 20 -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | *.sbr 86 | *.tlb 87 | *.tli 88 | *.tlh 89 | *.tmp 90 | *.tmp_proj 91 | *_wpftmp.csproj 92 | *.log 93 | *.tlog 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 298 | *.vbp 299 | 300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 301 | *.dsw 302 | *.dsp 303 | 304 | # Visual Studio 6 technical files 305 | *.ncb 306 | *.aps 307 | 308 | # Visual Studio LightSwitch build output 309 | **/*.HTMLClient/GeneratedArtifacts 310 | **/*.DesktopClient/GeneratedArtifacts 311 | **/*.DesktopClient/ModelManifest.xml 312 | **/*.Server/GeneratedArtifacts 313 | **/*.Server/ModelManifest.xml 314 | _Pvt_Extensions 315 | 316 | # Paket dependency manager 317 | .paket/paket.exe 318 | paket-files/ 319 | 320 | # FAKE - F# Make 321 | .fake/ 322 | 323 | # CodeRush personal settings 324 | .cr/personal 325 | 326 | # Python Tools for Visual Studio (PTVS) 327 | __pycache__/ 328 | *.pyc 329 | 330 | # Cake - Uncomment if you are using it 331 | # tools/** 332 | # !tools/packages.config 333 | 334 | # Tabs Studio 335 | *.tss 336 | 337 | # Telerik's JustMock configuration file 338 | *.jmconfig 339 | 340 | # BizTalk build output 341 | *.btp.cs 342 | *.btm.cs 343 | *.odx.cs 344 | *.xsd.cs 345 | 346 | # OpenCover UI analysis results 347 | OpenCover/ 348 | 349 | # Azure Stream Analytics local run output 350 | ASALocalRun/ 351 | 352 | # MSBuild Binary and Structured Log 353 | *.binlog 354 | 355 | # NVidia Nsight GPU debugger configuration file 356 | *.nvuser 357 | 358 | # MFractors (Xamarin productivity tool) working folder 359 | .mfractor/ 360 | 361 | # Local History for Visual Studio 362 | .localhistory/ 363 | 364 | # Visual Studio History (VSHistory) files 365 | .vshistory/ 366 | 367 | # BeatPulse healthcheck temp database 368 | healthchecksdb 369 | 370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 371 | MigrationBackup/ 372 | 373 | # Ionide (cross platform F# VS Code tools) working folder 374 | .ionide/ 375 | 376 | # Fody - auto-generated XML schema 377 | FodyWeavers.xsd 378 | 379 | # VS Code files for those working on multiple tools 380 | .vscode/* 381 | !.vscode/settings.json 382 | !.vscode/tasks.json 383 | !.vscode/launch.json 384 | !.vscode/extensions.json 385 | *.code-workspace 386 | 387 | # Local History for Visual Studio Code 388 | .history/ 389 | 390 | # Windows Installer files from build outputs 391 | *.cab 392 | *.msi 393 | *.msix 394 | *.msm 395 | *.msp 396 | 397 | # JetBrains Rider 398 | *.sln.iml 399 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Clock Set Bird 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/README.md: -------------------------------------------------------------------------------- 1 | # GGUFSharp 2 | A library for read/write GGUF file 3 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/FilesList.txt: -------------------------------------------------------------------------------- 1 | bartowski/Phi-3.5-mini-instruct-GGUF/Phi-3.5-mini-instruct-IQ2_M.gguf -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/example.gguf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/example.gguf -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/SampleFiles/genTestFile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | 7 | from gguf import GGUFWriter # noqa: E402 8 | 9 | 10 | # Example usage: 11 | def writer_example() -> None: 12 | # Example usage with a file 13 | gguf_writer = GGUFWriter("example.gguf", "llama") 14 | 15 | gguf_writer.add_block_count(12) 16 | gguf_writer.add_uint32("answer", 42) # Write a 32-bit integer 17 | gguf_writer.add_float32("answer_in_float", 42.0) # Write a 32-bit float 18 | gguf_writer.add_custom_alignment(64) 19 | 20 | tensor1 = np.ones((32,), dtype=np.float32) * 100.0 21 | tensor2 = np.ones((64,), dtype=np.float32) * 101.0 22 | tensor3 = np.ones((96,), dtype=np.float32) * 102.0 23 | 24 | gguf_writer.add_tensor("tensor1", tensor1) 25 | gguf_writer.add_tensor("tensor2", tensor2) 26 | gguf_writer.add_tensor("tensor3", tensor3) 27 | 28 | gguf_writer.write_header_to_file() 29 | gguf_writer.write_kv_data_to_file() 30 | gguf_writer.write_tensors_to_file() 31 | 32 | gguf_writer.close() 33 | 34 | 35 | if __name__ == '__main__': 36 | writer_example() 37 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/BasicFeatureTest.cs: -------------------------------------------------------------------------------- 1 | using System.Diagnostics; 2 | using System.Linq; 3 | using System.Runtime.InteropServices; 4 | 5 | namespace GGUFSharp.Test 6 | { 7 | [TestClass] 8 | [DoNotParallelize] 9 | public sealed class BasicFeatureTest 10 | { 11 | private string GGUFFilePath = @"example.gguf"; 12 | private string[] example_meta = 13 | { 14 | "general.architecture:llama", 15 | "llama.block_count:GGUF_METADATA_VALUE_TYPE_UINT32", 16 | "answer:GGUF_METADATA_VALUE_TYPE_UINT32", 17 | "answer_in_float:GGUF_METADATA_VALUE_TYPE_FLOAT32", 18 | "general.alignment:GGUF_METADATA_VALUE_TYPE_UINT32" 19 | }; 20 | private string[] example_tensorInfo = 21 | { 22 | "tensor1", 23 | "tensor2", 24 | "tensor3" 25 | }; 26 | [TestMethod] 27 | public void ReadBasicInfo() 28 | { 29 | GGUFReader reader = new GGUFReader(); 30 | var f = reader.Read(GGUFFilePath); 31 | Assert.IsTrue(f.MetaItems.Select(x=>x.ToString()).SequenceEqual(example_meta)); 32 | Assert.IsTrue(f.TensorInfos.Select(x => x.Name).SequenceEqual(example_tensorInfo)); 33 | } 34 | [TestMethod] 35 | public void ReadTensorData() 36 | { 37 | GGUFReader reader = new GGUFReader(); 38 | var f=reader.Read(GGUFFilePath); 39 | using var t1=reader.ReadTensorData(f,f.TensorInfos.FirstOrDefault()); 40 | var data = t1.Memory.Slice(0,(int)f.TensorInfos.First().Size); 41 | var dataF=MemoryMarshal.Cast(data.Span); 42 | foreach (var item in dataF) 43 | { 44 | Assert.AreEqual(item, 100); 45 | } 46 | } 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/GGUFSharp.Test.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net8.0 5 | latest 6 | enable 7 | enable 8 | 9 | 10 | 11 | 12 | PreserveNewest 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.Test/MSTestSettings.cs: -------------------------------------------------------------------------------- 1 | [assembly: Parallelize(Scope = ExecutionScope.MethodLevel)] 2 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.12.35506.116 d17.12 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp", "GGUFSharp\GGUFSharp.csproj", "{3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GGUFSharp.Test", "GGUFSharp.Test\GGUFSharp.Test.csproj", "{31135C56-06FB-42A0-B098-2A166A437A8D}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {3A5E61DA-70F6-4132-BCA3-11AB3AFA4281}.Release|Any CPU.Build.0 = Release|Any CPU 20 | {31135C56-06FB-42A0-B098-2A166A437A8D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {31135C56-06FB-42A0-B098-2A166A437A8D}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {31135C56-06FB-42A0-B098-2A166A437A8D}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {31135C56-06FB-42A0-B098-2A166A437A8D}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFDataTypeEnum.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace GGUFSharp 6 | { 7 | public enum GGUFDataTypeEnum : uint 8 | { 9 | // The value is a 8-bit unsigned integer. 10 | GGUF_METADATA_VALUE_TYPE_UINT8 = 0, 11 | // The value is a 8-bit signed integer. 12 | GGUF_METADATA_VALUE_TYPE_INT8 = 1, 13 | // The value is a 16-bit unsigned little-endian integer. 14 | GGUF_METADATA_VALUE_TYPE_UINT16 = 2, 15 | // The value is a 16-bit signed little-endian integer. 16 | GGUF_METADATA_VALUE_TYPE_INT16 = 3, 17 | // The value is a 32-bit unsigned little-endian integer. 18 | GGUF_METADATA_VALUE_TYPE_UINT32 = 4, 19 | // The value is a 32-bit signed little-endian integer. 20 | GGUF_METADATA_VALUE_TYPE_INT32 = 5, 21 | // The value is a 32-bit IEEE754 floating point number. 22 | GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6, 23 | // The value is a boolean. 24 | // 1-byte value where 0 is false and 1 is true. 25 | // Anything else is invalid, and should be treated as either the model being invalid or the reader being buggy. 26 | GGUF_METADATA_VALUE_TYPE_BOOL = 7, 27 | // The value is a UTF-8 non-null-terminated string, with length prepended. 28 | GGUF_METADATA_VALUE_TYPE_STRING = 8, 29 | // The value is an array of other values, with the length and type prepended. 30 | /// 31 | // Arrays can be nested, and the length of the array is the number of elements in the array, not the number of bytes. 32 | GGUF_METADATA_VALUE_TYPE_ARRAY = 9, 33 | // The value is a 64-bit unsigned little-endian integer. 34 | GGUF_METADATA_VALUE_TYPE_UINT64 = 10, 35 | // The value is a 64-bit signed little-endian integer. 36 | GGUF_METADATA_VALUE_TYPE_INT64 = 11, 37 | // The value is a 64-bit IEEE754 floating point number. 38 | GGUF_METADATA_VALUE_TYPE_FLOAT64 = 12, 39 | } 40 | public static class GGUFDataTypeEnumHelper 41 | { 42 | public static int GetDataTypeSize(this GGUFDataTypeEnum dateType) => dateType switch 43 | { 44 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT8 => 1, 45 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT8 => 1, 46 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT16 =>2, 47 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT16 => 2, 48 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT32 => 4, 49 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT32 => 4, 50 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_FLOAT32 => 4, 51 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_BOOL => 1, 52 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING => -1, 53 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY => -1, 54 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_UINT64 => 8, 55 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_INT64 => 8, 56 | GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_FLOAT64 => 8 57 | }; 58 | } 59 | 60 | 61 | } 62 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFFile.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace GGUFSharp 6 | { 7 | public class GGUFFile 8 | { 9 | public string FilePath { get; set; } 10 | public uint Version { get; set; } 11 | 12 | public ulong DataStartOffset { get; set; } 13 | public List TensorInfos { get; set; } 14 | public List MetaItems { get; set; } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFHeader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Runtime.InteropServices; 4 | using System.Text; 5 | 6 | namespace GGUFSharp 7 | { 8 | //[StructLayout(LayoutKind.Explicit)] 9 | public class GGUFHeader 10 | { 11 | //[FieldOffset(0)] 12 | public uint MagicCode; 13 | //[FieldOffset(4)] 14 | public uint Version; 15 | 16 | //[FieldOffset(8)] 17 | public ulong TensorCount; 18 | 19 | //[FieldOffset(24)] 20 | public ulong MetaKVCount; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFMetaItem.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace GGUFSharp 7 | { 8 | public class GGUFMetaItem 9 | { 10 | public GGUFDataTypeEnum DataType { get; set; } 11 | public GGUFDataTypeEnum? ArrayElementType { get; set; } 12 | public string Name { get; set; } 13 | public byte[] RawData { get; set; } 14 | public string[] ArrayStrings { get; set; } 15 | public override string ToString() 16 | { 17 | StringBuilder sb = new StringBuilder($"{Name}:"); 18 | switch(DataType) 19 | { 20 | case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING: 21 | sb.Append(Encoding.UTF8.GetString(RawData)); 22 | break; 23 | case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY: 24 | if (ArrayElementType==GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING) 25 | { 26 | if (ArrayStrings.Length>10) 27 | { 28 | sb.Append($"{string.Join(", ", ArrayStrings.Take(10))}..."); 29 | } 30 | else 31 | { 32 | sb.Append(string.Join(", ", ArrayStrings)); 33 | } 34 | } 35 | else 36 | { 37 | sb.Append($"[{Enum.GetName(typeof(GGUFDataTypeEnum), ArrayElementType)}]"); 38 | } 39 | break; 40 | default: 41 | sb.Append(Enum.GetName(typeof(GGUFDataTypeEnum), DataType)); 42 | break; 43 | }; 44 | return sb.ToString(); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFReader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Buffers; 3 | using System.Collections.Generic; 4 | using System.Diagnostics; 5 | using System.IO; 6 | using System.IO.MemoryMappedFiles; 7 | using System.Linq; 8 | using System.Runtime.InteropServices; 9 | using System.Text; 10 | 11 | namespace GGUFSharp 12 | { 13 | public class GGUFReader 14 | { 15 | public GGUFFile Read(string filePath) 16 | { 17 | using var fs = MemoryMappedFile.CreateFromFile(filePath); 18 | using var s = fs.CreateViewStream(0, 0, MemoryMappedFileAccess.Read); 19 | var header = readHeader(s); 20 | //using var meta = fs.CreateViewStream(24, 100*1024 * 1024, MemoryMappedFileAccess.Read); 21 | var d = readMetaData(s, header.MetaKVCount).ToList(); 22 | 23 | //foreach (var item in d) 24 | //{ 25 | // Debug.WriteLine($"{item.Name}, {item.ToString()}"); 26 | //} 27 | 28 | var t = readTensorData(s, header.TensorCount).ToList(); 29 | ulong alignment = 32;//TODO: read align from header 30 | 31 | 32 | ulong startOffset = (ulong)s.Position +(alignment-((ulong)s.Position % alignment))% alignment; 33 | var sortedItems = t.OrderBy(x => x.Offset).ToList(); 34 | for (var i = 0; i < sortedItems.Count - 1; i++) 35 | { 36 | sortedItems[i].Size = sortedItems[i + 1].Offset - sortedItems[i].Offset; 37 | } 38 | var last = sortedItems.Last(); 39 | last.Size = (ulong)new FileInfo(filePath).Length - last.Offset-startOffset; 40 | 41 | 42 | //foreach (var item in t) 43 | //{ 44 | // Debug.WriteLine($"[Tensor]{item.Name},{item.DimensionCount},{item.TensorType.ToString()},{item.Offset}"); 45 | //} 46 | return new GGUFFile() 47 | { 48 | FilePath = filePath, 49 | MetaItems = d, 50 | TensorInfos = sortedItems, 51 | Version = header.Version, 52 | DataStartOffset = startOffset, 53 | }; 54 | 55 | } 56 | 57 | public IMemoryOwner ReadTensorData(GGUFFile file,GGUFTensorInfo tensor) 58 | { 59 | using var fs = MemoryMappedFile.CreateFromFile(file.FilePath); 60 | using var s = fs.CreateViewStream((long)(file.DataStartOffset+tensor.Offset), (long)tensor.Size, MemoryMappedFileAccess.Read); 61 | if (tensor.Size>int.MaxValue) 62 | { 63 | throw new NotSupportedException("Not supoorted by now, tensor size shoud not larger than max value of int32"); 64 | } 65 | var om = MemoryPool.Shared.Rent((int)tensor.Size); 66 | //BinaryReader br=new BinaryReader(s); 67 | s.Read(om.Memory.Span); 68 | return om; 69 | } 70 | 71 | 72 | private GGUFHeader readHeader(Stream header) 73 | { 74 | using BinaryReader br = new BinaryReader(header, Encoding.UTF8, true); 75 | GGUFHeader result = new GGUFHeader(); 76 | result.MagicCode = br.ReadUInt32(); 77 | if (result.MagicCode != 0x46554747) // "GGUF" in little-endian bytes order 78 | { 79 | throw new InvalidOperationException("Invalid magic code"); 80 | } 81 | result.Version = br.ReadUInt32(); 82 | result.TensorCount = br.ReadUInt64(); 83 | result.MetaKVCount = br.ReadUInt64(); 84 | return result; 85 | } 86 | 87 | private IEnumerable readMetaData(Stream meta, ulong MetaCount) 88 | { 89 | using BinaryReader br = new BinaryReader(meta, Encoding.UTF8, true); 90 | for (ulong i = 0; i < MetaCount; i++) 91 | { 92 | 93 | GGUFMetaItem result = new GGUFMetaItem(); 94 | result.Name = readString(br); 95 | result.DataType = (GGUFDataTypeEnum)br.ReadUInt32(); 96 | int size; 97 | switch (result.DataType) 98 | { 99 | case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING: 100 | size = (int)br.ReadUInt64(); 101 | break; 102 | case GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY: 103 | GGUFDataTypeEnum elementType = (GGUFDataTypeEnum)br.ReadUInt32(); 104 | 105 | if (elementType == GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_ARRAY) 106 | { 107 | throw new NotSupportedException("Nested array is not supported"); 108 | } 109 | ulong elementCount = br.ReadUInt64(); 110 | if (elementType == GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING) 111 | { 112 | result.ArrayStrings = new string[elementCount]; 113 | result.ArrayElementType = GGUFDataTypeEnum.GGUF_METADATA_VALUE_TYPE_STRING; 114 | for (ulong j = 0; j < elementCount; j++) 115 | { 116 | result.ArrayStrings[j] = readString(br); 117 | } 118 | size = 0; 119 | } 120 | else 121 | { 122 | result.ArrayElementType = elementType; 123 | size = (elementType.GetDataTypeSize() * (int)elementCount); 124 | } 125 | 126 | break; 127 | default: 128 | size = result.DataType.GetDataTypeSize(); 129 | break; 130 | } 131 | if (size > 0) 132 | { 133 | result.RawData = br.ReadBytes(size); 134 | } 135 | 136 | 137 | yield return result; 138 | } 139 | 140 | } 141 | private IEnumerable readTensorData(Stream stream, ulong tensorCount) 142 | { 143 | using BinaryReader br = new BinaryReader(stream, Encoding.UTF8, true); 144 | for (ulong i = 0; i < tensorCount; i++) 145 | { 146 | GGUFTensorInfo result = new GGUFTensorInfo(); 147 | result.Name = readString(br); 148 | result.DimensionCount = br.ReadUInt32(); 149 | result.Dimensions = readArray(br, result.DimensionCount).ToArray(); 150 | result.TensorType = (GGUFTensorType)br.ReadUInt32(); 151 | result.Offset = br.ReadUInt64(); 152 | yield return result; 153 | } 154 | } 155 | 156 | private string readString(BinaryReader reader) 157 | { 158 | var l = reader.ReadUInt64(); 159 | var x = reader.ReadBytes((int)l); 160 | return System.Text.Encoding.UTF8.GetString(x); 161 | } 162 | 163 | private Span readArray(BinaryReader reader, UInt64 elementCount = 0) where T : struct 164 | { 165 | if (elementCount == 0) 166 | { 167 | elementCount = reader.ReadUInt64(); 168 | } 169 | int length = Marshal.SizeOf() * (int)elementCount; 170 | byte[] buffer = new byte[length]; 171 | reader.Read(buffer, 0, length); 172 | return MemoryMarshal.Cast(buffer); 173 | } 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFSharp.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netstandard2.1 5 | enable 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFStreamReader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Text; 5 | 6 | namespace GGUFSharp 7 | { 8 | internal class GGUFStreamReader : BinaryReader 9 | { 10 | public GGUFStreamReader(Stream stream) : base(stream) 11 | { 12 | } 13 | 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFTensorInfo.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | namespace GGUFSharp 7 | { 8 | public class GGUFTensorInfo 9 | { 10 | public string Name { get; set; } 11 | public UInt32 DimensionCount { get; set; } 12 | public UInt64[] Dimensions { get; set; } 13 | public GGUFTensorType TensorType { get; set; } 14 | public UInt64 Offset { get; set; } 15 | public UInt64 Size { get; set; } 16 | 17 | 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/GGUFSharp/src/GGUFSharp/GGUFSharp/GGUFTensorType.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | 5 | namespace GGUFSharp 6 | { 7 | public enum GGUFTensorType:UInt32 8 | { 9 | GGML_TYPE_F32 = 0, 10 | GGML_TYPE_F16 = 1, 11 | GGML_TYPE_Q4_0 = 2, 12 | GGML_TYPE_Q4_1 = 3, 13 | // GGML_TYPE_Q4_2 = 4, support has been removed 14 | // GGML_TYPE_Q4_3 = 5, support has been removed 15 | GGML_TYPE_Q5_0 = 6, 16 | GGML_TYPE_Q5_1 = 7, 17 | GGML_TYPE_Q8_0 = 8, 18 | GGML_TYPE_Q8_1 = 9, 19 | GGML_TYPE_Q2_K = 10, 20 | GGML_TYPE_Q3_K = 11, 21 | GGML_TYPE_Q4_K = 12, 22 | GGML_TYPE_Q5_K = 13, 23 | GGML_TYPE_Q6_K = 14, 24 | GGML_TYPE_Q8_K = 15, 25 | GGML_TYPE_IQ2_XXS = 16, 26 | GGML_TYPE_IQ2_XS = 17, 27 | GGML_TYPE_IQ3_XXS = 18, 28 | GGML_TYPE_IQ1_S = 19, 29 | GGML_TYPE_IQ4_NL = 20, 30 | GGML_TYPE_IQ3_S = 21, 31 | GGML_TYPE_IQ2_S = 22, 32 | GGML_TYPE_IQ4_XS = 23, 33 | GGML_TYPE_I8 = 24, 34 | GGML_TYPE_I16 = 25, 35 | GGML_TYPE_I32 = 26, 36 | GGML_TYPE_I64 = 27, 37 | GGML_TYPE_F64 = 28, 38 | GGML_TYPE_IQ1_M = 29, 39 | GGML_TYPE_COUNT, 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /llama.cpp/llamacpp-offload-calculator/readme.md: -------------------------------------------------------------------------------- 1 | # llama.cpp tesnsors offload calculator 2 | 3 | ## Conf 4 | 5 | Supported config formats: 6 | - json 7 | - yaml 8 | - env variables 9 | - cmd args 10 | 11 | Example: 12 | ### Base config `appsettings.yaml` 13 | ```yaml 14 | # print -ot config for command line running 15 | PrintCmdConfig: true 16 | # print yaml for https://github.com/mixa3607/charts/tree/master/charts/llamacpp helm chart 17 | PrintHelmCharConfig: true 18 | # print size per tensor 19 | PrintTensorsSize: false 20 | 21 | # list from 'llama-server --list-devices' + CPU 22 | Devices: # 1+ gpu and 1 cpu 23 | ROCm0: # name from llama.cpp output 24 | Id: 1 # used for ordering 25 | Type: GPU # GPU/CPU 26 | TotalSizeMb: 32768 # memory megabytes 27 | PciBus: 0000:01:00.0 # not used 28 | ROCm1: 29 | Id: 2 30 | Type: GPU 31 | TotalSizeMb: 32768 32 | PciBus: 0000:02:00.0 33 | CPU: 34 | Id: 3 35 | Type: CPU 36 | TotalSizeMb: 131072 37 | ReservedMemory: 0 38 | 39 | # offloading rules 40 | OffloadRules: 41 | ffn_gate_exps: # name 42 | Id: 1 # used for ordering 43 | Regex: '^blk\.\d+\.ffn_gate_exps.weight' # regex 44 | Priority: 10 # lower priority will be offloaded earlier 45 | ffn_up_exps: 46 | Id: 2 47 | Regex: '^blk\.\d+\.ffn_up_exps.weight' 48 | Priority: 20 49 | ffn_down_exps: 50 | Id: 3 51 | Regex: '^blk\.\d+\.ffn_down_exps.weight' 52 | Priority: 20 53 | ``` 54 | 55 | ### Per model config `appsettings.noname-model.yaml` 56 | ```yaml 57 | GgufFile: "/path/to/noname.gguf" 58 | 59 | Devices: 60 | ROCm0: 61 | ReservedMemoryMb: 10240 # reserved memory for cache, ctx, etc 62 | #LayersPortion: 50 # layers percentage can be set manually 63 | ROCm1: 64 | ReservedMemoryMb: 10240 65 | #LayersPortion: 50 66 | ``` 67 | 68 | For the first start, you need to specify an increased ReservedMemoryMb with which llama.cpp will guaranteed to work. 69 | ```shell 70 | $ cd ArkProjects.LlamaOffloadCalc 71 | $ dotnet run -- -e noname-model 72 | Reading /path/to/noname.gguf 73 | Move blk.0.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 74 | Move blk.0.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 75 | Move blk.1.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 76 | Move blk.1.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 77 | Move blk.2.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 78 | Move blk.2.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 79 | Move blk.3.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 80 | Move blk.3.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 81 | Move blk.4.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 82 | Move blk.4.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 83 | Move blk.5.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 84 | Move blk.5.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 85 | Move blk.6.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 86 | Move blk.6.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 87 | Move blk.7.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 88 | Move blk.18.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 89 | Move blk.18.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 90 | Move blk.19.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 91 | Move blk.19.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 92 | Move blk.20.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 93 | Move blk.20.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 94 | Move blk.21.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 95 | Move blk.21.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 96 | Move blk.22.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 97 | Move blk.22.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 98 | Move blk.23.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 99 | Move blk.23.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 100 | Move blk.24.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 101 | Move blk.24.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 102 | Move blk.25.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 103 | ======= Device memory usage 104 | ROCm0 32231 Mb of 32768 Mb (21991) 105 | ROCm1 32231 Mb of 32768 Mb (21991) 106 | CPU 16136 Mb of 131072 Mb (16136) 107 | 108 | ======= Tensors offload result 109 | Offload ffn_gate_exps (10) 0 (ROCm0 = 18, ROCm1 = 18, CPU = 0 ) of 36 110 | Offload ffn_up_exps (20) 0 (ROCm0 = 11, ROCm1 = 11, CPU = 14) of 36 111 | Offload ffn_down_exps (20) 0 (ROCm0 = 10, ROCm1 = 10, CPU = 16) of 36 112 | 113 | ======= Helm chart config 114 | extraEnvVars: 115 | - name: LLAMA_ARG_MAIN_GPU 116 | value: '0' 117 | - name: LLAMA_ARG_TENSOR_SPLIT 118 | value: '18,18,0' 119 | 120 | modelTensorsOverride: 121 | - name: CPU 122 | tensors: 123 | - blk.0.ffn_down_exps.weight 124 | - blk.0.ffn_up_exps.weight 125 | - blk.1.ffn_down_exps.weight 126 | - blk.1.ffn_up_exps.weight 127 | - blk.2.ffn_down_exps.weight 128 | - blk.2.ffn_up_exps.weight 129 | - blk.3.ffn_down_exps.weight 130 | - blk.3.ffn_up_exps.weight 131 | - blk.4.ffn_down_exps.weight 132 | - blk.4.ffn_up_exps.weight 133 | - blk.5.ffn_down_exps.weight 134 | - blk.5.ffn_up_exps.weight 135 | - blk.6.ffn_down_exps.weight 136 | - blk.6.ffn_up_exps.weight 137 | - blk.7.ffn_down_exps.weight 138 | - blk.18.ffn_down_exps.weight 139 | - blk.18.ffn_up_exps.weight 140 | - blk.19.ffn_down_exps.weight 141 | - blk.19.ffn_up_exps.weight 142 | - blk.20.ffn_down_exps.weight 143 | - blk.20.ffn_up_exps.weight 144 | - blk.21.ffn_down_exps.weight 145 | - blk.21.ffn_up_exps.weight 146 | - blk.22.ffn_down_exps.weight 147 | - blk.22.ffn_up_exps.weight 148 | - blk.23.ffn_down_exps.weight 149 | - blk.23.ffn_up_exps.weight 150 | - blk.24.ffn_down_exps.weight 151 | - blk.24.ffn_up_exps.weight 152 | - blk.25.ffn_down_exps.weight 153 | 154 | ======= CMD config 155 | --main-gpu 0 --tensor-split "18,18,0" --override-tensor "(blk.0.ffn_down_exps.weight|blk.0.ffn_up_exps.weight|blk.1.ffn_down_exps.weight|blk.1.ffn_up_exps.weight|blk.2.ffn_down_exps.weight|blk.2.ffn_up_exps.weight|blk.3.ffn_down_exps.weight|blk.3.ffn_up_exps.weight|blk.4.ffn_down_exps.weight|blk.4.ffn_up_exps.weight|blk.5.ffn_down_exps.weight|blk.5.ffn_up_exps.weight|blk.6.ffn_down_exps.weight|blk.6.ffn_up_exps.weight|blk.7.ffn_down_exps.weight|blk.18.ffn_down_exps.weight|blk.18.ffn_up_exps.weight|blk.19.ffn_down_exps.weight|blk.19.ffn_up_exps.weight|blk.20.ffn_down_exps.weight|blk.20.ffn_up_exps.weight|blk.21.ffn_down_exps.weight|blk.21.ffn_up_exps.weight|blk.22.ffn_down_exps.weight|blk.22.ffn_up_exps.weight|blk.23.ffn_down_exps.weight|blk.23.ffn_up_exps.weight|blk.24.ffn_down_exps.weight|blk.24.ffn_up_exps.weight|blk.25.ffn_down_exps.weight)=CPU" 156 | ``` 157 | 158 | ``` 159 | ┌┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐ │ ┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐ 160 | ││ VRAM: [ 30393 / 32752 MiB ] GTT: [ 14 / 48256 MiB ] │ │ │ VRAM: [ 31079 / 32752 MiB ] GTT: [ 14 / 48256 MiB ] │ 161 | └└──────────────────────────────────────────────────────────────────────────┘ │ └──────────────────────────────────────────────────────────────────────────┘ 162 | ``` 163 | 164 | After the stress test, you can reduce ReservedMemoryMb by the amount of free memory from the first run. 165 | 166 | ```yaml 167 | GgufFile: "/path/to/noname.gguf" 168 | Devices: 169 | ROCm0: 170 | ReservedMemoryMb: 9300 171 | ROCm1: 172 | ReservedMemoryMb: 8500 173 | ``` 174 | 175 | ```shell 176 | $ cd ArkProjects.LlamaOffloadCalc 177 | $ dotnet run -- -e noname-model 178 | Reading /path/to/noname.gguf 179 | Move blk.0.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 180 | Move blk.0.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 181 | Move blk.1.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 182 | Move blk.1.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 183 | Move blk.2.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 184 | Move blk.2.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 185 | Move blk.3.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 186 | Move blk.3.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 187 | Move blk.4.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 188 | Move blk.4.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 189 | Move blk.5.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 190 | Move blk.5.ffn_up_exps.weight (537 Mb) from ROCm0 to CPU 191 | Move blk.6.ffn_down_exps.weight (537 Mb) from ROCm0 to CPU 192 | Move blk.18.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 193 | Move blk.18.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 194 | Move blk.19.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 195 | Move blk.19.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 196 | Move blk.20.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 197 | Move blk.20.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 198 | Move blk.21.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 199 | Move blk.21.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 200 | Move blk.22.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 201 | Move blk.22.ffn_up_exps.weight (537 Mb) from ROCm1 to CPU 202 | Move blk.23.ffn_down_exps.weight (537 Mb) from ROCm1 to CPU 203 | ======= Device memory usage 204 | ROCm0 32366 Mb of 32768 Mb (23066) 205 | ROCm1 32642 Mb of 32768 Mb (24142) 206 | CPU 12909 Mb of 131072 Mb (12909) 207 | 208 | ======= Tensors offload result 209 | Offload ffn_gate_exps (10) 0 (ROCm0 = 18, ROCm1 = 18, CPU = 0 ) of 36 210 | Offload ffn_up_exps (20) 0 (ROCm0 = 12, ROCm1 = 13, CPU = 11) of 36 211 | Offload ffn_down_exps (20) 0 (ROCm0 = 11, ROCm1 = 12, CPU = 13) of 36 212 | 213 | ======= Helm chart config 214 | extraEnvVars: 215 | - name: LLAMA_ARG_MAIN_GPU 216 | value: '0' 217 | - name: LLAMA_ARG_TENSOR_SPLIT 218 | value: '18,18,0' 219 | 220 | modelTensorsOverride: 221 | - name: CPU 222 | tensors: 223 | - blk.0.ffn_down_exps.weight 224 | - blk.0.ffn_up_exps.weight 225 | - blk.1.ffn_down_exps.weight 226 | - blk.1.ffn_up_exps.weight 227 | - blk.2.ffn_down_exps.weight 228 | - blk.2.ffn_up_exps.weight 229 | - blk.3.ffn_down_exps.weight 230 | - blk.3.ffn_up_exps.weight 231 | - blk.4.ffn_down_exps.weight 232 | - blk.4.ffn_up_exps.weight 233 | - blk.5.ffn_down_exps.weight 234 | - blk.5.ffn_up_exps.weight 235 | - blk.6.ffn_down_exps.weight 236 | - blk.18.ffn_down_exps.weight 237 | - blk.18.ffn_up_exps.weight 238 | - blk.19.ffn_down_exps.weight 239 | - blk.19.ffn_up_exps.weight 240 | - blk.20.ffn_down_exps.weight 241 | - blk.20.ffn_up_exps.weight 242 | - blk.21.ffn_down_exps.weight 243 | - blk.21.ffn_up_exps.weight 244 | - blk.22.ffn_down_exps.weight 245 | - blk.22.ffn_up_exps.weight 246 | - blk.23.ffn_down_exps.weight 247 | 248 | ======= CMD config 249 | --main-gpu 0 --tensor-split "18,18,0" --override-tensor "(blk.0.ffn_down_exps.weight|blk.0.ffn_up_exps.weight|blk.1.ffn_down_exps.weight|blk.1.ffn_up_exps.weight|blk.2.ffn_down_exps.weight|blk.2.ffn_up_exps.weight|blk.3.ffn_down_exps.weight|blk.3.ffn_up_exps.weight|blk.4.ffn_down_exps.weight|blk.4.ffn_up_exps.weight|blk.5.ffn_down_exps.weight|blk.5.ffn_up_exps.weight|blk.6.ffn_down_exps.weight|blk.18.ffn_down_exps.weight|blk.18.ffn_up_exps.weight|blk.19.ffn_down_exps.weight|blk.19.ffn_up_exps.weight|blk.20.ffn_down_exps.weight|blk.20.ffn_up_exps.weight|blk.21.ffn_down_exps.weight|blk.21.ffn_up_exps.weight|blk.22.ffn_down_exps.weight|blk.22.ffn_up_exps.weight|blk.23.ffn_down_exps.weight)=CPU" 250 | ``` 251 | 252 | ``` 253 | ┌┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐ │ ┌┤ Memory Usage ├──────────────────────────────────────────────────────────┐ 254 | ││ VRAM: [ 32545 / 32752 MiB ] GTT: [ 14 / 48256 MiB ] │ │ │ VRAM: [ 32155 / 32752 MiB ] GTT: [ 14 / 48256 MiB ] │ 255 | └└──────────────────────────────────────────────────────────────────────────┘ │ └──────────────────────────────────────────────────────────────────────────┘ 256 | 257 | prompt eval time = 57715.98 ms / 8777 tokens ( 6.58 ms per token, 152.07 tokens per second) 258 | eval time = 66072.62 ms / 878 tokens ( 75.25 ms per token, 13.29 tokens per second) 259 | total time = 123788.60 ms / 9655 tokens 260 | ``` 261 | -------------------------------------------------------------------------------- /llama.cpp/preset.rocm-6.3.3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LLAMA_ROCM_VERSION="6.3.3" 4 | -------------------------------------------------------------------------------- /llama.cpp/preset.rocm-6.4.4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LLAMA_ROCM_VERSION="6.4.4" 4 | -------------------------------------------------------------------------------- /llama.cpp/preset.rocm-7.0.0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LLAMA_ROCM_VERSION="7.0.0" 4 | -------------------------------------------------------------------------------- /llama.cpp/readme.md: -------------------------------------------------------------------------------- 1 | # llama.cpp GFX906 2 | LLM inference in C/C++ https://github.com/ggml-org/llama.cpp 3 | 4 | Recommend use `docker.io/mixa3607/llama.cpp-gfx906:7.0.0-complete` 5 | 6 | Also see [llamacpp-offload-calculator](./llamacpp-offload-calculator/readme.md) 7 | 8 | ## Benchmarks 9 | ```shell 10 | export PATH="/app:$PATH" 11 | export LD_LIBRARY_PATH="/app:$LD_LIBRARY_PATH" 12 | 13 | MODEL=/root/.cache/huggingface/hub/models--ggml-org--gemma-3n-E4B-it-GGUF/snapshots/ee0f0cb58a4b9d5b48dd55b576db22eeeeecdd7e/gemma-3n-E4B-it-Q8_0.gguf 14 | MODEL=/root/.cache/huggingface/hub/models--unsloth--gemma-3-12b-it-GGUF/snapshots/a5592d885c8a933e824f80d2eeda84db95ad2712/gemma-3-12b-it-Q8_0.gguf 15 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-Q4_K_S.gguf 16 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-Q4_0.gguf 17 | MODEL=/root/.cache/huggingface/hub/models--bartowski--Qwen_Qwen3-14B-GGUF/snapshots/bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2/Qwen_Qwen3-14B-bf16.gguf 18 | MODEL=/root/.cache/huggingface/hub/models--ggml-org--gemma-3-27b-it-GGUF/snapshots/f94c25afed0072339c5fa3b705a7b4222afe5f62/gemma-3-27b-it-f16-00001-of-00002.gguf 19 | 20 | llama-bench --model $MODEL -t 16 --flash-attn 0 21 | ``` 22 | 23 | ``` 24 | ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no 25 | ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no 26 | ggml_cuda_init: found 2 ROCm devices: 27 | Device 0: AMD Radeon Graphics, gfx906:sramecc+:xnack- (0x906), VMM: no, Wave Size: 64 28 | Device 1: AMD Radeon Graphics, gfx906:sramecc+:xnack- (0x906), VMM: no, Wave Size: 64 29 | load_backend: loaded ROCm backend from /app/libggml-hip.so 30 | load_backend: loaded CPU backend from /app/libggml-cpu-haswell.so 31 | ``` 32 | 33 | | rocm | llama.cpp | model | size | params | backend | ngl | test | t/s | 34 | | ----- | --------- | ------------------------------------ | ---------: | ---------: | ---------- | --: | --------------: | -------------------: | 35 | | 6.3.4 | 982e3472 | gemma3n E4B Q8_0 | 6.84 GiB | 6.87 B | ROCm | 99 | pp512 | 483.29 ± 0.68 | 36 | | 6.3.4 | 982e3472 | gemma3n E4B Q8_0 | 6.84 GiB | 6.87 B | ROCm | 99 | tg128 | 33.48 ± 0.43 | 37 | | 6.3.4 | 982e3472 | gemma3 12B Q8_0 | 11.64 GiB | 11.77 B | ROCm | 99 | pp512 | 246.66 ± 0.07 | 38 | | 6.3.4 | 982e3472 | gemma3 12B Q8_0 | 11.64 GiB | 11.77 B | ROCm | 99 | tg128 | 28.41 ± 0.12 | 39 | | 6.3.4 | 982e3472 | qwen3 14B Q4_K - Small | 7.98 GiB | 14.77 B | ROCm | 99 | pp512 | 242.34 ± 0.15 | 40 | | 6.3.4 | 982e3472 | qwen3 14B Q4_K - Small | 7.98 GiB | 14.77 B | ROCm | 99 | tg128 | 35.87 ± 0.15 | 41 | | 6.3.4 | 982e3472 | qwen3 14B Q4_0 | 7.95 GiB | 14.77 B | ROCm | 99 | pp512 | 574.13 ± 0.28 | 42 | | 6.3.4 | 982e3472 | qwen3 14B Q4_0 | 7.95 GiB | 14.77 B | ROCm | 99 | tg128 | 39.02 ± 0.23 | 43 | | 6.3.4 | 982e3472 | qwen3 14B BF16 | 27.51 GiB | 14.77 B | ROCm | 99 | pp512 | 118.01 ± 0.24 | 44 | | 6.3.4 | 982e3472 | qwen3 14B BF16 | 27.51 GiB | 14.77 B | ROCm | 99 | tg128 | 19.33 ± 0.08 | 45 | | 6.3.4 | 982e3472 | gemma3 27B F16 | 50.31 GiB | 27.01 B | ROCm | 99 | pp512 | 236.51 ± 0.14 | 46 | | 6.3.4 | 982e3472 | gemma3 27B F16 | 50.31 GiB | 27.01 B | ROCm | 99 | tg128 | 10.37 ± 0.04 | 47 | | 6.3.4 | 982e3472 | llama4 17Bx16E (Scout) Q3_K - Medium | 48.19 GiB | 107.77 B | ROCm | 99 | pp512 | 160.50 ± 0.81 | 48 | | 6.3.4 | 982e3472 | llama4 17Bx16E (Scout) Q3_K - Medium | 48.19 GiB | 107.77 B | ROCm | 99 | tg128 | 22.75 ± 0.07 | 49 | | 6.4.1 | 982e3472 | gemma3n E4B Q8_0 | 6.84 GiB | 6.87 B | ROCm | 99 | pp512 | 606.83 ± 0.97 | 50 | | 6.4.1 | 982e3472 | gemma3n E4B Q8_0 | 6.84 GiB | 6.87 B | ROCm | 99 | tg128 | 33.36 ± 0.23 | 51 | | 6.4.1 | 982e3472 | gemma3 12B Q8_0 | 11.64 GiB | 11.77 B | ROCm | 99 | pp512 | 329.70 ± 0.30 | 52 | | 6.4.1 | 982e3472 | gemma3 12B Q8_0 | 11.64 GiB | 11.77 B | ROCm | 99 | tg128 | 28.58 ± 0.15 | 53 | | 6.4.1 | 982e3472 | qwen3 14B Q4_K - Small | 7.98 GiB | 14.77 B | ROCm | 99 | pp512 | 286.58 ± 0.15 | 54 | | 6.4.1 | 982e3472 | qwen3 14B Q4_K - Small | 7.98 GiB | 14.77 B | ROCm | 99 | tg128 | 36.48 ± 0.11 | 55 | | 6.4.1 | 982e3472 | qwen3 14B Q4_0 | 7.95 GiB | 14.77 B | ROCm | 99 | pp512 | 570.15 ± 0.23 | 56 | | 6.4.1 | 982e3472 | qwen3 14B Q4_0 | 7.95 GiB | 14.77 B | ROCm | 99 | tg128 | 38.94 ± 0.16 | 57 | | 6.4.1 | 982e3472 | qwen3 14B BF16 | 27.51 GiB | 14.77 B | ROCm | 99 | pp512 | 119.03 ± 0.31 | 58 | | 6.4.1 | 982e3472 | qwen3 14B BF16 | 27.51 GiB | 14.77 B | ROCm | 99 | tg128 | 19.46 ± 0.10 | 59 | | 6.4.1 | 982e3472 | gemma3 27B F16 | 50.31 GiB | 27.01 B | ROCm | 99 | pp512 | 238.38 ± 0.26 | 60 | | 6.4.1 | 982e3472 | gemma3 27B F16 | 50.31 GiB | 27.01 B | ROCm | 99 | tg128 | 10.41 ± 0.03 | 61 | | 6.4.1 | 982e3472 | llama4 17Bx16E (Scout) Q3_K - Medium | 48.19 GiB | 107.77 B | ROCm | 99 | pp512 | 190.52 ± 0.84 | 62 | | 6.4.1 | 982e3472 | llama4 17Bx16E (Scout) Q3_K - Medium | 48.19 GiB | 107.77 B | ROCm | 99 | tg128 | 22.96 ± 0.10 | 63 | 64 | 65 | ## Run 66 | ### Docker 67 | See https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md + https://github.com/ROCm/vllm/blob/main/docs/deployment/docker.md 68 | 69 | ### Kubernetes 70 | Helm chart and samples [mixa3607 charts](https://github.com/mixa3607/charts) 71 | 72 | ## Build 73 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.rocm.sh`: 74 | ```bash 75 | $ . preset.rocm-7.0.0.sh 76 | $ ./build-and-push.rocm.sh 77 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 78 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 79 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 80 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 81 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 82 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 83 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 84 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 85 | #0 building with "remote" instance using remote driver 86 | #............... 87 | #14 DONE 583.8s 88 | ``` 89 | -------------------------------------------------------------------------------- /pytorch/build-and-push.torch.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | cd $(dirname $0) 5 | source ../env.sh 6 | 7 | IMAGE_TAGS=( 8 | "$TORCH_IMAGE:${TORCH_VERSION}-rocm-${TORCH_ROCM_VERSION}-${REPO_GIT_REF}" 9 | "$TORCH_IMAGE:${TORCH_VERSION}-rocm-${TORCH_ROCM_VERSION}" 10 | ) 11 | 12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then 13 | echo "${IMAGE_TAGS[0]} already in registry. Skip" 14 | exit 0 15 | fi 16 | 17 | DOCKER_EXTRA_ARGS=() 18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do 19 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}") 20 | done 21 | 22 | mkdir ./logs || true 23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \ 24 | --build-arg BASE_ROCM_IMAGE="${PATCHED_ROCM_IMAGE}:${TORCH_ROCM_VERSION}-complete" \ 25 | --build-arg ROCM_ARCH="${ROCM_ARCH}" \ 26 | --build-arg PYTORCH_BRANCH="$TORCH_VERSION" \ 27 | --build-arg PYTORCH_VISION_BRANCH="$TORCH_VISION_VERSION" \ 28 | --target final -f ./torch.Dockerfile --progress=plain ./submodules 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log 29 | -------------------------------------------------------------------------------- /pytorch/env.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | pushd $(dirname ${BASH_SOURCE[0]}) 4 | 5 | # rocm version 6 | if [ "$TORCH_ROCM_VERSION" == "" ]; then TORCH_ROCM_VERSION=6.3.3; fi 7 | # torch git checkpoint 8 | if [ "$TORCH_VERSION" == "" ]; then TORCH_VERSION="v2.7.1"; fi 9 | 10 | # destination image 11 | if [ "$TORCH_IMAGE" == "" ]; then 12 | TORCH_IMAGE=docker.io/mixa3607/pytorch-gfx906 13 | #TORCH_IMAGE=registry.arkprojects.space/apps/pytorch-gfx906 14 | fi 15 | 16 | popd 17 | -------------------------------------------------------------------------------- /pytorch/preset.torch-2.7.1-rocm-6.3.3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export TORCH_ROCM_VERSION="6.3.3" 4 | export TORCH_VERSION="v2.7.1" 5 | export TORCH_VISION_VERSION="v0.21.0" 6 | -------------------------------------------------------------------------------- /pytorch/preset.torch-2.7.1-rocm-6.4.4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export TORCH_ROCM_VERSION="6.4.4" 4 | export TORCH_VERSION="v2.7.1" 5 | export TORCH_VISION_VERSION="v0.21.0" 6 | -------------------------------------------------------------------------------- /pytorch/preset.torch-2.8.0-rocm-6.3.3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export TORCH_ROCM_VERSION="6.3.3" 4 | export TORCH_VERSION="v2.8.0" 5 | export TORCH_VISION_VERSION="v0.23.0" 6 | -------------------------------------------------------------------------------- /pytorch/preset.torch-2.8.0-rocm-6.4.4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export TORCH_ROCM_VERSION="6.4.4" 4 | export TORCH_VERSION="v2.8.0" 5 | export TORCH_VISION_VERSION="v0.23.0" 6 | -------------------------------------------------------------------------------- /pytorch/preset.torch-2.8.0-rocm-7.0.2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export TORCH_ROCM_VERSION="7.0.2" 4 | export TORCH_VERSION="v2.8.0" 5 | -------------------------------------------------------------------------------- /pytorch/preset.torch-2.9.0-rocm-7.0.2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export TORCH_ROCM_VERSION="7.0.2" 4 | export TORCH_VERSION="v2.9.0" 5 | -------------------------------------------------------------------------------- /pytorch/readme.md: -------------------------------------------------------------------------------- 1 | # PyTorch GFX906 2 | Tensors and Dynamic neural networks in Python with strong GPU acceleration. 3 | 4 | Packages: 5 | - torch 6 | - torchvision 7 | - torchaudio 8 | 9 | Recommend use `docker.io/mixa3607/pytorch-gfx906:(v2.7.1|v2.8.0)-rocm-6.3.3` 10 | 11 | ## Build 12 | See build vars in `./env.sh`. You also may use presetis `./preset.*.sh`. Exec `./build-and-push.torch.sh`: 13 | ```bash 14 | $ . preset.torch-2.7.1-rocm-6.3.3.sh 15 | $ ./build-and-push.torch.sh 16 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 17 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 18 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 19 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 20 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 21 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 22 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 23 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 24 | #0 building with "remote" instance using remote driver 25 | 26 | #1 [internal] load build definition from rocm.Dockerfile 27 | #1 transferring dockerfile: 4.95kB done 28 | #1 DONE 0.0s 29 | 30 | #2 [auth] dockerio-proxy/rocm/dev-ubuntu-24.04:pull rocm/dev-ubuntu-24.04:pull token for registry.arkprojects.space 31 | #2 DONE 0.0s 32 | 33 | #3 [internal] load metadata for docker.io/rocm/dev-ubuntu-24.04:7.0-complete 34 | #3 DONE 1.8s 35 | 36 | #4 [internal] load .dockerignore 37 | #4 transferring context: 2B done 38 | #............... 39 | #24 exporting to image 40 | #24 pushing layers 6.5s done 41 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 42 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 3.3s done 43 | #24 pushing layers 2.0s done 44 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 45 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 2.2s done 46 | #24 DONE 17.6s 47 | ``` 48 | -------------------------------------------------------------------------------- /pytorch/submodules/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/pytorch/submodules/.gitkeep -------------------------------------------------------------------------------- /pytorch/torch.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_ROCM_IMAGE="docker.io/mixa3607/vllm-gfx906:latest" 2 | ARG ROCM_ARCH="gfx906" 3 | ARG PYTORCH_REPO="https://github.com/pytorch/pytorch.git" 4 | ARG PYTORCH_BRANCH="v2.7.1" 5 | ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" 6 | ARG PYTORCH_VISION_BRANCH="" 7 | ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git" 8 | ARG PYTORCH_AUDIO_BRANCH="" 9 | 10 | ############# Base image ############# 11 | FROM ${BASE_ROCM_IMAGE} AS rocm_base 12 | # Install basic utilities and Python 3.12 13 | RUN apt-get update && apt-get install -y software-properties-common git python3-pip && \ 14 | add-apt-repository ppa:deadsnakes/ppa && \ 15 | apt-get update -y && \ 16 | apt-get install -y python3.12 python3.12-dev python3.12-venv \ 17 | python3.12-lib2to3 python-is-python3 python3.12-full && \ 18 | update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \ 19 | update-alternatives --set python3 /usr/bin/python3.12 && \ 20 | ln -sf /usr/bin/python3.12-config /usr/bin/python3-config && \ 21 | python3 -m pip config set global.break-system-packages true && \ 22 | pip install amdsmi==$(cat /opt/ROCM_VERSION_FULL) && \ 23 | true 24 | 25 | # Set environment variables 26 | ARG ROCM_ARCH 27 | ENV ROCM_ARCH=$ROCM_ARCH 28 | ENV PYTORCH_ROCM_ARCH=$ROCM_ARCH 29 | ENV PATH=/opt/rocm/llvm/bin:$PATH 30 | ENV ROCM_PATH=/opt/rocm 31 | ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib: 32 | 33 | ############# Build torch ############# 34 | FROM rocm_base AS build_torch 35 | RUN pip install setuptools wheel packaging cmake ninja setuptools_scm jinja2 36 | 37 | WORKDIR /build/pytorch 38 | ARG PYTORCH_REPO 39 | ARG PYTORCH_BRANCH 40 | RUN git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_BRANCH}" "${PYTORCH_REPO}" . 41 | RUN pip install -r requirements.txt 42 | RUN sed -i 's|7e29c325d5bd33ba896ddb106f5d4fc7d715274dca7fe937f724fffa82017838|1e9b3dddf0c7fc07131c6f0f5266129e83ce2331f459fa2be8c63f4ae91b0f5b|g' cmake/External/aotriton.cmake && \ 43 | python3 tools/amd_build/build_amd.py && \ 44 | CMAKE_PREFIX_PATH=$(python3 -c 'import sys; print(sys.prefix)') python3 setup.py bdist_wheel --dist-dir=/dist && \ 45 | pip install /dist/*.whl 46 | 47 | ############# Build vision ############# 48 | FROM build_torch AS build_vision 49 | WORKDIR /build/vision 50 | ARG PYTORCH_VISION_REPO 51 | ARG PYTORCH_VISION_BRANCH 52 | RUN if [ "${PYTORCH_VISION_BRANCH}" = "" ]; then \ 53 | git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 "${PYTORCH_VISION_REPO}" . && \ 54 | git fetch --depth=1 origin "$(cat /build/pytorch/.github/ci_commit_pins/vision.txt)" && \ 55 | git checkout "$(cat /build/pytorch/.github/ci_commit_pins/vision.txt)" && \ 56 | git reset --hard FETCH_HEAD; \ 57 | else \ 58 | git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_VISION_BRANCH}" "${PYTORCH_VISION_REPO}" . ; \ 59 | fi 60 | RUN python3 setup.py bdist_wheel --dist-dir=/dist && \ 61 | pip install /dist/*.whl 62 | 63 | ############# Build audio ############# 64 | FROM build_torch AS build_audio 65 | WORKDIR /build/audio 66 | ARG PYTORCH_AUDIO_REPO 67 | ARG PYTORCH_AUDIO_BRANCH 68 | RUN if [ "${PYTORCH_AUDIO_BRANCH}" = "" ]; then \ 69 | git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 "${PYTORCH_AUDIO_REPO}" . && \ 70 | git fetch --depth=1 origin "$(cat /build/pytorch/.github/ci_commit_pins/audio.txt)" && \ 71 | git checkout "$(cat /build/pytorch/.github/ci_commit_pins/audio.txt)" && \ 72 | git reset --hard FETCH_HEAD; \ 73 | else \ 74 | git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch "${PYTORCH_AUDIO_BRANCH}" "${PYTORCH_AUDIO_REPO}" . ; \ 75 | fi 76 | RUN python3 setup.py bdist_wheel --dist-dir=/dist && \ 77 | pip install /dist/*.whl 78 | 79 | ############# Install all ############# 80 | FROM rocm_base AS final 81 | RUN --mount=type=bind,from=build_torch,src=/dist/,target=/dist_torch \ 82 | --mount=type=bind,from=build_vision,src=/dist/,target=/dist_vision \ 83 | --mount=type=bind,from=build_audio,src=/dist/,target=/dist_audio \ 84 | pip install /dist_torch/*.whl /dist_vision/torchvision-*.whl /dist_audio/torchaudio-*.whl && \ 85 | true 86 | 87 | CMD ["/bin/bash"] 88 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # ML software for deprecated GFX906 arch 2 | 3 | ## Prebuild images 4 | ### Images 5 | | Name | Source | Status | Docs | 6 | | ---- | ------ | ------ | ---- | 7 | | ROCm | [ROCm](https://github.com/ROCm/ROCm), [rocBLAS](https://github.com/ROCm/rocBLAS) | OK | [readme](./rocm/readme.md) | 8 | | llama.cpp | [llama.cpp](https://github.com/ggml-org/llama.cpp) | OK | [readme](./llama.cpp/readme.md) | 9 | | ComfyUI | [ComfyUI](https://github.com/comfyanonymous/ComfyUI) | OK | [readme](./comfyui/readme.md) | 10 | | VLLM | [VLLM](https://github.com/nlzy/vllm-gfx906), [triton](https://github.com/nlzy/triton-gfx906) | OK | [readme](./vllm/readme.md) | 11 | 12 | ### Deps graph 13 | ```mermaid 14 | flowchart TD 15 | rocm-src[docker.io/rocm/dev-ubuntu-24.04] --> rocm[docker.io/mixa3607/rocm-gfx906] 16 | rocm --> llama[docker.io/mixa3607/llama.cpp-gfx906] 17 | rocm --> torch[docker.io/mixa3607/pytorch-gfx906] 18 | torch --> comfyui[docker.io/mixa3607/comfyui-gfx906] 19 | torch --> vllm[docker.io/mixa3607/vllm-gfx906] 20 | ``` 21 | 22 | ## Perf tuning 23 | Changing smcPPTable/TdcLimitGfx 350 => 150 reduced the hotspot by 10+- degrees with almost no drop in performance in vllm ([table in vllm](./vllm/readme.md#benchmarks)) 24 | 25 | ```console 26 | $ upp -p /sys/class/drm/card${GPU_ID}/device/pp_table set --write smcPPTable/TdcLimitGfx=150 27 | Changing smcPPTable.TdcLimitGfx of type H from 330 to 150 at 0x1fe 28 | Committing changes to '/sys/class/drm/card1/device/pp_table'. 29 | ``` 30 | temperatures 31 | 32 | ## Environment 33 | All software tested on Lenovo RD450X with 256G mem and 2x MI50 32G (x16 + x8). For cooling gpus used [AMD Instinct MI50 blower fan adapter (thingiverse)](https://www.thingiverse.com/thing:7153218). 34 | 35 | ## RVS 36 | ```shell 37 | cd /opt/rocm-6.4.1/bin 38 | apt update 39 | apt install -y rocm-validation-suite 40 | echo 'actions: 41 | - name: gst-581Tflops-4K4K8K-rand-bf16 42 | device: all 43 | module: gst 44 | log_interval: 3000 45 | ramp_interval: 5000 46 | duration: 15000 47 | hot_calls: 1000 48 | copy_matrix: false 49 | target_stress: 581000 50 | matrix_size_a: 4864 51 | matrix_size_b: 4096 52 | matrix_size_c: 8192 53 | matrix_init: rand 54 | data_type: bf16_r 55 | lda: 8320 56 | ldb: 8320 57 | ldc: 4992 58 | ldd: 4992 59 | transa: 1 60 | transb: 0 61 | alpha: 1 62 | beta: 0' > ~/gst-581Tflops-4K4K8K-rand-bf16.conf 63 | ./rvs -c ~/gst-581Tflops-4K4K8K-rand-bf16.conf 64 | ``` 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /rocm/build-and-push.rocm.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | cd $(dirname $0) 5 | source ../env.sh 6 | 7 | IMAGE_TAGS=( 8 | "$PATCHED_ROCM_IMAGE:${ROCM_VERSION}-${REPO_GIT_REF}-complete" 9 | "$PATCHED_ROCM_IMAGE:${ROCM_VERSION}-complete" 10 | ) 11 | 12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then 13 | echo "${IMAGE_TAGS[0]} already in registry. Skip" 14 | exit 0 15 | fi 16 | 17 | DOCKER_EXTRA_ARGS=() 18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do 19 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}") 20 | done 21 | 22 | mkdir ./logs || true 23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \ 24 | --build-arg BASE_ROCM_IMAGE="${BASE_ROCM_IMAGE}:${ROCM_IMAGE_VER}-complete" \ 25 | --build-arg ROCM_ARCH="${ROCM_ARCH}" \ 26 | --target final -f ./rocm.Dockerfile --progress=plain ./submodules 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log 27 | -------------------------------------------------------------------------------- /rocm/env.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | pushd $(dirname ${BASH_SOURCE[0]}) 4 | 5 | # value from tag https://hub.docker.com/r/rocm/dev-ubuntu-24.04/tags e.g. 7.0/6.4.4 6 | if [ "$ROCM_VERSION" == "" ]; then 7 | ROCM_VERSION=6.3.3 8 | fi 9 | if [ "$ROCM_IMAGE_VER" == "" ]; then 10 | ROCM_IMAGE_VER=6.3.3 11 | fi 12 | 13 | # target arch 14 | if [ "$ROCM_ARCH" == "" ]; then 15 | ROCM_ARCH=gfx906 16 | fi 17 | 18 | # source image 19 | if [ "$BASE_ROCM_IMAGE" == "" ]; then 20 | BASE_ROCM_IMAGE=docker.io/rocm/dev-ubuntu-24.04 21 | fi 22 | 23 | # destination image 24 | if [ "$PATCHED_ROCM_IMAGE" == "" ]; then 25 | PATCHED_ROCM_IMAGE=docker.io/mixa3607/rocm-gfx906 26 | #PATCHED_ROCM_IMAGE=registry.arkprojects.space/apps/rocm-gfx906 27 | fi 28 | 29 | popd 30 | -------------------------------------------------------------------------------- /rocm/preset.rocm-6.3.3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ROCM_VERSION="6.3.3" 4 | export ROCM_IMAGE_VER="6.3.3" 5 | -------------------------------------------------------------------------------- /rocm/preset.rocm-6.4.4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ROCM_VERSION="6.4.4" 4 | export ROCM_IMAGE_VER="6.4.4" 5 | -------------------------------------------------------------------------------- /rocm/preset.rocm-7.0.0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ROCM_VERSION="7.0.0" 4 | export ROCM_IMAGE_VER="7.0" 5 | -------------------------------------------------------------------------------- /rocm/preset.rocm-7.0.2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export ROCM_VERSION="7.0.2" 4 | export ROCM_IMAGE_VER="7.0.2" 5 | -------------------------------------------------------------------------------- /rocm/readme.md: -------------------------------------------------------------------------------- 1 | # ROCm GFX906 2 | Open software stack that includes programming models, tools, compilers, libraries, and runtimes for AI and HPC solution development on AMD GPUs. 3 | In 6.4+ gfx906 support was dropped but may be manually compiled. 4 | 5 | At this moment rebuild: 6 | - rccl 7 | - rocblas+tensile 8 | 9 | Recommend use `docker.io/mixa3607/rocm-gfx906:6.4.4-complete` 10 | 11 | ## Run 12 | ### Docker 13 | TODO 14 | 15 | ### Kubernetes 16 | ```yaml 17 | apiVersion: apps/v1 18 | kind: Deployment 19 | metadata: 20 | name: rocmdev 21 | namespace: ns-vllm 22 | labels: 23 | app: rocmdev 24 | spec: 25 | strategy: 26 | type: Recreate 27 | replicas: 1 28 | selector: 29 | matchLabels: 30 | app: rocmdev 31 | template: 32 | metadata: 33 | labels: 34 | app: rocmdev 35 | spec: 36 | containers: 37 | - name: rocmdev 38 | image: docker.io.mixa3607/rocm-gfx906:7.0.0-20251005035204-complete 39 | imagePullPolicy: Always 40 | securityContext: 41 | privileged: true 42 | runAsNonRoot: false 43 | runAsGroup: 0 44 | runAsUser: 0 45 | command: [ "/bin/bash", "-c" ] 46 | args: 47 | - "apt install tmux wget -y; wget https://gist.githubusercontent.com/mixa3607/1e6d3ee7d87b018484cf80c7928b4c33/raw/.tmux.conf -O ~/.tmux.conf; while true; do sleep 1s; done;" 48 | #- sleep inf 49 | ``` 50 | 51 | ## Build 52 | See build vars in `./env.sh`. You also may use presetis `./preset.rocm-*.sh`. Exec `./build-and-push.rocm.sh`: 53 | ```bash 54 | $ . preset.rocm-7.0.0.sh 55 | $ ./build-and-push.rocm.sh 56 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 57 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 58 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 59 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 60 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 61 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 62 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 63 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 64 | #0 building with "remote" instance using remote driver 65 | 66 | #1 [internal] load build definition from rocm.Dockerfile 67 | #1 transferring dockerfile: 4.95kB done 68 | #1 DONE 0.0s 69 | 70 | #2 [auth] dockerio-proxy/rocm/dev-ubuntu-24.04:pull rocm/dev-ubuntu-24.04:pull token for registry.arkprojects.space 71 | #2 DONE 0.0s 72 | 73 | #3 [internal] load metadata for docker.io/rocm/dev-ubuntu-24.04:7.0-complete 74 | #3 DONE 1.8s 75 | 76 | #4 [internal] load .dockerignore 77 | #4 transferring context: 2B done 78 | #............... 79 | #24 exporting to image 80 | #24 pushing layers 6.5s done 81 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 82 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-20251005035204-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 3.3s done 83 | #24 pushing layers 2.0s done 84 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 85 | #24 pushing manifest for docker.io/mixa3607/rocm-gfx906:7.0.0-complete@sha256:00532f62462e80d51e48b021afb7875af53164455c84dc28b24eb29d39aa0005 2.2s done 86 | #24 DONE 17.6s 87 | ``` 88 | -------------------------------------------------------------------------------- /rocm/rocm.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG ROCM_ARCH="gfx906" 2 | ARG BASE_ROCM_IMAGE="rocm/dev-ubuntu-24.04:6.4.4-complete" 3 | ARG ROCBLAS_REPO="https://github.com/ROCm/rocBLAS" 4 | ARG TENSILE_REPO="https://github.com/ROCm/Tensile" 5 | ARG RCCL_REPO="https://github.com/ROCm/rccl" 6 | 7 | ############# Base image ############# 8 | FROM ${BASE_ROCM_IMAGE} AS rocm_base 9 | # ROCm ver 10 | RUN ROCM_VERSION_MAJOR=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\1|1p') && \ 11 | ROCM_VERSION_MINOR=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\2|1p') && \ 12 | ROCM_VERSION_PATCH=$(ls /opt/ | sed -nE 's|rocm-([0-9]+)\.([0-9]+)\.([0-9]+)|\3|1p') && \ 13 | echo "$ROCM_VERSION_MAJOR" > /opt/ROCM_VERSION_MAJOR && \ 14 | echo "$ROCM_VERSION_MINOR" > /opt/ROCM_VERSION_MINOR && \ 15 | echo "$ROCM_VERSION_PATCH" > /opt/ROCM_VERSION_PATCH && \ 16 | echo "$ROCM_VERSION_MAJOR.$ROCM_VERSION_MINOR" > /opt/ROCM_VERSION && \ 17 | echo "$ROCM_VERSION_MAJOR.$ROCM_VERSION_MINOR.$ROCM_VERSION_PATCH" > /opt/ROCM_VERSION_FULL && \ 18 | echo "Detected rocm version is $(cat /opt/ROCM_VERSION_FULL)" && \ 19 | true 20 | 21 | ############# Build base ############# 22 | FROM rocm_base AS build_base 23 | RUN apt-get update && apt-get install -y git cmake libfmt-dev 24 | WORKDIR /rebuild-deps 25 | 26 | ############# Build rocBLAS ############# 27 | FROM build_base AS build_rocblas 28 | ARG ROCBLAS_REPO 29 | ARG TENSILE_REPO 30 | RUN git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${ROCBLAS_REPO} rocBLAS && \ 31 | git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${TENSILE_REPO} Tensile && \ 32 | true 33 | 34 | WORKDIR /rebuild-deps/rocBLAS 35 | ARG ROCM_ARCH 36 | ENV PACKAGE_NAME=rocblas 37 | RUN dpkg -s ${PACKAGE_NAME} 38 | RUN ./install.sh --dependencies --rmake_invoked 39 | RUN export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \ 40 | echo "Installed package version is \"$INSTALLED_PACKAGE_VERSION\"" && \ 41 | export ROCM_LIBPATCH_VERSION=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\4|1') && \ 42 | echo "Set ROCM_LIBPATCH_VERSION to \"$ROCM_LIBPATCH_VERSION\"" && \ 43 | export CPACK_DEBIAN_PACKAGE_RELEASE=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\5|1') && \ 44 | echo "Set CPACK_DEBIAN_PACKAGE_RELEASE to \"$CPACK_DEBIAN_PACKAGE_RELEASE\"" && \ 45 | python3 ./rmake.py \ 46 | --install_invoked \ 47 | --build_dir=$(realpath ./build) \ 48 | --src_path=$(realpath .) \ 49 | --architecture ${ROCM_ARCH} \ 50 | --test_local_path=$(realpath ../Tensile) && \ 51 | cd ./build/release && \ 52 | make package && \ 53 | mkdir -p /dist && cp *.deb /dist && \ 54 | true 55 | RUN cd ./build/release && \ 56 | export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \ 57 | export BUILDED_PACKAGE_VERSION=$(dpkg -I /dist/${PACKAGE_NAME}_*.deb | sed -nE 's|^ *Version: (.+)$|\1|p') && \ 58 | if [ "$BUILDED_PACKAGE_VERSION" != "$INSTALLED_PACKAGE_VERSION" ]; then echo "ERR: Builded version is $BUILDED_PACKAGE_VERSION but expected $INSTALLED_PACKAGE_VERSION"; exit 10; fi && \ 59 | true 60 | 61 | ############# Build rccl ############# 62 | FROM build_base AS build_rccl 63 | ARG RCCL_REPO 64 | RUN git clone --depth 1 --branch rocm-$(cat /opt/ROCM_VERSION_FULL) ${RCCL_REPO} rccl && \ 65 | true 66 | 67 | WORKDIR /rebuild-deps/rccl 68 | ARG ROCM_ARCH 69 | ENV PACKAGE_NAME=rccl 70 | RUN dpkg -s ${PACKAGE_NAME} 71 | RUN export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \ 72 | echo "Installed package version is \"$INSTALLED_PACKAGE_VERSION\"" && \ 73 | export ROCM_LIBPATCH_VERSION=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\4|1') && \ 74 | echo "Set ROCM_LIBPATCH_VERSION to \"$ROCM_LIBPATCH_VERSION\"" && \ 75 | export CPACK_DEBIAN_PACKAGE_RELEASE=$(echo "$INSTALLED_PACKAGE_VERSION" | sed -E 's|^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)-(.*)|\5|1') && \ 76 | echo "Set CPACK_DEBIAN_PACKAGE_RELEASE to \"$CPACK_DEBIAN_PACKAGE_RELEASE\"" && \ 77 | ./install.sh --package_build --amdgpu_targets ${ROCM_ARCH} && \ 78 | mkdir -p /dist && cp ./build/release/*.deb /dist && \ 79 | true 80 | RUN cd ./build/release && \ 81 | export INSTALLED_PACKAGE_VERSION=$(dpkg -s ${PACKAGE_NAME} | sed -nE 's|^ *Version: (.+)$|\1|p') && \ 82 | export BUILDED_PACKAGE_VERSION=$(dpkg -I /dist/${PACKAGE_NAME}_*.deb | sed -nE 's|^ *Version: (.+)$|\1|p') && \ 83 | if [ "$BUILDED_PACKAGE_VERSION" != "$INSTALLED_PACKAGE_VERSION" ]; then echo "ERR: Builded version is $BUILDED_PACKAGE_VERSION but expected $INSTALLED_PACKAGE_VERSION"; exit 10; fi && \ 84 | true 85 | 86 | ############# Patched image ############# 87 | FROM rocm_base AS final 88 | RUN apt-get update && apt-get install -y libfmt-dev 89 | # Install rocblas 90 | RUN --mount=type=bind,from=build_rocblas,src=/dist/,target=/dist \ 91 | dpkg -i /dist/*.deb 92 | # Install rccl 93 | RUN --mount=type=bind,from=build_rccl,src=/dist/,target=/dist \ 94 | dpkg -i /dist/*.deb 95 | 96 | # Validate apt deps state 97 | RUN apt-get install 98 | -------------------------------------------------------------------------------- /rocm/submodules/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/rocm/submodules/.gitkeep -------------------------------------------------------------------------------- /vllm/benchmark/ResultsConverter/.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Oo]ut/ 33 | [Ll]og/ 34 | [Ll]ogs/ 35 | 36 | # Visual Studio 2015/2017 cache/options directory 37 | .vs/ 38 | # Uncomment if you have tasks that create the project's static files in wwwroot 39 | #wwwroot/ 40 | 41 | # Visual Studio 2017 auto generated files 42 | Generated\ Files/ 43 | 44 | # MSTest test Results 45 | [Tt]est[Rr]esult*/ 46 | [Bb]uild[Ll]og.* 47 | 48 | # NUnit 49 | *.VisualState.xml 50 | TestResult.xml 51 | nunit-*.xml 52 | 53 | # Build Results of an ATL Project 54 | [Dd]ebugPS/ 55 | [Rr]eleasePS/ 56 | dlldata.c 57 | 58 | # Benchmark Results 59 | BenchmarkDotNet.Artifacts/ 60 | 61 | # .NET Core 62 | project.lock.json 63 | project.fragment.lock.json 64 | artifacts/ 65 | 66 | # ASP.NET Scaffolding 67 | ScaffoldingReadMe.txt 68 | 69 | # StyleCop 70 | StyleCopReport.xml 71 | 72 | # Files built by Visual Studio 73 | *_i.c 74 | *_p.c 75 | *_h.h 76 | *.ilk 77 | *.meta 78 | *.obj 79 | *.iobj 80 | *.pch 81 | *.pdb 82 | *.ipdb 83 | *.pgc 84 | *.pgd 85 | *.rsp 86 | *.sbr 87 | *.tlb 88 | *.tli 89 | *.tlh 90 | *.tmp 91 | *.tmp_proj 92 | *_wpftmp.csproj 93 | *.log 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio LightSwitch build output 298 | **/*.HTMLClient/GeneratedArtifacts 299 | **/*.DesktopClient/GeneratedArtifacts 300 | **/*.DesktopClient/ModelManifest.xml 301 | **/*.Server/GeneratedArtifacts 302 | **/*.Server/ModelManifest.xml 303 | _Pvt_Extensions 304 | 305 | # Paket dependency manager 306 | .paket/paket.exe 307 | paket-files/ 308 | 309 | # FAKE - F# Make 310 | .fake/ 311 | 312 | # CodeRush personal settings 313 | .cr/personal 314 | 315 | # Python Tools for Visual Studio (PTVS) 316 | __pycache__/ 317 | *.pyc 318 | 319 | # Cake - Uncomment if you are using it 320 | # tools/** 321 | # !tools/packages.config 322 | 323 | # Tabs Studio 324 | *.tss 325 | 326 | # Telerik's JustMock configuration file 327 | *.jmconfig 328 | 329 | # BizTalk build output 330 | *.btp.cs 331 | *.btm.cs 332 | *.odx.cs 333 | *.xsd.cs 334 | 335 | # OpenCover UI analysis results 336 | OpenCover/ 337 | 338 | # Azure Stream Analytics local run output 339 | ASALocalRun/ 340 | 341 | # MSBuild Binary and Structured Log 342 | *.binlog 343 | 344 | # NVidia Nsight GPU debugger configuration file 345 | *.nvuser 346 | 347 | # MFractors (Xamarin productivity tool) working folder 348 | .mfractor/ 349 | 350 | # Local History for Visual Studio 351 | .localhistory/ 352 | 353 | # BeatPulse healthcheck temp database 354 | healthchecksdb 355 | 356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 357 | MigrationBackup/ 358 | 359 | # Ionide (cross platform F# VS Code tools) working folder 360 | .ionide/ 361 | 362 | # Fody - auto-generated XML schema 363 | FodyWeavers.xsd -------------------------------------------------------------------------------- /vllm/benchmark/ResultsConverter/ResultsConverter.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.14.36414.22 d17.14 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ResultsConverter", "ResultsConverter\ResultsConverter.csproj", "{F1ADC5F6-4208-4BF3-9612-A30E48364174}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {F1ADC5F6-4208-4BF3-9612-A30E48364174}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {F1ADC5F6-4208-4BF3-9612-A30E48364174}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {F1ADC5F6-4208-4BF3-9612-A30E48364174}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {F1ADC5F6-4208-4BF3-9612-A30E48364174}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {41BCD38E-4CD3-453E-9363-9DAE08F9519C} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /vllm/benchmark/ResultsConverter/ResultsConverter/MarkdownTableBuilder.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | // https://github.com/marcolink/MarkdownTable 4 | namespace MarkdownTable 5 | { 6 | public class MarkdownTableBuilder 7 | { 8 | private string[] header = { }; 9 | private readonly List rows = new List(); 10 | 11 | private readonly char verticalChar; 12 | private readonly char horizontalChar; 13 | private readonly char outerBorderChar; 14 | private readonly int padding; 15 | private readonly int minColumnWidth; 16 | private readonly StringBuilder rowBuilder; 17 | 18 | private enum Align 19 | { 20 | Left, 21 | Right, 22 | Center 23 | } 24 | 25 | public MarkdownTableBuilder() 26 | { 27 | rowBuilder = new StringBuilder(); 28 | horizontalChar = '-'; 29 | outerBorderChar = ' '; 30 | verticalChar = '|'; 31 | padding = 1; 32 | } 33 | 34 | #region Interface 35 | 36 | public MarkdownTableBuilder WithHeader(params string[] header) 37 | { 38 | this.header = header; 39 | return this; 40 | } 41 | 42 | public MarkdownTableBuilder WithRow(params object[] row) 43 | { 44 | rows.Add(row); 45 | return this; 46 | } 47 | 48 | public MarkdownTableBuilder Clear() 49 | { 50 | header = new string[] { }; 51 | rows.Clear(); 52 | return this; 53 | } 54 | 55 | public override string ToString() 56 | { 57 | var output = new StringBuilder(); 58 | var maxCols = MaxColumns(); 59 | 60 | if (header.Length > 0) 61 | { 62 | output.AppendLine(Row(header, maxCols)); 63 | } 64 | 65 | output.AppendLine(HorizontalLine()); 66 | 67 | rows.ForEach(row => { output.AppendLine(Row(row, maxCols)); }); 68 | 69 | return output.ToString(); 70 | } 71 | 72 | #endregion 73 | 74 | #region Calculation 75 | 76 | private int ColumnWidth(int index) 77 | { 78 | var width = 1; 79 | 80 | if (header != null && index < header.Length) 81 | { 82 | width = header[index].Length; 83 | } 84 | 85 | return Column(index).Length == 0 86 | ? 1 87 | : Math.Max(width, 88 | Column(index).Max(r => r != null ? r.Length : 0)); 89 | } 90 | 91 | private int[] SizeRow() 92 | { 93 | var row = new List(); 94 | var maxCols = MaxColumns(); 95 | for (var i = 0; i < maxCols; i++) 96 | { 97 | row.Add(ColumnWidth(i)); 98 | } 99 | 100 | return row.ToArray(); 101 | } 102 | 103 | private Align[] AlignmentRow() 104 | { 105 | var row = new List(); 106 | var maxCols = MaxColumns(); 107 | 108 | for (var i = 0; i < maxCols; i++) 109 | { 110 | var alignment = Align.Left; 111 | 112 | row.Add(alignment); 113 | } 114 | 115 | return row.ToArray(); 116 | } 117 | 118 | private int MaxColumns() 119 | { 120 | var result = 0; 121 | if (header != null) 122 | { 123 | result = header.Length; 124 | } 125 | 126 | rows.ForEach(row => { result = Math.Max(row.Length, result); }); 127 | return result; 128 | } 129 | 130 | private string[] Column(int index) 131 | { 132 | var column = new List(); 133 | rows.ForEach(row => { column.Add(index < row.Length ? row[index].ToString() : null); }); 134 | return column.ToArray(); 135 | } 136 | 137 | #endregion 138 | 139 | #region Creation 140 | 141 | private static string Fill(int size, char fillChar = ' ') 142 | { 143 | return new string(fillChar, Math.Max(size, 0)); 144 | } 145 | 146 | private string HorizontalLine() 147 | { 148 | var format = Fill(1, outerBorderChar) + "{0}" + Fill(1, outerBorderChar); 149 | var content = SizeRow() 150 | .Select(col => Fill(col + 2 * padding, horizontalChar)) 151 | .Aggregate((a, b) => a + Fill(1, verticalChar) + b); 152 | return string.Format(format, content); 153 | } 154 | 155 | private string Row(object[] row, int maxCols, Align align = Align.Left) 156 | { 157 | rowBuilder.Length = 0; 158 | rowBuilder.Append(outerBorderChar); 159 | 160 | for (var i = 0; i < row.Length; i++) 161 | { 162 | var maxColWidth = ColumnWidth(i); 163 | var format = "{0,-" + maxColWidth + "}"; 164 | 165 | rowBuilder.Append(Fill(padding)); 166 | rowBuilder.Append(string.Format(format, row[i])); 167 | rowBuilder.Append(Fill(padding)); 168 | rowBuilder.Append(i == maxCols - 1 ? outerBorderChar : verticalChar); 169 | } 170 | 171 | var j = row.Length - 1; 172 | while (j++ < maxCols - 1) 173 | { 174 | var maxColWidth = ColumnWidth(j); 175 | rowBuilder.Append(Fill(maxColWidth + 2 * padding)); 176 | rowBuilder.Append(j == maxCols - 1 ? outerBorderChar : verticalChar); 177 | } 178 | 179 | return rowBuilder.ToString(); 180 | } 181 | 182 | #endregion 183 | } 184 | } -------------------------------------------------------------------------------- /vllm/benchmark/ResultsConverter/ResultsConverter/MarkdownTableBuilderExtensions.cs: -------------------------------------------------------------------------------- 1 |  2 | // https://github.com/marcolink/MarkdownTable 3 | namespace MarkdownTable 4 | { 5 | public static class MarkdownTableBuilderExtensions 6 | { 7 | public static string ToMardownTableString(this IEnumerable rows) 8 | { 9 | var builder = new MarkdownTableBuilder(); 10 | var properties = typeof(T).GetProperties().Where(p => p.PropertyType.IsRenderable()).ToArray(); 11 | var fields = typeof(T).GetFields().Where(f => f.FieldType.IsRenderable()).ToArray(); 12 | 13 | builder.WithHeader(properties.Select(p => p.Name).Concat(fields.Select(f => f.Name)).ToArray()); 14 | 15 | foreach (var row in rows) 16 | { 17 | builder.WithRow(properties.Select(p => p.GetValue(row, null)) 18 | .Concat(fields.Select(f => f.GetValue(row))).ToArray()); 19 | } 20 | 21 | return builder.ToString(); 22 | } 23 | 24 | private static bool IsRenderable(this Type type) 25 | { 26 | return type.IsNumeric() 27 | || Type.GetTypeCode(type) == TypeCode.String 28 | || Type.GetTypeCode(type) == TypeCode.Boolean; 29 | } 30 | 31 | private static bool IsNumeric(this Type type) 32 | { 33 | switch (Type.GetTypeCode(type)) 34 | { 35 | case TypeCode.Decimal: 36 | case TypeCode.Double: 37 | case TypeCode.Single: 38 | case TypeCode.Byte: 39 | case TypeCode.Int16: 40 | case TypeCode.Int32: 41 | case TypeCode.Int64: 42 | case TypeCode.SByte: 43 | case TypeCode.UInt16: 44 | case TypeCode.UInt32: 45 | case TypeCode.UInt64: 46 | return true; 47 | case TypeCode.Object: 48 | if (type.IsGenericType && type.GetGenericTypeDefinition() == typeof(Nullable<>)) 49 | { 50 | return Nullable.GetUnderlyingType(type).IsNumeric(); 51 | } 52 | return false; 53 | default: 54 | return false; 55 | } 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /vllm/benchmark/ResultsConverter/ResultsConverter/Program.cs: -------------------------------------------------------------------------------- 1 | // See https://aka.ms/new-console-template for more information 2 | 3 | using System.CommandLine; 4 | using System.Text.Json; 5 | 6 | Console.WriteLine("Hello, World!"); 7 | var resultsDirOpt = new Option("--results-dir", "-i") 8 | { 9 | Required = true, 10 | }; 11 | 12 | var genTableCommand = new Command("gen-table", "Generate md table from results") 13 | { 14 | Options = { resultsDirOpt } 15 | }; 16 | genTableCommand.SetAction(result => GenerateTable(result.GetRequiredValue(resultsDirOpt))); 17 | 18 | var rootCommand = new RootCommand(); 19 | rootCommand.Subcommands.Add(genTableCommand); 20 | return rootCommand.Parse(args).Invoke(); 21 | 22 | 23 | static void GenerateTable(string resultsDir) 24 | { 25 | var results = new List(); 26 | foreach (var file in Directory.GetFiles(resultsDir, "*.json", SearchOption.TopDirectoryOnly)) 27 | { 28 | Console.WriteLine($"Reading {file}"); 29 | results.Add(JsonSerializer.Deserialize(File.ReadAllText(file))!); 30 | } 31 | 32 | results = results.OrderBy(x => x.Date).ToList(); 33 | 34 | var table = new MarkdownTable.MarkdownTableBuilder(); 35 | table.WithHeader("date", "rocm", "torch", "vllm", 36 | "triton", "TP", "PwrCap", "Model", "Prompts", 37 | "Threads", "Duration", "RPS", 38 | "Output TPS", "Total TPS", "About"); 39 | foreach (var result in results) 40 | { 41 | var fields = new List(); 42 | fields.Add(result.Date); 43 | fields.Add(result.MetadataRocmVer); 44 | fields.Add(result.MetadataTorchVer); 45 | fields.Add(result.MetadataVllmVer); 46 | fields.Add(result.MetadataTritonVer); 47 | fields.Add(result.MetadataTensorParallelism); 48 | fields.Add(result.MetadataPowerCap); 49 | fields.Add(result.ModelId); 50 | fields.Add(result.NumPrompts.ToString()); 51 | fields.Add(result.MaxConcurrency.ToString()); 52 | 53 | fields.Add(TimeSpan.FromSeconds(result.Duration).ToString()); 54 | fields.Add(result.RequestThroughput.ToString("N2")); 55 | fields.Add(result.OutputThroughput.ToString("N2")); 56 | fields.Add(result.TotalTokenThroughput.ToString("N2")); 57 | 58 | fields.Add(result.MetadataAbout); 59 | //fields.Add(result.MetadataBenchmarkAuthor); 60 | 61 | table.WithRow(fields.ToArray()); 62 | } 63 | 64 | Console.WriteLine(table.ToString()); 65 | } 66 | -------------------------------------------------------------------------------- /vllm/benchmark/ResultsConverter/ResultsConverter/Properties/launchSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "profiles": { 3 | "ResultsConverter": { 4 | "commandName": "Project", 5 | "commandLineArgs": "gen-table -i \\\\wsl.localhost\\Ubuntu\\home\\mixa3607\\REPOS\\mixa3607\\llama.cpp-gfx906\\vllm\\benchmark\\results" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /vllm/benchmark/ResultsConverter/ResultsConverter/ResultsConverter.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net8.0 6 | enable 7 | enable 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /vllm/benchmark/ResultsConverter/ResultsConverter/VllmBenchResult.cs: -------------------------------------------------------------------------------- 1 | using System.Text.Json.Serialization; 2 | 3 | public partial class VllmBenchResult 4 | { 5 | [JsonPropertyName("date")] 6 | public string Date { get; set; } 7 | 8 | [JsonPropertyName("endpoint_type")] 9 | public string EndpointType { get; set; } 10 | 11 | [JsonPropertyName("label")] 12 | public object Label { get; set; } 13 | 14 | [JsonPropertyName("model_id")] 15 | public string ModelId { get; set; } 16 | 17 | [JsonPropertyName("tokenizer_id")] 18 | public string TokenizerId { get; set; } 19 | 20 | [JsonPropertyName("num_prompts")] 21 | public long NumPrompts { get; set; } 22 | 23 | [JsonPropertyName("metadata.rocm_ver")] 24 | public string MetadataRocmVer { get; set; } 25 | 26 | [JsonPropertyName("metadata.torch_ver")] 27 | public string MetadataTorchVer { get; set; } 28 | 29 | [JsonPropertyName("metadata.vision_ver")] 30 | public string MetadataVisionVer { get; set; } 31 | 32 | [JsonPropertyName("metadata.vllm_ver")] 33 | public string MetadataVllmVer { get; set; } 34 | 35 | [JsonPropertyName("metadata.triton_ver")] 36 | public string MetadataTritonVer { get; set; } 37 | 38 | [JsonPropertyName("metadata.image")] 39 | public string MetadataImage { get; set; } 40 | 41 | [JsonPropertyName("metadata.tensor_parallelism")] 42 | public string MetadataTensorParallelism { get; set; } 43 | 44 | [JsonPropertyName("metadata.about")] 45 | public string MetadataAbout { get; set; } 46 | 47 | [JsonPropertyName("metadata.benchmark_author")] 48 | public string MetadataBenchmarkAuthor { get; set; } 49 | 50 | [JsonPropertyName("metadata.power_cap")] 51 | public string MetadataPowerCap { get; set; } 52 | 53 | [JsonPropertyName("request_rate")] 54 | public string RequestRate { get; set; } 55 | 56 | [JsonPropertyName("burstiness")] 57 | public double Burstiness { get; set; } 58 | 59 | [JsonPropertyName("max_concurrency")] 60 | public long MaxConcurrency { get; set; } 61 | 62 | [JsonPropertyName("duration")] 63 | public double Duration { get; set; } 64 | 65 | [JsonPropertyName("completed")] 66 | public long Completed { get; set; } 67 | 68 | [JsonPropertyName("total_input_tokens")] 69 | public long TotalInputTokens { get; set; } 70 | 71 | [JsonPropertyName("total_output_tokens")] 72 | public long TotalOutputTokens { get; set; } 73 | 74 | [JsonPropertyName("request_throughput")] 75 | public double RequestThroughput { get; set; } 76 | 77 | [JsonPropertyName("request_goodput")] 78 | public object RequestGoodput { get; set; } 79 | 80 | [JsonPropertyName("output_throughput")] 81 | public double OutputThroughput { get; set; } 82 | 83 | [JsonPropertyName("total_token_throughput")] 84 | public double TotalTokenThroughput { get; set; } 85 | 86 | [JsonPropertyName("input_lens")] 87 | public long[] InputLens { get; set; } 88 | 89 | [JsonPropertyName("output_lens")] 90 | public long[] OutputLens { get; set; } 91 | 92 | [JsonPropertyName("ttfts")] 93 | public double[] Ttfts { get; set; } 94 | 95 | [JsonPropertyName("itls")] 96 | public double[][] Itls { get; set; } 97 | 98 | [JsonPropertyName("generated_texts")] 99 | public string[] GeneratedTexts { get; set; } 100 | 101 | [JsonPropertyName("errors")] 102 | public string[] Errors { get; set; } 103 | 104 | [JsonPropertyName("mean_ttft_ms")] 105 | public double MeanTtftMs { get; set; } 106 | 107 | [JsonPropertyName("median_ttft_ms")] 108 | public double MedianTtftMs { get; set; } 109 | 110 | [JsonPropertyName("std_ttft_ms")] 111 | public double StdTtftMs { get; set; } 112 | 113 | [JsonPropertyName("p99_ttft_ms")] 114 | public double P99TtftMs { get; set; } 115 | 116 | [JsonPropertyName("mean_tpot_ms")] 117 | public double MeanTpotMs { get; set; } 118 | 119 | [JsonPropertyName("median_tpot_ms")] 120 | public double MedianTpotMs { get; set; } 121 | 122 | [JsonPropertyName("std_tpot_ms")] 123 | public double StdTpotMs { get; set; } 124 | 125 | [JsonPropertyName("p99_tpot_ms")] 126 | public double P99TpotMs { get; set; } 127 | 128 | [JsonPropertyName("mean_itl_ms")] 129 | public double MeanItlMs { get; set; } 130 | 131 | [JsonPropertyName("median_itl_ms")] 132 | public double MedianItlMs { get; set; } 133 | 134 | [JsonPropertyName("std_itl_ms")] 135 | public double StdItlMs { get; set; } 136 | 137 | [JsonPropertyName("p99_itl_ms")] 138 | public double P99ItlMs { get; set; } 139 | } -------------------------------------------------------------------------------- /vllm/benchmark/readme.md: -------------------------------------------------------------------------------- 1 | # vLLM benchmark 2 | Run all commands in same pod 3 | 4 | ### 1. fill env vars 5 | ```sh 6 | export VLLM_USE_V1=1 # vllm serve only. Required for gemma3 7 | export VLLM_SLEEP_WHEN_IDLE=1 # vllm serve only. Reduce CPU usage when vLLM is idle 8 | export HUGGING_FACE_HUB_TOKEN=hf_XXXXXXXXXXXXXXXXXXXXXXX # vllm serve only. HF api token 9 | export POWER_CAP=225 # AMD GPU power cap 10 | export TENSOR_PARALLELISM=2 # GPUs count. 1/2/4/8 11 | export BENCHMARK_AUTHOR=mixa3607 # author 12 | export ABOUT="tested on rd450x 256G inside k3s in lxc" # misc info 13 | #export IMAGE_NAME="XXXX" # set if not in env 14 | ``` 15 | 16 | ### 2. Run vllm 17 | ```sh 18 | # Run vllm with gemma3 27B in 4 bit quant 19 | vllm serve gaunernst/gemma-3-27b-it-qat-autoawq --tensor-parallel-size $TENSOR_PARALLELISM --max-model-len 8K 20 | ``` 21 | 22 | ### 3. Run benchmarks 23 | ```sh 24 | # Set power cap and run benchmarks 25 | amd-smi set --power-cap $POWER_CAP 26 | echo -e '75 1\n100 2\n125 3\n150 4' | while read SETUP; do 27 | SETUP=($SETUP) 28 | vllm bench serve \ 29 | --model gaunernst/gemma-3-27b-it-qat-autoawq \ 30 | --host 127.0.0.1 \ 31 | --num-prompts ${SETUP[0]} --max-concurrency ${SETUP[1]} \ 32 | --dataset-name random --random-input-len 1024 --random-output-len 512 --random-range-ratio 0.1 \ 33 | --save-detailed --save-result --metadata \ 34 | metadata.rocm_ver="$(cat /opt/ROCM_VERSION_FULL)" \ 35 | metadata.torch_ver="$(pip show torch | sed -nE 's|^Version: (.+)|\1|p')" \ 36 | metadata.vision_ver="$(pip show torchvision | sed -nE 's|^Version: (.+)|\1|p')" \ 37 | metadata.vllm_ver="$(pip show vllm | sed -nE 's|^Version: (.+)|\1|p')" \ 38 | metadata.triton_ver="$(pip show triton | sed -nE 's|^Version: (.+)|\1|p')" \ 39 | metadata.image="$IMAGE_NAME" \ 40 | metadata.tensor_parallelism="$TENSOR_PARALLELISM" \ 41 | metadata.about="$ABOUT" \ 42 | metadata.benchmark_author="$BENCHMARK_AUTHOR" \ 43 | metadata.tensor_parallelism="$TENSOR_PARALLELISM" \ 44 | metadata.power_cap="$POWER_CAP" 45 | done 46 | ``` 47 | 48 | ### 4. Copy results from pod 49 | ```sh 50 | kubectl exec -n ns-vllm pods/$(kubectl get pods -n ns-vllm -l app=vllm -o jsonpath='{.items[].metadata.name}') -- bash -c 'tar -zcvf - /app/vllm/*.json .' | tar -zxvf - -C results/ 51 | ``` 52 | 53 | ### 3. Generate table 54 | ```sh 55 | dotnet run --project ./ResultsConverter/ResultsConverter/ResultsConverter.csproj -- gen-table -i ./results/ 56 | ``` 57 | -------------------------------------------------------------------------------- /vllm/build-and-push.vllm.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -e 3 | 4 | cd $(dirname $0) 5 | source ../env.sh 6 | 7 | IMAGE_TAGS=( 8 | "$VLLM_IMAGE:${VLLM_PRESET_NAME}-${REPO_GIT_REF}" 9 | "$VLLM_IMAGE:${VLLM_PRESET_NAME}" 10 | ) 11 | 12 | if docker_image_pushed ${IMAGE_TAGS[0]}; then 13 | echo "${IMAGE_TAGS[0]} already in registry. Skip" 14 | exit 0 15 | fi 16 | 17 | DOCKER_EXTRA_ARGS=() 18 | for (( i=0; i<${#IMAGE_TAGS[@]}; i++ )); do 19 | DOCKER_EXTRA_ARGS+=("-t" "${IMAGE_TAGS[$i]}") 20 | done 21 | 22 | mkdir ./logs || true 23 | docker buildx build ${DOCKER_EXTRA_ARGS[@]} --push \ 24 | --build-arg BASE_PYTORCH_IMAGE=$TORCH_IMAGE:${VLLM_PYTORCH_VERSION}-rocm-${VLLM_ROCM_VERSION} \ 25 | --build-arg VLLM_BRANCH=$VLLM_BRANCH \ 26 | --build-arg TRITON_BRANCH=$VLLM_TRITON_BRANCH \ 27 | --progress=plain --target final -f ./vllm.Dockerfile ./submodules 2>&1 | tee ./logs/build_$(date +%Y%m%d%H%M%S).log 28 | -------------------------------------------------------------------------------- /vllm/env.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | pushd $(dirname ${BASH_SOURCE[0]}) 4 | 5 | if [ "$VLLM_IMAGE" == "" ]; then 6 | VLLM_IMAGE=docker.io/mixa3607/vllm-gfx906 7 | #VLLM_IMAGE=registry.arkprojects.space/apps/vllm-gfx906 8 | fi 9 | 10 | if [ "$VLLM_PRESET_NAME" == "" ]; then VLLM_PRESET_NAME=default; fi 11 | # vllm git checkpoint 12 | if [ "$VLLM_BRANCH" == "" ]; then VLLM_BRANCH="v0.10.2"; fi 13 | # triton git checkpoint 14 | if [ "$VLLM_TRITON_BRANCH" == "" ]; then VLLM_TRITON_BRANCH="v3.4.x"; fi 15 | # rocm version 16 | if [ "$VLLM_ROCM_VERSION" == "" ]; then VLLM_ROCM_VERSION=6.4.4; fi 17 | # torch git checkpoint 18 | if [ "$VLLM_PYTORCH_VERSION" == "" ]; then VLLM_PYTORCH_VERSION="v2.7.1"; fi 19 | 20 | popd 21 | -------------------------------------------------------------------------------- /vllm/preset.0.10.2-rocm-6.4.4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export VLLM_PRESET_NAME="0.10.2-rocm-6.4.4" 4 | export VLLM_ROCM_VERSION="6.4.4" 5 | export VLLM_PYTORCH_VERSION="v2.7.1" 6 | export VLLM_BRANCH="gfx906/v0.10.2" 7 | export VLLM_TRITON_BRANCH="gfx906/v3.3.x" 8 | -------------------------------------------------------------------------------- /vllm/preset.0.11.0-rocm-6.3.3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export VLLM_PRESET_NAME="0.11.0-rocm-6.3.3" 4 | export VLLM_ROCM_VERSION="6.3.3" 5 | export VLLM_PYTORCH_VERSION="v2.8.0" 6 | export VLLM_BRANCH="gfx906/v0.11.0" 7 | export VLLM_TRITON_BRANCH="gfx906/v3.4.x" 8 | -------------------------------------------------------------------------------- /vllm/preset.0.8.5-rocm-6.3.3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export VLLM_PRESET_NAME="0.8.5-rocm-6.3.3" 4 | export VLLM_ROCM_VERSION="6.3.3" 5 | export VLLM_PYTORCH_VERSION="v2.7.1" 6 | export VLLM_BRANCH="v0.8.5+gfx906" 7 | export VLLM_TRITON_BRANCH="gfx906/v3.3.x" 8 | -------------------------------------------------------------------------------- /vllm/readme.md: -------------------------------------------------------------------------------- 1 | # VLLM GFX906 2 | Used forks by https://github.com/nlzy: 3 | - https://github.com/nlzy/vllm-gfx906 4 | - https://github.com/nlzy/triton-gfx906 5 | 6 | ## Benchmarks 7 | 8 | Methodology [benchmark](./benchmark/readme.md) 9 | 10 | date | rocm | torch | vllm | triton | TP | PwrCap | Model | Prompts | Threads | Duration | RPS | Output TPS | Total TPS | About 11 | -----------------|-------|--------------------|-----------------------------|-------------------|----|--------|--------------------------------------|---------|---------|------------------|------|------------|-----------|----------------------------------------- 12 | 20251005-210513 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2 | 150 | gaunernst/gemma-3-27b-it-qat-autoawq | 150 | 4 | 00:20:10.3265325 | 0.12 | 58.77 | 186.03 | tested on rd450x 256G inside k3s in lxc 13 | 20251005-212640 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2 | 150 | gaunernst/gemma-3-27b-it-qat-autoawq | 125 | 3 | 00:20:00.2988691 | 0.10 | 48.18 | 154.96 | tested on rd450x 256G inside k3s in lxc 14 | 20251005-214604 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2 | 150 | gaunernst/gemma-3-27b-it-qat-autoawq | 100 | 2 | 00:18:05.4545212 | 0.09 | 41.81 | 136.23 | tested on rd450x 256G inside k3s in lxc 15 | 20251005-221837 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2 | 150 | gaunernst/gemma-3-27b-it-qat-autoawq | 75 | 1 | 00:27:37.0155547 | 0.05 | 21.18 | 67.61 | tested on rd450x 256G inside k3s in lxc 16 | 20251006-130816 | 6.3.3 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm633 | 3.3.0+git2b5c6ef9 | 2 | 150 | gaunernst/gemma-3-27b-it-qat-autoawq | 75 | 1 | 00:19:16.0905731 | 0.06 | 19.44 | 86.00 | tested on rd450x 256G inside k3s in lxc 17 | 20251006-132621 | 6.3.3 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm633 | 3.3.0+git2b5c6ef9 | 2 | 150 | gaunernst/gemma-3-27b-it-qat-autoawq | 100 | 2 | 00:17:29.1542989 | 0.10 | 41.52 | 139.21 | tested on rd450x 256G inside k3s in lxc 18 | 20251006-134724 | 6.3.3 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm633 | 3.3.0+git2b5c6ef9 | 2 | 150 | gaunernst/gemma-3-27b-it-qat-autoawq | 125 | 3 | 00:20:06.5979349 | 0.10 | 48.32 | 154.54 | tested on rd450x 256G inside k3s in lxc 19 | 20251006-140759 | 6.3.3 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm633 | 3.3.0+git2b5c6ef9 | 2 | 150 | gaunernst/gemma-3-27b-it-qat-autoawq | 150 | 4 | 00:19:38.5187576 | 0.13 | 57.69 | 188.37 | tested on rd450x 256G inside k3s in lxc 20 | 20251007-162504 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 75 | 1 | 00:19:22.7926510 | 0.06 | 20.08 | 86.25 | tested on rd450x 256G inside k3s in lxc 21 | 20251007-171239 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 100 | 2 | 00:16:02.6107616 | 0.10 | 44.64 | 151.11 | tested on rd450x 256G inside k3s in lxc 22 | 20251007-173243 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 125 | 3 | 00:19:21.7160991 | 0.11 | 50.35 | 160.67 | tested on rd450x 256G inside k3s in lxc 23 | 20251007-175203 | 6.4.4 | 2.7.1a0+gite2d141d | 0.1.dev1+gceec3eaf6.rocm644 | 3.3.0+git2b5c6ef9 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 150 | 4 | 00:18:17.4322852 | 0.14 | 60.88 | 201.22 | tested on rd450x 256G inside k3s in lxc 24 | 20251012-111624 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 75 | 1 | 00:21:05.0039699 | 0.06 | 16.07 | 76.89 | tested on rd450x 256G inside k3s in lxc 25 | 20251012-112842 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 100 | 2 | 00:11:41.9394741 | 0.14 | 35.56 | 181.57 | tested on rd450x 256G inside k3s in lxc 26 | 20251012-114201 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 125 | 3 | 00:12:43.8522526 | 0.16 | 41.50 | 209.29 | tested on rd450x 256G inside k3s in lxc 27 | 20251012-115501 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 150 | 4 | 00:12:24.0047521 | 0.20 | 48.33 | 255.35 | tested on rd450x 256G inside k3s in lxc 28 | 20251012-121023 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 200 | 8 | 00:13:31.6286220 | 0.25 | 54.78 | 308.18 | tested on rd450x 256G inside k3s in lxc 29 | 20251012-201017 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 100 | 16 | 00:07:22.8462734 | 0.23 | 54.01 | 285.44 | tested on rd450x 256G inside k3s in lxc 30 | 20251013-140107 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 75 | 1 | 00:20:08.6821350 | 0.06 | 15.66 | 79.31 | TdcLimitGfx=150 31 | 20251013-141355 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 100 | 2 | 00:12:11.5680303 | 0.14 | 34.12 | 174.22 | TdcLimitGfx=150 32 | 20251013-142754 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 125 | 3 | 00:13:21.9331666 | 0.16 | 39.53 | 199.35 | TdcLimitGfx=150 33 | 20251013-144145 | 6.3.3 | 2.8.0a0+gitba56102 | 0.11.0+gfx906.rocm633 | 3.4.0+gite4f3b253 | 2 | 225 | gaunernst/gemma-3-27b-it-qat-autoawq | 150 | 4 | 00:13:13.4142282 | 0.19 | 46.02 | 240.14 | TdcLimitGfx=150 34 | 35 | ## Run 36 | 37 | ## DockerHub images 38 | > ghcr.io registry is deprecated. Use https://hub.docker.com/r/mixa3607/vllm-gfx906 instead 39 | 40 | Vers compatibility table: 41 | | ROCm | PyTorch | vLLM | triton | model | text | images | misc | 42 | | ----- | ------- | ---- | ------ | ----- | ---- | ------ | ----| 43 | | 7.0.0 | ⛔ | ⛔ | ⛔ | ⛔ | ⛔ | ⛔ | can't build any torch ver with llvm20 | 44 | | 6.3.3 | 2.7.1 | 0.10.2 | 3.3.0 | gaunernst/gemma-3-27b-it-qat-autoawq | ✅️ | ✅️ | ok | 45 | | 6.4.4 | 2.7.1 | 0.10.2 | 3.3.0 | gaunernst/gemma-3-27b-it-qat-autoawq | ✅️ | ⛔ | requests with images throw exception | 46 | | 6.3.3 | 2.8.0 | 0.11.0 | 3.4.0 | gaunernst/gemma-3-27b-it-qat-autoawq | ✅️ | ✅️ | ok | 47 | | 6.4.4 | 2.8.0 | 0.11.0 | 3.4.0 | gaunernst/gemma-3-27b-it-qat-autoawq | ⛔ | ⛔ | all requests throw exception | 48 | 49 | Recommend use `docker.io/mixa3607/vllm-gfx906:0.11.0-rocm-6.3.3` 50 | 51 | ### Docker 52 | Basics from amd https://github.com/ROCm/vllm/blob/main/docs/deployment/docker.md 53 | 54 | ### Kubernetes 55 | ```yaml 56 | --- 57 | apiVersion: v1 58 | kind: PersistentVolumeClaim 59 | metadata: 60 | name: vllm-models 61 | namespace: ns-vllm 62 | spec: 63 | accessModes: 64 | - ReadWriteOnce 65 | volumeMode: Filesystem 66 | storageClassName: nfs-ssd-1 67 | resources: 68 | requests: 69 | storage: 64Gi 70 | --- 71 | apiVersion: apps/v1 72 | kind: Deployment 73 | metadata: 74 | name: vllm 75 | namespace: ns-vllm 76 | labels: 77 | app: vllm 78 | spec: 79 | strategy: 80 | type: Recreate 81 | replicas: 1 82 | selector: 83 | matchLabels: 84 | app: vllm 85 | template: 86 | metadata: 87 | labels: 88 | app: vllm 89 | spec: 90 | volumes: 91 | - name: models-volume 92 | persistentVolumeClaim: 93 | claimName: vllm-models 94 | - name: dev-kfd 95 | hostPath: 96 | path: /dev/kfd 97 | - name: dev-dri 98 | hostPath: 99 | path: /dev/dri 100 | - name: shm 101 | emptyDir: 102 | medium: Memory 103 | sizeLimit: 32G 104 | containers: 105 | - name: vllm 106 | image: docker.io/mixa3607/vllm-gfx906:ella 107 | imagePullPolicy: Always 108 | securityContext: 109 | privileged: true 110 | runAsNonRoot: false 111 | runAsGroup: 0 112 | runAsUser: 0 113 | seccompProfile: 114 | type: Unconfined 115 | capabilities: 116 | add: 117 | - SYS_PTRACE 118 | command: [ "/bin/bash", "-c" ] 119 | args: 120 | #- "while true; do sleep 1s; done;" 121 | - | 122 | export VLLM_USE_V1=1 123 | export HUGGING_FACE_HUB_TOKEN=hf_XXXXXXXXXXXXXXXXXXXXXXX 124 | exec vllm serve gaunernst/gemma-3-27b-it-qat-autoawq --tensor-parallel-size 2 --max-model-len 16K 125 | ports: 126 | - containerPort: 8000 127 | resources: 128 | limits: 129 | memory: 64G 130 | requests: 131 | cpu: "6" 132 | memory: 6G 133 | volumeMounts: 134 | - mountPath: /root/.cache/huggingface 135 | name: models-volume 136 | - name: shm 137 | mountPath: /dev/shm 138 | - name: dev-kfd 139 | mountPath: /dev/kfd 140 | - name: dev-dri 141 | mountPath: /dev/dri 142 | ``` 143 | 144 | ## Gemma3 AWQ patch for 0.11.0 145 | ```bash 146 | echo ' 147 | --- /usr/local/lib/python3.12/dist-packages/vllm/config/model.py 2025-10-12 13:22:53.000000000 +0000 148 | +++ /usr/local/lib/python3.12/dist-packages/vllm/config/model.py 2025-10-12 13:59:26.271776131 +0000 149 | @@ -1586,6 +1586,7 @@ 150 | "plamo2": "Numerical instability. Please use bfloat16 or float32 instead.", 151 | "glm4": "Numerical instability. Please use bfloat16 or float32 instead.", 152 | } 153 | +_FLOAT16_NOT_SUPPORTED_MODELS = {} 154 | 155 | 156 | def _is_valid_dtype(model_type: str, dtype: torch.dtype):' | patch -d/ -p0 157 | 158 | echo ' 159 | --- /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma3.py 160 | +++ /usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/gemma3.py 161 | @@ -329,6 +329,9 @@ class Gemma3DecoderLayer(nn.Module): 162 | residual: Optional[torch.Tensor], 163 | **kwargs, 164 | ) -> tuple[torch.Tensor, torch.Tensor]: 165 | + # https://github.com/huggingface/transformers/pull/36832 166 | + if hidden_states.dtype == torch.float16: 167 | + hidden_states = hidden_states.clamp_(-65504, 65504) 168 | if residual is None: 169 | residual = hidden_states 170 | hidden_states = self.input_layernorm(hidden_states) 171 | @@ -341,11 +344,15 @@ class Gemma3DecoderLayer(nn.Module): 172 | **kwargs, 173 | ) 174 | hidden_states = self.post_attention_layernorm(hidden_states) 175 | + if hidden_states.dtype == torch.float16: 176 | + hidden_states = hidden_states.clamp_(-65504, 65504) 177 | 178 | hidden_states, residual = self.pre_feedforward_layernorm( 179 | hidden_states, residual) 180 | hidden_states = self.mlp(hidden_states) 181 | hidden_states = self.post_feedforward_layernorm(hidden_states) 182 | + if hidden_states.dtype == torch.float16: 183 | + hidden_states = hidden_states.clamp_(-65504, 65504) 184 | return hidden_states, residual 185 | 186 | 187 | @@ -552,4 +559,4 @@ class Gemma3ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): 188 | skip_prefixes=(["lm_head."] 189 | if self.config.tie_word_embeddings else None), 190 | ) 191 | - return loader.load_weights(weights) 192 | + return loader.load_weights(weights)' | patch -d/ -p0 193 | ``` 194 | 195 | 196 | ## Build 197 | See build vars in `./env.sh`. You also may use presetis `./preset.*.sh`. Exec `./build-and-push.vllm.sh`: 198 | ```bash 199 | $ . preset.0.11.0-rocm-6.3.3.sh 200 | $ ./build-and-push.vllm.sh 201 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 202 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 203 | ~/REPOS/mixa3607/llama.cpp-gfx906/llama.cpp ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 204 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 205 | ~/REPOS/mixa3607/llama.cpp-gfx906/comfyui ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 206 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 207 | ~/REPOS/mixa3607/llama.cpp-gfx906/vllm ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 208 | ~/REPOS/mixa3607/llama.cpp-gfx906/rocm 209 | #0 building with "remote" instance using remote driver 210 | #............... 211 | #14 DONE 583.8s 212 | ``` 213 | -------------------------------------------------------------------------------- /vllm/submodules/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mixa3607/ML-gfx906/d95fce7ed5e14ec9cc7b801c668696194a929cda/vllm/submodules/.gitkeep -------------------------------------------------------------------------------- /vllm/vllm.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_PYTORCH_IMAGE="docker.io/mixa3607/pytorch-gfx906:v2.7.1-rocm-6.3.3" 2 | ARG VLLM_REPO="https://github.com/nlzy/vllm-gfx906.git" 3 | ARG VLLM_BRANCH="main" 4 | ARG TRITON_REPO="https://github.com/nlzy/triton-gfx906.git" 5 | ARG TRITON_BRANCH="main" 6 | 7 | ############# Base image ############# 8 | FROM ${BASE_PYTORCH_IMAGE} AS rocm_base 9 | # Install basic utilities and Python 3.12 10 | RUN pip install amdsmi==$(cat /opt/ROCM_VERSION_FULL) 11 | 12 | # Set environment variables 13 | ENV PYTORCH_ROCM_ARCH=$ROCM_ARCH 14 | ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib: 15 | ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 16 | ENV TOKENIZERS_PARALLELISM=false 17 | ENV HIP_FORCE_DEV_KERNARG=1 18 | ENV VLLM_TARGET_DEVICE=rocm 19 | 20 | ############# Build base ############# 21 | FROM rocm_base AS build_base 22 | RUN pip3 install ninja 'cmake<4' wheel pybind11 setuptools_scm 23 | 24 | ############# Build triton ############# 25 | FROM build_base AS build_triton 26 | ARG TRITON_REPO 27 | ARG TRITON_BRANCH 28 | WORKDIR /app 29 | RUN git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch ${TRITON_BRANCH} ${TRITON_REPO} triton 30 | WORKDIR /app/triton 31 | # "if" used for diff between triton 3.3.0<=>3.4.0 32 | RUN if [ ! -f setup.py ]; then cd python; fi; python3 setup.py bdist_wheel --dist-dir=/dist 33 | RUN ls /dist 34 | 35 | ############# Build vllm ############# 36 | FROM build_base AS build_vllm 37 | ARG VLLM_REPO 38 | ARG VLLM_BRANCH 39 | WORKDIR /app 40 | RUN git clone --depth 1 --recurse-submodules --shallow-submodules --jobs 4 --branch ${VLLM_BRANCH} ${VLLM_REPO} vllm 41 | WORKDIR /app/vllm 42 | RUN pip install -r requirements/rocm.txt 43 | RUN python3 setup.py bdist_wheel --dist-dir=/dist 44 | RUN ls /dist 45 | 46 | ############# Install all ############# 47 | FROM rocm_base AS final 48 | WORKDIR /app/vllm 49 | RUN --mount=type=bind,from=build_vllm,src=/app/vllm/requirements,target=/app/vllm/requirements \ 50 | --mount=type=bind,from=build_vllm,src=/dist/,target=/dist_vllm \ 51 | --mount=type=bind,from=build_triton,src=/dist/,target=/dist_triton \ 52 | pip install /dist_triton/*.whl /dist_vllm/*.whl && \ 53 | pip install -r requirements/rocm.txt && \ 54 | pip install opentelemetry-sdk opentelemetry-api opentelemetry-semantic-conventions-ai opentelemetry-exporter-otlp && \ 55 | true 56 | 57 | CMD ["/bin/bash"] 58 | --------------------------------------------------------------------------------