├── .gitattributes ├── .github └── workflows │ ├── ci.yml │ ├── cmd.yml │ ├── prune.yml │ └── sync.yml ├── .gitignore ├── .golangci.yaml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── cache.go ├── cmd └── gguf-parser │ ├── README.md │ ├── go.mod │ ├── go.sum │ └── main.go ├── file.go ├── file_architecture.go ├── file_architecture_test.go ├── file_estimate__llamacpp.go ├── file_estimate__llamacpp_test.go ├── file_estimate__stablediffusioncpp.go ├── file_estimate__stablediffusioncpp_test.go ├── file_estimate_option.go ├── file_from_distro.go ├── file_from_remote.go ├── file_metadata.go ├── file_metadata_test.go ├── file_option.go ├── file_test.go ├── file_tokenizer.go ├── file_tokenizer_test.go ├── filename.go ├── filename_test.go ├── gen.go ├── gen.regression.go ├── gen.stringer.go ├── ggml.go ├── go.mod ├── go.sum ├── ollama_model.go ├── ollama_model_option.go ├── ollama_model_test.go ├── ollama_registry_authenticate.go ├── scalar.go ├── scalar_test.go ├── util ├── anyx │ └── any.go ├── bytex │ └── pool.go ├── funcx │ └── error.go ├── httpx │ ├── client.go │ ├── client_helper.go │ ├── client_options.go │ ├── file.go │ ├── file_options.go │ ├── proxy.go │ ├── resolver.go │ ├── transport.go │ └── transport_options.go ├── json │ ├── common.go │ ├── jsoniter.go │ └── stdjson.go ├── osx │ ├── env.go │ ├── file.go │ ├── file_mmap.go │ ├── file_mmap_js.go │ ├── file_mmap_unix.go │ ├── file_mmap_windows.go │ ├── file_mmap_windows_386.go │ ├── file_mmap_windows_non386.go │ └── homedir.go ├── ptr │ └── pointer.go ├── signalx │ ├── handler.go │ ├── handler_unix.go │ └── handler_windows.go ├── slicex │ └── search.go └── stringx │ ├── bytes.go │ ├── random.go │ ├── strings.go │ └── sum.go ├── zz_generated.diffusion_model_memory_usage.regression.go ├── zz_generated.ggmltype.stringer.go ├── zz_generated.gguffiletype.stringer.go ├── zz_generated.ggufmagic.stringer.go ├── zz_generated.ggufmetadatavaluetype.stringer.go └── zz_generated.ggufversion.stringer.go /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | 3 | **/go.sum linguist-generated=true 4 | **/zz_generated.*.go linguist-generated=true 5 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | permissions: 4 | contents: read 5 | pull-requests: read 6 | actions: read 7 | 8 | defaults: 9 | run: 10 | shell: bash 11 | 12 | on: 13 | push: 14 | branches: 15 | - 'main' 16 | - 'branch-v*.*' 17 | paths-ignore: 18 | - "docs/**" 19 | - "**.md" 20 | - "**.mdx" 21 | - "**.png" 22 | - "**.jpg" 23 | - ".github/workflows/cmd.yml" 24 | - ".github/workflows/prune.yml" 25 | - ".github/workflows/sync.yml" 26 | pull_request: 27 | branches: 28 | - 'main' 29 | paths-ignore: 30 | - "docs/**" 31 | - "**.md" 32 | - "**.mdx" 33 | - "**.png" 34 | - "**.jpg" 35 | - ".github/workflows/cmd.yml" 36 | - ".github/workflows/prune.yml" 37 | - ".github/workflows/sync.yml" 38 | 39 | jobs: 40 | ci: 41 | timeout-minutes: 15 42 | runs-on: ubuntu-22.04 43 | steps: 44 | - name: Checkout 45 | uses: actions/checkout@v4 46 | with: 47 | fetch-depth: 1 48 | persist-credentials: false 49 | - name: Setup Go 50 | timeout-minutes: 15 51 | uses: actions/setup-go@v5 52 | with: 53 | go-version: "1.22.9" 54 | cache-dependency-path: | 55 | **/go.sum 56 | - name: Setup Toolbox 57 | timeout-minutes: 5 58 | uses: actions/cache@v4 59 | with: 60 | key: toolbox-${{ runner.os }} 61 | path: | 62 | ${{ github.workspace }}/.sbin 63 | - name: Make 64 | run: make ci 65 | env: 66 | LINT_DIRTY: "true" 67 | -------------------------------------------------------------------------------- /.github/workflows/cmd.yml: -------------------------------------------------------------------------------- 1 | name: cmd 2 | 3 | permissions: 4 | contents: write 5 | actions: read 6 | id-token: write 7 | 8 | defaults: 9 | run: 10 | shell: bash 11 | 12 | on: 13 | push: 14 | branches: 15 | - 'main' 16 | - 'branch-v*.*' 17 | paths-ignore: 18 | - "docs/**" 19 | - "**.md" 20 | - "**.mdx" 21 | - "**.png" 22 | - "**.jpg" 23 | - ".github/workflows/ci.yml" 24 | - ".github/workflows/prune.yml" 25 | - ".github/workflows/sync.yml" 26 | tags: 27 | - "v*.*.*" 28 | 29 | jobs: 30 | build: 31 | timeout-minutes: 15 32 | runs-on: ubuntu-22.04 33 | steps: 34 | - name: Checkout 35 | uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 1 38 | persist-credentials: false 39 | - name: Setup Go 40 | timeout-minutes: 15 41 | uses: actions/setup-go@v5 42 | with: 43 | go-version: "1.22.9" 44 | cache-dependency-path: | 45 | cmd/**/go.sum 46 | - name: Make 47 | run: make build 48 | env: 49 | VERSION: "${{ github.ref_name }}" 50 | - name: Upload Artifact 51 | uses: actions/upload-artifact@v4 52 | with: 53 | include-hidden-files: true 54 | path: ${{ github.workspace }}/.dist/* 55 | - name: Release 56 | if: ${{ startsWith(github.ref, 'refs/tags/') }} 57 | uses: softprops/action-gh-release@v2 58 | with: 59 | fail_on_unmatched_files: true 60 | tag_name: "${{ github.ref_name }}" 61 | prerelease: ${{ contains(github.ref, 'rc') }} 62 | files: ${{ github.workspace }}/.dist/* 63 | 64 | publish: 65 | needs: 66 | - build 67 | permissions: 68 | contents: write 69 | actions: read 70 | id-token: write 71 | timeout-minutes: 15 72 | runs-on: ubuntu-22.04 73 | env: 74 | PACKAGE_REGISTRY: "gpustack" 75 | PACKAGE_IMAGE: "gguf-parser" 76 | steps: 77 | - name: Checkout 78 | uses: actions/checkout@v4 79 | with: 80 | fetch-depth: 1 81 | persist-credentials: false 82 | - name: Setup QEMU 83 | uses: docker/setup-qemu-action@v3 84 | with: 85 | image: tonistiigi/binfmt:qemu-v9.2.2 86 | platforms: "arm64" 87 | - name: Setup Buildx 88 | uses: docker/setup-buildx-action@v3 89 | - name: Login DockerHub 90 | uses: docker/login-action@v3 91 | with: 92 | username: ${{ secrets.CI_DOCKERHUB_USERNAME }} 93 | password: ${{ secrets.CI_DOCKERHUB_PASSWORD }} 94 | - name: Download Artifact 95 | uses: actions/download-artifact@v4 96 | with: 97 | path: ${{ github.workspace }}/.dist 98 | merge-multiple: true 99 | - name: Get Metadata 100 | id: metadata 101 | uses: docker/metadata-action@v5 102 | with: 103 | images: "${{ env.PACKAGE_REGISTRY }}/${{ env.PACKAGE_IMAGE }}" 104 | - name: Package 105 | uses: docker/build-push-action@v6 106 | with: 107 | push: true 108 | file: ${{ github.workspace }}/Dockerfile 109 | context: ${{ github.workspace }} 110 | platforms: "linux/amd64,linux/arm64" 111 | tags: ${{ steps.metadata.outputs.tags }} 112 | labels: ${{ steps.metadata.outputs.labels }} 113 | cache-from: | 114 | type=registry,ref=${{ env.PACKAGE_REGISTRY }}/${{ env.PACKAGE_IMAGE }}:build-cache 115 | cache-to: | 116 | type=registry,mode=max,compression=gzip,ref=${{ env.PACKAGE_REGISTRY }}/${{ env.PACKAGE_IMAGE }}:build-cache,ignore-error=true 117 | provenance: true 118 | sbom: true 119 | -------------------------------------------------------------------------------- /.github/workflows/prune.yml: -------------------------------------------------------------------------------- 1 | name: prune 2 | 3 | permissions: 4 | contents: write 5 | pull-requests: write 6 | actions: write 7 | issues: write 8 | 9 | defaults: 10 | run: 11 | shell: bash 12 | 13 | on: 14 | workflow_dispatch: 15 | inputs: 16 | prune: 17 | description: 'Prune all caches' 18 | required: false 19 | type: boolean 20 | default: false 21 | schedule: 22 | - cron: "0 0 * * *" # every day at 00:00 UTC 23 | 24 | jobs: 25 | close-stale-issues-and-prs: 26 | uses: gpustack/.github/.github/workflows/close-stale-issues-and-prs.yml@main 27 | 28 | clean-stale-caches: 29 | uses: gpustack/.github/.github/workflows/clean-stale-caches.yml@main 30 | with: 31 | # allow to prune all caches on demand 32 | prune: ${{ github.event_name != 'schedule' && inputs.prune || false }} 33 | -------------------------------------------------------------------------------- /.github/workflows/sync.yml: -------------------------------------------------------------------------------- 1 | name: sync 2 | 3 | permissions: 4 | contents: read 5 | pull-requests: read 6 | actions: read 7 | 8 | defaults: 9 | run: 10 | shell: bash 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | max_releases: 16 | description: "Maximum number of latest releases to sync" 17 | required: false 18 | default: 1 19 | type: number 20 | specific_release_tag: 21 | description: "Specific release tag to sync" 22 | required: false 23 | default: "" 24 | type: string 25 | dry_run: 26 | description: "Skip the actual sync" 27 | required: false 28 | default: false 29 | type: boolean 30 | schedule: 31 | - cron: "0 */12 * * *" # every 12 hours 32 | 33 | jobs: 34 | gitcode: 35 | runs-on: ubuntu-22.04 36 | timeout-minutes: 240 37 | steps: 38 | - name: Checkout 39 | uses: actions/checkout@v4 40 | with: 41 | fetch-depth: 0 42 | persist-credentials: false 43 | - name: Sync 44 | uses: gpustack/.github/.github/actions/mirror-release-gitcode@main 45 | with: 46 | gitcode-username: "${{ secrets.CI_GITCODE_USERNAME }}" 47 | gitcode-password: "${{ secrets.CI_GITCODE_PASSWORD }}" 48 | gitcode-token: "${{ secrets.CI_GITCODE_TOKEN }}" 49 | max-releases: "${{ inputs.max_releases && inputs.max_releases || '1' }}" 50 | specific-release-tag: "${{ inputs.specific_release_tag && inputs.specific_release_tag || '' }}" 51 | code-only: true 52 | dry-run: "${{ inputs.dry_run && inputs.dry_run || 'false' }}" 53 | 54 | gitee: 55 | runs-on: ubuntu-22.04 56 | timeout-minutes: 120 57 | steps: 58 | - name: Checkout 59 | uses: actions/checkout@v4 60 | with: 61 | fetch-depth: 0 62 | persist-credentials: false 63 | - name: Sync 64 | uses: gpustack/.github/.github/actions/mirror-release-gitee@main 65 | with: 66 | gitee-username: "${{ secrets.CI_GITEE_USERNAME }}" 67 | gitee-token: "${{ secrets.CI_GITEE_TOKEN }}" 68 | max-releases: "${{ inputs.max_releases && inputs.max_releases || '1' }}" 69 | specific-release-tag: "${{ inputs.specific_release_tag && inputs.specific_release_tag || '' }}" 70 | code-only: true 71 | dry-run: "${{ inputs.dry_run && inputs.dry_run || 'false' }}" 72 | 73 | tencent-cos: 74 | runs-on: ubuntu-22.04 75 | timeout-minutes: 120 76 | steps: 77 | - name: Sync 78 | uses: gpustack/.github/.github/actions/mirror-release-tencent-cos@main 79 | with: 80 | tencent-secret-id: "${{ secrets.CI_TECENTCOS_SECRET_ID }}" 81 | tencent-secret-key: "${{ secrets.CI_TECENTCOS_SECRET_KEY }}" 82 | tencent-cos-region: "ap-guangzhou" 83 | tencent-cos-bucket: "gpustack-1303613262" 84 | max-releases: "${{ inputs.max_releases && inputs.max_releases || '1' }}" 85 | specific-release-tag: "${{ inputs.specific_release_tag && inputs.specific_release_tag || '' }}" 86 | dry-run: "${{ inputs.dry_run && inputs.dry_run || 'false' }}" 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Files 2 | .DS_Store 3 | *.lock 4 | *.test 5 | *.out 6 | *.swp 7 | *.swo 8 | *.db 9 | *.exe 10 | *.exe~ 11 | *.dll 12 | *.so 13 | *.dylib 14 | *.log 15 | go.work 16 | go.work.* 17 | 18 | # Dirs 19 | /.idea 20 | /.vscode 21 | /.kube 22 | /.terraform 23 | /.vagrant 24 | /.bundle 25 | /.cache 26 | /.docker 27 | /.entc 28 | /.sbin 29 | /.dist 30 | /log 31 | /certs 32 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | version: "1" 2 | 3 | run: 4 | timeout: 10m 5 | tests: true 6 | modules-download-mode: readonly 7 | go: "1.22" 8 | 9 | # output configuration options 10 | output: 11 | print-issued-lines: true 12 | print-linter-name: true 13 | path-prefix: "" 14 | sort-results: true 15 | 16 | linters: 17 | disable-all: true 18 | enable: 19 | - asciicheck 20 | - bidichk 21 | - decorder 22 | - durationcheck 23 | - errcheck 24 | - errname 25 | - errorlint 26 | - copyloopvar 27 | - godot 28 | - goconst 29 | - gocritic 30 | - gosimple 31 | - gosec 32 | - govet 33 | - gofumpt 34 | - gofmt 35 | - ineffassign 36 | - importas 37 | - lll 38 | - makezero 39 | - misspell 40 | - nakedret 41 | - nilerr 42 | - prealloc 43 | - predeclared 44 | - revive 45 | - staticcheck 46 | - stylecheck 47 | - typecheck 48 | - unconvert 49 | - unparam 50 | - unused 51 | - usestdlibvars 52 | - whitespace 53 | 54 | linters-settings: 55 | decorder: 56 | dec-order: 57 | - const 58 | - var 59 | - func 60 | disable-init-func-first-check: false 61 | disable-dec-order-check: true 62 | errorlint: 63 | errorf: true 64 | asserts: true 65 | comparison: true 66 | godot: 67 | scope: all 68 | exclude: 69 | - "(?i)^ FIXME:" 70 | - "(?i)^ TODO:" 71 | - "(?i)^ SPDX\\-License\\-Identifier:" 72 | - "(?i)^ +" 73 | period: true 74 | capital: false 75 | goconst: 76 | min-len: 3 77 | min-occurrences: 10 78 | gosimple: 79 | checks: [ "all" ] 80 | gosec: 81 | severity: "low" 82 | confidence: "low" 83 | excludes: 84 | - G101 85 | - G107 86 | - G112 87 | - G115 88 | - G404 89 | gofumpt: 90 | extra-rules: true 91 | gofmt: 92 | simplify: true 93 | rewrite-rules: 94 | - pattern: 'interface{}' 95 | replacement: 'any' 96 | - pattern: 'a[b:len(a)]' 97 | replacement: 'a[b:]' 98 | importas: 99 | no-unaliased: true 100 | lll: 101 | line-length: 150 102 | tab-width: 1 103 | makezero: 104 | always: false 105 | misspell: 106 | locale: US 107 | nakedret: 108 | max-func-lines: 60 109 | revive: 110 | rules: 111 | - name: var-naming 112 | disabled: true 113 | arguments: 114 | - [ "HTTP", "ID", "TLS", "TCP", "UDP", "API", "CA", "URL", "DNS" ] 115 | staticcheck: 116 | checks: [ "all", "-SA1019", "-SA2002", "-SA5008" ] 117 | stylecheck: 118 | checks: [ "all", "-ST1003" ] 119 | unparam: 120 | check-exported: false 121 | unused: 122 | field-writes-are-uses: true 123 | post-statements-are-reads: true 124 | exported-fields-are-used: true 125 | parameters-are-used: true 126 | local-variables-are-used: true 127 | generated-is-used: true 128 | usestdlibvars: 129 | http-method: true 130 | http-status-code: true 131 | time-weekday: true 132 | time-month: true 133 | time-layout: true 134 | crypto-hash: true 135 | 136 | issues: 137 | uniq-by-line: true 138 | exclude-files: 139 | - "doc.go" 140 | - "zz_generated.*.go" 141 | - "gen.*.go" 142 | exclude-rules: 143 | - path: _test\.go 144 | linters: 145 | - errcheck 146 | - gosec 147 | - makezero 148 | - lll 149 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | ARG TARGETOS 3 | ARG TARGETARCH 4 | COPY --chmod=755 .dist/gguf-parser-${TARGETOS}-${TARGETARCH} /bin/gguf-parser 5 | ENTRYPOINT ["/bin/gguf-parser"] 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 gguf-parser-go authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .SILENT: 2 | .DEFAULT_GOAL := ci 3 | 4 | SHELL := /bin/bash 5 | 6 | SRCDIR := $(patsubst %/,%,$(dir $(abspath $(lastword $(MAKEFILE_LIST))))) 7 | GOOS := $(shell go env GOOS) 8 | GOARCH := $(shell go env GOARCH) 9 | LINT_DIRTY ?= false 10 | VERSION ?= $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null | tr '[:upper:]' '[:lower:]' || echo "unknown") 11 | 12 | DEPS_UPDATE ?= false 13 | deps: 14 | @echo "+++ $@ +++" 15 | 16 | cd $(SRCDIR) && go mod tidy && go mod download 17 | cd $(SRCDIR)/cmd/gguf-parser && go mod tidy && go mod download 18 | 19 | if [[ "$(DEPS_UPDATE)" == "true" ]]; then \ 20 | cd $(SRCDIR) && go get -u -v ./...; \ 21 | cd $(SRCDIR)/cmd/gguf-parser && go get -u -v ./...; \ 22 | fi 23 | 24 | @echo "--- $@ ---" 25 | 26 | generate: 27 | @echo "+++ $@ +++" 28 | 29 | cd $(SRCDIR) && go generate ./... 30 | cd $(SRCDIR)/cmd/gguf-parser && go generate ./... 31 | 32 | @echo "--- $@ ---" 33 | 34 | lint: 35 | @echo "+++ $@ +++" 36 | 37 | [[ -d "$(SRCDIR)/.sbin" ]] || mkdir -p "$(SRCDIR)/.sbin" 38 | 39 | [[ -f "$(SRCDIR)/.sbin/goimports-reviser" ]] || \ 40 | curl --retry 3 --retry-all-errors --retry-delay 3 -sSfL "https://github.com/incu6us/goimports-reviser/releases/download/v3.8.2/goimports-reviser_3.8.2_$(GOOS)_$(GOARCH).tar.gz" \ 41 | | tar -zxvf - --directory "$(SRCDIR)/.sbin" --no-same-owner --exclude ./LICENSE --exclude ./README.md && chmod +x "$(SRCDIR)/.sbin/goimports-reviser" 42 | cd $(SRCDIR) && \ 43 | go list -f "{{.Dir}}" ./... | xargs -I {} find {} -maxdepth 1 -type f -name '*.go' ! -name 'gen.*' ! -name 'zz_generated.*' \ 44 | | xargs -I {} "$(SRCDIR)/.sbin/goimports-reviser" -use-cache -imports-order=std,general,company,project,blanked,dotted -output=file {} 1>/dev/null 2>&1 45 | cd $(SRCDIR)/cmd/gguf-parser && \ 46 | go list -f "{{.Dir}}" ./... | xargs -I {} find {} -maxdepth 1 -type f -name '*.go' ! -name 'gen.*' ! -name 'zz_generated.*' \ 47 | | xargs -I {} "$(SRCDIR)/.sbin/goimports-reviser" -use-cache -imports-order=std,general,company,project,blanked,dotted -output=file {} 1>/dev/null 2>&1 48 | 49 | [[ -f "$(SRCDIR)/.sbin/golangci-lint" ]] || \ 50 | curl --retry 3 --retry-all-errors --retry-delay 3 -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh \ 51 | | sh -s -- -b "$(SRCDIR)/.sbin" "v1.63.4" 52 | cd $(SRCDIR) && \ 53 | "$(SRCDIR)/.sbin/golangci-lint" run --fix ./... 54 | cd $(SRCDIR)/cmd/gguf-parser && \ 55 | "$(SRCDIR)/.sbin/golangci-lint" run --fix ./... 56 | 57 | if [[ "$(LINT_DIRTY)" == "true" ]]; then \ 58 | if [[ -n $$(git status --porcelain) ]]; then \ 59 | echo "Code tree is dirty."; \ 60 | git diff --exit-code; \ 61 | fi; \ 62 | fi 63 | 64 | @echo "--- $@ ---" 65 | 66 | test: 67 | @echo "+++ $@ +++" 68 | 69 | go test -v -failfast -race -cover -timeout=30m $(SRCDIR)/... 70 | 71 | @echo "--- $@ ---" 72 | 73 | benchmark: 74 | @echo "+++ $@ +++" 75 | 76 | go test -v -failfast -run="^Benchmark[A-Z]+" -bench=. -benchmem -timeout=30m $(SRCDIR)/... 77 | 78 | @echo "--- $@ ---" 79 | 80 | gguf-parser: 81 | [[ -d "$(SRCDIR)/.dist" ]] || mkdir -p "$(SRCDIR)/.dist" 82 | 83 | cd "$(SRCDIR)/cmd/gguf-parser" && for os in darwin linux windows; do \ 84 | tags="netgo"; \ 85 | if [[ $$os == "windows" ]]; then \ 86 | suffix=".exe"; \ 87 | tags="netcgo"; \ 88 | else \ 89 | suffix=""; \ 90 | fi; \ 91 | for arch in amd64 arm64; do \ 92 | echo "Building gguf-parser for $$os-$$arch $(VERSION)"; \ 93 | GOOS="$$os" GOARCH="$$arch" CGO_ENABLED=1 go build \ 94 | -trimpath \ 95 | -ldflags="-w -s -X main.Version=$(VERSION)" \ 96 | -tags="urfave_cli_no_docs $$tags" \ 97 | -o $(SRCDIR)/.dist/gguf-parser-$$os-$$arch$$suffix; \ 98 | done; \ 99 | if [[ $$os == "darwin" ]]; then \ 100 | [[ -d "$(SRCDIR)/.sbin" ]] || mkdir -p "$(SRCDIR)/.sbin"; \ 101 | [[ -f "$(SRCDIR)/.sbin/lipo" ]] || \ 102 | GOBIN="$(SRCDIR)/.sbin" go install github.com/konoui/lipo@v0.10.0; \ 103 | "$(SRCDIR)/.sbin/lipo" -create -output $(SRCDIR)/.dist/gguf-parser-darwin-universal $(SRCDIR)/.dist/gguf-parser-darwin-amd64 $(SRCDIR)/.dist/gguf-parser-darwin-arm64; \ 104 | fi;\ 105 | if [[ $$os == "$(GOOS)" ]] && [[ $$arch == "$(GOARCH)" ]]; then \ 106 | cp -rf $(SRCDIR)/.dist/gguf-parser-$$os-$$arch$$suffix $(SRCDIR)/.dist/gguf-parser$$suffix; \ 107 | fi; \ 108 | done 109 | 110 | build: gguf-parser 111 | 112 | PACKAGE_PUBLISH ?= false 113 | PACKAGE_REGISTRY ?= "gpustack" 114 | PACKAGE_IMAGE ?= "gguf-parser" 115 | package: build 116 | @echo "+++ $@ +++" 117 | 118 | if [[ -z $$(command -v docker) ]]; then \ 119 | echo "Docker is not installed."; \ 120 | exit 1; \ 121 | fi; \ 122 | platform="linux/amd64,linux/arm64"; \ 123 | image="$(PACKAGE_IMAGE):$(VERSION)"; \ 124 | if [[ -n "$(PACKAGE_REGISTRY)" ]]; then \ 125 | image="$(PACKAGE_REGISTRY)/$$image"; \ 126 | fi; \ 127 | if [[ "$(PACKAGE_PUBLISH)" == "true" ]]; then \ 128 | if [[ -z $$(docker buildx inspect --builder "gguf-parser") ]]; then \ 129 | docker run --rm --privileged tonistiigi/binfmt:qemu-v9.2.2 --install $$platform; \ 130 | docker buildx create --name "gguf-parser" --driver "docker-container" --buildkitd-flags "--allow-insecure-entitlement security.insecure --allow-insecure-entitlement network.host" --bootstrap; \ 131 | fi; \ 132 | docker buildx build --progress=plain --platform=$$platform --builder="gguf-parser" --output="type=image,name=$$image,push=true" "$(SRCDIR)"; \ 133 | else \ 134 | platform="linux/$(GOARCH)"; \ 135 | docker buildx build --progress=plain --platform=$$platform --output="type=docker,name=$$image" "$(SRCDIR)"; \ 136 | fi 137 | 138 | @echo "--- $@ ---" 139 | 140 | ci: deps generate lint test build 141 | -------------------------------------------------------------------------------- /cache.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "time" 9 | 10 | "github.com/gpustack/gguf-parser-go/util/json" 11 | "github.com/gpustack/gguf-parser-go/util/osx" 12 | "github.com/gpustack/gguf-parser-go/util/stringx" 13 | ) 14 | 15 | var ( 16 | ErrGGUFFileCacheDisabled = errors.New("GGUF file cache disabled") 17 | ErrGGUFFileCacheMissed = errors.New("GGUF file cache missed") 18 | ErrGGUFFileCacheCorrupted = errors.New("GGUF file cache corrupted") 19 | ) 20 | 21 | type GGUFFileCache string 22 | 23 | func (c GGUFFileCache) getKeyPath(key string) string { 24 | k := stringx.SumByFNV64a(key) 25 | p := filepath.Join(string(c), k[:1], k) 26 | return p 27 | } 28 | 29 | func (c GGUFFileCache) Get(key string, exp time.Duration) (*GGUFFile, error) { 30 | if c == "" { 31 | return nil, ErrGGUFFileCacheDisabled 32 | } 33 | 34 | if key == "" { 35 | return nil, ErrGGUFFileCacheMissed 36 | } 37 | 38 | p := c.getKeyPath(key) 39 | if !osx.Exists(p, func(stat os.FileInfo) bool { 40 | if !stat.Mode().IsRegular() { 41 | return false 42 | } 43 | return exp == 0 || time.Since(stat.ModTime()) < exp 44 | }) { 45 | return nil, ErrGGUFFileCacheMissed 46 | } 47 | 48 | var gf GGUFFile 49 | { 50 | bs, err := os.ReadFile(p) 51 | if err != nil { 52 | return nil, fmt.Errorf("GGUF file cache get: %w", err) 53 | } 54 | if err = json.Unmarshal(bs, &gf); err != nil { 55 | return nil, fmt.Errorf("GGUF file cache get: %w", err) 56 | } 57 | } 58 | if len(gf.Header.MetadataKV) == 0 || len(gf.TensorInfos) == 0 { 59 | _ = os.Remove(p) 60 | return nil, ErrGGUFFileCacheCorrupted 61 | } 62 | 63 | return &gf, nil 64 | } 65 | 66 | func (c GGUFFileCache) Put(key string, gf *GGUFFile) error { 67 | if c == "" { 68 | return ErrGGUFFileCacheDisabled 69 | } 70 | 71 | if key == "" || gf == nil { 72 | return nil 73 | } 74 | 75 | bs, err := json.Marshal(gf) 76 | if err != nil { 77 | return fmt.Errorf("GGUF file cache put: %w", err) 78 | } 79 | 80 | p := c.getKeyPath(key) 81 | if err = osx.WriteFile(p, bs, 0o600); err != nil { 82 | return fmt.Errorf("GGUF file cache put: %w", err) 83 | } 84 | return nil 85 | } 86 | 87 | func (c GGUFFileCache) Delete(key string) error { 88 | if c == "" { 89 | return ErrGGUFFileCacheDisabled 90 | } 91 | 92 | if key == "" { 93 | return ErrGGUFFileCacheMissed 94 | } 95 | 96 | p := c.getKeyPath(key) 97 | if !osx.ExistsFile(p) { 98 | return ErrGGUFFileCacheMissed 99 | } 100 | 101 | if err := os.Remove(p); err != nil { 102 | return fmt.Errorf("GGUF file cache delete: %w", err) 103 | } 104 | return nil 105 | } 106 | -------------------------------------------------------------------------------- /cmd/gguf-parser/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/gpustack/gguf-parser-go/cmd/gguf-parser 2 | 3 | go 1.22.0 4 | 5 | toolchain go1.22.9 6 | 7 | replace github.com/gpustack/gguf-parser-go => ../../ 8 | 9 | require ( 10 | github.com/gpustack/gguf-parser-go v0.6.0 11 | github.com/jedib0t/go-pretty/v6 v6.6.1 12 | github.com/urfave/cli/v2 v2.27.5 13 | ) 14 | 15 | require ( 16 | github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect 17 | github.com/henvic/httpretty v0.1.4 // indirect 18 | github.com/json-iterator/go v1.1.12 // indirect 19 | github.com/mattn/go-runewidth v0.0.16 // indirect 20 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 21 | github.com/modern-go/reflect2 v1.0.2 // indirect 22 | github.com/rivo/uniseg v0.4.7 // indirect 23 | github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect 24 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 25 | github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect 26 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect 27 | golang.org/x/crypto v0.29.0 // indirect 28 | golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect 29 | golang.org/x/mod v0.22.0 // indirect 30 | golang.org/x/sync v0.9.0 // indirect 31 | golang.org/x/sys v0.27.0 // indirect 32 | golang.org/x/tools v0.27.0 // indirect 33 | gonum.org/v1/gonum v0.15.1 // indirect 34 | ) 35 | -------------------------------------------------------------------------------- /cmd/gguf-parser/go.sum: -------------------------------------------------------------------------------- 1 | github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc= 2 | github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 7 | github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU= 8 | github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM= 9 | github.com/jedib0t/go-pretty/v6 v6.6.1 h1:iJ65Xjb680rHcikRj6DSIbzCex2huitmc7bDtxYVWyc= 10 | github.com/jedib0t/go-pretty/v6 v6.6.1/go.mod h1:zbn98qrYlh95FIhwwsbIip0LYpwSG8SUOScs+v9/t0E= 11 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 12 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 13 | github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= 14 | github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 15 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 16 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 17 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 18 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 19 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 20 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 21 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 22 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 23 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 24 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 25 | github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8= 26 | github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA= 27 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 28 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 29 | github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY= 30 | github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0= 31 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 32 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 33 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 34 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 35 | github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= 36 | github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= 37 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= 38 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= 39 | golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= 40 | golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= 41 | golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= 42 | golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= 43 | golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= 44 | golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= 45 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 46 | golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= 47 | golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 48 | golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= 49 | golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 50 | golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= 51 | golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= 52 | golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= 53 | golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= 54 | gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= 55 | gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= 56 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 57 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 58 | -------------------------------------------------------------------------------- /file_architecture_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "testing" 7 | 8 | "github.com/davecgh/go-spew/spew" 9 | ) 10 | 11 | func TestGGUFFile_Architecture(t *testing.T) { 12 | ctx := context.Background() 13 | 14 | f, err := ParseGGUFFileFromHuggingFace( 15 | ctx, 16 | "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF", 17 | "Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf", 18 | SkipLargeMetadata()) 19 | if err != nil { 20 | t.Fatal(err) 21 | return 22 | } 23 | 24 | t.Log("\n", spew.Sdump(f.Architecture()), "\n") 25 | } 26 | 27 | func BenchmarkGGUFFile_Architecture(b *testing.B) { 28 | mp, ok := os.LookupEnv("TEST_MODEL_PATH") 29 | if !ok { 30 | b.Skip("TEST_MODEL_PATH is not set") 31 | return 32 | } 33 | 34 | f, err := ParseGGUFFile(mp, SkipLargeMetadata(), UseMMap()) 35 | if err != nil { 36 | b.Fatal(err) 37 | return 38 | } 39 | 40 | b.ReportAllocs() 41 | 42 | b.ResetTimer() 43 | for i := 0; i < b.N; i++ { 44 | _ = f.Architecture() 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /file_estimate__llamacpp_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/davecgh/go-spew/spew" 8 | ) 9 | 10 | func TestGGUFFile_EstimateLLaMACppRun(t *testing.T) { 11 | ctx := context.Background() 12 | 13 | cases := []struct { 14 | name string 15 | given *GGUFFile 16 | }{ 17 | { 18 | name: "mixtral 7B", 19 | given: func() *GGUFFile { 20 | f, err := ParseGGUFFileFromHuggingFace( 21 | ctx, 22 | "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF", 23 | "Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf", 24 | SkipLargeMetadata()) 25 | if err != nil { 26 | t.Fatal(err) 27 | } 28 | return f 29 | }(), 30 | }, 31 | { 32 | name: "mixtral 8x7B", 33 | given: func() *GGUFFile { 34 | f, err := ParseGGUFFileFromHuggingFace( 35 | ctx, 36 | "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF", 37 | "Nous-Hermes-2-Mixtral-8x7B-DPO.Q5_K_M.gguf", 38 | SkipLargeMetadata()) 39 | if err != nil { 40 | t.Fatal(err) 41 | } 42 | return f 43 | }(), 44 | }, 45 | { 46 | name: "wizardlm 8x22B", 47 | given: func() *GGUFFile { 48 | f, err := ParseGGUFFileFromHuggingFace( 49 | ctx, 50 | "MaziyarPanahi/WizardLM-2-8x22B-GGUF", 51 | "WizardLM-2-8x22B.IQ1_M.gguf", 52 | SkipLargeMetadata()) 53 | if err != nil { 54 | t.Fatal(err) 55 | } 56 | return f 57 | }(), 58 | }, 59 | } 60 | for _, tc := range cases { 61 | t.Run(tc.name, func(t *testing.T) { 62 | f := tc.given 63 | t.Log("\n", spew.Sdump(f.EstimateLLaMACppRun()), "\n") 64 | }) 65 | } 66 | } 67 | 68 | func TestGGUFFile_EstimateLLaMACppRun_ContextSize(t *testing.T) { 69 | ctx := context.Background() 70 | 71 | f, err := ParseGGUFFileFromHuggingFace( 72 | ctx, 73 | "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF", 74 | "Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf", 75 | SkipLargeMetadata()) 76 | if err != nil { 77 | t.Fatal(err) 78 | return 79 | } 80 | 81 | cases := []struct { 82 | name string 83 | opts []GGUFRunEstimateOption 84 | }{ 85 | {"1024(fp16)", []GGUFRunEstimateOption{WithLLaMACppContextSize(1024)}}, 86 | {"1024(fp32)", []GGUFRunEstimateOption{WithLLaMACppContextSize(1024), WithLLaMACppCacheKeyType(GGMLTypeF32), WithLLaMACppCacheValueType(GGMLTypeF32)}}, 87 | {"4096(fp16)", []GGUFRunEstimateOption{WithLLaMACppContextSize(4096)}}, 88 | {"4096(fp32)", []GGUFRunEstimateOption{WithLLaMACppContextSize(4096), WithLLaMACppCacheKeyType(GGMLTypeF32), WithLLaMACppCacheValueType(GGMLTypeF32)}}, 89 | } 90 | for _, tc := range cases { 91 | t.Run(tc.name, func(t *testing.T) { 92 | t.Log("\n", spew.Sdump(f.EstimateLLaMACppRun(tc.opts...)), "\n") 93 | }) 94 | } 95 | } 96 | 97 | func TestGGUFFile_EstimateLLaMACppRun_OffloadLayers(t *testing.T) { 98 | ctx := context.Background() 99 | 100 | f, err := ParseGGUFFileFromHuggingFace( 101 | ctx, 102 | "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF", 103 | "Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf", 104 | SkipLargeMetadata()) 105 | if err != nil { 106 | t.Fatal(err) 107 | return 108 | } 109 | 110 | cases := []struct { 111 | name string 112 | opts []GGUFRunEstimateOption 113 | }{ 114 | {"offload 0 layer", []GGUFRunEstimateOption{WithLLaMACppOffloadLayers(0)}}, 115 | {"offload 1 layer", []GGUFRunEstimateOption{WithLLaMACppOffloadLayers(1)}}, 116 | {"offload 10 layers", []GGUFRunEstimateOption{WithLLaMACppOffloadLayers(10)}}, 117 | {"offload all layers", []GGUFRunEstimateOption{}}, 118 | {"offload 33 layers", []GGUFRunEstimateOption{WithLLaMACppOffloadLayers(33)}}, // exceeds the number of layers 119 | } 120 | for _, tc := range cases { 121 | t.Run(tc.name, func(t *testing.T) { 122 | t.Log("\n", spew.Sdump(f.EstimateLLaMACppRun(tc.opts...)), "\n") 123 | }) 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /file_estimate__stablediffusioncpp_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/davecgh/go-spew/spew" 8 | ) 9 | 10 | func TestGGUFFile_EstimateStableDiffusionRun(t *testing.T) { 11 | ctx := context.Background() 12 | 13 | cases := []struct { 14 | name string 15 | given *GGUFFile 16 | }{ 17 | { 18 | name: "sd 1.5", 19 | given: func() *GGUFFile { 20 | f, err := ParseGGUFFileFromHuggingFace( 21 | ctx, 22 | "gpustack/stable-diffusion-v1-5-GGUF", 23 | "stable-diffusion-v1-5-FP16.gguf", 24 | SkipLargeMetadata()) 25 | if err != nil { 26 | t.Fatal(err) 27 | } 28 | return f 29 | }(), 30 | }, 31 | { 32 | name: "sd 2.1", 33 | given: func() *GGUFFile { 34 | f, err := ParseGGUFFileFromHuggingFace( 35 | ctx, 36 | "gpustack/stable-diffusion-v2-1-GGUF", 37 | "stable-diffusion-v2-1-Q8_0.gguf", 38 | SkipLargeMetadata()) 39 | if err != nil { 40 | t.Fatal(err) 41 | } 42 | return f 43 | }(), 44 | }, 45 | { 46 | name: "sd xl", 47 | given: func() *GGUFFile { 48 | f, err := ParseGGUFFileFromHuggingFace( 49 | ctx, 50 | "gpustack/stable-diffusion-xl-base-1.0-GGUF", 51 | "stable-diffusion-xl-base-1.0-FP16.gguf", 52 | SkipLargeMetadata()) 53 | if err != nil { 54 | t.Fatal(err) 55 | } 56 | return f 57 | }(), 58 | }, 59 | { 60 | name: "sd 3.5 large", 61 | given: func() *GGUFFile { 62 | f, err := ParseGGUFFileFromHuggingFace( 63 | ctx, 64 | "gpustack/stable-diffusion-v3-5-large-GGUF", 65 | "stable-diffusion-v3-5-large-Q4_0.gguf", 66 | SkipLargeMetadata()) 67 | if err != nil { 68 | t.Fatal(err) 69 | } 70 | return f 71 | }(), 72 | }, 73 | { 74 | name: "flux .1 dev", 75 | given: func() *GGUFFile { 76 | f, err := ParseGGUFFileFromHuggingFace( 77 | ctx, 78 | "gpustack/FLUX.1-dev-GGUF", 79 | "FLUX.1-dev-Q4_0.gguf", 80 | SkipLargeMetadata()) 81 | if err != nil { 82 | t.Fatal(err) 83 | } 84 | return f 85 | }(), 86 | }, 87 | } 88 | for _, tc := range cases { 89 | t.Run(tc.name, func(t *testing.T) { 90 | f := tc.given 91 | t.Log("\n", spew.Sdump(f.EstimateStableDiffusionCppRun()), "\n") 92 | }) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /file_estimate_option.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "slices" 5 | 6 | "github.com/gpustack/gguf-parser-go/util/ptr" 7 | ) 8 | 9 | type ( 10 | _GGUFRunEstimateOptions struct { 11 | // Common 12 | ParallelSize *int32 13 | FlashAttention bool 14 | MainGPUIndex int 15 | RPCServers []string 16 | TensorSplitFraction []float64 17 | DeviceMetrics []GGUFRunDeviceMetric 18 | 19 | // LLaMACpp (LMC) specific 20 | LMCContextSize *int32 21 | LMCInMaxContextSize bool 22 | LMCLogicalBatchSize *int32 23 | LMCPhysicalBatchSize *int32 24 | LMCVisualMaxImageSize *uint32 25 | LMCMaxProjectedCache *uint32 26 | LMCCacheKeyType *GGMLType 27 | LMCCacheValueType *GGMLType 28 | LMCOffloadKVCache *bool 29 | LMCOffloadLayers *uint64 30 | LMCSplitMode LLaMACppSplitMode 31 | LMCFullSizeSWACache bool 32 | LMCProjector *LLaMACppRunEstimate 33 | LMCDrafter *LLaMACppRunEstimate 34 | LMCAdapters []LLaMACppRunEstimate 35 | 36 | // StableDiffusionCpp (SDC) specific 37 | SDCOffloadLayers *uint64 38 | SDCBatchCount *int32 39 | SDCHeight *uint32 40 | SDCWidth *uint32 41 | SDCOffloadConditioner *bool 42 | SDCOffloadAutoencoder *bool 43 | SDCAutoencoderTiling *bool 44 | SDCFreeComputeMemoryImmediately *bool 45 | SDCUpscaler *StableDiffusionCppRunEstimate 46 | SDCControlNet *StableDiffusionCppRunEstimate 47 | } 48 | 49 | // GGUFRunDeviceMetric holds the device metric for the estimate. 50 | // 51 | // When the device represents a CPU, 52 | // FLOPS refers to the floating-point operations per second of that CPU, 53 | // while UpBandwidth indicates the bandwidth of the RAM (since SRAM is typically small and cannot hold all weights, 54 | // the RAM here refers to the bandwidth of DRAM, 55 | // unless the device's SRAM can accommodate the corresponding model weights). 56 | // 57 | // When the device represents a GPU, 58 | // FLOPS refers to the floating-point operations per second of that GPU, 59 | // while UpBandwidth indicates the bandwidth of the VRAM. 60 | // 61 | // When the device represents a specific node, 62 | // FLOPS depends on whether a CPU or GPU is being used, 63 | // while UpBandwidth refers to the network bandwidth between nodes. 64 | GGUFRunDeviceMetric struct { 65 | // FLOPS is the floating-point operations per second of the device. 66 | FLOPS FLOPSScalar 67 | // UpBandwidth is the bandwidth of the device to transmit data to calculate, 68 | // unit is Bps (bytes per second). 69 | UpBandwidth BytesPerSecondScalar 70 | // DownBandwidth is the bandwidth of the device to transmit calculated result to next layer, 71 | // unit is Bps (bytes per second). 72 | DownBandwidth BytesPerSecondScalar 73 | } 74 | 75 | // GGUFRunEstimateOption is the options for the estimate. 76 | GGUFRunEstimateOption func(*_GGUFRunEstimateOptions) 77 | ) 78 | 79 | // WithParallelSize sets the (decoding sequences) parallel size for the estimate. 80 | func WithParallelSize(size int32) GGUFRunEstimateOption { 81 | return func(o *_GGUFRunEstimateOptions) { 82 | if size <= 0 { 83 | return 84 | } 85 | o.ParallelSize = &size 86 | } 87 | } 88 | 89 | // WithFlashAttention sets the flash attention flag. 90 | func WithFlashAttention() GGUFRunEstimateOption { 91 | return func(o *_GGUFRunEstimateOptions) { 92 | o.FlashAttention = true 93 | } 94 | } 95 | 96 | // WithMainGPUIndex sets the main device for the estimate. 97 | // 98 | // When split mode is LLaMACppSplitModeNone, the main device is the only device. 99 | // When split mode is LLaMACppSplitModeRow, the main device handles the intermediate results and KV. 100 | // 101 | // WithMainGPUIndex needs to combine with WithTensorSplitFraction. 102 | func WithMainGPUIndex(di int) GGUFRunEstimateOption { 103 | return func(o *_GGUFRunEstimateOptions) { 104 | o.MainGPUIndex = di 105 | } 106 | } 107 | 108 | // WithRPCServers sets the RPC servers for the estimate. 109 | func WithRPCServers(srvs []string) GGUFRunEstimateOption { 110 | return func(o *_GGUFRunEstimateOptions) { 111 | if len(srvs) == 0 { 112 | return 113 | } 114 | o.RPCServers = srvs 115 | } 116 | } 117 | 118 | // WithTensorSplitFraction sets the tensor split cumulative fractions for the estimate. 119 | // 120 | // WithTensorSplitFraction accepts a variadic number of fractions, 121 | // all fraction values must be in the range of [0, 1], 122 | // and the last fraction must be 1. 123 | // 124 | // For example, WithTensorSplitFraction(0.2, 0.4, 0.6, 0.8, 1) will split the tensor into five parts with 20% each. 125 | func WithTensorSplitFraction(fractions []float64) GGUFRunEstimateOption { 126 | return func(o *_GGUFRunEstimateOptions) { 127 | if len(fractions) == 0 { 128 | return 129 | } 130 | for _, f := range fractions { 131 | if f < 0 || f > 1 { 132 | return 133 | } 134 | } 135 | if fractions[len(fractions)-1] != 1 { 136 | return 137 | } 138 | o.TensorSplitFraction = fractions 139 | } 140 | } 141 | 142 | // WithDeviceMetrics sets the device metrics for the estimate. 143 | func WithDeviceMetrics(metrics []GGUFRunDeviceMetric) GGUFRunEstimateOption { 144 | return func(o *_GGUFRunEstimateOptions) { 145 | if len(metrics) == 0 { 146 | return 147 | } 148 | o.DeviceMetrics = metrics 149 | } 150 | } 151 | 152 | // WithLLaMACppContextSize sets the context size for the estimate. 153 | func WithLLaMACppContextSize(size int32) GGUFRunEstimateOption { 154 | return func(o *_GGUFRunEstimateOptions) { 155 | if size <= 0 { 156 | return 157 | } 158 | o.LMCContextSize = &size 159 | } 160 | } 161 | 162 | // WithinLLaMACppMaxContextSize limits the context size to the maximum, 163 | // if the context size is over the maximum. 164 | func WithinLLaMACppMaxContextSize() GGUFRunEstimateOption { 165 | return func(o *_GGUFRunEstimateOptions) { 166 | o.LMCInMaxContextSize = true 167 | } 168 | } 169 | 170 | // WithLLaMACppLogicalBatchSize sets the logical batch size for the estimate. 171 | func WithLLaMACppLogicalBatchSize(size int32) GGUFRunEstimateOption { 172 | return func(o *_GGUFRunEstimateOptions) { 173 | if size <= 0 { 174 | return 175 | } 176 | o.LMCLogicalBatchSize = &size 177 | } 178 | } 179 | 180 | // WithLLaMACppPhysicalBatchSize sets the physical batch size for the estimate. 181 | func WithLLaMACppPhysicalBatchSize(size int32) GGUFRunEstimateOption { 182 | return func(o *_GGUFRunEstimateOptions) { 183 | if size <= 0 { 184 | return 185 | } 186 | o.LMCPhysicalBatchSize = &size 187 | } 188 | } 189 | 190 | // _GGUFEstimateCacheTypeAllowList is the allow list of cache key and value types. 191 | var _GGUFEstimateCacheTypeAllowList = []GGMLType{ 192 | GGMLTypeF32, 193 | GGMLTypeF16, 194 | GGMLTypeBF16, 195 | GGMLTypeQ8_0, 196 | GGMLTypeQ4_0, GGMLTypeQ4_1, 197 | GGMLTypeIQ4_NL, 198 | GGMLTypeQ5_0, GGMLTypeQ5_1, 199 | } 200 | 201 | // WithLLaMACppCacheKeyType sets the cache key type for the estimate. 202 | func WithLLaMACppCacheKeyType(t GGMLType) GGUFRunEstimateOption { 203 | return func(o *_GGUFRunEstimateOptions) { 204 | if slices.Contains(_GGUFEstimateCacheTypeAllowList, t) { 205 | o.LMCCacheKeyType = &t 206 | } 207 | } 208 | } 209 | 210 | // WithLLaMACppCacheValueType sets the cache value type for the estimate. 211 | func WithLLaMACppCacheValueType(t GGMLType) GGUFRunEstimateOption { 212 | return func(o *_GGUFRunEstimateOptions) { 213 | if slices.Contains(_GGUFEstimateCacheTypeAllowList, t) { 214 | o.LMCCacheValueType = &t 215 | } 216 | } 217 | } 218 | 219 | // WithoutLLaMACppOffloadKVCache disables offloading the KV cache. 220 | func WithoutLLaMACppOffloadKVCache() GGUFRunEstimateOption { 221 | return func(o *_GGUFRunEstimateOptions) { 222 | o.LMCOffloadKVCache = ptr.To(false) 223 | } 224 | } 225 | 226 | // WithLLaMACppOffloadLayers sets the number of layers to offload. 227 | func WithLLaMACppOffloadLayers(layers uint64) GGUFRunEstimateOption { 228 | return func(o *_GGUFRunEstimateOptions) { 229 | o.LMCOffloadLayers = &layers 230 | } 231 | } 232 | 233 | // LLaMACppSplitMode is the split mode for LLaMACpp. 234 | type LLaMACppSplitMode uint 235 | 236 | const ( 237 | LLaMACppSplitModeLayer LLaMACppSplitMode = iota 238 | LLaMACppSplitModeRow 239 | LLaMACppSplitModeNone 240 | _LLAMACppSplitModeMax 241 | ) 242 | 243 | // WithLLaMACppSplitMode sets the split mode for the estimate. 244 | func WithLLaMACppSplitMode(mode LLaMACppSplitMode) GGUFRunEstimateOption { 245 | return func(o *_GGUFRunEstimateOptions) { 246 | if mode < _LLAMACppSplitModeMax { 247 | o.LMCSplitMode = mode 248 | } 249 | } 250 | } 251 | 252 | // WithLLaMACppFullSizeSWACache enables full size sliding window attention cache. 253 | func WithLLaMACppFullSizeSWACache() GGUFRunEstimateOption { 254 | return func(o *_GGUFRunEstimateOptions) { 255 | o.LMCFullSizeSWACache = true 256 | } 257 | } 258 | 259 | // WithLLaMACppVisualMaxImageSize sets the visual maximum image size input for the estimate. 260 | func WithLLaMACppVisualMaxImageSize(size uint32) GGUFRunEstimateOption { 261 | return func(o *_GGUFRunEstimateOptions) { 262 | if size == 0 { 263 | return 264 | } 265 | o.LMCVisualMaxImageSize = &size 266 | } 267 | } 268 | 269 | // WithLLaMACppMaxProjectedCache sets the maximum projected embedding cache for the estimate. 270 | func WithLLaMACppMaxProjectedCache(cacheSize uint32) GGUFRunEstimateOption { 271 | return func(o *_GGUFRunEstimateOptions) { 272 | if cacheSize == 0 { 273 | return 274 | } 275 | o.LMCMaxProjectedCache = ptr.To(cacheSize) 276 | } 277 | } 278 | 279 | // WithLLaMACppDrafter sets the drafter estimate usage. 280 | func WithLLaMACppDrafter(dft *LLaMACppRunEstimate) GGUFRunEstimateOption { 281 | return func(o *_GGUFRunEstimateOptions) { 282 | o.LMCDrafter = dft 283 | } 284 | } 285 | 286 | // WithLLaMACppProjector sets the multimodal projector estimate usage. 287 | func WithLLaMACppProjector(prj *LLaMACppRunEstimate) GGUFRunEstimateOption { 288 | return func(o *_GGUFRunEstimateOptions) { 289 | o.LMCProjector = prj 290 | } 291 | } 292 | 293 | // WithLLaMACppAdapters sets the adapters estimate usage. 294 | func WithLLaMACppAdapters(adp []LLaMACppRunEstimate) GGUFRunEstimateOption { 295 | return func(o *_GGUFRunEstimateOptions) { 296 | if len(adp) == 0 { 297 | return 298 | } 299 | o.LMCAdapters = adp 300 | } 301 | } 302 | 303 | // WithStableDiffusionCppOffloadLayers sets the number of layers to offload. 304 | func WithStableDiffusionCppOffloadLayers(layers uint64) GGUFRunEstimateOption { 305 | return func(o *_GGUFRunEstimateOptions) { 306 | o.SDCOffloadLayers = &layers 307 | } 308 | } 309 | 310 | // WithStableDiffusionCppBatchCount sets the batch count for the estimate. 311 | func WithStableDiffusionCppBatchCount(count int32) GGUFRunEstimateOption { 312 | return func(o *_GGUFRunEstimateOptions) { 313 | if count == 0 { 314 | return 315 | } 316 | o.SDCBatchCount = ptr.To(count) 317 | } 318 | } 319 | 320 | // WithStableDiffusionCppHeight sets the image height for the estimate. 321 | func WithStableDiffusionCppHeight(height uint32) GGUFRunEstimateOption { 322 | return func(o *_GGUFRunEstimateOptions) { 323 | if height == 0 { 324 | return 325 | } 326 | o.SDCHeight = ptr.To(height) 327 | } 328 | } 329 | 330 | // WithStableDiffusionCppWidth sets the image width for the estimate. 331 | func WithStableDiffusionCppWidth(width uint32) GGUFRunEstimateOption { 332 | return func(o *_GGUFRunEstimateOptions) { 333 | if width == 0 { 334 | return 335 | } 336 | o.SDCWidth = ptr.To(width) 337 | } 338 | } 339 | 340 | // WithoutStableDiffusionCppOffloadConditioner disables offloading the conditioner(text encoder). 341 | func WithoutStableDiffusionCppOffloadConditioner() GGUFRunEstimateOption { 342 | return func(o *_GGUFRunEstimateOptions) { 343 | o.SDCOffloadConditioner = ptr.To(false) 344 | } 345 | } 346 | 347 | // WithoutStableDiffusionCppOffloadAutoencoder disables offloading the autoencoder. 348 | func WithoutStableDiffusionCppOffloadAutoencoder() GGUFRunEstimateOption { 349 | return func(o *_GGUFRunEstimateOptions) { 350 | o.SDCOffloadAutoencoder = ptr.To(false) 351 | } 352 | } 353 | 354 | // WithStableDiffusionCppAutoencoderTiling enables tiling for the autoencoder. 355 | func WithStableDiffusionCppAutoencoderTiling() GGUFRunEstimateOption { 356 | return func(o *_GGUFRunEstimateOptions) { 357 | o.SDCAutoencoderTiling = ptr.To(true) 358 | } 359 | } 360 | 361 | // WithStableDiffusionCppFreeComputeMemoryImmediately enables freeing compute memory immediately. 362 | func WithStableDiffusionCppFreeComputeMemoryImmediately() GGUFRunEstimateOption { 363 | return func(o *_GGUFRunEstimateOptions) { 364 | o.SDCFreeComputeMemoryImmediately = ptr.To(true) 365 | } 366 | } 367 | 368 | // WithStableDiffusionCppUpscaler sets the upscaler estimate usage. 369 | func WithStableDiffusionCppUpscaler(ups *StableDiffusionCppRunEstimate) GGUFRunEstimateOption { 370 | return func(o *_GGUFRunEstimateOptions) { 371 | o.SDCUpscaler = ups 372 | } 373 | } 374 | 375 | // WithStableDiffusionCppControlNet sets the control net estimate usage. 376 | func WithStableDiffusionCppControlNet(cn *StableDiffusionCppRunEstimate) GGUFRunEstimateOption { 377 | return func(o *_GGUFRunEstimateOptions) { 378 | o.SDCControlNet = cn 379 | } 380 | } 381 | -------------------------------------------------------------------------------- /file_from_distro.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | "path/filepath" 9 | "time" 10 | 11 | "github.com/gpustack/gguf-parser-go/util/httpx" 12 | ) 13 | 14 | var ( 15 | ErrOllamaInvalidModel = errors.New("ollama invalid model") 16 | ErrOllamaBaseLayerNotFound = errors.New("ollama base layer not found") 17 | ) 18 | 19 | // ParseGGUFFileFromOllama parses a GGUF file from Ollama model's base layer, 20 | // and returns a GGUFFile, or an error if any. 21 | func ParseGGUFFileFromOllama(ctx context.Context, model string, opts ...GGUFReadOption) (*GGUFFile, error) { 22 | return ParseGGUFFileFromOllamaModel(ctx, ParseOllamaModel(model), opts...) 23 | } 24 | 25 | // ParseGGUFFileFromOllamaModel is similar to ParseGGUFFileFromOllama, 26 | // but inputs an OllamaModel instead of a string. 27 | // 28 | // The given OllamaModel will be completed(fetching MediaType, Config and Layers) after calling this function. 29 | func ParseGGUFFileFromOllamaModel(ctx context.Context, model *OllamaModel, opts ...GGUFReadOption) (gf *GGUFFile, err error) { 30 | if model == nil { 31 | return nil, ErrOllamaInvalidModel 32 | } 33 | 34 | opts = append(opts[:len(opts):len(opts)], SkipRangeDownloadDetection()) 35 | 36 | var o _GGUFReadOptions 37 | for _, opt := range opts { 38 | opt(&o) 39 | } 40 | 41 | // Cache. 42 | { 43 | if o.CachePath != "" { 44 | o.CachePath = filepath.Join(o.CachePath, "distro", "ollama") 45 | } 46 | c := GGUFFileCache(o.CachePath) 47 | 48 | // Get from cache. 49 | if gf, err = c.Get(model.String(), o.CacheExpiration); err == nil { 50 | return gf, nil 51 | } 52 | 53 | // Put to cache. 54 | defer func() { 55 | if err == nil { 56 | _ = c.Put(model.String(), gf) 57 | } 58 | }() 59 | } 60 | 61 | var cli *http.Client 62 | cli = httpx.Client( 63 | httpx.ClientOptions(). 64 | WithUserAgent(OllamaUserAgent()). 65 | If(o.Debug, func(x *httpx.ClientOption) *httpx.ClientOption { 66 | return x.WithDebug() 67 | }). 68 | WithTimeout(0). 69 | WithRetryBackoff(1*time.Second, 5*time.Second, 10). 70 | WithRetryIf(func(resp *http.Response, err error) bool { 71 | return httpx.DefaultRetry(resp, err) || OllamaRegistryAuthorizeRetry(resp, cli) 72 | }). 73 | WithTransport( 74 | httpx.TransportOptions(). 75 | WithoutKeepalive(). 76 | TimeoutForDial(10*time.Second). 77 | TimeoutForTLSHandshake(5*time.Second). 78 | If(o.SkipProxy, func(x *httpx.TransportOption) *httpx.TransportOption { 79 | return x.WithoutProxy() 80 | }). 81 | If(o.ProxyURL != nil, func(x *httpx.TransportOption) *httpx.TransportOption { 82 | return x.WithProxy(http.ProxyURL(o.ProxyURL)) 83 | }). 84 | If(o.SkipTLSVerification, func(x *httpx.TransportOption) *httpx.TransportOption { 85 | return x.WithoutInsecureVerify() 86 | }). 87 | If(o.SkipDNSCache, func(x *httpx.TransportOption) *httpx.TransportOption { 88 | return x.WithoutDNSCache() 89 | }))) 90 | 91 | var ml OllamaModelLayer 92 | { 93 | err := model.Complete(ctx, cli) 94 | if err != nil { 95 | return nil, fmt.Errorf("complete ollama model: %w", err) 96 | } 97 | 98 | var ok bool 99 | ml, ok = model.GetLayer("application/vnd.ollama.image.model") 100 | if !ok { 101 | return nil, ErrOllamaBaseLayerNotFound 102 | } 103 | } 104 | 105 | return parseGGUFFileFromRemote(ctx, cli, ml.BlobURL().String(), o) 106 | } 107 | -------------------------------------------------------------------------------- /file_from_remote.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "path/filepath" 9 | "strings" 10 | "time" 11 | 12 | "github.com/gpustack/gguf-parser-go/util/httpx" 13 | "github.com/gpustack/gguf-parser-go/util/osx" 14 | ) 15 | 16 | // ParseGGUFFileFromHuggingFace parses a GGUF file from Hugging Face(https://huggingface.co/), 17 | // and returns a GGUFFile, or an error if any. 18 | func ParseGGUFFileFromHuggingFace(ctx context.Context, repo, file string, opts ...GGUFReadOption) (*GGUFFile, error) { 19 | ep := osx.Getenv("HF_ENDPOINT", "https://huggingface.co") 20 | return ParseGGUFFileRemote(ctx, fmt.Sprintf("%s/%s/resolve/main/%s", ep, repo, file), opts...) 21 | } 22 | 23 | // ParseGGUFFileFromModelScope parses a GGUF file from Model Scope(https://modelscope.cn/), 24 | // and returns a GGUFFile, or an error if any. 25 | func ParseGGUFFileFromModelScope(ctx context.Context, repo, file string, opts ...GGUFReadOption) (*GGUFFile, error) { 26 | ep := osx.Getenv("MS_ENDPOINT", "https://modelscope.cn") 27 | opts = append(opts[:len(opts):len(opts)], SkipRangeDownloadDetection()) 28 | return ParseGGUFFileRemote(ctx, fmt.Sprintf("%s/models/%s/resolve/master/%s", ep, repo, file), opts...) 29 | } 30 | 31 | // ParseGGUFFileRemote parses a GGUF file from a remote BlobURL, 32 | // and returns a GGUFFile, or an error if any. 33 | func ParseGGUFFileRemote(ctx context.Context, url string, opts ...GGUFReadOption) (gf *GGUFFile, err error) { 34 | var o _GGUFReadOptions 35 | for _, opt := range opts { 36 | opt(&o) 37 | } 38 | 39 | // Cache. 40 | { 41 | if o.CachePath != "" { 42 | o.CachePath = filepath.Join(o.CachePath, "remote") 43 | if o.SkipLargeMetadata { 44 | o.CachePath = filepath.Join(o.CachePath, "brief") 45 | } 46 | } 47 | c := GGUFFileCache(o.CachePath) 48 | 49 | // Get from cache. 50 | if gf, err = c.Get(url, o.CacheExpiration); err == nil { 51 | return gf, nil 52 | } 53 | 54 | // Put to cache. 55 | defer func() { 56 | if err == nil { 57 | _ = c.Put(url, gf) 58 | } 59 | }() 60 | } 61 | 62 | cli := httpx.Client( 63 | httpx.ClientOptions(). 64 | WithUserAgent("gguf-parser-go"). 65 | If(o.Debug, 66 | func(x *httpx.ClientOption) *httpx.ClientOption { 67 | return x.WithDebug() 68 | }, 69 | ). 70 | If(o.BearerAuthToken != "", 71 | func(x *httpx.ClientOption) *httpx.ClientOption { 72 | return x.WithBearerAuth(o.BearerAuthToken) 73 | }, 74 | ). 75 | WithTimeout(0). 76 | WithTransport( 77 | httpx.TransportOptions(). 78 | WithoutKeepalive(). 79 | TimeoutForDial(5*time.Second). 80 | TimeoutForTLSHandshake(5*time.Second). 81 | TimeoutForResponseHeader(5*time.Second). 82 | If(o.SkipProxy, 83 | func(x *httpx.TransportOption) *httpx.TransportOption { 84 | return x.WithoutProxy() 85 | }, 86 | ). 87 | If(o.ProxyURL != nil, 88 | func(x *httpx.TransportOption) *httpx.TransportOption { 89 | return x.WithProxy(http.ProxyURL(o.ProxyURL)) 90 | }, 91 | ). 92 | If(o.SkipTLSVerification || !strings.HasPrefix(url, "https://"), 93 | func(x *httpx.TransportOption) *httpx.TransportOption { 94 | return x.WithoutInsecureVerify() 95 | }, 96 | ). 97 | If(o.SkipDNSCache, 98 | func(x *httpx.TransportOption) *httpx.TransportOption { 99 | return x.WithoutDNSCache() 100 | }, 101 | ), 102 | ), 103 | ) 104 | 105 | return parseGGUFFileFromRemote(ctx, cli, url, o) 106 | } 107 | 108 | func parseGGUFFileFromRemote(ctx context.Context, cli *http.Client, url string, o _GGUFReadOptions) (*GGUFFile, error) { 109 | var urls []string 110 | { 111 | rs := CompleteShardGGUFFilename(url) 112 | if rs != nil { 113 | urls = rs 114 | } else { 115 | urls = []string{url} 116 | } 117 | } 118 | 119 | fs := make([]_GGUFFileReadSeeker, 0, len(urls)) 120 | defer func() { 121 | for i := range fs { 122 | osx.Close(fs[i]) 123 | } 124 | }() 125 | 126 | for i := range urls { 127 | req, err := httpx.NewGetRequestWithContext(ctx, urls[i]) 128 | if err != nil { 129 | return nil, fmt.Errorf("new request: %w", err) 130 | } 131 | 132 | sf, err := httpx.OpenSeekerFile(cli, req, 133 | httpx.SeekerFileOptions(). 134 | WithBufferSize(o.BufferSize). 135 | If(o.SkipRangeDownloadDetection, 136 | func(x *httpx.SeekerFileOption) *httpx.SeekerFileOption { 137 | return x.WithoutRangeDownloadDetect() 138 | }, 139 | ), 140 | ) 141 | if err != nil { 142 | return nil, fmt.Errorf("open http file: %w", err) 143 | } 144 | 145 | fs = append(fs, _GGUFFileReadSeeker{ 146 | Closer: sf, 147 | ReadSeeker: io.NewSectionReader(sf, 0, sf.Len()), 148 | Size: sf.Len(), 149 | }) 150 | } 151 | 152 | return parseGGUFFile(fs, o) 153 | } 154 | -------------------------------------------------------------------------------- /file_metadata_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/davecgh/go-spew/spew" 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func TestGGUFFile_Metadata(t *testing.T) { 15 | ctx := context.Background() 16 | 17 | f, err := ParseGGUFFileFromHuggingFace( 18 | ctx, 19 | "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF", 20 | "Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf", 21 | SkipLargeMetadata()) 22 | if err != nil { 23 | t.Fatal(err) 24 | return 25 | } 26 | 27 | t.Log("\n", spew.Sdump(f.Metadata()), "\n") 28 | } 29 | 30 | func BenchmarkGGUFFile_Metadata(b *testing.B) { 31 | mp, ok := os.LookupEnv("TEST_MODEL_PATH") 32 | if !ok { 33 | b.Skip("TEST_MODEL_PATH is not set") 34 | return 35 | } 36 | 37 | f, err := ParseGGUFFile(mp, UseMMap(), SkipLargeMetadata()) 38 | if err != nil { 39 | b.Fatal(err) 40 | return 41 | } 42 | 43 | b.ReportAllocs() 44 | 45 | b.ResetTimer() 46 | for i := 0; i < b.N; i++ { 47 | _ = f.Metadata() 48 | } 49 | } 50 | 51 | func TestGGUFFile_extractFileType(t *testing.T) { 52 | ctx := context.Background() 53 | 54 | repo := "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF" 55 | cases := []string{ 56 | "Q2_K", 57 | "Q3_K_L", 58 | "Q3_K_M", 59 | "Q3_K_S", 60 | "Q4_0", 61 | "Q4_K_M", 62 | "Q4_K_S", 63 | "Q5_0", 64 | "Q5_K_M", 65 | "Q5_K_S", 66 | "Q6_K", 67 | "Q8_0", 68 | } 69 | for _, tc := range cases { 70 | t.Run(repo+"/"+tc, func(t *testing.T) { 71 | gf, err := ParseGGUFFileFromHuggingFace( 72 | ctx, 73 | repo, 74 | fmt.Sprintf("Hermes-2-Pro-Mistral-7B.%s.gguf", tc)) 75 | if err != nil { 76 | t.Fatal(err) 77 | return 78 | } 79 | md := gf.Metadata() 80 | ft, ftd := gf.extractFileType(md.Architecture) 81 | assert.Equal(t, md.FileType.String(), ft.String(), tc+" file type should be equal") 82 | assert.Equal(t, tc, ftd, tc+" file type descriptor should be equal") 83 | }) 84 | } 85 | 86 | repo = "Mungert/Qwen2.5-VL-3B-Instruct-GGUF" 87 | cases = []string{ 88 | "IQ2_M", 89 | "IQ2_S", 90 | "IQ2_XS", 91 | "IQ2_XXS", 92 | "IQ3_M", 93 | "IQ3_S", 94 | "IQ3_XS", 95 | "IQ3_XXS", 96 | "IQ4_NL", 97 | "IQ4_XS", 98 | "Q2_K_L", 99 | "Q2_K_S", 100 | "Q3_K_L", 101 | "Q3_K_M", 102 | "Q3_K_S", 103 | "Q4_0", 104 | "Q4_0_L", 105 | "Q4_1", 106 | "Q4_1_L", 107 | "Q4_K_L", 108 | "Q4_K_M", 109 | "Q4_K_S", 110 | "Q5_0", 111 | "Q5_0_L", 112 | "Q5_K_L", 113 | "Q5_K_M", 114 | "Q5_K_S", 115 | "Q6_K_L", 116 | // "Q6_K_M", == "Q6_K" 117 | "Q8_0", 118 | } 119 | for _, tc := range cases { 120 | t.Run(repo+"/"+tc, func(t *testing.T) { 121 | gf, err := ParseGGUFFileFromHuggingFace( 122 | ctx, 123 | repo, 124 | fmt.Sprintf("Qwen2.5-VL-3B-Instruct-%s.gguf", strings.ToLower(tc))) 125 | if err != nil { 126 | t.Fatal(err) 127 | return 128 | } 129 | md := gf.Metadata() 130 | ft, ftd := gf.extractFileType(md.Architecture) 131 | assert.Equal(t, md.FileType.String(), ft.String(), tc+" file type should be equal") 132 | assert.Equal(t, tc, ftd, tc+" file type descriptor should be equal") 133 | }) 134 | } 135 | 136 | repo = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF" 137 | cases = []string{ 138 | "BF16", 139 | "Q2_K", 140 | "Q2_K_L", 141 | "Q3_K_M", 142 | "Q4_K_M", 143 | "Q5_K_M", 144 | "Q6_K", 145 | "Q8_0", 146 | } 147 | for _, tc := range cases { 148 | t.Run(repo+"/"+tc, func(t *testing.T) { 149 | gf, err := ParseGGUFFileFromHuggingFace( 150 | ctx, 151 | repo, 152 | fmt.Sprintf("DeepSeek-R1-Distill-Qwen-1.5B-%s.gguf", tc)) 153 | if err != nil { 154 | t.Fatal(err) 155 | return 156 | } 157 | md := gf.Metadata() 158 | ft, ftd := gf.extractFileType(md.Architecture) 159 | assert.Equal(t, md.FileType.String(), ft.String(), tc+" file type should be equal") 160 | assert.Equal(t, tc, ftd, tc+" file type descriptor should be equal") 161 | }) 162 | } 163 | 164 | repo = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF" 165 | cases = []string{ 166 | "IQ1_M", 167 | "IQ1_S", 168 | "IQ2_M", 169 | "IQ2_XXS", 170 | "IQ3_XXS", 171 | "IQ4_XS", 172 | // "Q2_K_XL" == "Q2_K_L" 173 | // "Q3_K_XL" == "Q3_K_M" 174 | // "Q4_K_XL" == "Q4_K_M" 175 | } 176 | for _, tc := range cases { 177 | t.Run(repo+"/"+tc, func(t *testing.T) { 178 | gf, err := ParseGGUFFileFromHuggingFace( 179 | ctx, 180 | repo, 181 | fmt.Sprintf("DeepSeek-R1-Distill-Qwen-1.5B-UD-%s.gguf", tc)) 182 | if err != nil { 183 | t.Fatal(err) 184 | return 185 | } 186 | md := gf.Metadata() 187 | ft, ftd := gf.extractFileType(md.Architecture) 188 | assert.Equal(t, md.FileType.String(), ft.String(), tc+" file type should be equal") 189 | assert.Equal(t, tc, ftd, tc+" file type descriptor should be equal") 190 | }) 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /file_option.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "net/url" 5 | "path/filepath" 6 | "runtime" 7 | "strings" 8 | "time" 9 | 10 | "github.com/gpustack/gguf-parser-go/util/osx" 11 | ) 12 | 13 | type ( 14 | _GGUFReadOptions struct { 15 | Debug bool 16 | SkipLargeMetadata bool 17 | 18 | // Local. 19 | MMap bool 20 | 21 | // Remote. 22 | BearerAuthToken string 23 | ProxyURL *url.URL 24 | SkipProxy bool 25 | SkipTLSVerification bool 26 | SkipDNSCache bool 27 | BufferSize int 28 | SkipRangeDownloadDetection bool 29 | CachePath string 30 | CacheExpiration time.Duration 31 | } 32 | 33 | // GGUFReadOption is the option for reading the file. 34 | GGUFReadOption func(o *_GGUFReadOptions) 35 | ) 36 | 37 | // UseDebug uses debug mode to read the file. 38 | func UseDebug() GGUFReadOption { 39 | return func(o *_GGUFReadOptions) { 40 | o.Debug = true 41 | } 42 | } 43 | 44 | // SkipLargeMetadata skips reading large GGUFMetadataKV items, 45 | // which are not necessary for most cases. 46 | func SkipLargeMetadata() GGUFReadOption { 47 | return func(o *_GGUFReadOptions) { 48 | o.SkipLargeMetadata = true 49 | } 50 | } 51 | 52 | // UseMMap uses mmap to read the local file. 53 | func UseMMap() GGUFReadOption { 54 | return func(o *_GGUFReadOptions) { 55 | o.MMap = true 56 | } 57 | } 58 | 59 | // UseBearerAuth uses the given token as a bearer auth when reading from remote. 60 | func UseBearerAuth(token string) GGUFReadOption { 61 | return func(o *_GGUFReadOptions) { 62 | o.BearerAuthToken = token 63 | } 64 | } 65 | 66 | // UseProxy uses the given url as a proxy when reading from remote. 67 | func UseProxy(url *url.URL) GGUFReadOption { 68 | return func(o *_GGUFReadOptions) { 69 | o.ProxyURL = url 70 | } 71 | } 72 | 73 | // SkipProxy skips the proxy when reading from remote. 74 | func SkipProxy() GGUFReadOption { 75 | return func(o *_GGUFReadOptions) { 76 | o.SkipProxy = true 77 | } 78 | } 79 | 80 | // SkipTLSVerification skips the TLS verification when reading from remote. 81 | func SkipTLSVerification() GGUFReadOption { 82 | return func(o *_GGUFReadOptions) { 83 | o.SkipTLSVerification = true 84 | } 85 | } 86 | 87 | // SkipDNSCache skips the DNS cache when reading from remote. 88 | func SkipDNSCache() GGUFReadOption { 89 | return func(o *_GGUFReadOptions) { 90 | o.SkipDNSCache = true 91 | } 92 | } 93 | 94 | // UseBufferSize sets the buffer size when reading from remote. 95 | func UseBufferSize(size int) GGUFReadOption { 96 | const minSize = 32 * 1024 97 | if size < minSize { 98 | size = minSize 99 | } 100 | return func(o *_GGUFReadOptions) { 101 | o.BufferSize = size 102 | } 103 | } 104 | 105 | // SkipRangeDownloadDetection skips the range download detection when reading from remote. 106 | func SkipRangeDownloadDetection() GGUFReadOption { 107 | return func(o *_GGUFReadOptions) { 108 | o.SkipRangeDownloadDetection = true 109 | } 110 | } 111 | 112 | // UseCache caches the remote reading result. 113 | func UseCache() GGUFReadOption { 114 | return func(o *_GGUFReadOptions) { 115 | o.CachePath = DefaultCachePath() 116 | o.CacheExpiration = 24 * time.Hour 117 | } 118 | } 119 | 120 | // SkipCache skips the cache when reading from remote. 121 | func SkipCache() GGUFReadOption { 122 | return func(o *_GGUFReadOptions) { 123 | o.CachePath = "" 124 | o.CacheExpiration = 0 125 | } 126 | } 127 | 128 | // DefaultCachePath returns the default cache path. 129 | func DefaultCachePath() string { 130 | cd := filepath.Join(osx.UserHomeDir(), ".cache") 131 | if runtime.GOOS == "windows" { 132 | cd = osx.Getenv("APPDATA", cd) 133 | } 134 | return filepath.Join(cd, "gguf-parser") 135 | } 136 | 137 | // UseCachePath uses the given path to cache the remote reading result. 138 | func UseCachePath(path string) GGUFReadOption { 139 | path = strings.TrimSpace(filepath.Clean(osx.InlineTilde(path))) 140 | return func(o *_GGUFReadOptions) { 141 | if path == "" { 142 | return 143 | } 144 | o.CachePath = path 145 | } 146 | } 147 | 148 | // UseCacheExpiration uses the given expiration to cache the remote reading result. 149 | // 150 | // Disable cache expiration by setting it to 0. 151 | func UseCacheExpiration(expiration time.Duration) GGUFReadOption { 152 | if expiration < 0 { 153 | expiration = 0 154 | } 155 | return func(o *_GGUFReadOptions) { 156 | o.CacheExpiration = expiration 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /file_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "testing" 7 | "time" 8 | 9 | "github.com/davecgh/go-spew/spew" 10 | ) 11 | 12 | func TestParseGGUFFile(t *testing.T) { 13 | mp, ok := os.LookupEnv("TEST_MODEL_PATH") 14 | if !ok { 15 | t.Skip("TEST_MODEL_PATH is not set") 16 | return 17 | } 18 | 19 | // Slow read. 20 | { 21 | f, err := ParseGGUFFile(mp) 22 | if err != nil { 23 | t.Fatal(err) 24 | return 25 | } 26 | s := spew.ConfigState{ 27 | Indent: " ", 28 | MaxDepth: 5, // Avoid console overflow. 29 | } 30 | t.Log("\n", s.Sdump(f), "\n") 31 | } 32 | 33 | // Fast read. 34 | { 35 | f, err := ParseGGUFFile(mp, SkipLargeMetadata(), UseMMap()) 36 | if err != nil { 37 | t.Fatal(err) 38 | return 39 | } 40 | t.Log("\n", spew.Sdump(f), "\n") 41 | } 42 | } 43 | 44 | func BenchmarkParseGGUFFileMMap(b *testing.B) { 45 | mp, ok := os.LookupEnv("TEST_MODEL_PATH") 46 | if !ok { 47 | b.Skip("TEST_MODEL_PATH is not set") 48 | return 49 | } 50 | 51 | b.ReportAllocs() 52 | 53 | b.ResetTimer() 54 | b.Run("Normal", func(b *testing.B) { 55 | for i := 0; i < b.N; i++ { 56 | _, err := ParseGGUFFile(mp) 57 | if err != nil { 58 | b.Fatal(err) 59 | return 60 | } 61 | } 62 | }) 63 | 64 | b.ResetTimer() 65 | b.Run("UseMMap", func(b *testing.B) { 66 | for i := 0; i < b.N; i++ { 67 | _, err := ParseGGUFFile(mp, UseMMap()) 68 | if err != nil { 69 | b.Fatal(err) 70 | return 71 | } 72 | } 73 | }) 74 | } 75 | 76 | func BenchmarkParseGGUFFileSkipLargeMetadata(b *testing.B) { 77 | mp, ok := os.LookupEnv("TEST_MODEL_PATH") 78 | if !ok { 79 | b.Skip("TEST_MODEL_PATH is not set") 80 | return 81 | } 82 | 83 | b.ReportAllocs() 84 | 85 | b.ResetTimer() 86 | b.Run("Normal", func(b *testing.B) { 87 | for i := 0; i < b.N; i++ { 88 | _, err := ParseGGUFFile(mp, UseMMap()) 89 | if err != nil { 90 | b.Fatal(err) 91 | return 92 | } 93 | } 94 | }) 95 | 96 | b.ResetTimer() 97 | b.Run("SkipLargeMetadata", func(b *testing.B) { 98 | for i := 0; i < b.N; i++ { 99 | _, err := ParseGGUFFile(mp, SkipLargeMetadata(), UseMMap()) 100 | if err != nil { 101 | b.Fatal(err) 102 | return 103 | } 104 | } 105 | }) 106 | } 107 | 108 | func TestParseGGUFFileRemote(t *testing.T) { 109 | const u = "https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF" + 110 | "/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf" 111 | 112 | ctx := context.Background() 113 | 114 | // Slow read. 115 | { 116 | f, err := ParseGGUFFileRemote(ctx, u, UseDebug()) 117 | if err != nil { 118 | t.Fatal(err) 119 | return 120 | } 121 | s := spew.ConfigState{ 122 | Indent: " ", 123 | MaxDepth: 5, // Avoid console overflow. 124 | } 125 | t.Log("\n", s.Sdump(f), "\n") 126 | } 127 | 128 | // Fast read. 129 | { 130 | f, err := ParseGGUFFileRemote(ctx, u, UseDebug(), SkipLargeMetadata()) 131 | if err != nil { 132 | t.Fatal(err) 133 | return 134 | } 135 | t.Log("\n", spew.Sdump(f), "\n") 136 | } 137 | } 138 | 139 | func BenchmarkParseGGUFFileRemoteWithBufferSize(b *testing.B) { 140 | const u = "https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF" + 141 | "/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf" 142 | 143 | ctx := context.Background() 144 | 145 | b.ReportAllocs() 146 | 147 | b.ResetTimer() 148 | b.Run("256KibBuffer", func(b *testing.B) { 149 | for i := 0; i < b.N; i++ { 150 | _, err := ParseGGUFFileRemote(ctx, u, SkipLargeMetadata(), UseBufferSize(256*1024)) 151 | if err != nil { 152 | b.Fatal(err) 153 | return 154 | } 155 | } 156 | }) 157 | 158 | b.ResetTimer() 159 | b.Run("1MibBuffer", func(b *testing.B) { 160 | for i := 0; i < b.N; i++ { 161 | _, err := ParseGGUFFileRemote(ctx, u, SkipLargeMetadata(), UseBufferSize(1024*1024)) 162 | if err != nil { 163 | b.Fatal(err) 164 | return 165 | } 166 | } 167 | }) 168 | 169 | b.ResetTimer() 170 | b.Run("4MibBuffer", func(b *testing.B) { 171 | for i := 0; i < b.N; i++ { 172 | _, err := ParseGGUFFileRemote(ctx, u, SkipLargeMetadata(), UseBufferSize(4*1024*1024)) 173 | if err != nil { 174 | b.Fatal(err) 175 | return 176 | } 177 | } 178 | }) 179 | } 180 | 181 | func TestParseGGUFFileFromHuggingFace(t *testing.T) { 182 | ctx := context.Background() 183 | 184 | cases := [][2]string{ 185 | { 186 | "TheBloke/Llama-2-13B-chat-GGUF", 187 | "llama-2-13b-chat.Q8_0.gguf", 188 | }, 189 | { 190 | "lmstudio-community/Yi-1.5-9B-Chat-GGUF", 191 | "Yi-1.5-9B-Chat-Q5_K_M.gguf", 192 | }, 193 | { 194 | "bartowski/gemma-2-9b-it-GGUF", 195 | "gemma-2-9b-it-Q3_K_M.gguf", 196 | }, 197 | } 198 | for _, tc := range cases { 199 | t.Run(tc[0]+"/"+tc[1], func(t *testing.T) { 200 | f, err := ParseGGUFFileFromHuggingFace(ctx, tc[0], tc[1], SkipLargeMetadata()) 201 | if err != nil { 202 | t.Fatal(err) 203 | return 204 | } 205 | t.Log("\n", spew.Sdump(f), "\n") 206 | }) 207 | } 208 | } 209 | 210 | func TestParseGGUFFileFromModelScope(t *testing.T) { 211 | ctx := context.Background() 212 | 213 | cases := [][2]string{ 214 | { 215 | "qwen/Qwen1.5-0.5B-Chat-GGUF", 216 | "qwen1_5-0_5b-chat-q5_k_m.gguf", 217 | }, 218 | { 219 | "HIT-SCIR/huozi3-gguf", 220 | "huozi3-q2_k.gguf", 221 | }, 222 | { 223 | "shaowenchen/chinese-alpaca-2-13b-16k-gguf", 224 | "chinese-alpaca-2-13b-16k.Q5_K.gguf", 225 | }, 226 | } 227 | for _, tc := range cases { 228 | t.Run(tc[0]+"/"+tc[1], func(t *testing.T) { 229 | f, err := ParseGGUFFileFromModelScope(ctx, tc[0], tc[1], SkipLargeMetadata()) 230 | if err != nil { 231 | t.Fatal(err) 232 | return 233 | } 234 | t.Log("\n", spew.Sdump(f), "\n") 235 | }) 236 | } 237 | } 238 | 239 | func TestParseGGUFFileFromOllama(t *testing.T) { 240 | ctx := context.Background() 241 | 242 | cases := []string{ 243 | "gemma2", 244 | "llama3.1", 245 | "qwen2:72b-instruct-q3_K_M", 246 | } 247 | for _, tc := range cases { 248 | t.Run(tc, func(t *testing.T) { 249 | start := time.Now() 250 | f, err := ParseGGUFFileFromOllama(ctx, tc, SkipLargeMetadata()) 251 | if err != nil { 252 | t.Fatal(err) 253 | return 254 | } 255 | t.Logf("cost: %v\n", time.Since(start)) 256 | t.Log("\n", spew.Sdump(f), "\n") 257 | }) 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /file_tokenizer.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | // GGUFTokenizer represents the tokenizer metadata of a GGUF file. 4 | type GGUFTokenizer struct { 5 | /* Basic */ 6 | 7 | // Model is the model of the tokenizer. 8 | Model string `json:"model"` 9 | // TokensLength is the size of tokens. 10 | TokensLength uint64 `json:"tokensLength"` 11 | // MergeLength is the size of merges. 12 | MergesLength uint64 `json:"mergesLength"` 13 | // AddedTokensLength is the size of added tokens after training. 14 | AddedTokensLength uint64 `json:"addedTokenLength"` 15 | // BOSTokenID is the ID of the beginning of sentence token. 16 | // 17 | // Use -1 if the token is not found. 18 | BOSTokenID int64 `json:"bosTokenID"` 19 | // EOSTokenID is the ID of the end of sentence token. 20 | // 21 | // Use -1 if the token is not found. 22 | EOSTokenID int64 `json:"eosTokenID"` 23 | // EOTTokenID is the ID of the end of text token. 24 | // 25 | // Use -1 if the token is not found. 26 | EOTTokenID int64 `json:"eotTokenID"` 27 | // EOMTokenID is the ID of the end of message token. 28 | // 29 | // Use -1 if the token is not found. 30 | EOMTokenID int64 `json:"eomTokenID"` 31 | // UnknownTokenID is the ID of the unknown token. 32 | // 33 | // Use -1 if the token is not found. 34 | UnknownTokenID int64 `json:"unknownTokenID"` 35 | // SeparatorTokenID is the ID of the separator token. 36 | // 37 | // Use -1 if the token is not found. 38 | SeparatorTokenID int64 `json:"separatorTokenID"` 39 | // PaddingTokenID is the ID of the padding token. 40 | // 41 | // Use -1 if the token is not found. 42 | PaddingTokenID int64 `json:"paddingTokenID"` 43 | 44 | /* Appendix */ 45 | 46 | // TokenSize is the size of tokens in bytes. 47 | TokensSize int64 `json:"tokensSize"` 48 | // MergesSize is the size of merges in bytes. 49 | MergesSize int64 `json:"mergesSize"` 50 | } 51 | 52 | // Tokenizer returns the tokenizer metadata of a GGUF file. 53 | func (gf *GGUFFile) Tokenizer() (gt GGUFTokenizer) { 54 | const ( 55 | modelKey = "tokenizer.ggml.model" 56 | tokensKey = "tokenizer.ggml.tokens" 57 | mergesKey = "tokenizer.ggml.merges" 58 | addedTokensKey = "tokenizer.ggml.added_tokens" 59 | bosTokenIDKey = "tokenizer.ggml.bos_token_id" 60 | eosTokenIDKey = "tokenizer.ggml.eos_token_id" 61 | eotTokenIDKey = "tokenizer.ggml.eot_token_id" 62 | eomTokenIDKey = "tokenizer.ggml.eom_token_id" 63 | unknownTokenIDKey = "tokenizer.ggml.unknown_token_id" 64 | separatorTokenIDKey = "tokenizer.ggml.separator_token_id" 65 | paddingTokenIDKey = "tokenizer.ggml.padding_token_id" 66 | ) 67 | 68 | m, _ := gf.Header.MetadataKV.Index([]string{ 69 | modelKey, 70 | tokensKey, 71 | mergesKey, 72 | addedTokensKey, 73 | bosTokenIDKey, 74 | eosTokenIDKey, 75 | eotTokenIDKey, 76 | eomTokenIDKey, 77 | unknownTokenIDKey, 78 | separatorTokenIDKey, 79 | paddingTokenIDKey, 80 | }) 81 | 82 | gt.BOSTokenID = -1 83 | gt.EOSTokenID = -1 84 | gt.EOTTokenID = -1 85 | gt.EOMTokenID = -1 86 | gt.UnknownTokenID = -1 87 | gt.SeparatorTokenID = -1 88 | gt.PaddingTokenID = -1 89 | 90 | if v, ok := m[modelKey]; ok { 91 | gt.Model = v.ValueString() 92 | } 93 | if v, ok := m[tokensKey]; ok { 94 | arr := v.ValueArray() 95 | gt.TokensLength = arr.Len 96 | gt.TokensSize = arr.Size 97 | } 98 | if v, ok := m[mergesKey]; ok { 99 | arr := v.ValueArray() 100 | gt.MergesLength = arr.Len 101 | gt.MergesSize = arr.Size 102 | } 103 | if v, ok := m[addedTokensKey]; ok { 104 | gt.AddedTokensLength = v.ValueArray().Len 105 | } 106 | if v, ok := m[bosTokenIDKey]; ok { 107 | gt.BOSTokenID = ValueNumeric[int64](v) 108 | } 109 | if v, ok := m[eosTokenIDKey]; ok { 110 | gt.EOSTokenID = ValueNumeric[int64](v) 111 | } 112 | if v, ok := m[eotTokenIDKey]; ok { 113 | gt.EOTTokenID = ValueNumeric[int64](v) 114 | } 115 | if v, ok := m[eomTokenIDKey]; ok { 116 | gt.EOMTokenID = ValueNumeric[int64](v) 117 | } 118 | if v, ok := m[unknownTokenIDKey]; ok { 119 | gt.UnknownTokenID = ValueNumeric[int64](v) 120 | } 121 | if v, ok := m[separatorTokenIDKey]; ok { 122 | gt.SeparatorTokenID = ValueNumeric[int64](v) 123 | } 124 | if v, ok := m[paddingTokenIDKey]; ok { 125 | gt.PaddingTokenID = ValueNumeric[int64](v) 126 | } 127 | 128 | return gt 129 | } 130 | -------------------------------------------------------------------------------- /file_tokenizer_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "testing" 7 | 8 | "github.com/davecgh/go-spew/spew" 9 | ) 10 | 11 | func TestGGUFFile_Tokenizer(t *testing.T) { 12 | ctx := context.Background() 13 | 14 | f, err := ParseGGUFFileFromHuggingFace( 15 | ctx, 16 | "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF", 17 | "Hermes-2-Pro-Mistral-7B.Q5_K_M.gguf", 18 | SkipLargeMetadata()) 19 | if err != nil { 20 | t.Fatal(err) 21 | return 22 | } 23 | 24 | t.Log("\n", spew.Sdump(f.Tokenizer()), "\n") 25 | } 26 | 27 | func BenchmarkGGUFFile_Tokenizer(b *testing.B) { 28 | mp, ok := os.LookupEnv("TEST_MODEL_PATH") 29 | if !ok { 30 | b.Skip("TEST_MODEL_PATH is not set") 31 | return 32 | } 33 | 34 | f, err := ParseGGUFFile(mp, SkipLargeMetadata(), UseMMap()) 35 | if err != nil { 36 | b.Fatal(err) 37 | return 38 | } 39 | 40 | b.ReportAllocs() 41 | 42 | b.ResetTimer() 43 | for i := 0; i < b.N; i++ { 44 | _ = f.Tokenizer() 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /filename.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strconv" 7 | "strings" 8 | 9 | "github.com/gpustack/gguf-parser-go/util/funcx" 10 | "github.com/gpustack/gguf-parser-go/util/ptr" 11 | ) 12 | 13 | // GGUFFilename represents a GGUF filename, 14 | // see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention. 15 | type GGUFFilename struct { 16 | BaseName string `json:"baseName"` 17 | SizeLabel string `json:"sizeLabel"` 18 | FineTune string `json:"fineTune"` 19 | Version string `json:"version"` 20 | Encoding string `json:"encoding"` 21 | Type string `json:"type"` 22 | Shard *int `json:"shard,omitempty"` 23 | ShardTotal *int `json:"shardTotal,omitempty"` 24 | } 25 | 26 | var GGUFFilenameRegex = regexp.MustCompile(`^(?P[A-Za-z\s][A-Za-z0-9._\s]*(?:(?:-(?:(?:[A-Za-z\s][A-Za-z0-9._\s]*)|(?:[0-9._\s]*)))*))-(?:(?P(?:\d+x)?(?:\d+\.)?\d+[A-Za-z](?:-[A-Za-z]+(\d+\.)?\d+[A-Za-z]+)?)(?:-(?P[A-Za-z][A-Za-z0-9\s_-]+[A-Za-z](?i:[^BFKIQ])))?)?(?:-(?P[vV]\d+(?:\.\d+)*))?(?i:-(?P(BF16|F32|F16|([KI]?Q[0-9][A-Z0-9_]*))))?(?:-(?PLoRA|vocab))?(?:-(?P\d{5})-of-(?P\d{5}))?\.gguf$`) // nolint:lll 27 | 28 | // ParseGGUFFilename parses the given GGUF filename string, 29 | // and returns the GGUFFilename, or nil if the filename is invalid. 30 | func ParseGGUFFilename(name string) *GGUFFilename { 31 | n := name 32 | if !strings.HasSuffix(n, ".gguf") { 33 | n += ".gguf" 34 | } 35 | 36 | m := make(map[string]string) 37 | { 38 | r := GGUFFilenameRegex.FindStringSubmatch(n) 39 | for i, ne := range GGUFFilenameRegex.SubexpNames() { 40 | if i != 0 && i <= len(r) { 41 | m[ne] = r[i] 42 | } 43 | } 44 | } 45 | if m["BaseName"] == "" { 46 | return nil 47 | } 48 | 49 | var gn GGUFFilename 50 | gn.BaseName = strings.ReplaceAll(m["BaseName"], "-", " ") 51 | gn.SizeLabel = m["SizeLabel"] 52 | gn.FineTune = m["FineTune"] 53 | gn.Version = m["Version"] 54 | gn.Encoding = m["Encoding"] 55 | gn.Type = m["Type"] 56 | if v := m["Shard"]; v != "" { 57 | gn.Shard = ptr.To(parseInt(v)) 58 | } 59 | if v := m["ShardTotal"]; v != "" { 60 | gn.ShardTotal = ptr.To(parseInt(v)) 61 | } 62 | return &gn 63 | } 64 | 65 | func (gn GGUFFilename) String() string { 66 | if gn.BaseName == "" { 67 | return "" 68 | } 69 | 70 | var sb strings.Builder 71 | sb.WriteString(strings.ReplaceAll(gn.BaseName, " ", "-")) 72 | if gn.SizeLabel != "" { 73 | sb.WriteString("-") 74 | sb.WriteString(gn.SizeLabel) 75 | } 76 | if gn.FineTune != "" { 77 | sb.WriteString("-") 78 | sb.WriteString(gn.FineTune) 79 | } 80 | if gn.Version != "" { 81 | sb.WriteString("-") 82 | sb.WriteString(gn.Version) 83 | } 84 | if gn.Encoding != "" { 85 | sb.WriteString("-") 86 | sb.WriteString(gn.Encoding) 87 | } 88 | if gn.Type != "" { 89 | sb.WriteString("-") 90 | sb.WriteString(gn.Type) 91 | } 92 | if m, n := ptr.Deref(gn.Shard, 0), ptr.Deref(gn.ShardTotal, 0); m > 0 && n > 0 { 93 | sb.WriteString("-") 94 | sb.WriteString(fmt.Sprintf("%05d", m)) 95 | sb.WriteString("-of-") 96 | sb.WriteString(fmt.Sprintf("%05d", n)) 97 | } 98 | sb.WriteString(".gguf") 99 | return sb.String() 100 | } 101 | 102 | // IsShard returns true if the GGUF filename is a shard. 103 | func (gn GGUFFilename) IsShard() bool { 104 | return ptr.Deref(gn.Shard, 0) > 0 && ptr.Deref(gn.ShardTotal, 0) > 0 105 | } 106 | 107 | var ShardGGUFFilenameRegex = regexp.MustCompile(`^(?P.*)-(?:(?P\d{5})-of-(?P\d{5}))\.gguf$`) 108 | 109 | // IsShardGGUFFilename returns true if the given filename is a shard GGUF filename. 110 | func IsShardGGUFFilename(name string) bool { 111 | n := name 112 | if !strings.HasSuffix(n, ".gguf") { 113 | n += ".gguf" 114 | } 115 | 116 | m := make(map[string]string) 117 | { 118 | r := ShardGGUFFilenameRegex.FindStringSubmatch(n) 119 | for i, ne := range ShardGGUFFilenameRegex.SubexpNames() { 120 | if i != 0 && i <= len(r) { 121 | m[ne] = r[i] 122 | } 123 | } 124 | } 125 | 126 | var shard, shardTotal int 127 | if v := m["Shard"]; v != "" { 128 | shard = parseInt(v) 129 | } 130 | if v := m["ShardTotal"]; v != "" { 131 | shardTotal = parseInt(v) 132 | } 133 | return shard > 0 && shardTotal > 0 134 | } 135 | 136 | // CompleteShardGGUFFilename returns the list of shard GGUF filenames that are related to the given shard GGUF filename. 137 | // 138 | // Only available if the given filename is a shard GGUF filename. 139 | func CompleteShardGGUFFilename(name string) []string { 140 | n := name 141 | if !strings.HasSuffix(n, ".gguf") { 142 | n += ".gguf" 143 | } 144 | 145 | m := make(map[string]string) 146 | { 147 | r := ShardGGUFFilenameRegex.FindStringSubmatch(n) 148 | for i, ne := range ShardGGUFFilenameRegex.SubexpNames() { 149 | if i != 0 && i <= len(r) { 150 | m[ne] = r[i] 151 | } 152 | } 153 | } 154 | 155 | var shard, shardTotal int 156 | if v := m["Shard"]; v != "" { 157 | shard = parseInt(v) 158 | } 159 | if v := m["ShardTotal"]; v != "" { 160 | shardTotal = parseInt(v) 161 | } 162 | 163 | if shard <= 0 || shardTotal <= 0 { 164 | return nil 165 | } 166 | 167 | names := make([]string, 0, shardTotal) 168 | for i := 1; i <= shardTotal; i++ { 169 | names = append(names, fmt.Sprintf("%s-%05d-of-%05d.gguf", m["Prefix"], i, shardTotal)) 170 | } 171 | return names 172 | } 173 | 174 | func parseInt(v string) int { 175 | return int(funcx.MustNoError(strconv.ParseInt(v, 10, 64))) 176 | } 177 | -------------------------------------------------------------------------------- /filename_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | 8 | "github.com/gpustack/gguf-parser-go/util/ptr" 9 | ) 10 | 11 | func TestParseGGUFFilename(t *testing.T) { 12 | cases := []struct { 13 | given string 14 | expected *GGUFFilename 15 | }{ 16 | { 17 | given: "Mixtral-8x7B-V0.1-KQ2.gguf", 18 | expected: &GGUFFilename{ 19 | BaseName: "Mixtral", 20 | SizeLabel: "8x7B", 21 | Version: "V0.1", 22 | Encoding: "KQ2", 23 | }, 24 | }, 25 | { 26 | given: "Grok-100B-v1.0-Q4_0-00003-of-00009.gguf", 27 | expected: &GGUFFilename{ 28 | BaseName: "Grok", 29 | SizeLabel: "100B", 30 | Version: "v1.0", 31 | Encoding: "Q4_0", 32 | Shard: ptr.To(3), 33 | ShardTotal: ptr.To(9), 34 | }, 35 | }, 36 | { 37 | given: "Hermes-2-Pro-Llama-3-8B-F16.gguf", 38 | expected: &GGUFFilename{ 39 | BaseName: "Hermes 2 Pro Llama 3", 40 | SizeLabel: "8B", 41 | Encoding: "F16", 42 | }, 43 | }, 44 | { 45 | given: "Phi-3-mini-3.8B-ContextLength4k-instruct-v1.0.gguf", 46 | expected: &GGUFFilename{ 47 | BaseName: "Phi 3 mini", 48 | SizeLabel: "3.8B-ContextLength4k", 49 | FineTune: "instruct", 50 | Version: "v1.0", 51 | }, 52 | }, 53 | { 54 | given: "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00001-of-00018.gguf", 55 | expected: &GGUFFilename{ 56 | BaseName: "Meta Llama 3.1", 57 | SizeLabel: "405B", 58 | FineTune: "Instruct-XelotX", 59 | Encoding: "BF16", 60 | Shard: ptr.To(1), 61 | ShardTotal: ptr.To(18), 62 | }, 63 | }, 64 | { 65 | given: "qwen2-72b-instruct-q6_k-00001-of-00002.gguf", 66 | expected: &GGUFFilename{ 67 | BaseName: "qwen2", 68 | SizeLabel: "72b", 69 | FineTune: "instruct", 70 | Encoding: "q6_k", 71 | Shard: ptr.To(1), 72 | ShardTotal: ptr.To(2), 73 | }, 74 | }, 75 | { 76 | given: "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00001-of-00009.gguf", 77 | expected: nil, 78 | }, 79 | { 80 | given: "not-a-known-arrangement.gguf", 81 | expected: nil, 82 | }, 83 | } 84 | for _, tc := range cases { 85 | t.Run(tc.given, func(t *testing.T) { 86 | actual := ParseGGUFFilename(tc.given) 87 | assert.Equal(t, tc.expected, actual) 88 | }) 89 | } 90 | } 91 | 92 | func TestGGUFFilenameString(t *testing.T) { 93 | cases := []struct { 94 | given GGUFFilename 95 | expected string 96 | }{ 97 | { 98 | given: GGUFFilename{ 99 | BaseName: "Mixtral", 100 | SizeLabel: "8x7B", 101 | Version: "v0.1", 102 | Encoding: "KQ2", 103 | }, 104 | expected: "Mixtral-8x7B-v0.1-KQ2.gguf", 105 | }, 106 | { 107 | given: GGUFFilename{ 108 | BaseName: "Grok", 109 | SizeLabel: "100B", 110 | Version: "v1.0", 111 | Encoding: "Q4_0", 112 | Shard: ptr.To(3), 113 | ShardTotal: ptr.To(9), 114 | }, 115 | expected: "Grok-100B-v1.0-Q4_0-00003-of-00009.gguf", 116 | }, 117 | { 118 | given: GGUFFilename{ 119 | BaseName: "Hermes 2 Pro Llama 3", 120 | SizeLabel: "8B", 121 | Encoding: "F16", 122 | }, 123 | expected: "Hermes-2-Pro-Llama-3-8B-F16.gguf", 124 | }, 125 | { 126 | given: GGUFFilename{ 127 | BaseName: "Phi 3 mini", 128 | SizeLabel: "3.8B-ContextLength4k", 129 | FineTune: "instruct", 130 | Version: "v1.0", 131 | }, 132 | expected: "Phi-3-mini-3.8B-ContextLength4k-instruct-v1.0.gguf", 133 | }, 134 | { 135 | given: GGUFFilename{}, 136 | expected: "", 137 | }, 138 | } 139 | for _, tc := range cases { 140 | t.Run(tc.expected, func(t *testing.T) { 141 | actual := tc.given.String() 142 | assert.Equal(t, tc.expected, actual) 143 | }) 144 | } 145 | } 146 | 147 | func TestIsShardGGUFFilename(t *testing.T) { 148 | cases := []struct { 149 | given string 150 | expected bool 151 | }{ 152 | { 153 | given: "qwen2-72b-instruct-q6_k-00001-of-00002.gguf", 154 | expected: true, 155 | }, 156 | { 157 | given: "Grok-100B-v1.0-Q4_0-00003-of-00009.gguf", 158 | expected: true, 159 | }, 160 | { 161 | given: "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00001-of-00009.gguf", 162 | expected: true, 163 | }, 164 | { 165 | given: "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00001-of-00018.gguf", 166 | expected: true, 167 | }, 168 | { 169 | given: "not-a-known-arrangement.gguf", 170 | expected: false, 171 | }, 172 | } 173 | for _, tc := range cases { 174 | t.Run(tc.given, func(t *testing.T) { 175 | actual := IsShardGGUFFilename(tc.given) 176 | assert.Equal(t, tc.expected, actual) 177 | }) 178 | } 179 | } 180 | 181 | func TestCompleteShardGGUFFilename(t *testing.T) { 182 | cases := []struct { 183 | given string 184 | expected []string 185 | }{ 186 | { 187 | given: "qwen2-72b-instruct-q6_k-00001-of-00002.gguf", 188 | expected: []string{ 189 | "qwen2-72b-instruct-q6_k-00001-of-00002.gguf", 190 | "qwen2-72b-instruct-q6_k-00002-of-00002.gguf", 191 | }, 192 | }, 193 | { 194 | given: "Grok-100B-v1.0-Q4_0-00003-of-00009.gguf", 195 | expected: []string{ 196 | "Grok-100B-v1.0-Q4_0-00001-of-00009.gguf", 197 | "Grok-100B-v1.0-Q4_0-00002-of-00009.gguf", 198 | "Grok-100B-v1.0-Q4_0-00003-of-00009.gguf", 199 | "Grok-100B-v1.0-Q4_0-00004-of-00009.gguf", 200 | "Grok-100B-v1.0-Q4_0-00005-of-00009.gguf", 201 | "Grok-100B-v1.0-Q4_0-00006-of-00009.gguf", 202 | "Grok-100B-v1.0-Q4_0-00007-of-00009.gguf", 203 | "Grok-100B-v1.0-Q4_0-00008-of-00009.gguf", 204 | "Grok-100B-v1.0-Q4_0-00009-of-00009.gguf", 205 | }, 206 | }, 207 | { 208 | given: "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00001-of-00009.gguf", 209 | expected: []string{ 210 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00001-of-00009.gguf", 211 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00002-of-00009.gguf", 212 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00003-of-00009.gguf", 213 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00004-of-00009.gguf", 214 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00005-of-00009.gguf", 215 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00006-of-00009.gguf", 216 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00007-of-00009.gguf", 217 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00008-of-00009.gguf", 218 | "Meta-Llama-3.1-405B-Instruct.Q2_K.gguf-00009-of-00009.gguf", 219 | }, 220 | }, 221 | { 222 | given: "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00001-of-00018.gguf", 223 | expected: []string{ 224 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00001-of-00018.gguf", 225 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00002-of-00018.gguf", 226 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00003-of-00018.gguf", 227 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00004-of-00018.gguf", 228 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00005-of-00018.gguf", 229 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00006-of-00018.gguf", 230 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00007-of-00018.gguf", 231 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00008-of-00018.gguf", 232 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00009-of-00018.gguf", 233 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00010-of-00018.gguf", 234 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00011-of-00018.gguf", 235 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00012-of-00018.gguf", 236 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00013-of-00018.gguf", 237 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00014-of-00018.gguf", 238 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00015-of-00018.gguf", 239 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00016-of-00018.gguf", 240 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00017-of-00018.gguf", 241 | "Meta-Llama-3.1-405B-Instruct-XelotX-BF16-00018-of-00018.gguf", 242 | }, 243 | }, 244 | { 245 | given: "not-a-known-arrangement.gguf", 246 | expected: nil, 247 | }, 248 | } 249 | for _, tc := range cases { 250 | t.Run(tc.given, func(t *testing.T) { 251 | actual := CompleteShardGGUFFilename(tc.given) 252 | assert.Equal(t, tc.expected, actual) 253 | }) 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /gen.go: -------------------------------------------------------------------------------- 1 | //go:generate go generate -tags stringer gen.stringer.go 2 | //go:generate go generate -tags regression gen.regression.go 3 | package gguf_parser 4 | -------------------------------------------------------------------------------- /gen.stringer.go: -------------------------------------------------------------------------------- 1 | //go:build stringer 2 | 3 | //go:generate go run golang.org/x/tools/cmd/stringer -linecomment -type GGUFMagic -output zz_generated.ggufmagic.stringer.go -trimprefix GGUFMagic 4 | //go:generate go run golang.org/x/tools/cmd/stringer -linecomment -type GGUFVersion -output zz_generated.ggufversion.stringer.go -trimprefix GGUFVersion 5 | //go:generate go run golang.org/x/tools/cmd/stringer -linecomment -type GGUFMetadataValueType -output zz_generated.ggufmetadatavaluetype.stringer.go -trimprefix GGUFMetadataValueType 6 | //go:generate go run golang.org/x/tools/cmd/stringer -linecomment -type GGUFFileType -output zz_generated.gguffiletype.stringer.go -trimprefix GGUFFileType 7 | //go:generate go run golang.org/x/tools/cmd/stringer -linecomment -type GGMLType -output zz_generated.ggmltype.stringer.go -trimprefix GGMLType 8 | package gguf_parser 9 | 10 | import _ "golang.org/x/tools/cmd/stringer" 11 | -------------------------------------------------------------------------------- /ggml.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "slices" 7 | ) 8 | 9 | // Types for GGMLType. 10 | type ( 11 | // GGMLType is a type of GGML tensor, 12 | // see https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/ggml/include/ggml.h#L363-L401. 13 | GGMLType uint32 14 | 15 | // GGMLTypeTrait holds the trait of a GGMLType, 16 | // see https://github.com/ggerganov/llama.cpp/blob/b34e02348064c2f0cef1f89b44d9bee4eb15b9e7/ggml/src/ggml.c#L663-L1082. 17 | GGMLTypeTrait struct { 18 | BlockSize uint64 // Original is int, in order to reduce conversion, here we use uint64. 19 | TypeSize uint64 // Original is uint32, in order to reduce conversion, here we use uint64. 20 | Quantized bool 21 | } 22 | ) 23 | 24 | // GGMLType constants. 25 | // 26 | // GGMLTypeQ4_2, GGMLTypeQ4_3 are deprecated. 27 | // GGMLTypeQ4_0_4_4, GGMLTypeQ4_0_4_8, GGMLTypeQ4_0_8_8 are deprecated. 28 | // GGMLTypeIQ4_NL_4_4, GGMLTypeIQ4_NL_4_8, GGMLTypeIQ4_NL_8_8 are deprecated. 29 | const ( 30 | GGMLTypeF32 GGMLType = iota 31 | GGMLTypeF16 32 | GGMLTypeQ4_0 33 | GGMLTypeQ4_1 34 | GGMLTypeQ4_2 35 | GGMLTypeQ4_3 36 | GGMLTypeQ5_0 37 | GGMLTypeQ5_1 38 | GGMLTypeQ8_0 39 | GGMLTypeQ8_1 40 | GGMLTypeQ2_K 41 | GGMLTypeQ3_K 42 | GGMLTypeQ4_K 43 | GGMLTypeQ5_K 44 | GGMLTypeQ6_K 45 | GGMLTypeQ8_K 46 | GGMLTypeIQ2_XXS 47 | GGMLTypeIQ2_XS 48 | GGMLTypeIQ3_XXS 49 | GGMLTypeIQ1_S 50 | GGMLTypeIQ4_NL 51 | GGMLTypeIQ3_S 52 | GGMLTypeIQ2_S 53 | GGMLTypeIQ4_XS 54 | GGMLTypeI8 55 | GGMLTypeI16 56 | GGMLTypeI32 57 | GGMLTypeI64 58 | GGMLTypeF64 59 | GGMLTypeIQ1_M 60 | GGMLTypeBF16 61 | GGMLTypeQ4_0_4_4 62 | GGMLTypeQ4_0_4_8 63 | GGMLTypeQ4_0_8_8 64 | GGMLTypeTQ1_0 65 | GGMLTypeTQ2_0 66 | GGMLTypeIQ4_NL_4_4 67 | GGMLTypeIQ4_NL_4_8 68 | GGMLTypeIQ4_NL_8_8 69 | _GGMLTypeCount // Unknown 70 | ) 71 | 72 | // _GGMLTypeTraits is a table of GGMLTypeTrait for GGMLType. 73 | var _GGMLTypeTraits = map[GGMLType]GGMLTypeTrait{ 74 | GGMLTypeF32: {BlockSize: 1, TypeSize: 4}, 75 | GGMLTypeF16: {BlockSize: 1, TypeSize: 2}, 76 | GGMLTypeQ4_0: {BlockSize: 32, TypeSize: 18, Quantized: true}, 77 | GGMLTypeQ4_1: {BlockSize: 32, TypeSize: 20, Quantized: true}, 78 | GGMLTypeQ4_2: {BlockSize: 0, TypeSize: 0}, // Deprecated 79 | GGMLTypeQ4_3: {BlockSize: 0, TypeSize: 0}, // Deprecated 80 | GGMLTypeQ5_0: {BlockSize: 32, TypeSize: 22, Quantized: true}, 81 | GGMLTypeQ5_1: {BlockSize: 32, TypeSize: 24, Quantized: true}, 82 | GGMLTypeQ8_0: {BlockSize: 32, TypeSize: 34, Quantized: true}, 83 | GGMLTypeQ8_1: {BlockSize: 32, TypeSize: 36, Quantized: true}, 84 | GGMLTypeQ2_K: {BlockSize: 256, TypeSize: 84, Quantized: true}, 85 | GGMLTypeQ3_K: {BlockSize: 256, TypeSize: 110, Quantized: true}, 86 | GGMLTypeQ4_K: {BlockSize: 256, TypeSize: 144, Quantized: true}, 87 | GGMLTypeQ5_K: {BlockSize: 256, TypeSize: 176, Quantized: true}, 88 | GGMLTypeQ6_K: {BlockSize: 256, TypeSize: 210, Quantized: true}, 89 | GGMLTypeQ8_K: {BlockSize: 256, TypeSize: 292, Quantized: true}, 90 | GGMLTypeIQ2_XXS: {BlockSize: 256, TypeSize: 66, Quantized: true}, 91 | GGMLTypeIQ2_XS: {BlockSize: 256, TypeSize: 74, Quantized: true}, 92 | GGMLTypeIQ3_XXS: {BlockSize: 256, TypeSize: 98, Quantized: true}, 93 | GGMLTypeIQ1_S: {BlockSize: 256, TypeSize: 50, Quantized: true}, 94 | GGMLTypeIQ4_NL: {BlockSize: 32, TypeSize: 18, Quantized: true}, 95 | GGMLTypeIQ3_S: {BlockSize: 256, TypeSize: 110, Quantized: true}, 96 | GGMLTypeIQ2_S: {BlockSize: 256, TypeSize: 82, Quantized: true}, 97 | GGMLTypeIQ4_XS: {BlockSize: 256, TypeSize: 136, Quantized: true}, 98 | GGMLTypeI8: {BlockSize: 1, TypeSize: 1}, 99 | GGMLTypeI16: {BlockSize: 1, TypeSize: 2}, 100 | GGMLTypeI32: {BlockSize: 1, TypeSize: 4}, 101 | GGMLTypeI64: {BlockSize: 1, TypeSize: 8}, 102 | GGMLTypeF64: {BlockSize: 1, TypeSize: 8}, 103 | GGMLTypeIQ1_M: {BlockSize: 256, TypeSize: 56, Quantized: true}, 104 | GGMLTypeBF16: {BlockSize: 1, TypeSize: 2}, 105 | GGMLTypeQ4_0_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true}, 106 | GGMLTypeQ4_0_4_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, 107 | GGMLTypeQ4_0_8_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, 108 | GGMLTypeTQ1_0: {BlockSize: 256, TypeSize: 54, Quantized: true}, 109 | GGMLTypeTQ2_0: {BlockSize: 256, TypeSize: 66, Quantized: true}, 110 | GGMLTypeIQ4_NL_4_4: {BlockSize: 32, TypeSize: 18, Quantized: true}, 111 | GGMLTypeIQ4_NL_4_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, 112 | GGMLTypeIQ4_NL_8_8: {BlockSize: 32, TypeSize: 18, Quantized: true}, 113 | } 114 | 115 | // Trait returns the GGMLTypeTrait of the GGMLType. 116 | func (t GGMLType) Trait() (GGMLTypeTrait, bool) { 117 | tt, ok := _GGMLTypeTraits[t] 118 | return tt, ok 119 | } 120 | 121 | // IsQuantized returns whether the GGMLType is quantized. 122 | func (t GGMLType) IsQuantized() bool { 123 | tt, ok := t.Trait() 124 | if !ok { 125 | return false 126 | } 127 | return tt.Quantized 128 | } 129 | 130 | // RowSizeOf returns the size of the given dimensions according to the GGMLType's GGMLTypeTrait, 131 | // which is inspired by 132 | // https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/src/ggml.c#L3142-L3145. 133 | // 134 | // The index of the given dimensions means the number of dimension, 135 | // i.e. 0 is the first dimension, 1 is the second dimension, and so on. 136 | // 137 | // The value of the item is the number of elements in the corresponding dimension. 138 | func (t GGMLType) RowSizeOf(dimensions []uint64) uint64 { 139 | if len(dimensions) == 0 { 140 | panic(errors.New("no dimensions")) 141 | } 142 | 143 | tt, ok := t.Trait() 144 | if !ok { 145 | panic(fmt.Errorf("invalid type: %v", t)) 146 | } 147 | 148 | // https://github.com/ggerganov/ggml/blob/a10a8b880c059b3b29356eb9a9f8df72f03cdb6a/src/ggml.c#L2640-L2643 149 | ds := tt.TypeSize * dimensions[0] / tt.BlockSize // Row size 150 | for i := 1; i < len(dimensions); i++ { 151 | ds *= dimensions[i] 152 | } 153 | return ds 154 | } 155 | 156 | // GGMLMemoryPadding returns the padded size of the given size according to GGML memory padding, 157 | // see https://github.com/ggerganov/ggml/blob/0cbb7c0/include/ggml/ggml.h#L238-L243. 158 | func GGMLMemoryPadding(size uint64) uint64 { 159 | const align = 16 160 | return GGMLPadding(size, align) 161 | } 162 | 163 | // GGMLPadding returns the padded size of the given size according to given align, 164 | // see https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/include/ggml/ggml.h#L255. 165 | func GGMLPadding(size, align uint64) uint64 { 166 | return (size + align - 1) &^ (align - 1) 167 | } 168 | 169 | // GGML tensor constants. 170 | const ( 171 | // GGMLTensorSize is the size of GGML tensor in bytes, 172 | // see https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/include/ggml/ggml.h#L606. 173 | GGMLTensorSize = 368 174 | 175 | // GGMLObjectSize is the size of GGML object in bytes, 176 | // see https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/include/ggml/ggml.h#L563. 177 | GGMLObjectSize = 32 178 | ) 179 | 180 | // GGMLTensorOverhead is the overhead of GGML tensor in bytes, 181 | // see https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/src/ggml.c#L2765-L2767. 182 | func GGMLTensorOverhead() uint64 { 183 | return GGMLObjectSize + GGMLTensorSize 184 | } 185 | 186 | // GGML computation graph constants. 187 | const ( 188 | // GGMLComputationGraphSize is the size of GGML computation graph in bytes. 189 | GGMLComputationGraphSize = 80 190 | 191 | // GGMLComputationBitsetSize is the size of GGML computation bitset in bytes, 192 | // see https://github.com/ggml-org/llama.cpp/blob/master/ggml/src/ggml-impl.h#L165. 193 | GGMLComputationBitsetSize = 4 194 | ) 195 | 196 | // GGMLComputationGraphOverhead is the overhead of GGML graph in bytes, 197 | // see https://github.com/ggml-org/ggml/blob/5592ffda9c417c3c12232c828247c23d17004c88/src/ggml.c#L5941-L5956. 198 | func GGMLComputationGraphOverhead(nodes uint64, grads bool) uint64 { 199 | const ps = 8 // c++ pointer size 200 | 201 | hs := GGMLHashSize(nodes * 2) 202 | 203 | var g uint64 = GGMLComputationGraphSize // graph 204 | g += GGMLPadding(nodes*ps, ps) // nodes 205 | g += GGMLPadding(nodes*ps, ps) // leafs 206 | g += GGMLPadding(nodes*ps, ps) // parents 207 | g += GGMLPadding(hs*ps, ps) // hash keys 208 | if grads { 209 | g += GGMLPadding(hs*ps, ps) // grads 210 | g += GGMLPadding(hs*ps, ps) // grad_accs 211 | } 212 | g += GGMLPadding(GGMLBitsetSize(hs)*GGMLComputationBitsetSize, GGMLComputationBitsetSize) // bitset 213 | 214 | return GGMLObjectSize + GGMLMemoryPadding(g) 215 | } 216 | 217 | // GGMLHashSize returns the size of the hash table for the given base, 218 | // see https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/src/ggml.c#L17698-L17722. 219 | func GGMLHashSize(base uint64) uint64 { 220 | primes := []uint64{ 221 | 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031, 222 | 2053, 4099, 8209, 16411, 32771, 65537, 131101, 223 | 262147, 524309, 1048583, 2097169, 4194319, 8388617, 224 | 16777259, 33554467, 67108879, 134217757, 268435459, 225 | 536870923, 1073741827, 2147483659, 226 | } 227 | i, ok := slices.BinarySearchFunc(primes, base, func(e, t uint64) int { 228 | if t >= e { 229 | return 0 230 | } 231 | return -1 232 | }) 233 | if !ok { 234 | return base | 1 235 | } 236 | return primes[i] 237 | } 238 | 239 | // GGMLBitsetSize returns the size of the bitset for the given number of bits, 240 | // see https://github.com/ggml-org/llama.cpp/blob/ec9e0301fef6476df83e94842c3b625501c95566/ggml/src/ggml-impl.h#L166-L171. 241 | func GGMLBitsetSize(n uint64) uint64 { 242 | return (n + (GGMLComputationBitsetSize*8 - 1)) >> 5 243 | } 244 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/gpustack/gguf-parser-go 2 | 3 | go 1.22.0 4 | 5 | toolchain go1.22.9 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 9 | github.com/henvic/httpretty v0.1.4 10 | github.com/json-iterator/go v1.1.12 11 | github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 12 | github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d 13 | github.com/stretchr/testify v1.9.0 14 | golang.org/x/crypto v0.29.0 15 | golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f 16 | golang.org/x/sync v0.9.0 17 | golang.org/x/sys v0.27.0 18 | golang.org/x/tools v0.27.0 19 | gonum.org/v1/gonum v0.15.1 20 | ) 21 | 22 | require ( 23 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 24 | github.com/modern-go/reflect2 v1.0.2 // indirect 25 | github.com/pmezard/go-difflib v1.0.0 // indirect 26 | golang.org/x/mod v0.22.0 // indirect 27 | gopkg.in/yaml.v3 v3.0.1 // indirect 28 | ) 29 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 5 | github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU= 6 | github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM= 7 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 8 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 9 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 10 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 11 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 12 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 13 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 14 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 15 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 16 | github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8= 17 | github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA= 18 | github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY= 19 | github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0= 20 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 21 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 22 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 23 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 24 | golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= 25 | golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= 26 | golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= 27 | golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= 28 | golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= 29 | golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= 30 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 31 | golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= 32 | golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 33 | golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= 34 | golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 35 | golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= 36 | golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= 37 | golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= 38 | golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= 39 | gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= 40 | gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= 41 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 42 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 43 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 44 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 45 | -------------------------------------------------------------------------------- /ollama_model.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/http" 7 | "net/url" 8 | "regexp" 9 | "strings" 10 | 11 | "golang.org/x/sync/errgroup" 12 | 13 | "github.com/gpustack/gguf-parser-go/util/httpx" 14 | "github.com/gpustack/gguf-parser-go/util/json" 15 | "github.com/gpustack/gguf-parser-go/util/stringx" 16 | ) 17 | 18 | // Inspired by https://github.com/ollama/ollama/blob/380e06e5bea06ae8ded37f47c37bd5d604194d3e/types/model/name.go, 19 | // and https://github.com/ollama/ollama/blob/380e06e5bea06ae8ded37f47c37bd5d604194d3e/server/modelpath.go. 20 | 21 | const ( 22 | OllamaDefaultScheme = "https" 23 | OllamaDefaultRegistry = "registry.ollama.ai" 24 | OllamaDefaultNamespace = "library" 25 | OllamaDefaultTag = "latest" 26 | ) 27 | 28 | type ( 29 | // OllamaModel represents an Ollama model, 30 | // its manifest(including MediaType, Config and Layers) can be completed further by calling the Complete method. 31 | OllamaModel struct { 32 | Schema string `json:"schema"` 33 | Registry string `json:"registry"` 34 | Namespace string `json:"namespace"` 35 | Repository string `json:"repository"` 36 | Tag string `json:"tag"` 37 | SchemaVersion uint32 `json:"schemaVersion"` 38 | MediaType string `json:"mediaType"` 39 | Config OllamaModelLayer `json:"config"` 40 | Layers []OllamaModelLayer `json:"layers"` 41 | 42 | // Client is the http client used to complete the OllamaModel's network operations. 43 | // 44 | // When this field is nil, 45 | // it will be set to the client used by OllamaModel.Complete. 46 | // 47 | // When this field is offered, 48 | // the network operations will be done with this client. 49 | Client *http.Client `json:"-"` 50 | } 51 | 52 | // OllamaModelLayer represents an Ollama model layer, 53 | // its digest can be used to download the artifact. 54 | OllamaModelLayer struct { 55 | MediaType string `json:"mediaType"` 56 | Size uint64 `json:"size"` 57 | Digest string `json:"digest"` 58 | 59 | // Root points to the root OllamaModel, 60 | // which is never serialized or deserialized. 61 | // 62 | // When called OllamaModel.Complete, 63 | // this field will be set to the OllamaModel itself. 64 | // If not, this field will be nil, 65 | // and must be set manually to the root OllamaModel before calling the method of OllamaModelLayer. 66 | Root *OllamaModel `json:"-"` 67 | } 68 | ) 69 | 70 | // ParseOllamaModel parses the given Ollama model string, 71 | // and returns the OllamaModel, or nil if the model is invalid. 72 | func ParseOllamaModel(model string, opts ...OllamaModelOption) *OllamaModel { 73 | if model == "" { 74 | return nil 75 | } 76 | 77 | var o _OllamaModelOptions 78 | for _, opt := range opts { 79 | opt(&o) 80 | } 81 | 82 | om := OllamaModel{ 83 | Schema: OllamaDefaultScheme, 84 | Registry: OllamaDefaultRegistry, 85 | Namespace: OllamaDefaultNamespace, 86 | Tag: OllamaDefaultTag, 87 | } 88 | { 89 | if o.DefaultScheme != "" { 90 | om.Schema = o.DefaultScheme 91 | } 92 | if o.DefaultRegistry != "" { 93 | om.Registry = o.DefaultRegistry 94 | } 95 | if o.DefaultNamespace != "" { 96 | om.Namespace = o.DefaultNamespace 97 | } 98 | if o.DefaultTag != "" { 99 | om.Tag = o.DefaultTag 100 | } 101 | } 102 | 103 | m := model 104 | 105 | // Drop digest. 106 | m, _, _ = stringx.CutFromRight(m, "@") 107 | 108 | // Get tag. 109 | m, s, ok := stringx.CutFromRight(m, ":") 110 | if ok && s != "" { 111 | om.Tag = s 112 | } 113 | 114 | // Get repository. 115 | m, s, ok = stringx.CutFromRight(m, "/") 116 | if ok && s != "" { 117 | om.Repository = s 118 | } else if m != "" { 119 | om.Repository = m 120 | m = "" 121 | } 122 | 123 | // Get namespace. 124 | m, s, ok = stringx.CutFromRight(m, "/") 125 | if ok && s != "" { 126 | om.Namespace = s 127 | } else if m != "" { 128 | om.Namespace = m 129 | m = "" 130 | } 131 | 132 | // Get registry. 133 | m, s, ok = stringx.CutFromLeft(m, "://") 134 | if ok && s != "" { 135 | om.Schema = m 136 | om.Registry = s 137 | } else if m != "" { 138 | om.Registry = m 139 | } 140 | 141 | if om.Repository == "" { 142 | return nil 143 | } 144 | return &om 145 | } 146 | 147 | func (om *OllamaModel) String() string { 148 | var b strings.Builder 149 | if om.Registry != "" { 150 | b.WriteString(om.Registry) 151 | b.WriteByte('/') 152 | } 153 | if om.Namespace != "" { 154 | b.WriteString(om.Namespace) 155 | b.WriteByte('/') 156 | } 157 | b.WriteString(om.Repository) 158 | if om.Tag != "" { 159 | b.WriteByte(':') 160 | b.WriteString(om.Tag) 161 | } 162 | return b.String() 163 | } 164 | 165 | // GetLayer returns the OllamaModelLayer with the given media type, 166 | // and true if found, and false otherwise. 167 | func (om *OllamaModel) GetLayer(mediaType string) (OllamaModelLayer, bool) { 168 | for i := range om.Layers { 169 | if om.Layers[i].MediaType == mediaType { 170 | return om.Layers[i], true 171 | } 172 | } 173 | return OllamaModelLayer{}, false 174 | } 175 | 176 | // SearchLayers returns a list of OllamaModelLayer with the media type that matches the given regex. 177 | func (om *OllamaModel) SearchLayers(mediaTypeRegex *regexp.Regexp) []OllamaModelLayer { 178 | var ls []OllamaModelLayer 179 | for i := range om.Layers { 180 | if mediaTypeRegex.MatchString(om.Layers[i].MediaType) { 181 | ls = append(ls, om.Layers[i]) 182 | } 183 | } 184 | return ls 185 | } 186 | 187 | // WebPageURL returns the Ollama web page URL of the OllamaModel. 188 | func (om *OllamaModel) WebPageURL() *url.URL { 189 | u := &url.URL{ 190 | Scheme: om.Schema, 191 | Host: om.Registry, 192 | } 193 | return u.JoinPath(om.Namespace, om.Repository+":"+om.Tag) 194 | } 195 | 196 | // Complete completes the OllamaModel with the given context and http client. 197 | func (om *OllamaModel) Complete(ctx context.Context, cli *http.Client) error { 198 | if om.Client == nil { 199 | om.Client = cli 200 | } 201 | 202 | u := &url.URL{ 203 | Scheme: om.Schema, 204 | Host: om.Registry, 205 | } 206 | u = u.JoinPath("v2", om.Namespace, om.Repository, "manifests", om.Tag) 207 | 208 | req, err := httpx.NewGetRequestWithContext(ctx, u.String()) 209 | if err != nil { 210 | return fmt.Errorf("new request: %w", err) 211 | } 212 | req.Header.Set("Accept", "application/vnd.docker.distribution.manifest.v2+json") 213 | 214 | err = httpx.Do(om.Client, req, func(resp *http.Response) error { 215 | if resp.StatusCode != http.StatusOK { 216 | return fmt.Errorf("status code %d", resp.StatusCode) 217 | } 218 | return json.NewDecoder(resp.Body).Decode(om) 219 | }) 220 | if err != nil { 221 | return fmt.Errorf("do request %s: %w", u, err) 222 | } 223 | 224 | // Connect. 225 | om.Config.Root = om 226 | for i := range om.Layers { 227 | om.Layers[i].Root = om 228 | } 229 | 230 | return nil 231 | } 232 | 233 | // Params returns the parameters of the OllamaModel. 234 | func (om *OllamaModel) Params(ctx context.Context, cli *http.Client) (map[string]any, error) { 235 | if cli == nil { 236 | cli = om.Client 237 | } 238 | if cli == nil { 239 | return nil, fmt.Errorf("no client") 240 | } 241 | 242 | mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.params$`)) 243 | if len(mls) == 0 { 244 | return nil, nil 245 | } 246 | 247 | rs := make([]map[string]any, len(mls)) 248 | eg, ctx := errgroup.WithContext(ctx) 249 | for i := range mls { 250 | x := i 251 | eg.Go(func() error { 252 | bs, err := mls[x].FetchBlob(ctx, cli) 253 | if err == nil { 254 | p := make(map[string]any) 255 | if err = json.Unmarshal(bs, &p); err == nil { 256 | rs[x] = p 257 | } 258 | } 259 | return err 260 | }) 261 | } 262 | if err := eg.Wait(); err != nil { 263 | return nil, fmt.Errorf("fetch blob: %w", err) 264 | } 265 | 266 | r := make(map[string]any) 267 | for i := range rs { 268 | for k, v := range rs[i] { 269 | r[k] = v 270 | } 271 | } 272 | return r, nil 273 | } 274 | 275 | // Template returns the template of the OllamaModel. 276 | func (om *OllamaModel) Template(ctx context.Context, cli *http.Client) (string, error) { 277 | if cli == nil { 278 | cli = om.Client 279 | } 280 | if cli == nil { 281 | return "", fmt.Errorf("no client") 282 | } 283 | 284 | mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.(prompt|template)$`)) 285 | if len(mls) == 0 { 286 | return "", nil 287 | } 288 | 289 | ml := mls[len(mls)-1] 290 | bs, err := ml.FetchBlob(ctx, cli) 291 | if err != nil { 292 | return "", fmt.Errorf("fetch blob: %w", err) 293 | } 294 | return stringx.FromBytes(&bs), nil 295 | } 296 | 297 | // System returns the system message of the OllamaModel. 298 | func (om *OllamaModel) System(ctx context.Context, cli *http.Client) (string, error) { 299 | if cli == nil { 300 | cli = om.Client 301 | } 302 | if cli == nil { 303 | return "", fmt.Errorf("no client") 304 | } 305 | 306 | mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.system$`)) 307 | if len(mls) == 0 { 308 | return "", nil 309 | } 310 | 311 | ml := mls[len(mls)-1] 312 | bs, err := ml.FetchBlob(ctx, cli) 313 | if err != nil { 314 | return "", fmt.Errorf("fetch blob: %w", err) 315 | } 316 | return stringx.FromBytes(&bs), nil 317 | } 318 | 319 | // License returns the license of the OllamaModel. 320 | func (om *OllamaModel) License(ctx context.Context, cli *http.Client) ([]string, error) { 321 | if cli == nil { 322 | cli = om.Client 323 | } 324 | if cli == nil { 325 | return nil, fmt.Errorf("no client") 326 | } 327 | 328 | mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.license$`)) 329 | if len(mls) == 0 { 330 | return nil, nil 331 | } 332 | 333 | rs := make([]string, len(mls)) 334 | eg, ctx := errgroup.WithContext(ctx) 335 | for i := range mls { 336 | x := i 337 | eg.Go(func() error { 338 | bs, err := mls[x].FetchBlob(ctx, cli) 339 | if err == nil { 340 | rs[x] = stringx.FromBytes(&bs) 341 | } 342 | return err 343 | }) 344 | } 345 | if err := eg.Wait(); err != nil { 346 | return nil, fmt.Errorf("fetch blob: %w", err) 347 | } 348 | return rs, nil 349 | } 350 | 351 | // Messages returns the messages of the OllamaModel. 352 | func (om *OllamaModel) Messages(ctx context.Context, cli *http.Client) ([]json.RawMessage, error) { 353 | if cli == nil { 354 | cli = om.Client 355 | } 356 | if cli == nil { 357 | return nil, fmt.Errorf("no client") 358 | } 359 | 360 | mls := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.messages$`)) 361 | if len(mls) == 0 { 362 | return nil, nil 363 | } 364 | 365 | rs := make([]json.RawMessage, len(mls)) 366 | eg, ctx := errgroup.WithContext(ctx) 367 | for i := range mls { 368 | x := i 369 | eg.Go(func() error { 370 | bs, err := mls[x].FetchBlob(ctx, cli) 371 | if err == nil { 372 | rs[x] = bs 373 | } 374 | return err 375 | }) 376 | } 377 | if err := eg.Wait(); err != nil { 378 | return nil, fmt.Errorf("fetch blob: %w", err) 379 | } 380 | return rs, nil 381 | } 382 | 383 | // BlobURL returns the blob URL of the OllamaModelLayer. 384 | func (ol *OllamaModelLayer) BlobURL() *url.URL { 385 | if ol.Root == nil { 386 | return nil 387 | } 388 | 389 | u := &url.URL{ 390 | Scheme: ol.Root.Schema, 391 | Host: ol.Root.Registry, 392 | } 393 | return u.JoinPath("v2", ol.Root.Namespace, ol.Root.Repository, "blobs", ol.Digest) 394 | } 395 | 396 | // FetchBlob fetches the blob of the OllamaModelLayer with the given context and http client, 397 | // and returns the response body as bytes. 398 | func (ol *OllamaModelLayer) FetchBlob(ctx context.Context, cli *http.Client) ([]byte, error) { 399 | var b []byte 400 | err := ol.FetchBlobFunc(ctx, cli, func(resp *http.Response) error { 401 | b = httpx.BodyBytes(resp) 402 | return nil 403 | }) 404 | return b, err 405 | } 406 | 407 | // FetchBlobFunc fetches the blob of the OllamaModelLayer with the given context and http client, 408 | // and processes the response with the given function. 409 | func (ol *OllamaModelLayer) FetchBlobFunc(ctx context.Context, cli *http.Client, process func(*http.Response) error) error { 410 | if cli == nil { 411 | cli = ol.Root.Client 412 | } 413 | if cli == nil { 414 | return fmt.Errorf("no client") 415 | } 416 | 417 | u := ol.BlobURL() 418 | if u == nil { 419 | return fmt.Errorf("no blob URL") 420 | } 421 | 422 | req, err := httpx.NewGetRequestWithContext(ctx, u.String()) 423 | if err != nil { 424 | return fmt.Errorf("new request: %w", err) 425 | } 426 | 427 | err = httpx.Do(cli, req, process) 428 | if err != nil { 429 | return fmt.Errorf("do request %s: %w", u, err) 430 | } 431 | return nil 432 | } 433 | -------------------------------------------------------------------------------- /ollama_model_option.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "net/url" 5 | "strings" 6 | ) 7 | 8 | type ( 9 | _OllamaModelOptions struct { 10 | DefaultScheme string 11 | DefaultRegistry string 12 | DefaultNamespace string 13 | DefaultTag string 14 | } 15 | OllamaModelOption func(*_OllamaModelOptions) 16 | ) 17 | 18 | // SetOllamaModelBaseURL parses the given base URL, 19 | // and sets default schema/registry for OllamaModel. 20 | func SetOllamaModelBaseURL(baseURL string) OllamaModelOption { 21 | baseURL = strings.TrimSpace(baseURL) 22 | return func(o *_OllamaModelOptions) { 23 | if baseURL == "" { 24 | return 25 | } 26 | 27 | if !strings.Contains(baseURL, "://") { 28 | baseURL = "https://" + baseURL 29 | } 30 | 31 | u, err := url.Parse(baseURL) 32 | if err != nil { 33 | return 34 | } 35 | 36 | o.DefaultScheme = u.Scheme 37 | o.DefaultRegistry = u.Host 38 | } 39 | } 40 | 41 | // SetOllamaModelDefaultScheme sets the default scheme for OllamaModel. 42 | func SetOllamaModelDefaultScheme(scheme string) OllamaModelOption { 43 | return func(o *_OllamaModelOptions) { 44 | if scheme == "" { 45 | return 46 | } 47 | o.DefaultScheme = scheme 48 | } 49 | } 50 | 51 | // SetOllamaModelDefaultRegistry sets the default registry for OllamaModel. 52 | func SetOllamaModelDefaultRegistry(registry string) OllamaModelOption { 53 | return func(o *_OllamaModelOptions) { 54 | if registry == "" { 55 | return 56 | } 57 | o.DefaultRegistry = registry 58 | } 59 | } 60 | 61 | // SetOllamaModelDefaultNamespace sets the default namespace for OllamaModel. 62 | func SetOllamaModelDefaultNamespace(namespace string) OllamaModelOption { 63 | return func(o *_OllamaModelOptions) { 64 | if namespace == "" { 65 | return 66 | } 67 | o.DefaultNamespace = namespace 68 | } 69 | } 70 | 71 | // SetOllamaModelDefaultTag sets the default tag for OllamaModel. 72 | func SetOllamaModelDefaultTag(tag string) OllamaModelOption { 73 | return func(o *_OllamaModelOptions) { 74 | if tag == "" { 75 | return 76 | } 77 | o.DefaultTag = tag 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /ollama_model_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestParseOllamaModel(t *testing.T) { 10 | cases := []struct { 11 | given string 12 | expected *OllamaModel 13 | }{ 14 | { 15 | given: "gemma2", 16 | expected: &OllamaModel{ 17 | Schema: OllamaDefaultScheme, 18 | Registry: OllamaDefaultRegistry, 19 | Namespace: OllamaDefaultNamespace, 20 | Repository: "gemma2", 21 | Tag: OllamaDefaultTag, 22 | }, 23 | }, 24 | { 25 | given: "gemma2:awesome", 26 | expected: &OllamaModel{ 27 | Schema: OllamaDefaultScheme, 28 | Registry: OllamaDefaultRegistry, 29 | Namespace: OllamaDefaultNamespace, 30 | Repository: "gemma2", 31 | Tag: "awesome", 32 | }, 33 | }, 34 | { 35 | given: "gemma2:awesome@sha256:1234567890abcdef", 36 | expected: &OllamaModel{ 37 | Schema: OllamaDefaultScheme, 38 | Registry: OllamaDefaultRegistry, 39 | Namespace: OllamaDefaultNamespace, 40 | Repository: "gemma2", 41 | Tag: "awesome", 42 | }, 43 | }, 44 | { 45 | given: "awesome/gemma2:latest@sha256:1234567890abcdef", 46 | expected: &OllamaModel{ 47 | Schema: OllamaDefaultScheme, 48 | Registry: OllamaDefaultRegistry, 49 | Namespace: "awesome", 50 | Repository: "gemma2", 51 | Tag: "latest", 52 | }, 53 | }, 54 | { 55 | given: "mysite.com/library/gemma2:latest@sha256:1234567890abcdef", 56 | expected: &OllamaModel{ 57 | Schema: OllamaDefaultScheme, 58 | Registry: "mysite.com", 59 | Namespace: "library", 60 | Repository: "gemma2", 61 | Tag: "latest", 62 | }, 63 | }, 64 | { 65 | given: "http://mysite.com/library/gemma2:latest@sha256:1234567890abcdef", 66 | expected: &OllamaModel{ 67 | Schema: "http", 68 | Registry: "mysite.com", 69 | Namespace: "library", 70 | Repository: "gemma2", 71 | Tag: "latest", 72 | }, 73 | }, 74 | } 75 | for _, tc := range cases { 76 | t.Run(tc.given, func(t *testing.T) { 77 | actual := ParseOllamaModel(tc.given) 78 | assert.Equal(t, tc.expected, actual) 79 | }) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /ollama_registry_authenticate.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/ed25519" 7 | "crypto/rand" 8 | "encoding/base64" 9 | "encoding/json" 10 | "encoding/pem" 11 | "errors" 12 | "fmt" 13 | "net/http" 14 | "net/url" 15 | "os" 16 | "path/filepath" 17 | "runtime" 18 | "strconv" 19 | "strings" 20 | "time" 21 | 22 | "golang.org/x/crypto/ssh" 23 | 24 | "github.com/gpustack/gguf-parser-go/util/funcx" 25 | "github.com/gpustack/gguf-parser-go/util/httpx" 26 | "github.com/gpustack/gguf-parser-go/util/osx" 27 | "github.com/gpustack/gguf-parser-go/util/stringx" 28 | ) 29 | 30 | const ( 31 | httpHeaderWWWAuthenticate = "WWW-Authenticate" 32 | httpHeaderAuthorization = "Authorization" 33 | ) 34 | 35 | // OllamaUserAgent returns the user agent string for Ollama, 36 | // since llama3.1, the user agent is required to be set, 37 | // otherwise the request will be rejected by 412. 38 | func OllamaUserAgent() string { 39 | return fmt.Sprintf("ollama/9.9.9 (%s %s) Go/%s", runtime.GOARCH, runtime.GOOS, runtime.Version()) 40 | } 41 | 42 | // OllamaRegistryAuthorizeRetry returns true if the request should be retried with authorization. 43 | // 44 | // OllamaRegistryAuthorizeRetry leverages OllamaRegistryAuthorize to obtain an authorization token, 45 | // and configures the request with the token. 46 | func OllamaRegistryAuthorizeRetry(resp *http.Response, cli *http.Client) bool { 47 | if resp == nil || cli == nil { 48 | return false 49 | } 50 | 51 | if resp.StatusCode != http.StatusUnauthorized && resp.Request == nil { 52 | // Not unauthorized, return. 53 | return false 54 | } 55 | 56 | req := resp.Request 57 | if req.Header.Get(httpHeaderAuthorization) != "" { 58 | // Already authorized, return. 59 | return false 60 | } 61 | 62 | const tokenPrefix = "Bearer " 63 | authnToken := strings.TrimPrefix(resp.Header.Get(httpHeaderWWWAuthenticate), tokenPrefix) 64 | if authnToken == "" { 65 | // No authentication token, return. 66 | return false 67 | } 68 | authzToken := funcx.MustNoError(OllamaRegistryAuthorize(req.Context(), cli, authnToken)) 69 | req.Header.Set(httpHeaderAuthorization, tokenPrefix+authzToken) 70 | return true 71 | } 72 | 73 | // OllamaRegistryAuthorize authorizes the request with the given authentication token, 74 | // and returns the authorization token. 75 | func OllamaRegistryAuthorize(ctx context.Context, cli *http.Client, authnToken string) (string, error) { 76 | priKey, err := OllamaSingKeyLoad() 77 | if err != nil { 78 | return "", fmt.Errorf("load sign key: %w", err) 79 | } 80 | 81 | var authzUrl string 82 | { 83 | ss := strings.Split(authnToken, ",") 84 | if len(ss) < 3 { 85 | return "", errors.New("invalid authn token") 86 | } 87 | 88 | var realm, service, scope string 89 | for _, s := range ss { 90 | sp := strings.SplitN(s, "=", 2) 91 | if len(sp) < 2 { 92 | continue 93 | } 94 | sp[1] = strings.TrimFunc(sp[1], func(r rune) bool { 95 | return r == '"' || r == '\'' 96 | }) 97 | switch sp[0] { 98 | case "realm": 99 | realm = sp[1] 100 | case "service": 101 | service = sp[1] 102 | case "scope": 103 | scope = sp[1] 104 | } 105 | } 106 | 107 | u, err := url.Parse(realm) 108 | if err != nil { 109 | return "", fmt.Errorf("parse realm: %w", err) 110 | } 111 | 112 | qs := u.Query() 113 | qs.Add("service", service) 114 | for _, s := range strings.Split(scope, " ") { 115 | qs.Add("scope", s) 116 | } 117 | qs.Add("ts", strconv.FormatInt(time.Now().Unix(), 10)) 118 | qs.Add("nonce", stringx.RandomBase64(16)) 119 | u.RawQuery = qs.Encode() 120 | 121 | authzUrl = u.String() 122 | } 123 | 124 | var authnData string 125 | { 126 | pubKey := ssh.MarshalAuthorizedKey(priKey.PublicKey()) 127 | pubKeyp := bytes.Split(pubKey, []byte(" ")) 128 | if len(pubKeyp) < 2 { 129 | return "", errors.New("malformed public key") 130 | } 131 | 132 | nc := base64.StdEncoding.EncodeToString([]byte(stringx.SumBytesBySHA256(nil))) 133 | py := []byte(fmt.Sprintf("%s,%s,%s", http.MethodGet, authzUrl, nc)) 134 | sd, err := priKey.Sign(rand.Reader, py) 135 | if err != nil { 136 | return "", fmt.Errorf("signing data: %w", err) 137 | } 138 | authnData = fmt.Sprintf("%s:%s", bytes.TrimSpace(pubKeyp[1]), base64.StdEncoding.EncodeToString(sd.Blob)) 139 | } 140 | 141 | req, err := httpx.NewGetRequestWithContext(ctx, authzUrl) 142 | if err != nil { 143 | return "", fmt.Errorf("new request: %w", err) 144 | } 145 | req.Header.Add(httpHeaderAuthorization, authnData) 146 | 147 | var authzToken string 148 | err = httpx.Do(cli, req, func(resp *http.Response) error { 149 | if resp.StatusCode != http.StatusOK { 150 | return fmt.Errorf("status code %d", resp.StatusCode) 151 | } 152 | var tok struct { 153 | Token string `json:"token"` 154 | } 155 | if err = json.NewDecoder(resp.Body).Decode(&tok); err != nil { 156 | return err 157 | } 158 | if tok.Token == "" { 159 | return errors.New("empty token") 160 | } 161 | authzToken = tok.Token 162 | return nil 163 | }) 164 | if err != nil { 165 | return "", fmt.Errorf("do request %s: %w", authzUrl, err) 166 | } 167 | 168 | return authzToken, nil 169 | } 170 | 171 | // OllamaSingKeyLoad loads the signing key for Ollama, 172 | // and generates a new key if not exists. 173 | func OllamaSingKeyLoad() (ssh.Signer, error) { 174 | hd := filepath.Join(osx.UserHomeDir(), ".ollama") 175 | 176 | priKeyPath := filepath.Join(hd, "id_ed25519") 177 | if !osx.ExistsFile(priKeyPath) { 178 | // Generate key if not exists. 179 | pubKey, priKey, err := ed25519.GenerateKey(rand.Reader) 180 | if err != nil { 181 | return nil, fmt.Errorf("generate key: %w", err) 182 | } 183 | 184 | priKeyPem, err := ssh.MarshalPrivateKey(priKey, "") 185 | if err != nil { 186 | return nil, fmt.Errorf("marshal private key: %w", err) 187 | } 188 | priKeyBs := pem.EncodeToMemory(priKeyPem) 189 | 190 | sshPubKey, err := ssh.NewPublicKey(pubKey) 191 | if err != nil { 192 | return nil, fmt.Errorf("new public key: %w", err) 193 | } 194 | pubKeyBs := ssh.MarshalAuthorizedKey(sshPubKey) 195 | 196 | if err = osx.WriteFile(priKeyPath, priKeyBs, 0o600); err != nil { 197 | return nil, fmt.Errorf("write private key: %w", err) 198 | } 199 | if err = osx.WriteFile(priKeyPath+".pub", pubKeyBs, 0o644); err != nil { 200 | _ = os.Remove(priKeyPath) 201 | return nil, fmt.Errorf("write public key: %w", err) 202 | } 203 | } 204 | 205 | priKeyBs, err := os.ReadFile(priKeyPath) 206 | if err != nil { 207 | return nil, fmt.Errorf("read private key: %w", err) 208 | } 209 | priKey, err := ssh.ParsePrivateKey(priKeyBs) 210 | if err != nil { 211 | return nil, fmt.Errorf("parse private key: %w", err) 212 | } 213 | return priKey, nil 214 | } 215 | -------------------------------------------------------------------------------- /scalar.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "errors" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | const ( 10 | _Ki = 1 << ((iota + 1) * 10) 11 | _Mi 12 | _Gi 13 | _Ti 14 | _Pi 15 | ) 16 | 17 | const ( 18 | _K = 1e3 19 | _M = 1e6 20 | _G = 1e9 21 | _T = 1e12 22 | _P = 1e15 23 | ) 24 | 25 | const ( 26 | _Thousand = 1e3 27 | _Million = 1e6 28 | _Billion = 1e9 29 | _Trillion = 1e12 30 | _Quadrillion = 1e15 31 | ) 32 | 33 | type ( 34 | // SizeScalar is the scalar for size. 35 | SizeScalar uint64 36 | 37 | // FLOPSScalar is the scalar for FLOPS. 38 | FLOPSScalar uint64 39 | 40 | // BytesPerSecondScalar is the scalar for bytes per second (Bps). 41 | BytesPerSecondScalar uint64 42 | ) 43 | 44 | var ( 45 | // _GeneralBaseUnitMatrix is the base unit matrix for bytes. 46 | _GeneralBaseUnitMatrix = []struct { 47 | Base float64 48 | Unit string 49 | }{ 50 | {_Pi, "Pi"}, 51 | {_P, "P"}, 52 | {_Ti, "Ti"}, 53 | {_T, "T"}, 54 | {_Gi, "Gi"}, 55 | {_G, "G"}, 56 | {_Mi, "Mi"}, 57 | {_M, "M"}, 58 | {_Ki, "Ki"}, 59 | {_K, "K"}, 60 | } 61 | 62 | // _SizeBaseUnitMatrix is the base unit matrix for size. 63 | _SizeBaseUnitMatrix = []struct { 64 | Base float64 65 | Unit string 66 | }{ 67 | {_Pi, "P"}, 68 | {_Ti, "T"}, 69 | {_Gi, "G"}, 70 | {_Mi, "M"}, 71 | {_Ki, "K"}, 72 | } 73 | 74 | // _NumberBaseUnitMatrix is the base unit matrix for numbers. 75 | _NumberBaseUnitMatrix = []struct { 76 | Base float64 77 | Unit string 78 | }{ 79 | {_Quadrillion, "Q"}, 80 | {_Trillion, "T"}, 81 | {_Billion, "B"}, 82 | {_Million, "M"}, 83 | {_Thousand, "K"}, 84 | } 85 | ) 86 | 87 | // ParseSizeScalar parses the SizeScalar from the string. 88 | func ParseSizeScalar(s string) (_ SizeScalar, err error) { 89 | if s == "" { 90 | return 0, errors.New("invalid SizeScalar") 91 | } 92 | b := float64(1) 93 | for i := range _SizeBaseUnitMatrix { 94 | if strings.HasSuffix(s, _SizeBaseUnitMatrix[i].Unit) { 95 | b = _SizeBaseUnitMatrix[i].Base 96 | s = strings.TrimSuffix(s, _SizeBaseUnitMatrix[i].Unit) 97 | break 98 | } 99 | } 100 | f, err := strconv.ParseFloat(strings.TrimSpace(s), 64) 101 | if err != nil { 102 | return 0, err 103 | } 104 | return SizeScalar(f * b), nil 105 | } 106 | 107 | func (s SizeScalar) String() string { 108 | if s == 0 { 109 | return "0" 110 | } 111 | b, u := float64(1), "" 112 | for i := range _SizeBaseUnitMatrix { 113 | if float64(s) >= _SizeBaseUnitMatrix[i].Base { 114 | b = _SizeBaseUnitMatrix[i].Base 115 | u = _SizeBaseUnitMatrix[i].Unit 116 | break 117 | } 118 | } 119 | f := strconv.FormatFloat(float64(s)/b, 'f', 2, 64) 120 | return strings.TrimSuffix(f, ".00") + " " + u 121 | } 122 | 123 | // ParseFLOPSScalar parses the FLOPSScalar from the string. 124 | func ParseFLOPSScalar(s string) (_ FLOPSScalar, err error) { 125 | if s == "" { 126 | return 0, errors.New("invalid FLOPSScalar") 127 | } 128 | s = strings.TrimSuffix(s, "FLOPS") 129 | b := float64(1) 130 | for i := range _GeneralBaseUnitMatrix { 131 | if strings.HasSuffix(s, _GeneralBaseUnitMatrix[i].Unit) { 132 | b = _GeneralBaseUnitMatrix[i].Base 133 | s = strings.TrimSuffix(s, _GeneralBaseUnitMatrix[i].Unit) 134 | break 135 | } 136 | } 137 | f, err := strconv.ParseFloat(strings.TrimSpace(s), 64) 138 | if err != nil { 139 | return 0, err 140 | } 141 | return FLOPSScalar(f * b), nil 142 | } 143 | 144 | func (s FLOPSScalar) String() string { 145 | if s == 0 { 146 | return "0 FLOPS" 147 | } 148 | b, u := float64(1), "" 149 | for i := range _GeneralBaseUnitMatrix { 150 | if float64(s) >= _GeneralBaseUnitMatrix[i].Base { 151 | b = _GeneralBaseUnitMatrix[i].Base 152 | u = _GeneralBaseUnitMatrix[i].Unit 153 | break 154 | } 155 | } 156 | f := strconv.FormatFloat(float64(s)/b, 'f', 2, 64) 157 | return strings.TrimSuffix(f, ".00") + " " + u + "FLOPS" 158 | } 159 | 160 | // ParseBytesPerSecondScalar parses the BytesPerSecondScalar from the string. 161 | func ParseBytesPerSecondScalar(s string) (_ BytesPerSecondScalar, err error) { 162 | if s == "" { 163 | return 0, errors.New("invalid BytesPerSecondScalar") 164 | } 165 | b := float64(1) 166 | o := float64(1) 167 | switch { 168 | case strings.HasSuffix(s, "Bps") || strings.HasSuffix(s, "B/s"): 169 | s = strings.TrimSuffix(strings.TrimSuffix(s, "Bps"), "B/s") 170 | case strings.HasSuffix(s, "bps") || strings.HasSuffix(s, "b/s"): 171 | s = strings.TrimSuffix(strings.TrimSuffix(s, "bps"), "b/s") 172 | o = 8 173 | } 174 | for i := range _GeneralBaseUnitMatrix { 175 | if strings.HasSuffix(s, _GeneralBaseUnitMatrix[i].Unit) { 176 | b = _GeneralBaseUnitMatrix[i].Base 177 | s = strings.TrimSuffix(s, _GeneralBaseUnitMatrix[i].Unit) 178 | break 179 | } 180 | } 181 | f, err := strconv.ParseFloat(strings.TrimSpace(s), 64) 182 | if err != nil { 183 | return 0, err 184 | } 185 | return BytesPerSecondScalar(f * b / o), nil 186 | } 187 | 188 | func (s BytesPerSecondScalar) String() string { 189 | if s == 0 { 190 | return "0 Bps" 191 | } 192 | b, u := float64(1), "" 193 | for i := range _GeneralBaseUnitMatrix { 194 | if float64(s) >= _GeneralBaseUnitMatrix[i].Base { 195 | b = _GeneralBaseUnitMatrix[i].Base 196 | u = _GeneralBaseUnitMatrix[i].Unit 197 | break 198 | } 199 | } 200 | f := strconv.FormatFloat(float64(s)/b, 'f', 2, 64) 201 | return strings.TrimSuffix(f, ".00") + " " + u + "Bps" 202 | } 203 | 204 | type ( 205 | // GGUFBytesScalar is the scalar for bytes. 206 | GGUFBytesScalar uint64 207 | 208 | // GGUFParametersScalar is the scalar for parameters. 209 | GGUFParametersScalar uint64 210 | 211 | // GGUFBitsPerWeightScalar is the scalar for bits per weight. 212 | GGUFBitsPerWeightScalar float64 213 | 214 | // GGUFTokensPerSecondScalar is the scalar for tokens per second. 215 | GGUFTokensPerSecondScalar float64 216 | ) 217 | 218 | // ParseGGUFBytesScalar parses the GGUFBytesScalar from the string. 219 | func ParseGGUFBytesScalar(s string) (_ GGUFBytesScalar, err error) { 220 | if s == "" { 221 | return 0, errors.New("invalid GGUFBytesScalar") 222 | } 223 | s = strings.TrimSuffix(s, "B") 224 | b := float64(1) 225 | for i := range _GeneralBaseUnitMatrix { 226 | if strings.HasSuffix(s, _GeneralBaseUnitMatrix[i].Unit) { 227 | b = _GeneralBaseUnitMatrix[i].Base 228 | s = strings.TrimSuffix(s, _GeneralBaseUnitMatrix[i].Unit) 229 | break 230 | } 231 | } 232 | f, err := strconv.ParseFloat(strings.TrimSpace(s), 64) 233 | if err != nil { 234 | return 0, err 235 | } 236 | return GGUFBytesScalar(f * b), nil 237 | } 238 | 239 | // GGUFBytesScalarStringInMiBytes is the flag to show the GGUFBytesScalar string in MiB. 240 | var GGUFBytesScalarStringInMiBytes bool 241 | 242 | func (s GGUFBytesScalar) String() string { 243 | if s == 0 { 244 | return "0 B" 245 | } 246 | b, u := float64(1), "" 247 | if GGUFBytesScalarStringInMiBytes { 248 | b = _Mi 249 | u = "Mi" 250 | } else { 251 | for i := range _GeneralBaseUnitMatrix { 252 | if float64(s) >= _GeneralBaseUnitMatrix[i].Base { 253 | b = _GeneralBaseUnitMatrix[i].Base 254 | u = _GeneralBaseUnitMatrix[i].Unit 255 | break 256 | } 257 | } 258 | } 259 | f := strconv.FormatFloat(float64(s)/b, 'f', 2, 64) 260 | return strings.TrimSuffix(f, ".00") + " " + u + "B" 261 | } 262 | 263 | func (s GGUFParametersScalar) String() string { 264 | if s == 0 { 265 | return "0" 266 | } 267 | b, u := float64(1), "" 268 | for i := range _NumberBaseUnitMatrix { 269 | if float64(s) >= _NumberBaseUnitMatrix[i].Base { 270 | b = _NumberBaseUnitMatrix[i].Base 271 | u = _NumberBaseUnitMatrix[i].Unit 272 | break 273 | } 274 | } 275 | f := strconv.FormatFloat(float64(s)/b, 'f', 2, 64) 276 | return strings.TrimSuffix(f, ".00") + " " + u 277 | } 278 | 279 | func (s GGUFBitsPerWeightScalar) String() string { 280 | if s <= 0 { 281 | return "0 bpw" 282 | } 283 | return strconv.FormatFloat(float64(s), 'f', 2, 64) + " bpw" 284 | } 285 | 286 | func (s GGUFTokensPerSecondScalar) String() string { 287 | if s <= 0 { 288 | return "0 tps" 289 | } 290 | return strconv.FormatFloat(float64(s), 'f', 2, 64) + " tps" 291 | } 292 | -------------------------------------------------------------------------------- /scalar_test.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestParseSizeScalar(t *testing.T) { 10 | testCases := []struct { 11 | given string 12 | expected SizeScalar 13 | }{ 14 | {"1", 1}, 15 | {"1K", 1 * _Ki}, 16 | {"1M", 1 * _Mi}, 17 | {"1G", 1 * _Gi}, 18 | {"1T", 1 * _Ti}, 19 | {"1P", 1 * _Pi}, 20 | } 21 | for _, tc := range testCases { 22 | t.Run(tc.given, func(t *testing.T) { 23 | actual, err := ParseSizeScalar(tc.given) 24 | if !assert.NoError(t, err) { 25 | return 26 | } 27 | assert.Equal(t, tc.expected, actual) 28 | }) 29 | } 30 | } 31 | 32 | func TestParseFLOPSScalar(t *testing.T) { 33 | testCases := []struct { 34 | given string 35 | expected FLOPSScalar 36 | }{ 37 | {"1FLOPS", 1}, 38 | {"1KFLOPS", 1 * _K}, 39 | {"1MFLOPS", 1 * _M}, 40 | {"1GFLOPS", 1 * _G}, 41 | {"1TFLOPS", 1 * _T}, 42 | {"1PFLOPS", 1 * _P}, 43 | } 44 | for _, tc := range testCases { 45 | t.Run(tc.given, func(t *testing.T) { 46 | actual, err := ParseFLOPSScalar(tc.given) 47 | if !assert.NoError(t, err) { 48 | return 49 | } 50 | assert.Equal(t, tc.expected, actual) 51 | }) 52 | } 53 | } 54 | 55 | func TestParseBytesPerSecondScalar(t *testing.T) { 56 | testCases := []struct { 57 | given string 58 | expected BytesPerSecondScalar 59 | }{ 60 | {"1B/s", 1}, 61 | {"1KB/s", 1 * _K}, 62 | {"1MB/s", 1 * _M}, 63 | {"1GB/s", 1 * _G}, 64 | {"1TB/s", 1 * _T}, 65 | {"1PB/s", 1 * _P}, 66 | {"1KiBps", 1 * _Ki}, 67 | {"1MiBps", 1 * _Mi}, 68 | {"1GiBps", 1 * _Gi}, 69 | {"1TiBps", 1 * _Ti}, 70 | {"1PiBps", 1 * _Pi}, 71 | {"8b/s", 1}, 72 | {"1Kbps", 1 * _K >> 3}, 73 | {"1Mbps", 1 * _M >> 3}, 74 | {"1Gbps", 1 * _G >> 3}, 75 | {"1Tbps", 1 * _T >> 3}, 76 | {"1Pbps", 1 * _P >> 3}, 77 | {"1Kibps", 1 * _Ki >> 3}, 78 | {"1Mibps", 1 * _Mi >> 3}, 79 | {"1Gibps", 1 * _Gi >> 3}, 80 | {"1Tibps", 1 * _Ti >> 3}, 81 | {"1Pibps", 1 * _Pi >> 3}, 82 | } 83 | for _, tc := range testCases { 84 | t.Run(tc.given, func(t *testing.T) { 85 | actual, err := ParseBytesPerSecondScalar(tc.given) 86 | if !assert.NoError(t, err) { 87 | return 88 | } 89 | assert.Equal(t, tc.expected, actual) 90 | }) 91 | } 92 | } 93 | 94 | func TestParseGGUFBytesScalar(t *testing.T) { 95 | testCases := []struct { 96 | given string 97 | expected GGUFBytesScalar 98 | }{ 99 | {"1B", 1}, 100 | {"1KB", 1 * _K}, 101 | {"1MB", 1 * _M}, 102 | {"1GB", 1 * _G}, 103 | {"1TB", 1 * _T}, 104 | {"1PB", 1 * _P}, 105 | {"1KiB", 1 * _Ki}, 106 | {"1MiB", 1 * _Mi}, 107 | {"1GiB", 1 * _Gi}, 108 | {"1TiB", 1 * _Ti}, 109 | {"1PiB", 1 * _Pi}, 110 | } 111 | for _, tc := range testCases { 112 | t.Run(tc.given, func(t *testing.T) { 113 | actual, err := ParseGGUFBytesScalar(tc.given) 114 | if !assert.NoError(t, err) { 115 | return 116 | } 117 | assert.Equal(t, tc.expected, actual) 118 | }) 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /util/anyx/any.go: -------------------------------------------------------------------------------- 1 | package anyx 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "strconv" 7 | 8 | "golang.org/x/exp/constraints" 9 | ) 10 | 11 | // Number converts any type to the specified number type. 12 | func Number[T constraints.Integer | constraints.Float](v any) T { 13 | switch vv := v.(type) { 14 | case int: 15 | return T(vv) 16 | case int8: 17 | return T(vv) 18 | case int16: 19 | return T(vv) 20 | case int32: 21 | return T(vv) 22 | case int64: 23 | return T(vv) 24 | case uint: 25 | return T(vv) 26 | case uint8: 27 | return T(vv) 28 | case uint16: 29 | return T(vv) 30 | case uint32: 31 | return T(vv) 32 | case uint64: 33 | return T(vv) 34 | case float32: 35 | return T(vv) 36 | case float64: 37 | return T(vv) 38 | case bool: 39 | if vv { 40 | return T(1) 41 | } 42 | return T(0) 43 | case string: 44 | x, err := strconv.ParseInt(vv, 10, 64) 45 | if err != nil { 46 | y, err := strconv.ParseFloat(vv, 64) 47 | if err != nil { 48 | return T(0) 49 | } else { 50 | return T(y) 51 | } 52 | } 53 | return T(x) 54 | case json.Number: 55 | x, err := vv.Int64() 56 | if err != nil { 57 | y, err := vv.Float64() 58 | if err != nil { 59 | return T(0) 60 | } else { 61 | return T(y) 62 | } 63 | } 64 | return T(x) 65 | default: 66 | return T(0) 67 | } 68 | } 69 | 70 | // Bool converts any type to a bool. 71 | func Bool(v any) bool { 72 | switch vv := v.(type) { 73 | case bool: 74 | return vv 75 | case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, uintptr: 76 | return vv != 0 77 | case float32, float64: 78 | return vv != 0 79 | case string: 80 | return vv != "0" 81 | case fmt.Stringer: 82 | return vv.String() != "0" 83 | default: 84 | return false 85 | } 86 | } 87 | 88 | // String converts any type to a string. 89 | func String(v any) string { 90 | switch vv := v.(type) { 91 | case string: 92 | return vv 93 | case []byte: 94 | return string(vv) 95 | case int: 96 | return strconv.FormatInt(int64(vv), 10) 97 | case int8: 98 | return strconv.FormatInt(int64(vv), 10) 99 | case int16: 100 | return strconv.FormatInt(int64(vv), 10) 101 | case int32: 102 | return strconv.FormatInt(int64(vv), 10) 103 | case int64: 104 | return strconv.FormatInt(vv, 10) 105 | case uint: 106 | return strconv.FormatUint(uint64(vv), 10) 107 | case uint8: 108 | return strconv.FormatUint(uint64(vv), 10) 109 | case uint16: 110 | return strconv.FormatUint(uint64(vv), 10) 111 | case uint32: 112 | return strconv.FormatUint(uint64(vv), 10) 113 | case uint64: 114 | return strconv.FormatUint(vv, 10) 115 | case float32: 116 | return strconv.FormatFloat(float64(vv), 'f', -1, 32) 117 | case float64: 118 | return strconv.FormatFloat(vv, 'f', -1, 64) 119 | case bool: 120 | return strconv.FormatBool(vv) 121 | case fmt.Stringer: 122 | return vv.String() 123 | case json.RawMessage: 124 | return string(vv) 125 | default: 126 | return fmt.Sprintf("%v", v) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /util/bytex/pool.go: -------------------------------------------------------------------------------- 1 | package bytex 2 | 3 | import ( 4 | "bytes" 5 | "sync" 6 | ) 7 | 8 | const defaultSize = 32 * 1024 9 | 10 | type ( 11 | Bytes = []byte 12 | BytesBuffer = *bytes.Buffer 13 | ) 14 | 15 | var gp = sync.Pool{ 16 | New: func() any { 17 | buf := make(Bytes, defaultSize) 18 | return &buf 19 | }, 20 | } 21 | 22 | // GetBytes gets a bytes buffer from the pool, 23 | // which can specify with a size, 24 | // default is 32k. 25 | func GetBytes(size ...uint64) Bytes { 26 | buf := *(gp.Get().(*Bytes)) 27 | 28 | s := defaultSize 29 | if len(size) != 0 { 30 | s = int(size[0]) 31 | if s == 0 { 32 | s = defaultSize 33 | } 34 | } 35 | if cap(buf) >= s { 36 | return buf[:s] 37 | } 38 | 39 | gp.Put(&buf) 40 | 41 | ns := s 42 | if ns < defaultSize { 43 | ns = defaultSize 44 | } 45 | buf = make(Bytes, ns) 46 | return buf[:s] 47 | } 48 | 49 | // WithBytes relies on GetBytes to get a buffer, 50 | // calls the function with the buffer, 51 | // finally, puts it back to the pool after the function returns. 52 | func WithBytes(fn func(Bytes) error, size ...uint64) error { 53 | if fn == nil { 54 | return nil 55 | } 56 | 57 | buf := GetBytes(size...) 58 | defer Put(buf) 59 | return fn(buf) 60 | } 61 | 62 | // GetBuffer is similar to GetBytes, 63 | // but it returns the bytes buffer wrapped by bytes.Buffer. 64 | func GetBuffer(size ...uint64) BytesBuffer { 65 | return bytes.NewBuffer(GetBytes(size...)[:0]) 66 | } 67 | 68 | // WithBuffer relies on GetBuffer to get a buffer, 69 | // calls the function with the buffer, 70 | // finally, puts it back to the pool after the function returns. 71 | func WithBuffer(fn func(BytesBuffer) error, size ...uint64) error { 72 | if fn == nil { 73 | return nil 74 | } 75 | 76 | buf := GetBuffer(size...) 77 | defer Put(buf) 78 | return fn(buf) 79 | } 80 | 81 | // Put puts the buffer(either Bytes or BytesBuffer) back to the pool. 82 | func Put[T Bytes | BytesBuffer](buf T) { 83 | switch v := any(buf).(type) { 84 | case Bytes: 85 | gp.Put(&v) 86 | case BytesBuffer: 87 | bs := v.Bytes() 88 | gp.Put(&bs) 89 | v.Reset() 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /util/funcx/error.go: -------------------------------------------------------------------------------- 1 | package funcx 2 | 3 | // NoError ignores the given error, 4 | // it is usually a nice helper for chain function calling. 5 | func NoError[T any](t T, _ error) T { 6 | return t 7 | } 8 | 9 | // NoError2 ignores the given error, 10 | // it is usually a nice helper for chain function calling. 11 | func NoError2[T, U any](t T, u U, _ error) (T, U) { 12 | return t, u 13 | } 14 | 15 | // NoError3 ignores the given error, 16 | // it is usually a nice helper for chain function calling. 17 | func NoError3[T, U, V any](t T, u U, v V, _ error) (T, U, V) { 18 | return t, u, v 19 | } 20 | 21 | // NoError4 ignores the given error, 22 | // it is usually a nice helper for chain function calling. 23 | func NoError4[T, U, V, W any](t T, u U, v V, w W, _ error) (T, U, V, W) { 24 | return t, u, v, w 25 | } 26 | 27 | // MustNoError is similar to NoError, 28 | // but it panics if the given error is not nil, 29 | // it is usually a nice helper for chain function calling. 30 | func MustNoError[T any](t T, e error) T { 31 | if e != nil { 32 | panic(e) 33 | } 34 | return t 35 | } 36 | 37 | // MustNoError2 is similar to NoError2, 38 | // but it panics if the given error is not nil, 39 | // it is usually a nice helper for chain function calling. 40 | func MustNoError2[T, U any](t T, u U, e error) (T, U) { 41 | if e != nil { 42 | panic(e) 43 | } 44 | return t, u 45 | } 46 | 47 | // MustNoError3 is similar to NoError3, 48 | // but it panics if the given error is not nil, 49 | // it is usually a nice helper for chain function calling. 50 | func MustNoError3[T, U, V any](t T, u U, v V, e error) (T, U, V) { 51 | if e != nil { 52 | panic(e) 53 | } 54 | return t, u, v 55 | } 56 | 57 | // MustNoError4 is similar to NoError4, 58 | // but it panics if the given error is not nil, 59 | // it is usually a nice helper for chain function calling. 60 | func MustNoError4[T, U, V, W any](t T, u U, v V, w W, e error) (T, U, V, W) { 61 | if e != nil { 62 | panic(e) 63 | } 64 | return t, u, v, w 65 | } 66 | -------------------------------------------------------------------------------- /util/httpx/client.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "time" 9 | 10 | "github.com/henvic/httpretty" 11 | 12 | "github.com/gpustack/gguf-parser-go/util/bytex" 13 | ) 14 | 15 | // DefaultClient is similar to the default http.Client used by the package. 16 | // 17 | // It is used for requests pooling. 18 | var DefaultClient = &http.Client{ 19 | Transport: DefaultTransport, 20 | } 21 | 22 | // DefaultInsecureClient is the default http.Client used by the package, 23 | // with TLS insecure skip verify. 24 | // 25 | // It is used for requests pooling. 26 | var DefaultInsecureClient = &http.Client{ 27 | Transport: DefaultInsecureTransport, 28 | } 29 | 30 | // Client returns a new http.Client with the given options, 31 | // the result http.Client is used for fast-consuming requests. 32 | // 33 | // If you want a requests pool management, use DefaultClient instead. 34 | func Client(opts ...*ClientOption) *http.Client { 35 | var o *ClientOption 36 | if len(opts) > 0 { 37 | o = opts[0] 38 | } else { 39 | o = ClientOptions() 40 | } 41 | 42 | root := DefaultTransport 43 | if o.transport != nil { 44 | root = o.transport 45 | } 46 | 47 | if o.debug { 48 | pretty := &httpretty.Logger{ 49 | Time: true, 50 | TLS: true, 51 | RequestHeader: true, 52 | RequestBody: true, 53 | MaxRequestBody: 1024, 54 | ResponseHeader: true, 55 | ResponseBody: true, 56 | MaxResponseBody: 1024, 57 | Formatters: []httpretty.Formatter{&JSONFormatter{}}, 58 | } 59 | root = pretty.RoundTripper(root) 60 | } 61 | 62 | rtc := RoundTripperChain{ 63 | Next: root, 64 | } 65 | for i := range o.roundTrippers { 66 | rtc = RoundTripperChain{ 67 | Do: o.roundTrippers[i], 68 | Next: rtc, 69 | } 70 | } 71 | 72 | var rt http.RoundTripper = rtc 73 | if o.retryIf != nil { 74 | rt = RoundTripperFunc(func(req *http.Request) (*http.Response, error) { 75 | for i := 0; ; i++ { 76 | resp, err := rtc.RoundTrip(req) 77 | if !o.retryIf(resp, err) { 78 | return resp, err 79 | } 80 | w, ok := o.retryBackoff(i+1, resp) 81 | if !ok { 82 | return resp, err 83 | } 84 | wt := time.NewTimer(w) 85 | select { 86 | case <-req.Context().Done(): 87 | wt.Stop() 88 | return resp, req.Context().Err() 89 | case <-wt.C: 90 | } 91 | } 92 | }) 93 | } 94 | 95 | return &http.Client{ 96 | Transport: rt, 97 | Timeout: o.timeout, 98 | } 99 | } 100 | 101 | // NewGetRequestWithContext returns a new http.MethodGet request, 102 | // which is saving your life from http.NewRequestWithContext. 103 | func NewGetRequestWithContext(ctx context.Context, uri string) (*http.Request, error) { 104 | return http.NewRequestWithContext(ctx, http.MethodGet, uri, nil) 105 | } 106 | 107 | // NewGetRequest returns a new http.MethodGet request, 108 | // which is saving your life from http.NewRequest. 109 | func NewGetRequest(uri string) (*http.Request, error) { 110 | return http.NewRequest(http.MethodGet, uri, nil) 111 | } 112 | 113 | // NewHeadRequestWithContext returns a new http.MethodHead request, 114 | // which is saving your life from http.NewRequestWithContext. 115 | func NewHeadRequestWithContext(ctx context.Context, uri string) (*http.Request, error) { 116 | return http.NewRequestWithContext(ctx, http.MethodHead, uri, nil) 117 | } 118 | 119 | // NewHeadRequest returns a new http.MethodHead request, 120 | // which is saving your life from http.NewRequest. 121 | func NewHeadRequest(uri string) (*http.Request, error) { 122 | return http.NewRequest(http.MethodHead, uri, nil) 123 | } 124 | 125 | // NewPostRequestWithContext returns a new http.MethodPost request with the given context, 126 | // which is saving your life from http.NewRequestWithContext. 127 | func NewPostRequestWithContext(ctx context.Context, uri string, body io.Reader) (*http.Request, error) { 128 | return http.NewRequestWithContext(ctx, http.MethodPost, uri, body) 129 | } 130 | 131 | // NewPostRequest returns a new http.MethodPost request, 132 | // which is saving your life from http.NewRequest. 133 | func NewPostRequest(uri string, body io.Reader) (*http.Request, error) { 134 | return http.NewRequest(http.MethodPost, uri, body) 135 | } 136 | 137 | // NewPutRequestWithContext returns a new http.MethodPut request with the given context, 138 | // which is saving your life from http.NewRequestWithContext. 139 | func NewPutRequestWithContext(ctx context.Context, uri string, body io.Reader) (*http.Request, error) { 140 | return http.NewRequestWithContext(ctx, http.MethodPut, uri, body) 141 | } 142 | 143 | // NewPutRequest returns a new http.MethodPut request, 144 | // which is saving your life from http.NewRequest. 145 | func NewPutRequest(uri string, body io.Reader) (*http.Request, error) { 146 | return http.NewRequest(http.MethodPut, uri, body) 147 | } 148 | 149 | // NewPatchRequestWithContext returns a new http.MethodPatch request with the given context, 150 | // which is saving your life from http.NewRequestWithContext. 151 | func NewPatchRequestWithContext(ctx context.Context, uri string, body io.Reader) (*http.Request, error) { 152 | return http.NewRequestWithContext(ctx, http.MethodPatch, uri, body) 153 | } 154 | 155 | // NewPatchRequest returns a new http.MethodPatch request, 156 | // which is saving your life from http.NewRequest. 157 | func NewPatchRequest(uri string, body io.Reader) (*http.Request, error) { 158 | return http.NewRequest(http.MethodPatch, uri, body) 159 | } 160 | 161 | // NewDeleteRequestWithContext returns a new http.MethodDelete request with the given context, 162 | // which is saving your life from http.NewRequestWithContext. 163 | func NewDeleteRequestWithContext(ctx context.Context, uri string) (*http.Request, error) { 164 | return http.NewRequestWithContext(ctx, http.MethodDelete, uri, nil) 165 | } 166 | 167 | // NewDeleteRequest returns a new http.MethodDelete request, 168 | // which is saving your life from http.NewRequest. 169 | func NewDeleteRequest(uri string) (*http.Request, error) { 170 | return http.NewRequest(http.MethodDelete, uri, nil) 171 | } 172 | 173 | // NewConnectRequestWithContext returns a new http.MethodConnect request with the given context, 174 | // which is saving your life from http.NewRequestWithContext. 175 | func NewConnectRequestWithContext(ctx context.Context, uri string) (*http.Request, error) { 176 | return http.NewRequestWithContext(ctx, http.MethodConnect, uri, nil) 177 | } 178 | 179 | // NewConnectRequest returns a new http.MethodConnect request, 180 | // which is saving your life from http.NewRequest. 181 | func NewConnectRequest(uri string) (*http.Request, error) { 182 | return http.NewRequest(http.MethodConnect, uri, nil) 183 | } 184 | 185 | // NewOptionsRequestWithContext returns a new http.MethodOptions request with the given context, 186 | // which is saving your life from http.NewRequestWithContext. 187 | func NewOptionsRequestWithContext(ctx context.Context, uri string) (*http.Request, error) { 188 | return http.NewRequestWithContext(ctx, http.MethodOptions, uri, nil) 189 | } 190 | 191 | // NewOptionsRequest returns a new http.MethodOptions request, 192 | // which is saving your life from http.NewRequest. 193 | func NewOptionsRequest(uri string) (*http.Request, error) { 194 | return http.NewRequest(http.MethodOptions, uri, nil) 195 | } 196 | 197 | // NewTraceRequestWithContext returns a new http.MethodTrace request with the given context, 198 | // which is saving your life from http.NewRequestWithContext. 199 | func NewTraceRequestWithContext(ctx context.Context, uri string) (*http.Request, error) { 200 | return http.NewRequestWithContext(ctx, http.MethodTrace, uri, nil) 201 | } 202 | 203 | // NewTraceRequest returns a new http.MethodTrace request, 204 | // which is saving your life from http.NewRequest. 205 | func NewTraceRequest(uri string) (*http.Request, error) { 206 | return http.NewRequest(http.MethodTrace, uri, nil) 207 | } 208 | 209 | // Error is similar to http.Error, 210 | // but it can get the error message by the given code. 211 | func Error(rw http.ResponseWriter, code int) { 212 | http.Error(rw, http.StatusText(code), code) 213 | } 214 | 215 | // Close closes the http response body without error. 216 | func Close(resp *http.Response) { 217 | if resp != nil && resp.Body != nil { 218 | _ = resp.Body.Close() 219 | } 220 | } 221 | 222 | // BodyBytes returns the body of the http response as a byte slice. 223 | func BodyBytes(resp *http.Response) []byte { 224 | buf := bytex.GetBytes() 225 | defer bytex.Put(buf) 226 | 227 | w := bytex.GetBuffer() 228 | _, _ = io.CopyBuffer(w, resp.Body, buf) 229 | return w.Bytes() 230 | } 231 | 232 | // BodyString returns the body of the http response as a string. 233 | func BodyString(resp *http.Response) string { 234 | return string(BodyBytes(resp)) 235 | } 236 | 237 | // Do is a helper function to execute the given http request with the given http client, 238 | // and execute the given function with the http response. 239 | // 240 | // It is useful to avoid forgetting to close the http response body. 241 | // 242 | // Do will return the error if failed to execute the http request or the given function. 243 | func Do(cli *http.Client, req *http.Request, respFunc func(*http.Response) error) error { 244 | resp, err := cli.Do(req) 245 | if err != nil { 246 | return fmt.Errorf("do request: %w", err) 247 | } 248 | defer Close(resp) 249 | if respFunc == nil { 250 | return nil 251 | } 252 | return respFunc(resp) 253 | } 254 | -------------------------------------------------------------------------------- /util/httpx/client_helper.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "io" 7 | "net/http" 8 | "regexp" 9 | 10 | "github.com/henvic/httpretty" 11 | 12 | "github.com/gpustack/gguf-parser-go/util/json" 13 | ) 14 | 15 | var _ httpretty.Formatter = (*JSONFormatter)(nil) 16 | 17 | // JSONFormatter is copied from httpretty.JSONFormatter, 18 | // but use our own json package. 19 | type JSONFormatter struct{} 20 | 21 | var jsonTypeRE = regexp.MustCompile(`[/+]json($|;)`) 22 | 23 | // Match JSON media type. 24 | func (j *JSONFormatter) Match(mediatype string) bool { 25 | return jsonTypeRE.MatchString(mediatype) 26 | } 27 | 28 | // Format JSON content. 29 | func (j *JSONFormatter) Format(w io.Writer, src []byte) error { 30 | if !json.Valid(src) { 31 | // We want to get the error of json.checkValid, not unmarshal it. 32 | // The happy path has been optimized, maybe prematurely. 33 | if err := json.Unmarshal(src, &json.RawMessage{}); err != nil { 34 | return err 35 | } 36 | } 37 | // Avoiding allocation as we use *bytes.Buffer to store the formatted body before printing 38 | dst, ok := w.(*bytes.Buffer) 39 | if !ok { 40 | // Mitigating panic to avoid upsetting anyone who uses this directly 41 | return errors.New("underlying writer for JSONFormatter must be *bytes.Buffer") 42 | } 43 | return json.Indent(dst, src, "", " ") 44 | } 45 | 46 | type RoundTripperChain struct { 47 | Do func(req *http.Request) error 48 | Next http.RoundTripper 49 | } 50 | 51 | func (c RoundTripperChain) RoundTrip(req *http.Request) (*http.Response, error) { 52 | if c.Do != nil { 53 | if err := c.Do(req); err != nil { 54 | return nil, err 55 | } 56 | } 57 | if c.Next != nil { 58 | return c.Next.RoundTrip(req) 59 | } 60 | return nil, nil 61 | } 62 | 63 | type RoundTripperFunc func(*http.Request) (*http.Response, error) 64 | 65 | func (fn RoundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) { 66 | return fn(req) 67 | } 68 | -------------------------------------------------------------------------------- /util/httpx/client_options.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "math" 5 | "net/http" 6 | "strconv" 7 | "strings" 8 | "time" 9 | ) 10 | 11 | type ClientOption struct { 12 | *TransportOption 13 | 14 | timeout time.Duration 15 | debug bool 16 | retryIf RetryFunc 17 | retryBackoff func(attemptNum int, resp *http.Response) (wait time.Duration, ok bool) 18 | roundTrippers []func(req *http.Request) error 19 | } 20 | 21 | func ClientOptions() *ClientOption { 22 | return &ClientOption{ 23 | TransportOption: TransportOptions().WithoutKeepalive(), 24 | timeout: 30 * time.Second, 25 | retryIf: DefaultRetry, 26 | retryBackoff: createRetryBackoff(100*time.Millisecond, 5*time.Second, 5), 27 | } 28 | } 29 | 30 | // WithTransport sets the TransportOption. 31 | func (o *ClientOption) WithTransport(opt *TransportOption) *ClientOption { 32 | if o == nil || opt == nil { 33 | return o 34 | } 35 | o.TransportOption = opt 36 | return o 37 | } 38 | 39 | // WithTimeout sets the request timeout. 40 | // 41 | // This timeout controls the sum of [network dial], [tls handshake], [request], [response header reading] and [response body reading]. 42 | // 43 | // Use 0 to disable timeout. 44 | func (o *ClientOption) WithTimeout(timeout time.Duration) *ClientOption { 45 | if o == nil || timeout < 0 { 46 | return o 47 | } 48 | o.timeout = timeout 49 | return o 50 | } 51 | 52 | // WithDebug sets the debug mode. 53 | func (o *ClientOption) WithDebug() *ClientOption { 54 | if o == nil { 55 | return o 56 | } 57 | o.debug = true 58 | return o 59 | } 60 | 61 | type RetryFunc func(resp *http.Response, err error) (retry bool) 62 | 63 | // WithRetryIf specifies the if-condition of retry operation for request, 64 | // or stops retrying if setting with `nil`. 65 | func (o *ClientOption) WithRetryIf(retryIf RetryFunc) *ClientOption { 66 | if o == nil { 67 | return o 68 | } 69 | o.retryIf = retryIf 70 | return o 71 | } 72 | 73 | // WithRetryBackoff specifies the retry-backoff mechanism for request. 74 | func (o *ClientOption) WithRetryBackoff(waitMin, waitMax time.Duration, attemptMax int) *ClientOption { 75 | if o == nil || waitMin < 0 || waitMax < 0 || waitMax < waitMin || attemptMax <= 0 { 76 | return o 77 | } 78 | o.retryBackoff = createRetryBackoff(waitMin, waitMax, attemptMax) 79 | return o 80 | } 81 | 82 | // WithUserAgent sets the user agent. 83 | func (o *ClientOption) WithUserAgent(ua string) *ClientOption { 84 | return o.WithRoundTripper(func(req *http.Request) error { 85 | req.Header.Set("User-Agent", ua) 86 | return nil 87 | }) 88 | } 89 | 90 | // WithBearerAuth sets the bearer token. 91 | func (o *ClientOption) WithBearerAuth(token string) *ClientOption { 92 | return o.WithRoundTripper(func(req *http.Request) error { 93 | req.Header.Set("Authorization", "Bearer "+token) 94 | return nil 95 | }) 96 | } 97 | 98 | // WithBasicAuth sets the basic authentication. 99 | func (o *ClientOption) WithBasicAuth(username, password string) *ClientOption { 100 | return o.WithRoundTripper(func(req *http.Request) error { 101 | req.SetBasicAuth(username, password) 102 | return nil 103 | }) 104 | } 105 | 106 | // WithHeader sets the header. 107 | func (o *ClientOption) WithHeader(key, value string) *ClientOption { 108 | return o.WithRoundTripper(func(req *http.Request) error { 109 | req.Header.Set(key, value) 110 | return nil 111 | }) 112 | } 113 | 114 | // WithHeaders sets the headers. 115 | func (o *ClientOption) WithHeaders(headers map[string]string) *ClientOption { 116 | return o.WithRoundTripper(func(req *http.Request) error { 117 | for k, v := range headers { 118 | req.Header.Set(k, v) 119 | } 120 | return nil 121 | }) 122 | } 123 | 124 | // WithRoundTripper sets the round tripper. 125 | func (o *ClientOption) WithRoundTripper(rt func(req *http.Request) error) *ClientOption { 126 | if o == nil || rt == nil { 127 | return o 128 | } 129 | o.roundTrippers = append(o.roundTrippers, rt) 130 | return o 131 | } 132 | 133 | // If is a conditional option, 134 | // which receives a boolean condition to trigger the given function or not. 135 | func (o *ClientOption) If(condition bool, then func(*ClientOption) *ClientOption) *ClientOption { 136 | if condition { 137 | return then(o) 138 | } 139 | return o 140 | } 141 | 142 | // DefaultRetry is the default retry condition, 143 | // inspired by https://github.com/hashicorp/go-retryablehttp/blob/40b0cad1633fd521cee5884724fcf03d039aaf3f/client.go#L68-L86. 144 | func DefaultRetry(resp *http.Response, respErr error) bool { 145 | if respErr != nil { 146 | switch errMsg := respErr.Error(); { 147 | case strings.Contains(errMsg, `redirects`): 148 | return false 149 | case strings.Contains(errMsg, `unsupported protocol scheme`): 150 | return false 151 | case strings.Contains(errMsg, `certificate is not trusted`): 152 | return false 153 | case strings.Contains(errMsg, `invalid header`): 154 | return false 155 | case strings.Contains(errMsg, `failed to verify certificate`): 156 | return false 157 | } 158 | 159 | // Retry if receiving connection closed. 160 | return true 161 | } 162 | 163 | // Retry if receiving rate-limited of server. 164 | if resp.StatusCode == http.StatusTooManyRequests { 165 | return true 166 | } 167 | 168 | // Retry if receiving unexpected responses. 169 | if resp.StatusCode == 0 || (resp.StatusCode >= 500 && resp.StatusCode != http.StatusNotImplemented) { 170 | return true 171 | } 172 | 173 | return false 174 | } 175 | 176 | // createRetryBackoff creates a backoff function for retry operation. 177 | func createRetryBackoff(waitMin, waitMax time.Duration, attemptMax int) func(int, *http.Response) (time.Duration, bool) { 178 | return func(attemptNum int, resp *http.Response) (wait time.Duration, ok bool) { 179 | if attemptNum > attemptMax { 180 | return 0, false 181 | } 182 | 183 | if resp != nil && (resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable) { 184 | if retryAfter := resp.Header.Get("Retry-After"); retryAfter != "" { 185 | if seconds, err := strconv.Atoi(retryAfter); err == nil { 186 | return time.Duration(seconds) * time.Second, true 187 | } 188 | } 189 | } 190 | 191 | wait = time.Duration(math.Pow(2, float64(attemptNum)) * float64(waitMin)) 192 | return min(wait, waitMax), true 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /util/httpx/file.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "strings" 9 | "syscall" 10 | 11 | "github.com/smallnest/ringbuffer" 12 | 13 | "github.com/gpustack/gguf-parser-go/util/bytex" 14 | ) 15 | 16 | type SeekerFile struct { 17 | cli *http.Client 18 | req *http.Request 19 | b *ringbuffer.RingBuffer 20 | c int64 21 | l int64 22 | } 23 | 24 | // OpenSeekerFile tries the GET http.Request as a SeekerFile, 25 | // and returns a SeekerFile, or an error if any. 26 | func OpenSeekerFile(cli *http.Client, req *http.Request, opts ...*SeekerFileOption) (*SeekerFile, error) { 27 | if cli == nil { 28 | return nil, errors.New("client is nil") 29 | } 30 | if req == nil { 31 | return nil, errors.New("request is nil") 32 | } 33 | if req.Method != http.MethodGet { 34 | return nil, errors.New("request method is not GET") 35 | } 36 | 37 | var o *SeekerFileOption 38 | if len(opts) > 0 { 39 | o = opts[0] 40 | } else { 41 | o = SeekerFileOptions() 42 | } 43 | if o.bufSize <= 0 { 44 | o.bufSize = 4 * 1024 * 1024 // 4mb 45 | } 46 | 47 | var l int64 48 | { 49 | if !o.skipRangeDownloadDetect { 50 | req := req.Clone(req.Context()) 51 | req.Method = http.MethodHead 52 | err := Do(cli, req, func(resp *http.Response) error { 53 | if resp.StatusCode != http.StatusOK { 54 | return fmt.Errorf("stat: status code %d", resp.StatusCode) 55 | } 56 | if !strings.EqualFold(resp.Header.Get("Accept-Ranges"), "bytes") { 57 | return fmt.Errorf("stat: not support range download") 58 | } 59 | l = resp.ContentLength 60 | return nil 61 | }) 62 | if err != nil { 63 | return nil, fmt.Errorf("stat: do head request: %w", err) 64 | } 65 | } else { 66 | req := req.Clone(req.Context()) 67 | err := Do(cli, req, func(resp *http.Response) error { 68 | if resp.StatusCode != http.StatusOK { 69 | return fmt.Errorf("stat: status code %d", resp.StatusCode) 70 | } 71 | l = resp.ContentLength 72 | return nil 73 | }) 74 | if err != nil { 75 | return nil, fmt.Errorf("stat: do get request: %w", err) 76 | } 77 | } 78 | switch sz := int64(o.size); { 79 | case sz > l: 80 | return nil, fmt.Errorf("size %d is greater than limit %d", o.size, l) 81 | case sz <= 0: 82 | default: 83 | l = sz 84 | } 85 | } 86 | 87 | b := ringbuffer.New(o.bufSize).WithCancel(req.Context()) 88 | return &SeekerFile{cli: cli, req: req, b: b, c: 1<<63 - 1, l: l}, nil 89 | } 90 | 91 | func (f *SeekerFile) Close() error { 92 | if f.b != nil { 93 | f.b.CloseWriter() 94 | } 95 | return nil 96 | } 97 | 98 | func (f *SeekerFile) Len() int64 { 99 | return f.l 100 | } 101 | 102 | func (f *SeekerFile) ReadAt(p []byte, off int64) (int, error) { 103 | if off < 0 { 104 | return 0, syscall.EINVAL 105 | } 106 | if off > f.Len() { 107 | return 0, io.EOF 108 | } 109 | 110 | // Sync and move to new offset, if backward or empty buffer. 111 | if f.c > off || f.b.IsEmpty() { 112 | if err := f.sync(off, true); err != nil { 113 | return 0, err 114 | } 115 | } 116 | 117 | var ( 118 | remain = int64(f.b.Length()) 119 | capacity = int64(f.b.Capacity()) 120 | need = int64(len(p)) 121 | ) 122 | 123 | switch { 124 | case f.c+remain >= off+need: // Skip and move to new offset, if enough to forward. 125 | if err := f.skip(off - f.c); err != nil { 126 | return 0, err 127 | } 128 | return f.Read(p) 129 | case f.c+capacity >= off+need: // Sync and move to new offset, if enough to forward after synced. 130 | if err := f.sync(f.c+remain, false); err != nil { 131 | return 0, err 132 | } 133 | if err := f.skip(off - f.c); err != nil { 134 | return 0, err 135 | } 136 | return f.Read(p) 137 | default: 138 | } 139 | 140 | // Otherwise, read directly. 141 | 142 | f.b.Reset() 143 | f.c = off 144 | 145 | // Request remain needing. 146 | lim := off + int64(len(p)) - 1 147 | if lim > f.Len() { 148 | lim = f.Len() 149 | } 150 | req := f.req.Clone(f.req.Context()) 151 | req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", off, lim)) 152 | resp, err := f.cli.Do(req) 153 | if err != nil { 154 | return 0, err 155 | } 156 | defer Close(resp) 157 | if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK { 158 | return 0, errors.New(resp.Status) 159 | } 160 | n, err := resp.Body.Read(p) 161 | f.c += int64(n) 162 | return n, err 163 | } 164 | 165 | func (f *SeekerFile) Read(p []byte) (int, error) { 166 | n, err := f.b.Read(p) 167 | f.c += int64(n) 168 | return n, err 169 | } 170 | 171 | func (f *SeekerFile) sync(off int64, reset bool) error { 172 | lim := off + int64(f.b.Free()) - 1 173 | if lim > f.Len() { 174 | lim = f.Len() 175 | } 176 | req := f.req.Clone(f.req.Context()) 177 | req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", off, lim)) 178 | 179 | resp, err := f.cli.Do(req) 180 | if err != nil { 181 | return err 182 | } 183 | defer Close(resp) 184 | if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK { 185 | return errors.New(resp.Status) 186 | } 187 | 188 | buf := bytex.GetBytes() 189 | defer bytex.Put(buf) 190 | if reset { 191 | f.b.Reset() 192 | f.c = off 193 | } 194 | 195 | _, err = io.CopyBuffer(_WriterOnly{w: f.b}, resp.Body, buf) 196 | if err != nil { 197 | return err 198 | } 199 | 200 | return nil 201 | } 202 | 203 | func (f *SeekerFile) skip(dif int64) error { 204 | if dif <= 0 { 205 | return nil 206 | } 207 | 208 | buf := bytex.GetBytes(uint64(dif)) 209 | defer bytex.Put(buf) 210 | n, err := f.b.Read(buf) 211 | f.c += int64(n) 212 | if err != nil { 213 | return err 214 | } 215 | return nil 216 | } 217 | 218 | // _WriterOnly is a wrapper to expose the io.Writer method only, 219 | // which to avoid calling the io.ReaderFrom method. 220 | type _WriterOnly struct { 221 | w io.Writer 222 | } 223 | 224 | func (w _WriterOnly) Write(p []byte) (int, error) { 225 | return w.w.Write(p) 226 | } 227 | -------------------------------------------------------------------------------- /util/httpx/file_options.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | type SeekerFileOption struct { 4 | bufSize int 5 | size int 6 | skipRangeDownloadDetect bool 7 | } 8 | 9 | func SeekerFileOptions() *SeekerFileOption { 10 | return &SeekerFileOption{ 11 | bufSize: 4 * 1024 * 1024, // 4mb 12 | } 13 | } 14 | 15 | // WithBufferSize sets the size of the buffer to read the file, 16 | // 17 | // Default is 4mb. 18 | func (o *SeekerFileOption) WithBufferSize(bufSize int) *SeekerFileOption { 19 | if o == nil || bufSize <= 0 { 20 | return o 21 | } 22 | o.bufSize = bufSize 23 | return o 24 | } 25 | 26 | // WithSize sets the size of the file to read, 27 | // 28 | // If the size is greater than the content size of the file, it will return an error. 29 | func (o *SeekerFileOption) WithSize(size int) *SeekerFileOption { 30 | if o == nil || size <= 0 { 31 | return o 32 | } 33 | o.size = size 34 | return o 35 | } 36 | 37 | // WithoutRangeDownloadDetect disables range download detection. 38 | // 39 | // Usually, OpenSeekerFile sends a "HEAD" HTTP request to destination to get the content size from the "Content-Length" header, 40 | // and confirms whether supports range download via the "Accept-Ranges" header. 41 | // However, some servers may not support the "HEAD" method, or the "Accept-Ranges" header is not set correctly. 42 | // 43 | // With this option, OpenSeekerFile sends "GET" HTTP request to get the content size as usual, 44 | // and does not confirm whether supports range download. But during the seeking read, 45 | // it still uses the "Range" header to read the file. 46 | func (o *SeekerFileOption) WithoutRangeDownloadDetect() *SeekerFileOption { 47 | if o == nil { 48 | return o 49 | } 50 | o.skipRangeDownloadDetect = true 51 | return o 52 | } 53 | 54 | // If is a conditional option, 55 | // which receives a boolean condition to trigger the given function or not. 56 | func (o *SeekerFileOption) If(condition bool, then func(*SeekerFileOption) *SeekerFileOption) *SeekerFileOption { 57 | if condition { 58 | return then(o) 59 | } 60 | return o 61 | } 62 | -------------------------------------------------------------------------------- /util/httpx/proxy.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "net" 5 | "net/http" 6 | "net/url" 7 | "strings" 8 | 9 | "github.com/gpustack/gguf-parser-go/util/osx" 10 | ) 11 | 12 | var noProxies []*net.IPNet 13 | 14 | func init() { 15 | noProxyEnv := osx.Getenv("NO_PROXY", osx.Getenv("no_proxy")) 16 | noProxyRules := strings.Split(noProxyEnv, ",") 17 | for i := range noProxyRules { 18 | _, cidr, _ := net.ParseCIDR(noProxyRules[i]) 19 | if cidr != nil { 20 | noProxies = append(noProxies, cidr) 21 | } 22 | } 23 | } 24 | 25 | // ProxyFromEnvironment is similar to http.ProxyFromEnvironment, 26 | // but it also respects the NO_PROXY environment variable. 27 | func ProxyFromEnvironment(r *http.Request) (*url.URL, error) { 28 | if ip := net.ParseIP(r.URL.Hostname()); ip != nil { 29 | for i := range noProxies { 30 | if noProxies[i].Contains(ip) { 31 | return nil, nil 32 | } 33 | } 34 | } 35 | 36 | return http.ProxyFromEnvironment(r) 37 | } 38 | -------------------------------------------------------------------------------- /util/httpx/resolver.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "context" 5 | "net" 6 | "slices" 7 | "strings" 8 | "time" 9 | 10 | "github.com/rs/dnscache" 11 | ) 12 | 13 | // DefaultResolver is the default DNS resolver used by the package, 14 | // which caches DNS lookups in memory. 15 | var DefaultResolver = &dnscache.Resolver{ 16 | // NB(thxCode): usually, a high latency DNS is about 3s, 17 | // so we set the timeout to 5s here. 18 | Timeout: 5 * time.Second, 19 | Resolver: net.DefaultResolver, 20 | } 21 | 22 | func init() { 23 | go func() { 24 | t := time.NewTimer(5 * time.Minute) 25 | defer t.Stop() 26 | for range t.C { 27 | DefaultResolver.RefreshWithOptions(dnscache.ResolverRefreshOptions{ 28 | ClearUnused: true, 29 | PersistOnFailure: false, 30 | }) 31 | } 32 | }() 33 | } 34 | 35 | func DNSCacheDialContext(dialer *net.Dialer) func(context.Context, string, string) (net.Conn, error) { 36 | return func(ctx context.Context, nw, addr string) (conn net.Conn, err error) { 37 | h, p, err := net.SplitHostPort(addr) 38 | if err != nil { 39 | return nil, err 40 | } 41 | ips, err := DefaultResolver.LookupHost(ctx, h) 42 | if err != nil { 43 | return nil, err 44 | } 45 | switch len(ips) { 46 | case 0: 47 | return nil, net.UnknownNetworkError("failed to resolve host") 48 | case 1: 49 | return dialer.DialContext(ctx, nw, net.JoinHostPort(ips[0], p)) 50 | default: 51 | } 52 | // Sort IPs to put IPv4 first, then IPv6. 53 | slices.SortFunc(ips, func(a, b string) int { 54 | aIPv4, bIPv4 := strings.Contains(a, "."), strings.Contains(b, ".") 55 | if (aIPv4 && bIPv4) || (!aIPv4 && !bIPv4) { 56 | return 0 57 | } 58 | if !aIPv4 { 59 | return 1 60 | } 61 | return -1 62 | }) 63 | // Try to connect to each IP address in order. 64 | for _, ip := range ips { 65 | conn, err = dialer.DialContext(ctx, nw, net.JoinHostPort(ip, p)) 66 | if err == nil { 67 | break 68 | } 69 | } 70 | return conn, err 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /util/httpx/transport.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "net/http" 5 | ) 6 | 7 | // DefaultTransport is similar to the default http.DefaultTransport used by the package. 8 | var DefaultTransport http.RoundTripper = Transport() 9 | 10 | // DefaultInsecureTransport is the default http.DefaultTransport used by the package, 11 | // with TLS insecure skip verify. 12 | var DefaultInsecureTransport http.RoundTripper = Transport(TransportOptions().WithoutInsecureVerify()) 13 | 14 | // Transport returns a new http.Transport with the given options, 15 | // the result http.Transport is used for constructing http.Client. 16 | func Transport(opts ...*TransportOption) *http.Transport { 17 | var o *TransportOption 18 | if len(opts) > 0 { 19 | o = opts[0] 20 | } else { 21 | o = TransportOptions() 22 | } 23 | 24 | return o.transport 25 | } 26 | -------------------------------------------------------------------------------- /util/httpx/transport_options.go: -------------------------------------------------------------------------------- 1 | package httpx 2 | 3 | import ( 4 | "crypto/tls" 5 | "net" 6 | "net/http" 7 | "net/url" 8 | "time" 9 | ) 10 | 11 | type TransportOption struct { 12 | dialer *net.Dialer 13 | transport *http.Transport 14 | } 15 | 16 | func TransportOptions() *TransportOption { 17 | dialer := &net.Dialer{ 18 | Timeout: 30 * time.Second, 19 | KeepAlive: 30 * time.Second, 20 | } 21 | transport := &http.Transport{ 22 | Proxy: ProxyFromEnvironment, 23 | TLSClientConfig: &tls.Config{ 24 | MinVersion: tls.VersionTLS12, 25 | }, 26 | DialContext: DNSCacheDialContext(dialer), 27 | ForceAttemptHTTP2: true, 28 | MaxIdleConns: 100, 29 | IdleConnTimeout: 90 * time.Second, 30 | TLSHandshakeTimeout: 10 * time.Second, 31 | ExpectContinueTimeout: 1 * time.Second, 32 | } 33 | 34 | return &TransportOption{ 35 | dialer: dialer, 36 | transport: transport, 37 | } 38 | } 39 | 40 | // WithProxy sets the proxy. 41 | func (o *TransportOption) WithProxy(proxy func(*http.Request) (*url.URL, error)) *TransportOption { 42 | if o == nil || o.transport == nil { 43 | return o 44 | } 45 | o.transport.Proxy = proxy 46 | return o 47 | } 48 | 49 | // WithoutProxy disables the proxy. 50 | func (o *TransportOption) WithoutProxy() *TransportOption { 51 | if o == nil || o.transport == nil { 52 | return o 53 | } 54 | o.transport.Proxy = nil 55 | return o 56 | } 57 | 58 | // WithKeepalive sets the keepalive. 59 | func (o *TransportOption) WithKeepalive(timeoutAndKeepalive ...time.Duration) *TransportOption { 60 | if o == nil || o.transport == nil || o.dialer == nil { 61 | return o 62 | } 63 | tak := [2]time.Duration{30 * time.Second, 30 * time.Second} 64 | if len(timeoutAndKeepalive) > 0 { 65 | tak[0] = timeoutAndKeepalive[0] 66 | if len(timeoutAndKeepalive) > 1 { 67 | tak[1] = timeoutAndKeepalive[1] 68 | } 69 | } 70 | o.dialer.Timeout, o.dialer.KeepAlive = tak[0], tak[1] 71 | o.transport.MaxIdleConns = 100 72 | o.transport.IdleConnTimeout = 90 * time.Second 73 | return o 74 | } 75 | 76 | // WithoutKeepalive disables the keepalive. 77 | func (o *TransportOption) WithoutKeepalive() *TransportOption { 78 | if o == nil || o.transport == nil { 79 | return o 80 | } 81 | o.dialer.KeepAlive = -1 82 | o.transport.MaxIdleConns = 0 83 | o.transport.IdleConnTimeout = 0 84 | return o 85 | } 86 | 87 | // WithInsecureVerify verifies the insecure connection. 88 | func (o *TransportOption) WithInsecureVerify() *TransportOption { 89 | if o == nil || o.transport == nil || o.transport.TLSClientConfig == nil { 90 | return o 91 | } 92 | o.transport.TLSClientConfig.InsecureSkipVerify = false 93 | return o 94 | } 95 | 96 | // WithoutInsecureVerify skips the insecure connection verify. 97 | func (o *TransportOption) WithoutInsecureVerify() *TransportOption { 98 | if o == nil || o.transport == nil || o.transport.TLSClientConfig == nil { 99 | return o 100 | } 101 | o.transport.TLSClientConfig.InsecureSkipVerify = true 102 | return o 103 | } 104 | 105 | // TimeoutForDial sets the timeout for network dial. 106 | // 107 | // This timeout controls the [network dial] only. 108 | // 109 | // Use 0 to disable timeout. 110 | func (o *TransportOption) TimeoutForDial(timeout time.Duration) *TransportOption { 111 | if o == nil || o.dialer == nil { 112 | return o 113 | } 114 | o.dialer.Timeout = timeout 115 | return o 116 | } 117 | 118 | // TimeoutForResponseHeader sets the timeout for response header. 119 | // 120 | // This timeout controls the [response header reading] only. 121 | // 122 | // Use 0 to disable timeout. 123 | func (o *TransportOption) TimeoutForResponseHeader(timeout time.Duration) *TransportOption { 124 | if o == nil || o.transport == nil { 125 | return o 126 | } 127 | o.transport.ResponseHeaderTimeout = timeout 128 | return o 129 | } 130 | 131 | // TimeoutForTLSHandshake sets the timeout for tls handshake. 132 | // 133 | // This timeout controls the [tls handshake] only. 134 | // 135 | // Use 0 to disable timeout. 136 | func (o *TransportOption) TimeoutForTLSHandshake(timeout time.Duration) *TransportOption { 137 | if o == nil || o.transport == nil { 138 | return o 139 | } 140 | o.transport.TLSHandshakeTimeout = timeout 141 | return o 142 | } 143 | 144 | // TimeoutForIdleConn sets the timeout for idle connection. 145 | // 146 | // This timeout controls the [idle connection lifetime] only. 147 | // 148 | // Use 0 to disable timeout. 149 | func (o *TransportOption) TimeoutForIdleConn(timeout time.Duration) *TransportOption { 150 | if o == nil || o.transport == nil { 151 | return o 152 | } 153 | o.transport.IdleConnTimeout = timeout 154 | return o 155 | } 156 | 157 | // WithTLSClientConfig sets the tls.Config. 158 | func (o *TransportOption) WithTLSClientConfig(config *tls.Config) *TransportOption { 159 | if o == nil || o.transport == nil { 160 | return o 161 | } 162 | o.transport.TLSClientConfig = config 163 | return o 164 | } 165 | 166 | // WithoutDNSCache disables the dns cache. 167 | func (o *TransportOption) WithoutDNSCache() *TransportOption { 168 | if o == nil || o.transport == nil || o.dialer == nil { 169 | return o 170 | } 171 | o.transport.DialContext = o.dialer.DialContext 172 | return o 173 | } 174 | 175 | // WithDialer sets the dialer. 176 | func (o *TransportOption) WithDialer(dialer *net.Dialer) *TransportOption { 177 | if o == nil || o.transport == nil || dialer == nil { 178 | return o 179 | } 180 | o.dialer = dialer 181 | o.transport.DialContext = DNSCacheDialContext(o.dialer) 182 | return o 183 | } 184 | 185 | // Customize sets the transport. 186 | func (o *TransportOption) Customize(fn func(*http.Transport)) *TransportOption { 187 | if o == nil || o.transport == nil { 188 | return o 189 | } 190 | o.dialer = nil 191 | fn(o.transport) 192 | return o 193 | } 194 | 195 | // If is a conditional option, 196 | // which receives a boolean condition to trigger the given function or not. 197 | func (o *TransportOption) If(condition bool, then func(*TransportOption) *TransportOption) *TransportOption { 198 | if condition { 199 | return then(o) 200 | } 201 | return o 202 | } 203 | -------------------------------------------------------------------------------- /util/json/common.go: -------------------------------------------------------------------------------- 1 | package json 2 | 3 | import ( 4 | stdjson "encoding/json" 5 | "fmt" 6 | ) 7 | 8 | type RawMessage = stdjson.RawMessage 9 | 10 | var ( 11 | MarshalIndent = stdjson.MarshalIndent 12 | Indent = stdjson.Indent 13 | NewEncoder = stdjson.NewEncoder 14 | Valid = stdjson.Valid 15 | ) 16 | 17 | // MustMarshal is similar to Marshal, 18 | // but panics if found error. 19 | func MustMarshal(v any) []byte { 20 | bs, err := Marshal(v) 21 | if err != nil { 22 | panic(fmt.Errorf("error marshaling json: %w", err)) 23 | } 24 | 25 | return bs 26 | } 27 | 28 | // MustUnmarshal is similar to Unmarshal, 29 | // but panics if found error. 30 | func MustUnmarshal(data []byte, v any) { 31 | err := Unmarshal(data, v) 32 | if err != nil { 33 | panic(fmt.Errorf("error unmarshaling json: %w", err)) 34 | } 35 | } 36 | 37 | // MustMarshalIndent is similar to MarshalIndent, 38 | // but panics if found error. 39 | func MustMarshalIndent(v any, prefix, indent string) []byte { 40 | bs, err := MarshalIndent(v, prefix, indent) 41 | if err != nil { 42 | panic(fmt.Errorf("error marshaling indent json: %w", err)) 43 | } 44 | 45 | return bs 46 | } 47 | 48 | // ShouldMarshal is similar to Marshal, 49 | // but never return error. 50 | func ShouldMarshal(v any) []byte { 51 | bs, _ := Marshal(v) 52 | return bs 53 | } 54 | 55 | // ShouldUnmarshal is similar to Unmarshal, 56 | // but never return error. 57 | func ShouldUnmarshal(data []byte, v any) { 58 | _ = Unmarshal(data, v) 59 | } 60 | 61 | // ShouldMarshalIndent is similar to MarshalIndent, 62 | // but never return error. 63 | func ShouldMarshalIndent(v any, prefix, indent string) []byte { 64 | bs, _ := MarshalIndent(v, prefix, indent) 65 | return bs 66 | } 67 | -------------------------------------------------------------------------------- /util/json/jsoniter.go: -------------------------------------------------------------------------------- 1 | //go:build !stdjson 2 | 3 | package json 4 | 5 | import ( 6 | stdjson "encoding/json" 7 | "strconv" 8 | "unsafe" 9 | 10 | jsoniter "github.com/json-iterator/go" 11 | ) 12 | 13 | var json = jsoniter.ConfigCompatibleWithStandardLibrary 14 | 15 | func init() { 16 | // borrowed from https://github.com/json-iterator/go/issues/145#issuecomment-323483602 17 | decodeNumberAsInt64IfPossible := func(ptr unsafe.Pointer, iter *jsoniter.Iterator) { 18 | switch iter.WhatIsNext() { 19 | case jsoniter.NumberValue: 20 | var number stdjson.Number 21 | 22 | iter.ReadVal(&number) 23 | i, err := strconv.ParseInt(string(number), 10, 64) 24 | 25 | if err == nil { 26 | *(*any)(ptr) = i 27 | return 28 | } 29 | 30 | f, err := strconv.ParseFloat(string(number), 64) 31 | if err == nil { 32 | *(*any)(ptr) = f 33 | return 34 | } 35 | default: 36 | *(*any)(ptr) = iter.Read() 37 | } 38 | } 39 | jsoniter.RegisterTypeDecoderFunc("interface {}", decodeNumberAsInt64IfPossible) 40 | jsoniter.RegisterTypeDecoderFunc("any", decodeNumberAsInt64IfPossible) 41 | } 42 | 43 | var ( 44 | Marshal = json.Marshal 45 | Unmarshal = json.Unmarshal 46 | NewDecoder = json.NewDecoder 47 | ) 48 | -------------------------------------------------------------------------------- /util/json/stdjson.go: -------------------------------------------------------------------------------- 1 | //go:build stdjson 2 | 3 | package json 4 | 5 | import ( 6 | "encoding/json" 7 | ) 8 | 9 | var ( 10 | Marshal = json.Marshal 11 | Unmarshal = json.Unmarshal 12 | NewDecoder = json.NewDecoder 13 | ) 14 | -------------------------------------------------------------------------------- /util/osx/env.go: -------------------------------------------------------------------------------- 1 | package osx 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | // ExistEnv checks if the environment variable named by the key exists. 8 | func ExistEnv(key string) bool { 9 | _, ok := os.LookupEnv(key) 10 | return ok 11 | } 12 | 13 | // Getenv retrieves the value of the environment variable named by the key. 14 | // It returns the default, which will be empty if the variable is not present. 15 | // To distinguish between an empty value and an unset value, use LookupEnv. 16 | func Getenv(key string, def ...string) string { 17 | e, ok := os.LookupEnv(key) 18 | if !ok && len(def) != 0 { 19 | return def[0] 20 | } 21 | 22 | return e 23 | } 24 | 25 | // ExpandEnv is similar to Getenv, 26 | // but replaces ${var} or $var in the result. 27 | func ExpandEnv(key string, def ...string) string { 28 | return os.ExpandEnv(Getenv(key, def...)) 29 | } 30 | -------------------------------------------------------------------------------- /util/osx/file.go: -------------------------------------------------------------------------------- 1 | package osx 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | ) 9 | 10 | // InlineTilde replaces the leading ~ with the home directory. 11 | func InlineTilde(path string) string { 12 | if path == "" { 13 | return path 14 | } 15 | if strings.HasPrefix(path, "~"+string(filepath.Separator)) { 16 | hd, err := os.UserHomeDir() 17 | if err == nil { 18 | path = filepath.Join(hd, path[2:]) 19 | } 20 | } 21 | return path 22 | } 23 | 24 | // Open is similar to os.Open but supports ~ as the home directory. 25 | func Open(path string) (*os.File, error) { 26 | p := filepath.Clean(path) 27 | p = InlineTilde(p) 28 | return os.Open(p) 29 | } 30 | 31 | // Exists checks if the given path exists. 32 | func Exists(path string, checks ...func(os.FileInfo) bool) bool { 33 | p := filepath.Clean(path) 34 | p = InlineTilde(p) 35 | 36 | stat, err := os.Lstat(p) 37 | if err != nil { 38 | return false 39 | } 40 | 41 | for i := range checks { 42 | if checks[i] == nil { 43 | continue 44 | } 45 | 46 | if !checks[i](stat) { 47 | return false 48 | } 49 | } 50 | 51 | return true 52 | } 53 | 54 | // ExistsDir checks if the given path exists and is a directory. 55 | func ExistsDir(path string) bool { 56 | return Exists(path, func(stat os.FileInfo) bool { 57 | return stat.Mode().IsDir() 58 | }) 59 | } 60 | 61 | // ExistsLink checks if the given path exists and is a symbolic link. 62 | func ExistsLink(path string) bool { 63 | return Exists(path, func(stat os.FileInfo) bool { 64 | return stat.Mode()&os.ModeSymlink != 0 65 | }) 66 | } 67 | 68 | // ExistsFile checks if the given path exists and is a regular file. 69 | func ExistsFile(path string) bool { 70 | return Exists(path, func(stat os.FileInfo) bool { 71 | return stat.Mode().IsRegular() 72 | }) 73 | } 74 | 75 | // ExistsSocket checks if the given path exists and is a socket. 76 | func ExistsSocket(path string) bool { 77 | return Exists(path, func(stat os.FileInfo) bool { 78 | return stat.Mode()&os.ModeSocket != 0 79 | }) 80 | } 81 | 82 | // ExistsDevice checks if the given path exists and is a device. 83 | func ExistsDevice(path string) bool { 84 | return Exists(path, func(stat os.FileInfo) bool { 85 | return stat.Mode()&os.ModeDevice != 0 86 | }) 87 | } 88 | 89 | // Close closes the given io.Closer without error. 90 | func Close(c io.Closer) { 91 | if c == nil { 92 | return 93 | } 94 | _ = c.Close() 95 | } 96 | 97 | // WriteFile is similar to os.WriteFile but supports ~ as the home directory, 98 | // and also supports the parent directory creation. 99 | func WriteFile(name string, data []byte, perm os.FileMode) error { 100 | p := filepath.Clean(name) 101 | p = InlineTilde(p) 102 | 103 | if err := os.MkdirAll(filepath.Dir(p), 0o700); err != nil { 104 | return err 105 | } 106 | 107 | return os.WriteFile(p, data, perm) 108 | } 109 | 110 | // CreateFile is similar to os.Create but supports ~ as the home directory, 111 | // and also supports the parent directory creation. 112 | func CreateFile(name string, perm os.FileMode) (*os.File, error) { 113 | p := filepath.Clean(name) 114 | p = InlineTilde(p) 115 | 116 | if err := os.MkdirAll(filepath.Dir(p), 0o700); err != nil { 117 | return nil, err 118 | } 119 | 120 | return os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, perm) 121 | } 122 | 123 | // OpenFile is similar to os.OpenFile but supports ~ as the home directory, 124 | // and also supports the parent directory creation. 125 | func OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) { 126 | p := filepath.Clean(name) 127 | p = InlineTilde(p) 128 | 129 | if err := os.MkdirAll(filepath.Dir(p), 0o700); err != nil { 130 | return nil, err 131 | } 132 | 133 | return os.OpenFile(p, flag, perm) 134 | } 135 | -------------------------------------------------------------------------------- /util/osx/file_mmap.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package osx 15 | 16 | import ( 17 | "errors" 18 | "fmt" 19 | "io" 20 | "os" 21 | "path/filepath" 22 | "runtime/debug" 23 | "syscall" 24 | ) 25 | 26 | type MmapFile struct { 27 | f *os.File 28 | b []byte 29 | } 30 | 31 | func OpenMmapFile(path string) (*MmapFile, error) { 32 | return OpenMmapFileWithSize(path, 0) 33 | } 34 | 35 | func OpenMmapFileWithSize(path string, size int) (*MmapFile, error) { 36 | p := filepath.Clean(path) 37 | p = InlineTilde(p) 38 | 39 | f, err := os.Open(p) 40 | if err != nil { 41 | return nil, fmt.Errorf("try lock file: %w", err) 42 | } 43 | if size <= 0 { 44 | info, err := f.Stat() 45 | if err != nil { 46 | Close(f) 47 | return nil, fmt.Errorf("stat: %w", err) 48 | } 49 | size = int(info.Size()) 50 | } 51 | 52 | b, err := mmap(f, size) 53 | if err != nil { 54 | Close(f) 55 | return nil, fmt.Errorf("mmap, size %d: %w", size, err) 56 | } 57 | 58 | return &MmapFile{f: f, b: b}, nil 59 | } 60 | 61 | func (f *MmapFile) Close() error { 62 | err0 := munmap(f.b) 63 | err1 := f.f.Close() 64 | 65 | if err0 != nil { 66 | return err0 67 | } 68 | return err1 69 | } 70 | 71 | func (f *MmapFile) Bytes() []byte { 72 | return f.b 73 | } 74 | 75 | func (f *MmapFile) Len() int64 { 76 | return int64(len(f.b)) 77 | } 78 | 79 | var ErrPageFault = errors.New("page fault occurred while reading from memory map") 80 | 81 | func (f *MmapFile) ReadAt(p []byte, off int64) (_ int, err error) { 82 | if off < 0 { 83 | return 0, syscall.EINVAL 84 | } 85 | if off > f.Len() { 86 | return 0, io.EOF 87 | } 88 | 89 | old := debug.SetPanicOnFault(true) 90 | defer func() { 91 | debug.SetPanicOnFault(old) 92 | if recover() != nil { 93 | err = ErrPageFault 94 | } 95 | }() 96 | 97 | n := copy(p, f.b[off:]) 98 | if n < len(p) { 99 | err = io.EOF 100 | } 101 | return n, err 102 | } 103 | -------------------------------------------------------------------------------- /util/osx/file_mmap_js.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package osx 15 | 16 | import ( 17 | "errors" 18 | "os" 19 | ) 20 | 21 | func mmap(f *os.File, length int) ([]byte, error) { 22 | return nil, errors.New("unsupported") 23 | } 24 | 25 | func munmap(b []byte) (err error) { 26 | return errors.New("unsupported") 27 | } 28 | -------------------------------------------------------------------------------- /util/osx/file_mmap_unix.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | //go:build aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris 15 | 16 | package osx 17 | 18 | import ( 19 | "os" 20 | 21 | "golang.org/x/sys/unix" 22 | ) 23 | 24 | func mmap(f *os.File, length int) ([]byte, error) { 25 | return unix.Mmap(int(f.Fd()), 0, length, unix.PROT_READ, unix.MAP_SHARED) 26 | } 27 | 28 | func munmap(b []byte) (err error) { 29 | return unix.Munmap(b) 30 | } 31 | -------------------------------------------------------------------------------- /util/osx/file_mmap_windows.go: -------------------------------------------------------------------------------- 1 | package osx 2 | 3 | import ( 4 | "os" 5 | "syscall" 6 | "unsafe" 7 | ) 8 | 9 | func mmap(f *os.File, size int) ([]byte, error) { 10 | low, high := uint32(size), uint32(size>>32) 11 | h, errno := syscall.CreateFileMapping(syscall.Handle(f.Fd()), nil, syscall.PAGE_READONLY, high, low, nil) 12 | if h == 0 { 13 | return nil, os.NewSyscallError("CreateFileMapping", errno) 14 | } 15 | 16 | addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(size)) 17 | if addr == 0 { 18 | return nil, os.NewSyscallError("MapViewOfFile", errno) 19 | } 20 | 21 | if err := syscall.CloseHandle(h); err != nil { 22 | return nil, os.NewSyscallError("CloseHandle", err) 23 | } 24 | 25 | return (*[maxMapSize]byte)(unsafe.Pointer(addr))[:size], nil 26 | } 27 | 28 | func munmap(b []byte) error { 29 | if err := syscall.UnmapViewOfFile((uintptr)(unsafe.Pointer(&b[0]))); err != nil { 30 | return os.NewSyscallError("UnmapViewOfFile", err) 31 | } 32 | return nil 33 | } 34 | -------------------------------------------------------------------------------- /util/osx/file_mmap_windows_386.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | package osx 15 | 16 | const maxMapSize = 0x7FFFFFFF // 2GB 17 | -------------------------------------------------------------------------------- /util/osx/file_mmap_windows_non386.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Prometheus Authors 2 | // Licensed under the Apache License, Version 2.0 (the "License"); 3 | // you may not use this file except in compliance with the License. 4 | // You may obtain a copy of the License at 5 | // 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // 8 | // Unless required by applicable law or agreed to in writing, software 9 | // distributed under the License is distributed on an "AS IS" BASIS, 10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | // See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | 14 | //go:build windows && !386 15 | 16 | package osx 17 | 18 | const maxMapSize = 0xFFFFFFFFFFFF // 256TB 19 | -------------------------------------------------------------------------------- /util/osx/homedir.go: -------------------------------------------------------------------------------- 1 | package osx 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "time" 7 | ) 8 | 9 | // UserHomeDir is similar to os.UserHomeDir, 10 | // but returns the temp dir if the home dir is not found. 11 | func UserHomeDir() string { 12 | hd, err := os.UserHomeDir() 13 | if err != nil { 14 | hd = filepath.Join(os.TempDir(), time.Now().Format(time.DateOnly)) 15 | } 16 | return hd 17 | } 18 | -------------------------------------------------------------------------------- /util/ptr/pointer.go: -------------------------------------------------------------------------------- 1 | package ptr 2 | 3 | import ( 4 | "time" 5 | 6 | "golang.org/x/exp/constraints" 7 | ) 8 | 9 | func Int(v int) *int { 10 | return Ref(v) 11 | } 12 | 13 | func IntDeref(v *int, def int) int { 14 | return Deref(v, def) 15 | } 16 | 17 | func Int8(v int8) *int8 { 18 | return Ref(v) 19 | } 20 | 21 | func Int8Deref(v *int8, def int8) int8 { 22 | return Deref(v, def) 23 | } 24 | 25 | func Int16(v int16) *int16 { 26 | return Ref(v) 27 | } 28 | 29 | func Int16Deref(v *int16, def int16) int16 { 30 | return Deref(v, def) 31 | } 32 | 33 | func Int32(v int32) *int32 { 34 | return Ref(v) 35 | } 36 | 37 | func Int32Deref(v *int32, def int32) int32 { 38 | return Deref(v, def) 39 | } 40 | 41 | func Int64(v int64) *int64 { 42 | return Ref(v) 43 | } 44 | 45 | func Int64Deref(v *int64, def int64) int64 { 46 | return Deref(v, def) 47 | } 48 | 49 | func Uint(v uint) *uint { 50 | return Ref(v) 51 | } 52 | 53 | func UintDeref(v *uint, def uint) uint { 54 | return Deref(v, def) 55 | } 56 | 57 | func Uint8(v uint8) *uint8 { 58 | return Ref(v) 59 | } 60 | 61 | func Uint8Deref(v *uint8, def uint8) uint8 { 62 | return Deref(v, def) 63 | } 64 | 65 | func Uint16(v uint16) *uint16 { 66 | return Ref(v) 67 | } 68 | 69 | func Uint16Deref(v *uint16, def uint16) uint16 { 70 | return Deref(v, def) 71 | } 72 | 73 | func Uint32(v uint32) *uint32 { 74 | return Ref(v) 75 | } 76 | 77 | func Uint32Deref(v *uint32, def uint32) uint32 { 78 | return Deref(v, def) 79 | } 80 | 81 | func Uint64(v uint64) *uint64 { 82 | return Ref(v) 83 | } 84 | 85 | func Uint64Deref(v *uint64, def uint64) uint64 { 86 | return Deref(v, def) 87 | } 88 | 89 | func Float32(v float32) *float32 { 90 | return Ref(v) 91 | } 92 | 93 | func Float32Deref(v *float32, def float32) float32 { 94 | return Deref(v, def) 95 | } 96 | 97 | func Float64(v float64) *float64 { 98 | return Ref(v) 99 | } 100 | 101 | func Float64Deref(v *float64, def float64) float64 { 102 | return Deref(v, def) 103 | } 104 | 105 | func String(v string) *string { 106 | return Ref(v) 107 | } 108 | 109 | func StringDeref(v *string, def string) string { 110 | return Deref(v, def) 111 | } 112 | 113 | func Bool(v bool) *bool { 114 | return Ref(v) 115 | } 116 | 117 | func BoolDeref(v *bool, def bool) bool { 118 | return Deref(v, def) 119 | } 120 | 121 | func Duration(v time.Duration) *time.Duration { 122 | return Ref(v) 123 | } 124 | 125 | func DurationDeref(v *time.Duration, def time.Duration) time.Duration { 126 | return Deref(v, def) 127 | } 128 | 129 | func Time(v time.Time) *time.Time { 130 | return Ref(v) 131 | } 132 | 133 | func TimeDeref(v *time.Time, def time.Time) time.Time { 134 | return Deref(v, def) 135 | } 136 | 137 | type Pointerable interface { 138 | constraints.Ordered | ~bool | time.Time 139 | } 140 | 141 | func Ref[T Pointerable](v T) *T { 142 | return &v 143 | } 144 | 145 | func To[T Pointerable](v T) *T { 146 | return Ref(v) 147 | } 148 | 149 | func Deref[T Pointerable](ptr *T, def T) T { 150 | if ptr != nil { 151 | return *ptr 152 | } 153 | 154 | return def 155 | } 156 | 157 | func Equal[T Pointerable](a, b *T) bool { 158 | if a != nil && b != nil { 159 | return *a == *b 160 | } 161 | 162 | return false 163 | } 164 | -------------------------------------------------------------------------------- /util/signalx/handler.go: -------------------------------------------------------------------------------- 1 | package signalx 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "os/signal" 7 | ) 8 | 9 | var registered = make(chan struct{}) 10 | 11 | // Handler registers for signals and returns a context. 12 | func Handler() context.Context { 13 | close(registered) // Panics when called twice. 14 | 15 | sigChan := make(chan os.Signal, len(sigs)) 16 | ctx, cancel := context.WithCancel(context.Background()) 17 | 18 | // Register for signals. 19 | signal.Notify(sigChan, sigs...) 20 | 21 | // Process signals. 22 | go func() { 23 | var exited bool 24 | for range sigChan { 25 | if exited { 26 | os.Exit(1) 27 | } 28 | cancel() 29 | exited = true 30 | } 31 | }() 32 | 33 | return ctx 34 | } 35 | -------------------------------------------------------------------------------- /util/signalx/handler_unix.go: -------------------------------------------------------------------------------- 1 | //go:build !windows 2 | 3 | package signalx 4 | 5 | import ( 6 | "os" 7 | "syscall" 8 | ) 9 | 10 | var sigs = []os.Signal{syscall.SIGINT, syscall.SIGTERM} 11 | -------------------------------------------------------------------------------- /util/signalx/handler_windows.go: -------------------------------------------------------------------------------- 1 | package signalx 2 | 3 | import ( 4 | "os" 5 | "syscall" 6 | ) 7 | 8 | var sigs = []os.Signal{syscall.SIGINT} 9 | -------------------------------------------------------------------------------- /util/slicex/search.go: -------------------------------------------------------------------------------- 1 | package slicex 2 | 3 | import "golang.org/x/exp/constraints" 4 | 5 | // UpperBound returns an index of the first element that is greater than value. 6 | func UpperBound[T constraints.Integer | constraints.Float](s []T, e T) int { 7 | l, r := 0, len(s) 8 | for l < r { 9 | m := l + (r-l)/2 10 | if s[m] <= e { 11 | l = m + 1 12 | } else { 13 | r = m 14 | } 15 | } 16 | return l 17 | } 18 | -------------------------------------------------------------------------------- /util/stringx/bytes.go: -------------------------------------------------------------------------------- 1 | package stringx 2 | 3 | import "unsafe" 4 | 5 | // FromBytes converts a byte slice to a string. 6 | func FromBytes(b *[]byte) string { 7 | return unsafe.String(unsafe.SliceData(*b), len(*b)) 8 | } 9 | 10 | // ToBytes converts a string to a byte slice, 11 | // which is impossible to modify the item of slice. 12 | func ToBytes(s *string) (bs []byte) { 13 | return unsafe.Slice(unsafe.StringData(*s), len(*s)) 14 | } 15 | -------------------------------------------------------------------------------- /util/stringx/random.go: -------------------------------------------------------------------------------- 1 | package stringx 2 | 3 | // Borrowed from github.com/thanhpk/randstr. 4 | 5 | import ( 6 | "bytes" 7 | "crypto/rand" 8 | "encoding/binary" 9 | "encoding/hex" 10 | ) 11 | 12 | // list of default letters that can be used to make a random string when calling RandomString 13 | // function with no letters provided. 14 | var defLetters = []rune("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 15 | 16 | // RandomBytes generates n random bytes. 17 | func RandomBytes(n int) []byte { 18 | b := make([]byte, n) 19 | 20 | _, err := rand.Read(b) 21 | if err != nil { 22 | panic(err) 23 | } 24 | 25 | return b 26 | } 27 | 28 | // RandomHex generates a random hex string with length of n 29 | // e.g: 67aab2d956bd7cc621af22cfb169cba8. 30 | func RandomHex(n int) string { return hex.EncodeToString(RandomBytes(n)) } 31 | 32 | // RandomString generates a random string using only letters provided in the letters parameter 33 | // if user omit letters parameters, this function will use defLetters instead. 34 | func RandomString(n int, letters ...string) string { 35 | var ( 36 | letterRunes []rune 37 | bb bytes.Buffer 38 | ) 39 | 40 | if len(letters) == 0 { 41 | letterRunes = defLetters 42 | } else { 43 | letterRunes = []rune(letters[0]) 44 | } 45 | 46 | bb.Grow(n) 47 | 48 | l := uint32(len(letterRunes)) 49 | // On each loop, generate one random rune and append to output. 50 | for i := 0; i < n; i++ { 51 | bb.WriteRune(letterRunes[binary.BigEndian.Uint32(RandomBytes(4))%l]) 52 | } 53 | 54 | return bb.String() 55 | } 56 | 57 | // RandomBase64 generates a random base64 string with length of n, 58 | // safe for URL. 59 | func RandomBase64(n int) string { 60 | return RandomString(n, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_") 61 | } 62 | -------------------------------------------------------------------------------- /util/stringx/strings.go: -------------------------------------------------------------------------------- 1 | package stringx 2 | 3 | import "strings" 4 | 5 | // CutFromLeft is the same as strings.Cut, 6 | // which starts from left to right, 7 | // slices s around the first instance of sep, 8 | // returning the text before and after sep. 9 | // The found result reports whether sep appears in s. 10 | // If sep does not appear in s, cut returns s, "", false. 11 | func CutFromLeft(s, sep string) (before, after string, found bool) { 12 | return strings.Cut(s, sep) 13 | } 14 | 15 | // CutFromRight takes the same arguments as CutFromLeft, 16 | // but starts from right to left, 17 | // slices s around the last instance of sep, 18 | // return the text before and after sep. 19 | // The found result reports whether sep appears in s. 20 | // If sep does not appear in s, cut returns s, "", false. 21 | func CutFromRight(s, sep string) (before, after string, found bool) { 22 | if i := strings.LastIndex(s, sep); i >= 0 { 23 | return s[:i], s[i+len(sep):], true 24 | } 25 | return s, "", false 26 | } 27 | 28 | // ReplaceAllFunc is similar to strings.ReplaceAll, 29 | // but it replaces each rune in s with the result of f(r). 30 | func ReplaceAllFunc(s string, f func(rune) rune) string { 31 | var b strings.Builder 32 | for _, r := range s { 33 | b.WriteRune(f(r)) 34 | } 35 | return b.String() 36 | } 37 | 38 | // HasSuffixes checks if s has any of the suffixes in prefixes. 39 | func HasSuffixes(s string, suffixes ...string) bool { 40 | for _, suffix := range suffixes { 41 | if strings.HasSuffix(s, suffix) { 42 | return true 43 | } 44 | } 45 | return false 46 | } 47 | -------------------------------------------------------------------------------- /util/stringx/sum.go: -------------------------------------------------------------------------------- 1 | package stringx 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | "hash/fnv" 7 | ) 8 | 9 | // SumByFNV64a sums up the string(s) by FNV-64a hash algorithm. 10 | func SumByFNV64a(s string, ss ...string) string { 11 | h := fnv.New64a() 12 | 13 | _, _ = h.Write(ToBytes(&s)) 14 | for i := range ss { 15 | _, _ = h.Write(ToBytes(&ss[i])) 16 | } 17 | 18 | sum := h.Sum(nil) 19 | return hex.EncodeToString(sum) 20 | } 21 | 22 | // SumBytesByFNV64a sums up the byte slice(s) by FNV-64a hash algorithm. 23 | func SumBytesByFNV64a(bs []byte, bss ...[]byte) string { 24 | h := fnv.New64a() 25 | 26 | _, _ = h.Write(bs) 27 | for i := range bss { 28 | _, _ = h.Write(bss[i]) 29 | } 30 | 31 | sum := h.Sum(nil) 32 | return hex.EncodeToString(sum) 33 | } 34 | 35 | // SumBySHA256 sums up the string(s) by SHA256 hash algorithm. 36 | func SumBySHA256(s string, ss ...string) string { 37 | h := sha256.New() 38 | 39 | _, _ = h.Write(ToBytes(&s)) 40 | for i := range ss { 41 | _, _ = h.Write(ToBytes(&ss[i])) 42 | } 43 | 44 | sum := h.Sum(nil) 45 | return hex.EncodeToString(sum) 46 | } 47 | 48 | // SumBytesBySHA256 sums up the byte slice(s) by SHA256 hash algorithm. 49 | func SumBytesBySHA256(bs []byte, bss ...[]byte) string { 50 | h := sha256.New() 51 | 52 | _, _ = h.Write(bs) 53 | for i := range bss { 54 | _, _ = h.Write(bss[i]) 55 | } 56 | 57 | sum := h.Sum(nil) 58 | return hex.EncodeToString(sum) 59 | } 60 | 61 | // SumBySHA224 sums up the string(s) by SHA224 hash algorithm. 62 | func SumBySHA224(s string, ss ...string) string { 63 | h := sha256.New224() 64 | 65 | _, _ = h.Write(ToBytes(&s)) 66 | for i := range ss { 67 | _, _ = h.Write(ToBytes(&ss[i])) 68 | } 69 | 70 | sum := h.Sum(nil) 71 | return hex.EncodeToString(sum) 72 | } 73 | 74 | // SumBytesBySHA224 sums up the byte slice(s) by SHA224 hash algorithm. 75 | func SumBytesBySHA224(bs []byte, bss ...[]byte) string { 76 | h := sha256.New224() 77 | 78 | _, _ = h.Write(bs) 79 | for i := range bss { 80 | _, _ = h.Write(bss[i]) 81 | } 82 | 83 | sum := h.Sum(nil) 84 | return hex.EncodeToString(sum) 85 | } 86 | -------------------------------------------------------------------------------- /zz_generated.diffusion_model_memory_usage.regression.go: -------------------------------------------------------------------------------- 1 | package gguf_parser 2 | 3 | import "math" 4 | 5 | // GuessSD1DiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, 6 | // which is calculated by linear regression or polynomial regression. 7 | func GuessSD1DiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { 8 | coefficients := []float64{7876368.5672, 161.4230198633, 0.0078124893} 9 | degree := 2 10 | x := float64(width * height) 11 | 12 | y := float64(0) 13 | for i := 0; i <= degree; i++ { 14 | y += coefficients[i] * math.Pow(x, float64(i)) 15 | } 16 | return uint64(y) 17 | } 18 | 19 | // GuessSD2DiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, 20 | // which is calculated by linear regression or polynomial regression. 21 | func GuessSD2DiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { 22 | coefficients := []float64{-355043979.0562, -1193.3271458642, 0.0054023818} 23 | degree := 2 24 | x := float64(width * height) 25 | 26 | if flashAttention { 27 | coefficients = []float64{3780681.28078, 513.2102510935} 28 | degree = 1 29 | } 30 | 31 | y := float64(0) 32 | for i := 0; i <= degree; i++ { 33 | y += coefficients[i] * math.Pow(x, float64(i)) 34 | } 35 | return uint64(y) 36 | } 37 | 38 | // GuessSDXLDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, 39 | // which is calculated by linear regression or polynomial regression. 40 | func GuessSDXLDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { 41 | coefficients := []float64{55541290.3893, 138.3196116655, 0.0006109455} 42 | degree := 2 43 | x := float64(width * height) 44 | 45 | if flashAttention { 46 | coefficients = []float64{-5958802.78052, 500.0687898915} 47 | degree = 1 48 | } 49 | 50 | y := float64(0) 51 | for i := 0; i <= degree; i++ { 52 | y += coefficients[i] * math.Pow(x, float64(i)) 53 | } 54 | return uint64(y) 55 | } 56 | 57 | // GuessSDXLRefinerDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, 58 | // which is calculated by linear regression or polynomial regression. 59 | func GuessSDXLRefinerDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { 60 | coefficients := []float64{49395992.3449, 155.2477810191, 0.0007351736} 61 | degree := 2 62 | x := float64(width * height) 63 | 64 | if flashAttention { 65 | coefficients = []float64{7031343.31998, 599.4137437227} 66 | degree = 1 67 | } 68 | 69 | y := float64(0) 70 | for i := 0; i <= degree; i++ { 71 | y += coefficients[i] * math.Pow(x, float64(i)) 72 | } 73 | return uint64(y) 74 | } 75 | 76 | // GuessSD3MediumDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, 77 | // which is calculated by linear regression or polynomial regression. 78 | func GuessSD3MediumDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { 79 | coefficients := []float64{16529921.3700, 234.6656247718, 0.0014648995} 80 | degree := 2 81 | x := float64(width * height) 82 | 83 | y := float64(0) 84 | for i := 0; i <= degree; i++ { 85 | y += coefficients[i] * math.Pow(x, float64(i)) 86 | } 87 | return uint64(y) 88 | } 89 | 90 | // GuessSD35MediumDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, 91 | // which is calculated by linear regression or polynomial regression. 92 | func GuessSD35MediumDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { 93 | coefficients := []float64{17441103.4726, 281.6956819806, 0.0014651233} 94 | degree := 2 95 | x := float64(width * height) 96 | 97 | y := float64(0) 98 | for i := 0; i <= degree; i++ { 99 | y += coefficients[i] * math.Pow(x, float64(i)) 100 | } 101 | return uint64(y) 102 | } 103 | 104 | // GuessSD35LargeDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, 105 | // which is calculated by linear regression or polynomial regression. 106 | func GuessSD35LargeDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { 107 | coefficients := []float64{23204369.2029, 410.3731196298, 0.0023195947} 108 | degree := 2 109 | x := float64(width * height) 110 | 111 | y := float64(0) 112 | for i := 0; i <= degree; i++ { 113 | y += coefficients[i] * math.Pow(x, float64(i)) 114 | } 115 | return uint64(y) 116 | } 117 | 118 | // GuessFLUXDiffusionModelMemoryUsage returns the memory usage in bytes for the given width and height, 119 | // which is calculated by linear regression or polynomial regression. 120 | func GuessFLUXDiffusionModelMemoryUsage(width, height uint32, flashAttention bool) uint64 { 121 | coefficients := []float64{46511668.6742, 997.7758807792, 0.0014573393} 122 | degree := 2 123 | x := float64(width * height) 124 | 125 | y := float64(0) 126 | for i := 0; i <= degree; i++ { 127 | y += coefficients[i] * math.Pow(x, float64(i)) 128 | } 129 | return uint64(y) 130 | } 131 | -------------------------------------------------------------------------------- /zz_generated.ggmltype.stringer.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -linecomment -type GGMLType -output zz_generated.ggmltype.stringer.go -trimprefix GGMLType"; DO NOT EDIT. 2 | 3 | package gguf_parser 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[GGMLTypeF32-0] 12 | _ = x[GGMLTypeF16-1] 13 | _ = x[GGMLTypeQ4_0-2] 14 | _ = x[GGMLTypeQ4_1-3] 15 | _ = x[GGMLTypeQ4_2-4] 16 | _ = x[GGMLTypeQ4_3-5] 17 | _ = x[GGMLTypeQ5_0-6] 18 | _ = x[GGMLTypeQ5_1-7] 19 | _ = x[GGMLTypeQ8_0-8] 20 | _ = x[GGMLTypeQ8_1-9] 21 | _ = x[GGMLTypeQ2_K-10] 22 | _ = x[GGMLTypeQ3_K-11] 23 | _ = x[GGMLTypeQ4_K-12] 24 | _ = x[GGMLTypeQ5_K-13] 25 | _ = x[GGMLTypeQ6_K-14] 26 | _ = x[GGMLTypeQ8_K-15] 27 | _ = x[GGMLTypeIQ2_XXS-16] 28 | _ = x[GGMLTypeIQ2_XS-17] 29 | _ = x[GGMLTypeIQ3_XXS-18] 30 | _ = x[GGMLTypeIQ1_S-19] 31 | _ = x[GGMLTypeIQ4_NL-20] 32 | _ = x[GGMLTypeIQ3_S-21] 33 | _ = x[GGMLTypeIQ2_S-22] 34 | _ = x[GGMLTypeIQ4_XS-23] 35 | _ = x[GGMLTypeI8-24] 36 | _ = x[GGMLTypeI16-25] 37 | _ = x[GGMLTypeI32-26] 38 | _ = x[GGMLTypeI64-27] 39 | _ = x[GGMLTypeF64-28] 40 | _ = x[GGMLTypeIQ1_M-29] 41 | _ = x[GGMLTypeBF16-30] 42 | _ = x[GGMLTypeQ4_0_4_4-31] 43 | _ = x[GGMLTypeQ4_0_4_8-32] 44 | _ = x[GGMLTypeQ4_0_8_8-33] 45 | _ = x[GGMLTypeTQ1_0-34] 46 | _ = x[GGMLTypeTQ2_0-35] 47 | _ = x[GGMLTypeIQ4_NL_4_4-36] 48 | _ = x[GGMLTypeIQ4_NL_4_8-37] 49 | _ = x[GGMLTypeIQ4_NL_8_8-38] 50 | _ = x[_GGMLTypeCount-39] 51 | } 52 | 53 | const _GGMLType_name = "F32F16Q4_0Q4_1Q4_2Q4_3Q5_0Q5_1Q8_0Q8_1Q2_KQ3_KQ4_KQ5_KQ6_KQ8_KIQ2_XXSIQ2_XSIQ3_XXSIQ1_SIQ4_NLIQ3_SIQ2_SIQ4_XSI8I16I32I64F64IQ1_MBF16Q4_0_4_4Q4_0_4_8Q4_0_8_8TQ1_0TQ2_0IQ4_NL_4_4IQ4_NL_4_8IQ4_NL_8_8Unknown" 54 | 55 | var _GGMLType_index = [...]uint8{0, 3, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, 69, 75, 82, 87, 93, 98, 103, 109, 111, 114, 117, 120, 123, 128, 132, 140, 148, 156, 161, 166, 176, 186, 196, 203} 56 | 57 | func (i GGMLType) String() string { 58 | if i >= GGMLType(len(_GGMLType_index)-1) { 59 | return "GGMLType(" + strconv.FormatInt(int64(i), 10) + ")" 60 | } 61 | return _GGMLType_name[_GGMLType_index[i]:_GGMLType_index[i+1]] 62 | } 63 | -------------------------------------------------------------------------------- /zz_generated.gguffiletype.stringer.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -linecomment -type GGUFFileType -output zz_generated.gguffiletype.stringer.go -trimprefix GGUFFileType"; DO NOT EDIT. 2 | 3 | package gguf_parser 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[GGUFFileTypeMostlyF32-0] 12 | _ = x[GGUFFileTypeMostlyF16-1] 13 | _ = x[GGUFFileTypeMostlyQ4_0-2] 14 | _ = x[GGUFFileTypeMostlyQ4_1-3] 15 | _ = x[GGUFFileTypeMostlyQ4_1_SOME_F16-4] 16 | _ = x[GGUFFileTypeMostlyQ4_2-5] 17 | _ = x[GGUFFileTypeMostlyQ4_3-6] 18 | _ = x[GGUFFileTypeMostlyQ8_0-7] 19 | _ = x[GGUFFileTypeMostlyQ5_0-8] 20 | _ = x[GGUFFileTypeMostlyQ5_1-9] 21 | _ = x[GGUFFileTypeMostlyQ2_K-10] 22 | _ = x[GGUFFileTypeMostlyQ3_K_S-11] 23 | _ = x[GGUFFileTypeMostlyQ3_K_M-12] 24 | _ = x[GGUFFileTypeMostlyQ3_K_L-13] 25 | _ = x[GGUFFileTypeMostlyQ4_K_S-14] 26 | _ = x[GGUFFileTypeMostlyQ4_K_M-15] 27 | _ = x[GGUFFileTypeMostlyQ5_K_S-16] 28 | _ = x[GGUFFileTypeMostlyQ5_K_M-17] 29 | _ = x[GGUFFileTypeMostlyQ6_K-18] 30 | _ = x[GGUFFileTypeMostlyIQ2_XXS-19] 31 | _ = x[GGUFFileTypeMostlyIQ2_XS-20] 32 | _ = x[GGUFFileTypeMostlyQ2_K_S-21] 33 | _ = x[GGUFFileTypeMostlyIQ3_XS-22] 34 | _ = x[GGUFFileTypeMostlyIQ3_XXS-23] 35 | _ = x[GGUFFileTypeMostlyIQ1_S-24] 36 | _ = x[GGUFFileTypeMostlyIQ4_NL-25] 37 | _ = x[GGUFFileTypeMostlyIQ3_S-26] 38 | _ = x[GGUFFileTypeMostlyIQ3_M-27] 39 | _ = x[GGUFFileTypeMostlyIQ2_S-28] 40 | _ = x[GGUFFileTypeMostlyIQ2_M-29] 41 | _ = x[GGUFFileTypeMostlyIQ4_XS-30] 42 | _ = x[GGUFFileTypeMostlyIQ1_M-31] 43 | _ = x[GGUFFileTypeMostlyBF16-32] 44 | _ = x[GGUFFileTypeMostlyQ4_0_4_4-33] 45 | _ = x[GGUFFileTypeMostlyQ4_0_4_8-34] 46 | _ = x[GGUFFileTypeMostlyQ4_0_8_8-35] 47 | _ = x[_GGUFFileTypeCount-36] 48 | } 49 | 50 | const _GGUFFileType_name = "MOSTLY_F32MOSTLY_F16MOSTLY_Q4_0MOSTLY_Q4_1MOSTLY_Q4_1_SOME_F16MOSTLY_Q4_2MOSTLY_Q4_3MOSTLY_Q8_0MOSTLY_Q5_0MOSTLY_Q5_1MOSTLY_Q2_KMOSTLY_Q3_K_SMOSTLY_Q3_K_MMOSTLY_Q3_K_LMOSTLY_Q4_K_SMOSTLY_Q4_K_MMOSTLY_Q5_K_SMOSTLY_Q5_K_MMOSTLY_Q6_KMOSTLY_IQ2_XXSMOSTLY_IQ2_XSMOSTLY_Q2_K_SMOSTLY_IQ3_XSMOSTLY_IQ3_XXSMOSTLY_IQ1_SMOSTLY_IQ4_NLMOSTLY_IQ3_SMOSTLY_IQ3_MMOSTLY_IQ2_SMOSTLY_IQ2_MMOSTLY_IQ4_XSMOSTLY_IQ1_MMOSTLY_BF16MOSTLY_Q4_0_4_4MOSTLY_Q4_0_4_8MOSTLY_Q4_0_8_8Unknown" 51 | 52 | var _GGUFFileType_index = [...]uint16{0, 10, 20, 31, 42, 62, 73, 84, 95, 106, 117, 128, 141, 154, 167, 180, 193, 206, 219, 230, 244, 257, 270, 283, 297, 309, 322, 334, 346, 358, 370, 383, 395, 406, 421, 436, 451, 458} 53 | 54 | func (i GGUFFileType) String() string { 55 | if i >= GGUFFileType(len(_GGUFFileType_index)-1) { 56 | return "GGUFFileType(" + strconv.FormatInt(int64(i), 10) + ")" 57 | } 58 | return _GGUFFileType_name[_GGUFFileType_index[i]:_GGUFFileType_index[i+1]] 59 | } 60 | -------------------------------------------------------------------------------- /zz_generated.ggufmagic.stringer.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -linecomment -type GGUFMagic -output zz_generated.ggufmagic.stringer.go -trimprefix GGUFMagic"; DO NOT EDIT. 2 | 3 | package gguf_parser 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[GGUFMagicGGML-1734831468] 12 | _ = x[GGUFMagicGGMF-1734831462] 13 | _ = x[GGUFMagicGGJT-1734830708] 14 | _ = x[GGUFMagicGGUFLe-1179993927] 15 | _ = x[GGUFMagicGGUFBe-1195857222] 16 | } 17 | 18 | const ( 19 | _GGUFMagic_name_0 = "GGUF" 20 | _GGUFMagic_name_1 = "GGUF" 21 | _GGUFMagic_name_2 = "GGJT" 22 | _GGUFMagic_name_3 = "GGMF" 23 | _GGUFMagic_name_4 = "GGML" 24 | ) 25 | 26 | func (i GGUFMagic) String() string { 27 | switch { 28 | case i == 1179993927: 29 | return _GGUFMagic_name_0 30 | case i == 1195857222: 31 | return _GGUFMagic_name_1 32 | case i == 1734830708: 33 | return _GGUFMagic_name_2 34 | case i == 1734831462: 35 | return _GGUFMagic_name_3 36 | case i == 1734831468: 37 | return _GGUFMagic_name_4 38 | default: 39 | return "GGUFMagic(" + strconv.FormatInt(int64(i), 10) + ")" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /zz_generated.ggufmetadatavaluetype.stringer.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -linecomment -type GGUFMetadataValueType -output zz_generated.ggufmetadatavaluetype.stringer.go -trimprefix GGUFMetadataValueType"; DO NOT EDIT. 2 | 3 | package gguf_parser 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[GGUFMetadataValueTypeUint8-0] 12 | _ = x[GGUFMetadataValueTypeInt8-1] 13 | _ = x[GGUFMetadataValueTypeUint16-2] 14 | _ = x[GGUFMetadataValueTypeInt16-3] 15 | _ = x[GGUFMetadataValueTypeUint32-4] 16 | _ = x[GGUFMetadataValueTypeInt32-5] 17 | _ = x[GGUFMetadataValueTypeFloat32-6] 18 | _ = x[GGUFMetadataValueTypeBool-7] 19 | _ = x[GGUFMetadataValueTypeString-8] 20 | _ = x[GGUFMetadataValueTypeArray-9] 21 | _ = x[GGUFMetadataValueTypeUint64-10] 22 | _ = x[GGUFMetadataValueTypeInt64-11] 23 | _ = x[GGUFMetadataValueTypeFloat64-12] 24 | _ = x[_GGUFMetadataValueTypeCount-13] 25 | } 26 | 27 | const _GGUFMetadataValueType_name = "Uint8Int8Uint16Int16Uint32Int32Float32BoolStringArrayUint64Int64Float64Unknown" 28 | 29 | var _GGUFMetadataValueType_index = [...]uint8{0, 5, 9, 15, 20, 26, 31, 38, 42, 48, 53, 59, 64, 71, 78} 30 | 31 | func (i GGUFMetadataValueType) String() string { 32 | if i >= GGUFMetadataValueType(len(_GGUFMetadataValueType_index)-1) { 33 | return "GGUFMetadataValueType(" + strconv.FormatInt(int64(i), 10) + ")" 34 | } 35 | return _GGUFMetadataValueType_name[_GGUFMetadataValueType_index[i]:_GGUFMetadataValueType_index[i+1]] 36 | } 37 | -------------------------------------------------------------------------------- /zz_generated.ggufversion.stringer.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -linecomment -type GGUFVersion -output zz_generated.ggufversion.stringer.go -trimprefix GGUFVersion"; DO NOT EDIT. 2 | 3 | package gguf_parser 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[GGUFVersionV1-1] 12 | _ = x[GGUFVersionV2-2] 13 | _ = x[GGUFVersionV3-3] 14 | } 15 | 16 | const _GGUFVersion_name = "V1V2V3" 17 | 18 | var _GGUFVersion_index = [...]uint8{0, 2, 4, 6} 19 | 20 | func (i GGUFVersion) String() string { 21 | i -= 1 22 | if i >= GGUFVersion(len(_GGUFVersion_index)-1) { 23 | return "GGUFVersion(" + strconv.FormatInt(int64(i+1), 10) + ")" 24 | } 25 | return _GGUFVersion_name[_GGUFVersion_index[i]:_GGUFVersion_index[i+1]] 26 | } 27 | --------------------------------------------------------------------------------