├── .github └── workflows │ └── on_publish.yaml ├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── cmd ├── api │ ├── delete.go │ ├── download.go │ ├── main.go │ ├── models.go │ └── ping.go └── whisper │ ├── delete.go │ ├── download.go │ ├── main.go │ ├── models.go │ ├── server.go │ ├── transcribe.go │ └── version.go ├── doc ├── API.md └── build.md ├── etc ├── Dockerfile ├── Dockerfile.cuda ├── Dockerfile.vulkan └── entrypoint.sh ├── go.mod ├── go.sum ├── opt.go ├── pkg ├── api │ ├── logging.go │ ├── models.go │ ├── register.go │ └── transcribe.go ├── client │ ├── client.go │ └── opts.go ├── pool │ ├── contextpool.go │ ├── contextpool_test.go │ ├── pool.go │ └── pool_test.go ├── schema │ ├── model.go │ ├── segment.go │ └── transcription.go ├── store │ ├── doc.go │ ├── store.go │ └── writer.go ├── task │ ├── context.go │ └── transcription.go └── version │ └── version.go ├── samples ├── OlivierL.wav ├── de-podcast.wav ├── en-audiobook.mp3 ├── en-office.mp3 └── jfk.wav ├── sys ├── pkg-config │ └── main.go └── whisper │ ├── alignment_aheads_preset.go │ ├── contextparams.go │ ├── contextparams_test.go │ ├── error.go │ ├── fullparams.go │ ├── fullparams_test.go │ ├── generate.go │ ├── generate_cuda.go │ ├── generate_vulkan.go │ ├── logging.go │ ├── model.go │ ├── model_test.go │ ├── token.go │ ├── whisper.go │ └── whisper_test.go ├── whisper.go └── whisper_test.go /.github/workflows/on_publish.yaml: -------------------------------------------------------------------------------- 1 | name: Build docker container on publish 2 | on: 3 | release: 4 | types: [ created, edited ] 5 | workflow_dispatch: 6 | inputs: 7 | tag: 8 | description: 'tag' 9 | required: true 10 | default: 'latest' 11 | type: string 12 | jobs: 13 | var: 14 | name: Set variables 15 | runs-on: ubuntu-latest 16 | outputs: 17 | image: "ghcr.io/mutablelogic/go-whisper" 18 | tag: ${{ steps.var.outputs.tag }} 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v4 22 | with: 23 | fetch-depth: 0 24 | - name: Set variables 25 | id: var 26 | run: | 27 | if [ "${{ github.event_name }}" != "release" ] && [ "${{ inputs.tag }}" != "latest" ]; then 28 | TAG="${{ inputs.tag }}" && echo "tag=${TAG#v}" >> $GITHUB_OUTPUT 29 | else 30 | TAG="$(git describe --tags)" && echo "tag=${TAG#v}" >> $GITHUB_OUTPUT 31 | fi 32 | build: 33 | name: Build 34 | needs: var 35 | strategy: 36 | matrix: 37 | arch: [ amd64, arm64 ] 38 | runs-on: ${{ matrix.arch == 'amd64' && 'ubuntu-latest' || matrix.arch == 'arm64' && 'nx1' }} # Builds on NX1 self-hosted 39 | env: 40 | ARCH: ${{ matrix.arch }} 41 | OS: linux 42 | DOCKER_REGISTRY: ghcr.io/mutablelogic 43 | steps: 44 | - name: Install git 45 | run: | 46 | sudo apt -y update 47 | sudo apt -y install build-essential git 48 | git config --global advice.detachedHead false 49 | - name: Checkout 50 | uses: actions/checkout@v4 51 | with: 52 | fetch-depth: 0 53 | - name: Login 54 | uses: docker/login-action@v3 55 | with: 56 | registry: ghcr.io 57 | username: ${{ github.repository_owner }} 58 | password: ${{ secrets.GITHUB_TOKEN }} 59 | - name: Build and Push 60 | id: build 61 | run: | 62 | git checkout v${{ needs.var.outputs.tag }} 63 | make docker && make docker-push 64 | manifest: 65 | name: Manifest 66 | needs: 67 | - var 68 | - build 69 | strategy: 70 | matrix: 71 | include: 72 | - tag: ${{ needs.var.outputs.tag }} 73 | - tag: latest # TODO: Skip this if the event calling is not publishing a release 74 | runs-on: ubuntu-latest 75 | steps: 76 | - name: Login 77 | uses: docker/login-action@v3 78 | with: 79 | registry: ghcr.io 80 | username: ${{ github.repository_owner }} 81 | password: ${{ secrets.GITHUB_TOKEN }} 82 | - name: Create 83 | run: | 84 | docker manifest create ${{ needs.var.outputs.image }}:${{ matrix.tag }} \ 85 | --amend ${{ needs.var.outputs.image }}-linux-amd64:${{ needs.var.outputs.tag }} \ 86 | --amend ${{ needs.var.outputs.image }}-linux-arm64:${{ needs.var.outputs.tag }} 87 | - name: Annotate 88 | run: | 89 | docker manifest annotate --arch arm64 --os linux \ 90 | ${{ needs.var.outputs.image }}:${{ matrix.tag }} \ 91 | ${{ needs.var.outputs.image }}-linux-arm64:${{ needs.var.outputs.tag }} 92 | docker manifest annotate --arch amd64 --os linux \ 93 | ${{ needs.var.outputs.image }}:${{ matrix.tag }} \ 94 | ${{ needs.var.outputs.image }}-linux-amd64:${{ needs.var.outputs.tag }} 95 | docker manifest push ${{ needs.var.outputs.image }}:${{ matrix.tag }} 96 | - name: Push 97 | run: | 98 | docker manifest push ${{ needs.var.outputs.image }}:${{ matrix.tag }} 99 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | *.test 8 | *.out 9 | 10 | # Dependency directories (remove the comment below to include it) 11 | vendor 12 | build 13 | models 14 | 15 | # Other 16 | .vscode 17 | 18 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/whisper.cpp"] 2 | path = third_party/whisper.cpp 3 | url = https://github.com/ggerganov/whisper.cpp 4 | 5 | [submodule "third_party/go-media"] 6 | path = third_party/go-media 7 | url = https://github.com/mutablelogic/go-media 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Paths to packages 2 | DOCKER=$(shell which docker) 3 | GIT=$(shell which git) 4 | GO=$(shell which go) 5 | CMAKE=$(shell which cmake) 6 | 7 | # Set OS and Architecture 8 | ARCH ?= $(shell arch | tr A-Z a-z | sed 's/x86_64/amd64/' | sed 's/i386/amd64/' | sed 's/armv7l/arm/' | sed 's/aarch64/arm64/') 9 | OS ?= $(shell uname | tr A-Z a-z) 10 | VERSION ?= $(shell git describe --tags --always | sed 's/^v//') 11 | DOCKER_REGISTRY ?= ghcr.io/mutablelogic 12 | DOCKER_FILE ?= etc/Dockerfile 13 | 14 | # Set docker tag, etc 15 | BUILD_TAG := ${DOCKER_REGISTRY}/go-whisper-${OS}-${ARCH}:${VERSION} 16 | ROOT_PATH := $(CURDIR) 17 | BUILD_DIR ?= "build" 18 | PREFIX ?= ${BUILD_DIR}/install 19 | 20 | # Build flags 21 | BUILD_MODULE := $(shell cat go.mod | head -1 | cut -d ' ' -f 2) 22 | BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitSource=${BUILD_MODULE} 23 | BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitTag=$(shell git describe --tags --always) 24 | BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitBranch=$(shell git name-rev HEAD --name-only --always) 25 | BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitHash=$(shell git rev-parse HEAD) 26 | BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GoBuildTime=$(shell date -u '+%Y-%m-%dT%H:%M:%SZ') 27 | BUILD_FLAGS = -ldflags "-s -w $(BUILD_LD_FLAGS)" 28 | TEST_FLAGS = -v 29 | CMAKE_FLAGS = -DBUILD_SHARED_LIBS=OFF 30 | 31 | # If GGML_CUDA is set, then add a cuda tag for the go ${BUILD FLAGS} 32 | ifeq ($(GGML_CUDA),1) 33 | TEST_FLAGS += -tags cuda 34 | BUILD_FLAGS += -tags cuda 35 | CUDA_DOCKER_ARCH ?= all 36 | CMAKE_FLAGS += -DGGML_CUDA=ON 37 | BUILD_TAG := "${BUILD_TAG}-cuda" 38 | DOCKER_FILE = etc/Dockerfile.cuda 39 | endif 40 | 41 | # If GGML_VULKAN is set, then add a vulkan tag for the go ${BUILD FLAGS} 42 | ifeq ($(GGML_VULKAN),1) 43 | TEST_FLAGS += -tags vulkan 44 | BUILD_FLAGS += -tags vulkan 45 | CMAKE_FLAGS += -DGGML_VULKAN=ON 46 | BUILD_TAG := "${BUILD_TAG}-vulkan" 47 | DOCKER_FILE = etc/Dockerfile.vulkan 48 | endif 49 | 50 | # Targets 51 | all: whisper api 52 | 53 | # Generate the pkg-config files 54 | generate: mkdir go-tidy libwhisper 55 | @echo "Generating pkg-config" 56 | @mkdir -p ${BUILD_DIR}/lib/pkgconfig 57 | @PKG_CONFIG_PATH=$(shell realpath ${PREFIX})/lib/pkgconfig PREFIX="$(shell realpath ${PREFIX})" go generate ./sys/whisper 58 | 59 | # Make whisper 60 | whisper: generate libwhisper libffmpeg 61 | @echo "Building whisper" 62 | @PKG_CONFIG_PATH=$(shell realpath ${PREFIX})/lib/pkgconfig CGO_LDFLAGS_ALLOW="-(W|D).*" ${GO} build ${BUILD_FLAGS} -o ${BUILD_DIR}/whisper ./cmd/whisper 63 | 64 | # Make api 65 | api: mkdir go-tidy 66 | @echo "Building api" 67 | @${GO} build ${BUILD_FLAGS} -o ${BUILD_DIR}/api ./cmd/api 68 | 69 | # Test whisper bindings 70 | test: generate libwhisper 71 | @echo "Running tests (sys) with ${PREFIX}/lib" 72 | PKG_CONFIG_PATH=$(shell realpath ${PREFIX})/lib ${GO} test ${TEST_FLAGS} ./sys/whisper/... 73 | @echo "Running tests (pkg)" 74 | @PKG_CONFIG_PATH=$(shell realpath ${PREFIX})/lib ${GO} test ${TEST_FLAGS} ./pkg/... 75 | @echo "Running tests (whisper)" 76 | @PKG_CONFIG_PATH=$(shell realpath ${PREFIX})/lib ${GO} test ${TEST_FLAGS} ./ 77 | 78 | # make libwhisper and install at ${PREFIX} 79 | libwhisper: mkdir submodule cmake-dep 80 | @echo "Making libwhisper with ${CMAKE_FLAGS}" 81 | @${CMAKE} -S third_party/whisper.cpp -B ${BUILD_DIR} -DCMAKE_BUILD_TYPE=Release ${CMAKE_FLAGS} 82 | @${CMAKE} --build ${BUILD_DIR} -j --config Release 83 | @${CMAKE} --install ${BUILD_DIR} --prefix $(shell realpath ${PREFIX}) 84 | 85 | # make ffmpeg libraries and install at ${PREFIX} 86 | libffmpeg: mkdir submodule 87 | @echo "Making ffmpeg libraries => ${PREFIX}" 88 | @mkdir -p ${BUILD_DIR} 89 | @mkdir -p ${PREFIX} 90 | @BUILD_DIR=$(shell realpath ${BUILD_DIR}) PREFIX=$(shell realpath ${PREFIX}) make -C third_party/go-media ffmpeg 91 | 92 | # Build docker container 93 | docker: docker-dep submodule 94 | @echo build docker image: ${BUILD_TAG} for ${OS}/${ARCH} 95 | @${DOCKER} build \ 96 | --tag ${BUILD_TAG} \ 97 | --build-arg ARCH=${ARCH} \ 98 | --build-arg OS=${OS} \ 99 | --build-arg SOURCE=${BUILD_MODULE} \ 100 | --build-arg VERSION=${VERSION} \ 101 | --build-arg GGML_CUDA=${GGML_CUDA} \ 102 | -f ${DOCKER_FILE} . 103 | 104 | # Push docker container 105 | docker-push: docker-dep 106 | @echo push docker image: ${BUILD_TAG} 107 | @${DOCKER} push ${BUILD_TAG} 108 | 109 | # Update submodule to the latest version 110 | submodule-update: git-dep 111 | @echo "Updating submodules" 112 | @${GIT} submodule foreach git pull origin master 113 | 114 | # Submodule checkout 115 | submodule: git-dep 116 | @echo "Checking out submodules" 117 | @${GIT} submodule update --init --recursive --remote 118 | 119 | # Submodule clean 120 | submodule-clean: git-dep 121 | @echo "Cleaning submodules" 122 | @${GIT} reset --hard 123 | @${GIT} submodule sync --recursive 124 | @${GIT} submodule update --init --force --recursive 125 | @${GIT} clean -ffdx 126 | @${GIT} submodule foreach --recursive git clean -ffdx 127 | 128 | # Check for docker 129 | docker-dep: 130 | @test -f "${DOCKER}" && test -x "${DOCKER}" || (echo "Missing docker binary" && exit 1) 131 | 132 | # Check for docker 133 | cmake-dep: 134 | @test -f "${CMAKE}" && test -x "${CMAKE}" || (echo "Missing cmake binary" && exit 1) 135 | 136 | # Check for git 137 | git-dep: 138 | @test -f "${GIT}" && test -x "${GIT}" || (echo "Missing git binary" && exit 1) 139 | 140 | # Check for go 141 | go-dep: 142 | @test -f "${GO}" && test -x "${GO}" || (echo "Missing go binary" && exit 1) 143 | 144 | # Make build directory 145 | mkdir: 146 | @echo Mkdir ${BUILD_DIR} 147 | @install -d ${BUILD_DIR} 148 | @echo Mkdir ${PREFIX} 149 | @install -d ${PREFIX} 150 | 151 | # go mod tidy 152 | go-tidy: go-dep 153 | @echo Tidy 154 | @${GO} mod tidy 155 | @${GO} clean -cache 156 | 157 | # Clean 158 | clean: submodule-clean go-tidy 159 | @echo "Cleaning" 160 | @rm -rf ${BUILD_DIR} 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-whisper 2 | 3 | Speech-to-Text in golang. This is an early development version. 4 | 5 | * `cmd` contains an OpenAI-API compatible service 6 | * `pkg` contains the `whisper` service and client 7 | * `sys` contains the `whisper` bindings to the `whisper.cpp` library 8 | * `third_party` is a submodule for the whisper.cpp source 9 | 10 | ## Running 11 | 12 | You can either run the whisper service as a CLI command or in a docker container. 13 | There are docker images for arm64 and amd64 (Intel). The arm64 image is built for 14 | Jetson GPU support specifically, but it will also run on Raspberry Pi's. 15 | 16 | In order to utilize a NVIDIA GPU, you'll need to install the 17 | [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) first. 18 | 19 | A docker volume should be created called "whisper" can be used for storing the Whisper language 20 | models. You can see which models are available to download locally [here](https://huggingface.co/ggerganov/whisper.cpp). 21 | 22 | The following command will run the server on port 8080 for an NVIDIA GPU: 23 | 24 | ```bash 25 | docker run \ 26 | --name whisper-server --rm \ 27 | --runtime nvidia --gpus all \ # When using a NVIDIA GPU 28 | -v whisper:/data -p 8080:80 \ 29 | ghcr.io/mutablelogic/go-whisper:latest 30 | ``` 31 | 32 | The API is then 33 | available at `http://localhost:8080/v1` and it generally conforms to the 34 | [OpenAI API](https://platform.openai.com/docs/api-reference/audio) spec. 35 | 36 | ### Sample Usage 37 | 38 | In order to download a model, you can use the following command (for example): 39 | 40 | ```bash 41 | curl -X POST -H "Content-Type: application/json" -d '{"Path" : "ggml-medium-q5_0.bin" }' localhost:8080/v1/models\?stream=true 42 | ``` 43 | 44 | To list the models available, you can use the following command: 45 | 46 | ```bash 47 | curl -X GET localhost:8080/v1/models 48 | ``` 49 | 50 | To delete a model, you can use the following command: 51 | 52 | ```bash 53 | curl -X DELETE localhost:8080/v1/models/ggml-medium-q5_0 54 | ``` 55 | 56 | To transcribe a media file into it's original language, you can use the following command: 57 | 58 | ```bash 59 | curl -F model=ggml-medium-q5_0 -F file=@samples/jfk.wav localhost:8080/v1/audio/transcriptions\?stream=true 60 | ``` 61 | 62 | To translate a media file into a different language, you can use the following command: 63 | 64 | ```bash 65 | curl -F model=ggml-medium-q5_0 -F file=@samples/de-podcast.wav -F language=en localhost:8080/v1/audio/translations\?stream=true 66 | ``` 67 | 68 | There's more information on the API [here](doc/API.md). 69 | 70 | ## Building 71 | 72 | If you are building a docker image, you just need make and docker installed: 73 | 74 | * `DOCKER_REGISTRY=docker.io/user make docker` - builds a docker container with the 75 | server binary for CUDA, tagged to a specific registry 76 | * `OS=linux GGML_CUDA=0 DOCKER_REGISTRY=docker.io/user make docker` - builds a docker container 77 | for Linux, with the server binary without CUDA, tagged to a specific registry 78 | 79 | If you want to build the server without docker, you can use the `Makefile` in the root 80 | directory and have the following dependencies met: 81 | 82 | * Recent version of Go (ie, 1.22+) 83 | * C++ compiler and cmake 84 | * FFmpeg 6.1 libraries (see [here](doc/build.md) for more information) 85 | * For CUDA, you'll need the CUDA toolkit installed including the `nvcc` compiler 86 | 87 | The following `Makefile` targets can be used: 88 | 89 | * `make server` - creates the server binary, and places it in the `build` directory. Should 90 | link to Metal on macOS 91 | * `GGML_CUDA=1 make server` - creates the server binary linked to CUDA, and places it 92 | in the `build` directory. Should work for amd64 and arm64 (Jetson) platforms 93 | 94 | See all the other targets in the `Makefile` for more information. 95 | 96 | ## Developing 97 | 98 | TODO 99 | 100 | ## Status 101 | 102 | Still in development. See this [issue](https://github.com/mutablelogic/go-whisper/issues/1) for 103 | remaining tasks to be completed. 104 | 105 | ## Contributing & Distribution 106 | 107 | __This module is currently in development and subject to change.__ 108 | 109 | Please do file feature requests and bugs [here](https://github.com/mutablelogic/go-whisper/issues). 110 | The license is Apache 2 so feel free to redistribute. Redistributions in either source 111 | code or binary form must reproduce the copyright notice, and please link back to this 112 | repository for more information: 113 | 114 | > __go-whisper__\ 115 | > [https://github.com/mutablelogic/go-whisper/](https://github.com/mutablelogic/go-whisper/)\ 116 | > Copyright (c) 2023-2024 David Thorpe, All rights reserved. 117 | > 118 | > __whisper.cpp__\ 119 | > [https://github.com/ggerganov/whisper.cpp](https://github.com/ggerganov/whisper.cpp)\ 120 | > Copyright (c) 2023-2024 The ggml authors 121 | 122 | This software links to static libraries of [whisper.cpp](https://github.com/ggerganov/whisper.cpp) licensed under 123 | the [MIT License](http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html). 124 | -------------------------------------------------------------------------------- /cmd/api/delete.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type DeleteCmd struct { 4 | Model string `arg:"" name:"model" help:"Model to delete"` 5 | } 6 | 7 | func (cmd *DeleteCmd) Run(ctx *Globals) error { 8 | if err := ctx.client.DeleteModel(ctx.ctx, cmd.Model); err != nil { 9 | return err 10 | } 11 | return nil 12 | } 13 | -------------------------------------------------------------------------------- /cmd/api/download.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | // Packages 7 | "github.com/djthorpe/go-tablewriter" 8 | ) 9 | 10 | type DownloadCmd struct { 11 | Model string `arg:"" name:"model" help:"Model to download (must end in .bin)"` 12 | } 13 | 14 | func (cmd *DownloadCmd) Run(ctx *Globals) error { 15 | model, err := ctx.client.DownloadModel(ctx.ctx, cmd.Model, func(status string, cur, total int64) { 16 | pct := fmt.Sprintf("%02d%%", int(100*float64(cur)/float64(total))) 17 | ctx.writer.Writeln(pct, status) 18 | }) 19 | if err != nil { 20 | return err 21 | } 22 | return ctx.writer.Write(model, tablewriter.OptHeader()) 23 | } 24 | -------------------------------------------------------------------------------- /cmd/api/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "os/signal" 7 | "path/filepath" 8 | "syscall" 9 | 10 | // Packages 11 | kong "github.com/alecthomas/kong" 12 | tablewriter "github.com/djthorpe/go-tablewriter" 13 | opt "github.com/mutablelogic/go-client" 14 | client "github.com/mutablelogic/go-whisper/pkg/client" 15 | ) 16 | 17 | //////////////////////////////////////////////////////////////////////////////// 18 | // TYPES 19 | 20 | type Globals struct { 21 | Url string `name:"url" help:"URL of whisper service (can be set from WHISPER_URL env)" default:"${WHISPER_URL}"` 22 | Debug bool `name:"debug" help:"Enable debug output"` 23 | 24 | // Writer, service and context 25 | writer *tablewriter.Writer 26 | client *client.Client 27 | ctx context.Context 28 | } 29 | 30 | type CLI struct { 31 | Globals 32 | 33 | Ping PingCmd `cmd:"ping" help:"Ping the whisper service"` 34 | Models ModelsCmd `cmd:"models" help:"List models"` 35 | Download DownloadCmd `cmd:"download" help:"Download a model"` 36 | Delete DeleteCmd `cmd:"delete" help:"Delete a model"` 37 | } 38 | 39 | //////////////////////////////////////////////////////////////////////////////// 40 | // GLOBALS 41 | 42 | const ( 43 | defaultEndpoint = "http://localhost:8080/api/v1" 44 | ) 45 | 46 | //////////////////////////////////////////////////////////////////////////////// 47 | // MAIN 48 | 49 | func main() { 50 | // The name of the executable 51 | name, err := os.Executable() 52 | if err != nil { 53 | panic(err) 54 | } else { 55 | name = filepath.Base(name) 56 | } 57 | 58 | // Create a cli parser 59 | cli := CLI{} 60 | cmd := kong.Parse(&cli, 61 | kong.Name(name), 62 | kong.Description("speech transcription and translation service client"), 63 | kong.UsageOnError(), 64 | kong.ConfigureHelp(kong.HelpOptions{Compact: true}), 65 | kong.Vars{ 66 | "WHISPER_URL": envOrDefault("WHISPER_URL", defaultEndpoint), 67 | }, 68 | ) 69 | 70 | // Set whisper client options 71 | opts := []opt.ClientOpt{} 72 | if cli.Globals.Debug { 73 | opts = append(opts, opt.OptTrace(os.Stderr, true)) 74 | } 75 | 76 | // Create a whisper client 77 | client, err := client.New(cli.Globals.Url, opts...) 78 | if err != nil { 79 | cmd.FatalIfErrorf(err) 80 | return 81 | } else { 82 | cli.Globals.client = client 83 | } 84 | 85 | // Create a tablewriter object with text output 86 | writer := tablewriter.New(os.Stdout, tablewriter.OptOutputText()) 87 | cli.Globals.writer = writer 88 | 89 | // Create a context 90 | var cancel context.CancelFunc 91 | cli.Globals.ctx, cancel = signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) 92 | defer cancel() 93 | 94 | // Run the command 95 | if err := cmd.Run(&cli.Globals); err != nil { 96 | cmd.FatalIfErrorf(err) 97 | } 98 | } 99 | 100 | func envOrDefault(name, def string) string { 101 | if value := os.Getenv(name); value != "" { 102 | return value 103 | } else { 104 | return def 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /cmd/api/models.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/djthorpe/go-tablewriter" 4 | 5 | type ModelsCmd struct{} 6 | 7 | func (cmd *ModelsCmd) Run(ctx *Globals) error { 8 | if models, err := ctx.client.ListModels(ctx.ctx); err != nil { 9 | return err 10 | } else { 11 | return ctx.writer.Write(models, tablewriter.OptHeader()) 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /cmd/api/ping.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type PingCmd struct{} 4 | 5 | func (cmd *PingCmd) Run(ctx *Globals) error { 6 | if err := ctx.client.Ping(ctx.ctx); err != nil { 7 | return err 8 | } 9 | return ctx.writer.Writeln("OK") 10 | } 11 | -------------------------------------------------------------------------------- /cmd/whisper/delete.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type DeleteCmd struct { 4 | Model string `arg:"" help:"Model id to delete"` 5 | } 6 | 7 | func (cmd *DeleteCmd) Run(ctx *Globals) error { 8 | if err := ctx.service.DeleteModelById(cmd.Model); err != nil { 9 | return err 10 | } 11 | return ModelsCmd{}.Run(ctx) 12 | } 13 | -------------------------------------------------------------------------------- /cmd/whisper/download.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "time" 6 | 7 | // Packages 8 | "github.com/djthorpe/go-tablewriter" 9 | ) 10 | 11 | type DownloadCmd struct { 12 | Model string `arg:"" help:"Model to download"` 13 | } 14 | 15 | func (cmd *DownloadCmd) Run(ctx *Globals) error { 16 | t := time.Now() 17 | model, err := ctx.service.DownloadModel(ctx.ctx, cmd.Model, func(curBytes, totalBytes uint64) { 18 | if time.Since(t) > time.Second { 19 | pct := float64(curBytes) / float64(totalBytes) * 100 20 | log.Printf("Downloaded %.0f%%", pct) 21 | t = time.Now() 22 | } 23 | }) 24 | if err != nil { 25 | return err 26 | } 27 | return ctx.writer.Write(model, tablewriter.OptHeader()) 28 | } 29 | -------------------------------------------------------------------------------- /cmd/whisper/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "log" 6 | "os" 7 | "os/signal" 8 | "path/filepath" 9 | "syscall" 10 | 11 | // Packages 12 | kong "github.com/alecthomas/kong" 13 | tablewriter "github.com/djthorpe/go-tablewriter" 14 | whisper "github.com/mutablelogic/go-whisper" 15 | ) 16 | 17 | type Globals struct { 18 | NoGPU bool `name:"nogpu" help:"Disable GPU acceleration"` 19 | Debug bool `name:"debug" help:"Enable debug output"` 20 | Dir string `name:"dir" help:"Path to model store, uses ${WHISPER_DIR} " default:"${WHISPER_DIR}"` 21 | 22 | // Writer, service and context 23 | writer *tablewriter.Writer 24 | service *whisper.Whisper 25 | ctx context.Context 26 | } 27 | 28 | type CLI struct { 29 | Globals 30 | Transcribe TranscribeCmd `cmd:"transcribe" help:"Transcribe from file"` 31 | Translate TranslateCmd `cmd:"translate" help:"Translate to english from file"` 32 | Models ModelsCmd `cmd:"models" help:"List models"` 33 | Download DownloadCmd `cmd:"download" help:"Download a model"` 34 | Delete DeleteCmd `cmd:"delete" help:"Delete a model"` 35 | Server ServerCmd `cmd:"server" help:"Run the whisper service"` 36 | Version VersionCmd `cmd:"version" help:"Print version information"` 37 | } 38 | 39 | func main() { 40 | // The name of the executable 41 | name, err := os.Executable() 42 | if err != nil { 43 | panic(err) 44 | } else { 45 | name = filepath.Base(name) 46 | } 47 | 48 | // Create a cli parser 49 | cli := CLI{} 50 | cmd := kong.Parse(&cli, 51 | kong.Name(name), 52 | kong.Description("speech transcription and translation service"), 53 | kong.UsageOnError(), 54 | kong.ConfigureHelp(kong.HelpOptions{Compact: true}), 55 | kong.Vars{ 56 | "WHISPER_DIR": dirEnvOrDefault(name), 57 | }, 58 | ) 59 | 60 | // Create a whisper server - set options 61 | opts := []whisper.Opt{ 62 | whisper.OptLog(func(line string) { 63 | if cli.Globals.Debug { 64 | log.Println(line) 65 | } 66 | }), 67 | } 68 | if cli.Globals.Debug { 69 | opts = append(opts, whisper.OptDebug()) 70 | } 71 | if cli.Globals.NoGPU { 72 | opts = append(opts, whisper.OptNoGPU()) 73 | } 74 | 75 | // Create directory if it doesn't exist 76 | if err := os.MkdirAll(cli.Globals.Dir, 0755); err != nil { 77 | cmd.FatalIfErrorf(err) 78 | return 79 | } 80 | 81 | // Create a whisper server - create 82 | service, err := whisper.New(cli.Globals.Dir, opts...) 83 | if err != nil { 84 | cmd.FatalIfErrorf(err) 85 | return 86 | } else { 87 | cli.Globals.service = service 88 | } 89 | defer service.Close() 90 | 91 | // Create a tablewriter object with text output 92 | writer := tablewriter.New(os.Stdout, tablewriter.OptOutputText()) 93 | cli.Globals.writer = writer 94 | 95 | // Create a context 96 | var cancel context.CancelFunc 97 | cli.Globals.ctx, cancel = signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGQUIT) 98 | defer cancel() 99 | 100 | // Run the command 101 | if err := cmd.Run(&cli.Globals); err != nil { 102 | cmd.FatalIfErrorf(err) 103 | } 104 | } 105 | 106 | func dirEnvOrDefault(name string) string { 107 | if dir := os.Getenv("WHISPER_DIR"); dir != "" { 108 | return dir 109 | } 110 | if dir, err := os.UserCacheDir(); err == nil { 111 | return filepath.Join(dir, name) 112 | } 113 | return os.TempDir() 114 | } 115 | -------------------------------------------------------------------------------- /cmd/whisper/models.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | 6 | // Packages 7 | "github.com/djthorpe/go-tablewriter" 8 | ) 9 | 10 | type ModelsCmd struct{} 11 | 12 | func (ModelsCmd) Run(ctx *Globals) error { 13 | models := ctx.service.ListModels() 14 | if len(models) == 0 { 15 | return errors.New("no models found") 16 | } else { 17 | return ctx.writer.Write(ctx.service.ListModels(), tablewriter.OptHeader()) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /cmd/whisper/server.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | 6 | // Packages 7 | "github.com/mutablelogic/go-server/pkg/httpserver" 8 | "github.com/mutablelogic/go-whisper/pkg/api" 9 | ) 10 | 11 | type ServerCmd struct { 12 | Endpoint string `name:"endpoint" help:"Endpoint for the server" default:"/api/v1"` 13 | Listen string `name:"listen" help:"Listen address for the server" default:"localhost:8080"` 14 | } 15 | 16 | func (cmd *ServerCmd) Run(ctx *Globals) error { 17 | // Create a new HTTP server 18 | log.Println("Listen address", cmd.Listen) 19 | server, err := httpserver.New(cmd.Listen, api.RegisterEndpoints(cmd.Endpoint, ctx.service, nil), nil) 20 | if err != nil { 21 | return err 22 | } 23 | 24 | // Run the server until CTRL+C 25 | log.Println("Press CTRL+C to exit") 26 | return server.Run(ctx.ctx) 27 | } 28 | -------------------------------------------------------------------------------- /cmd/whisper/transcribe.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "os" 7 | "time" 8 | 9 | // Packages 10 | segmenter "github.com/mutablelogic/go-media/pkg/segmenter" 11 | whisper "github.com/mutablelogic/go-whisper" 12 | client "github.com/mutablelogic/go-whisper/pkg/client" 13 | schema "github.com/mutablelogic/go-whisper/pkg/schema" 14 | task "github.com/mutablelogic/go-whisper/pkg/task" 15 | 16 | // Namespace imports 17 | . "github.com/djthorpe/go-errors" 18 | ) 19 | 20 | //////////////////////////////////////////////////////////////////////////////// 21 | // TYPES 22 | 23 | type TranslateCmd struct { 24 | Model string `arg:"" help:"Model to use"` 25 | Path string `arg:"" help:"Path to audio file"` 26 | Format string `flag:"" help:"Output format" default:"text" enum:"json,verbose_json,text,vtt,srt"` 27 | Segments time.Duration `flag:"" help:"Segment size for reading audio file"` 28 | Api bool `flag:"api" help:"Use API for translation" default:"false"` 29 | } 30 | 31 | type TranscribeCmd struct { 32 | TranslateCmd 33 | Language string `flag:"language" help:"Language to transcribe" default:"auto"` 34 | } 35 | 36 | //////////////////////////////////////////////////////////////////////////////// 37 | // GLOBALS 38 | 39 | const ( 40 | remoteUrl = "https://api.openai.com/" 41 | ) 42 | 43 | //////////////////////////////////////////////////////////////////////////////// 44 | // PUBLIC METHODS 45 | 46 | func (cmd *TranscribeCmd) Run(app *Globals) error { 47 | if cmd.Api { 48 | return run_remote(app, cmd.Model, cmd.Path, cmd.Language, cmd.Format, cmd.Segments, false) 49 | } else { 50 | return run_local(app, cmd.Model, cmd.Path, cmd.Language, cmd.Format, cmd.Segments, false) 51 | } 52 | } 53 | 54 | func (cmd *TranslateCmd) Run(app *Globals) error { 55 | if cmd.Api { 56 | return run_remote(app, cmd.Model, cmd.Path, "", cmd.Format, cmd.Segments, true) 57 | } else { 58 | return run_local(app, cmd.Model, cmd.Path, "", cmd.Format, cmd.Segments, true) 59 | } 60 | } 61 | 62 | func run_local(app *Globals, model, path, language, format string, segments time.Duration, translate bool) error { 63 | // Get the model 64 | model_ := app.service.GetModelById(model) 65 | if model_ == nil { 66 | return ErrNotFound.With(model) 67 | } 68 | 69 | // Open the audio file 70 | f, err := os.Open(path) 71 | if err != nil { 72 | return err 73 | } 74 | defer f.Close() 75 | 76 | // Create a segmenter - read segments based on requested segment size 77 | segmenter, err := segmenter.NewReader(f, segments, whisper.SampleRate) 78 | if err != nil { 79 | return err 80 | } 81 | defer segmenter.Close() 82 | 83 | // Perform the transcription 84 | return app.service.WithModel(model_, func(taskctx *task.Context) error { 85 | // Transcribe or Translate 86 | taskctx.SetTranslate(translate) 87 | taskctx.SetDiarize(false) 88 | 89 | // Set language 90 | if language != "" { 91 | if err := taskctx.SetLanguage(language); err != nil { 92 | return err 93 | } 94 | } 95 | 96 | // Read samples and transcribe them 97 | if err := segmenter.DecodeFloat32(app.ctx, func(ts time.Duration, buf []float32) error { 98 | // Perform the transcription, return any errors 99 | return taskctx.Transcribe(app.ctx, ts, buf, func(segment *schema.Segment) { 100 | var buf bytes.Buffer 101 | switch format { 102 | case "json", "verbose_json": 103 | app.writer.Writeln(segment) 104 | case "srt": 105 | task.WriteSegmentSrt(&buf, segment) 106 | app.writer.Writeln(buf.String()) 107 | case "vtt": 108 | task.WriteSegmentVtt(&buf, segment) 109 | app.writer.Writeln(buf.String()) 110 | case "text": 111 | task.WriteSegmentText(&buf, segment) 112 | app.writer.Writeln(buf.String()) 113 | } 114 | }) 115 | }); err != nil { 116 | return err 117 | } 118 | 119 | return nil 120 | }) 121 | } 122 | 123 | func run_remote(app *Globals, model, path, language, format string, segments time.Duration, translate bool) error { 124 | // Open the audio file 125 | f, err := os.Open(path) 126 | if err != nil { 127 | return err 128 | } 129 | defer f.Close() 130 | 131 | // Create a client for the whisper service 132 | remote, err := client.New(remoteUrl) 133 | if err != nil { 134 | return err 135 | } 136 | 137 | // Create a segmenter - read segments based on requested segment size 138 | segmenter, err := segmenter.NewReader(f, segments, whisper.SampleRate) 139 | if err != nil { 140 | return err 141 | } 142 | defer segmenter.Close() 143 | 144 | // Read samples and transcribe or translate them 145 | return segmenter.DecodeFloat32(app.ctx, func(ts time.Duration, buf []float32) error { 146 | // Make a WAV file from the float32 samples 147 | var wav bytes.Buffer 148 | 149 | if translate { 150 | translation, err := remote.Translate(app.ctx, model, &wav) 151 | if err != nil { 152 | return err 153 | } 154 | fmt.Println(translation) 155 | } else { 156 | transcription, err := remote.Transcribe(app.ctx, model, &wav, client.OptLanguage(language)) 157 | if err != nil { 158 | return err 159 | } 160 | fmt.Println(transcription) 161 | } 162 | 163 | return nil 164 | }) 165 | } 166 | -------------------------------------------------------------------------------- /cmd/whisper/version.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "runtime" 5 | 6 | // Packages 7 | "github.com/mutablelogic/go-whisper/pkg/version" 8 | ) 9 | 10 | type VersionCmd struct{} 11 | 12 | func (cmd *VersionCmd) Run(ctx *Globals) error { 13 | type kv struct { 14 | Key string `json:"name"` 15 | Value string `json:"value" writer:",width:60"` 16 | } 17 | var metadata = []kv{} 18 | if version.GitSource != "" { 19 | metadata = append(metadata, kv{"source", version.GitSource}) 20 | } 21 | if version.GitBranch != "" { 22 | metadata = append(metadata, kv{"branch", version.GitBranch}) 23 | } 24 | if version.GitTag != "" { 25 | metadata = append(metadata, kv{"tag", version.GitTag}) 26 | } 27 | if version.GitHash != "" { 28 | metadata = append(metadata, kv{"hash", version.GitHash}) 29 | } 30 | if version.GoBuildTime != "" { 31 | metadata = append(metadata, kv{"build time", version.GoBuildTime}) 32 | } 33 | metadata = append(metadata, kv{"go version", runtime.Version()}) 34 | metadata = append(metadata, kv{"os", runtime.GOOS + "/" + runtime.GOARCH}) 35 | 36 | return ctx.writer.Write(metadata) 37 | } 38 | -------------------------------------------------------------------------------- /doc/API.md: -------------------------------------------------------------------------------- 1 | # Whisper server API 2 | 3 | Based on OpenAPI docs 4 | 5 | ## Ping 6 | 7 | ```html 8 | GET /v1/ping 9 | ``` 10 | 11 | Returns a OK status to indicate the API is up and running. 12 | 13 | ## Models 14 | 15 | ### List Models 16 | 17 | ```html 18 | GET /v1/models 19 | ``` 20 | 21 | Returns a list of available models. Example response: 22 | 23 | ```json 24 | { 25 | "object": "list", 26 | "models": [ 27 | { 28 | "id": "ggml-large-v3", 29 | "object": "model", 30 | "path": "ggml-large-v3.bin", 31 | "created": 1722090121 32 | }, 33 | { 34 | "id": "ggml-medium-q5_0", 35 | "object": "model", 36 | "path": "ggml-medium-q5_0.bin", 37 | "created": 1722081999 38 | } 39 | ] 40 | } 41 | ``` 42 | 43 | ### Download Model 44 | 45 | ```html 46 | POST /v1/models 47 | POST /v1/models?stream={bool} 48 | ``` 49 | 50 | The request should be a application/json, multipart/form-data or application/x-www-form-urlencoded request with the following fields: 51 | 52 | ```json 53 | { 54 | "path": "ggml-large-v3.bin" 55 | } 56 | ``` 57 | 58 | Downloads a model from remote huggingface repository. If the optional `stream` argument is true, 59 | the progress is streamed back to the client as a series of [text/event-stream](https://html.spec.whatwg.org/multipage/server-sent-events.html) events. 60 | 61 | If the model is already downloaded, a 200 OK status is returned. If the model was downloaded, a 201 Created status is returned. 62 | Example streaming response: 63 | 64 | ```text 65 | event: ping 66 | 67 | event: progress 68 | data: {"status":"downloading ggml-medium-q5_0.bin","total":539212467,"completed":10159256} 69 | 70 | event: progress 71 | data: {"status":"downloading ggml-medium-q5_0.bin","total":539212467,"completed":21895036} 72 | 73 | event: progress 74 | data: {"status":"downloading ggml-medium-q5_0.bin","total":539212467,"completed":33540592} 75 | 76 | event: ok 77 | data: {"id":"ggml-medium-q5_0","object":"model","path":"ggml-medium-q5_0.bin","created":1722411778} 78 | ``` 79 | 80 | 81 | ### Delete Model 82 | 83 | ```html 84 | DELETE /v1/models/{model-id} 85 | ``` 86 | 87 | Deletes a model by it's ID. If the model is deleted, a 200 OK status is returned. 88 | 89 | ## Transcription and translation with file upload 90 | 91 | ### Transcription 92 | 93 | This endpoint's purpose is to transcribe media files into text, in the language of the media file. 94 | 95 | ```html 96 | POST /v1/audio/transcriptions 97 | POST /v1/audio/transcriptions?stream={bool} 98 | ``` 99 | 100 | The request should be a multipart/form-data request with the following fields: 101 | 102 | ```json 103 | { 104 | "model": "", 105 | "file": "", 106 | "language": "", 107 | "response_format": "", 108 | } 109 | ``` 110 | 111 | Transcribes audio into the input language. 112 | 113 | `file` (required) The audio file object (not file name) to transcribe. This can be audio or video, and the format is auto-detected. The "best" audio stream is selected from the file, and the audio is converted to 16 kHz mono PCM format during transcription. 114 | 115 | `model-id` (required) ID of the model to use. This should have previously been downloaded. 116 | 117 | `language` (optional) The language of the input audio in ISO-639-1 format. If not set, then the language is auto-detected. 118 | 119 | `response_format` (optional, defaults to `json`). The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt. 120 | 121 | If the optional `stream` argument is true, the segments of the transcription are returned as a series of [text/event-stream](https://html.spec.whatwg.org/multipage/server-sent-events.html) events. Otherwise, the full transcription is returned in the response body. 122 | 123 | Example streaming response: 124 | 125 | ```text 126 | event: ping 127 | 128 | event: task 129 | data: {"task":"translate","language":"en","duration":62.6155} 130 | 131 | event: ping 132 | 133 | event: segment 134 | data: {"id":0,"start":0,"end":14.2,"text":" What do you think about new media like Facebook, emails and cell phones?"} 135 | 136 | event: segment 137 | data: {"id":1,"start":14.2,"end":18.2,"text":" The new media make our life much easier."} 138 | 139 | event: segment 140 | data: {"id":2,"start":18.2,"end":23,"text":" You can get in touch with people much faster than before."} 141 | 142 | event: ok 143 | ``` 144 | 145 | ### Translation 146 | 147 | This is the same as transcription (above) except that the `language` parameter is always set to 'en', to translate the audio into English. 148 | 149 | ```html 150 | POST /v1/audio/translations 151 | POST /v1/audio/translations?stream={bool} 152 | ``` 153 | 154 | ### Diarization 155 | 156 | To diarize an Enlgish-language audio file, use the following endpoint: 157 | 158 | ```html 159 | POST /v1/audio/diarize 160 | POST /v1/audio/diarize?stream={bool} 161 | ``` 162 | 163 | The segments returned include a "speaker_turn" field which indicates that the segment is a new speaker. It requires a separate download of a [diarization model](https://huggingface.co/akashmjn/tinydiarize-whisper.cpp). 164 | -------------------------------------------------------------------------------- /doc/build.md: -------------------------------------------------------------------------------- 1 | # Notes on building 2 | 3 | ## Package Config 4 | 5 | libwhisper.pc 6 | 7 | ```pkg-config 8 | prefix=/Users/djt/Projects/go-whisper/ 9 | 10 | Name: libwhisper 11 | Description: Whisper is a C/C++ library for speech transcription, translation and diarization. 12 | Version: 0.0.0 13 | Cflags: -I${prefix}/third_party/whisper.cpp/include -I${prefix}/third_party/whisper.cpp/ggml/include 14 | Libs: -L${prefix}/third_party/whisper.cpp -lwhisper -lggml -lm -lstdc++ 15 | ``` 16 | 17 | libwhisper-darwin.pc 18 | 19 | ```pkg-config 20 | prefix=/Users/djt/Projects/go-whisper/ 21 | 22 | Name: libwhisper-darwin 23 | Description: Whisper is a C/C++ library for speech transcription, translation and diarization. 24 | Version: 0.0.0 25 | Libs: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics 26 | ``` 27 | 28 | I don't know what the windows one should be as I don't have a windows machine. 29 | 30 | ## FFmpeg 31 | 32 | Required for decoding media files into audio which is suitable for audio detection and transcription. 33 | 34 | ### MacOS 35 | 36 | On Macintosh with homebrew, for example: 37 | 38 | ```bash 39 | brew install ffmpeg@6 make 40 | brew link ffmpeg@6 41 | ``` 42 | 43 | ### Debian 44 | 45 | If you're using Debian you may not be able to get the ffmpeg 6 unless you first of all add the debi-multimedia repository. You can do this by adding the following line to your /etc/apt/sources.list file: 46 | 47 | Add the repository as privileged user: 48 | 49 | ```bash 50 | echo "deb https://www.deb-multimedia.org $(lsb_release -sc) main" >> /etc/apt/sources.list 51 | apt update -y -oAcquire::AllowInsecureRepositories=true 52 | apt install -y --force-yes deb-multimedia-keyring 53 | apt install -y libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev 54 | ``` 55 | 56 | ### Ubuntu 22.04 57 | 58 | Easier with Ubuntu! Installing FFmpeg 6.1 libraries: 59 | 60 | ```bash 61 | add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 62 | apt-get update 63 | apt-get install -y libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev 64 | ``` 65 | -------------------------------------------------------------------------------- /etc/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_DEV_CONTAINER=ubuntu:22.04 2 | ARG BASE_RUN_CONTAINER=ubuntu:22.04 3 | ARG GO_VERSION=1.24.3 4 | ARG ARCH 5 | ARG OS 6 | 7 | # Setup build container 8 | FROM ${BASE_DEV_CONTAINER} AS build 9 | ARG GO_VERSION 10 | ARG ARCH 11 | ARG OS 12 | 13 | RUN apt-get -y update \ 14 | && apt-get -y install software-properties-common build-essential cmake git pkg-config curl \ 15 | && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ 16 | && apt-get -y update \ 17 | && apt-get -y install libavformat-dev libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev 18 | 19 | # Install go 20 | RUN curl -sL https://golang.org/dl/go${GO_VERSION}.${OS}-${ARCH}.tar.gz | tar -C /usr/local -xz 21 | ENV PATH=$PATH:/usr/local/go/bin 22 | 23 | # Copy source 24 | WORKDIR /app 25 | COPY . . 26 | 27 | # Make whisper-server 28 | RUN make -j$(nproc) 29 | 30 | # Setup runtime container 31 | FROM ${BASE_RUN_CONTAINER} AS runtime 32 | RUN apt-get -y update \ 33 | && apt-get -y install software-properties-common \ 34 | && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ 35 | && apt-get -y update \ 36 | && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 \ 37 | && apt -y remove software-properties-common \ 38 | && apt -y autoremove 39 | COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper 40 | COPY --from=build --chmod=755 /app/build/api /usr/local/bin/api 41 | COPY --chmod=755 etc/entrypoint.sh . 42 | 43 | # Entrypoint when running the server 44 | ENTRYPOINT [ "/entrypoint.sh" ] 45 | STOPSIGNAL SIGQUIT 46 | EXPOSE 80 443 47 | VOLUME [ "/data" ] 48 | -------------------------------------------------------------------------------- /etc/Dockerfile.cuda: -------------------------------------------------------------------------------- 1 | ARG BASE_TAG=1.0.2 2 | ARG BASE_DEV_CONTAINER=ghcr.io/mutablelogic/cuda-dev:${BASE_TAG} 3 | ARG BASE_RUN_CONTAINER=ghcr.io/mutablelogic/cuda-rt:${BASE_TAG} 4 | ARG CUDA_DOCKER_ARCH=all 5 | ARG GGML_CUDA=1 6 | ARG GO_VERSION=1.24.3 7 | ARG ARCH 8 | ARG OS 9 | 10 | # Setup build container 11 | FROM ${BASE_DEV_CONTAINER} AS build 12 | ARG CUDA_DOCKER_ARCH 13 | ARG GGML_CUDA 14 | ARG GO_VERSION 15 | ARG ARCH 16 | ARG OS 17 | 18 | RUN apt-get -y update \ 19 | && apt-get -y install software-properties-common build-essential cmake git pkg-config curl libgomp1 \ 20 | && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ 21 | && apt-get -y update \ 22 | && apt-get -y install libavformat-dev libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev 23 | 24 | # Install go 25 | RUN curl -sL https://golang.org/dl/go${GO_VERSION}.${OS}-${ARCH}.tar.gz | tar -C /usr/local -xz 26 | ENV PATH=$PATH:/usr/local/go/bin 27 | 28 | # Copy source 29 | WORKDIR /app 30 | COPY . . 31 | 32 | # Make whisper-server 33 | ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} 34 | ENV GGML_CUDA=${GGML_CUDA} 35 | RUN make -j$(nproc) 36 | 37 | # Setup runtime container 38 | FROM ${BASE_RUN_CONTAINER} AS runtime 39 | RUN apt-get -y update \ 40 | && apt-get -y install software-properties-common libgomp1 \ 41 | && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ 42 | && apt-get -y update \ 43 | && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 \ 44 | && apt -y remove software-properties-common \ 45 | && apt -y autoremove 46 | COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper 47 | COPY --from=build --chmod=755 /app/build/api /usr/local/bin/api 48 | COPY --chmod=755 etc/entrypoint.sh . 49 | 50 | # Entrypoint when running the server 51 | ENTRYPOINT [ "/entrypoint.sh" ] 52 | STOPSIGNAL SIGQUIT 53 | EXPOSE 80 54 | VOLUME [ "/data" ] 55 | 56 | -------------------------------------------------------------------------------- /etc/Dockerfile.vulkan: -------------------------------------------------------------------------------- 1 | ARG BASE_DEV_CONTAINER=ubuntu:22.04 2 | ARG BASE_RUN_CONTAINER=ubuntu:22.04 3 | ARG GO_VERSION=1.24.3 4 | ARG GGML_VULKAN=1 5 | ARG ARCH 6 | ARG OS 7 | 8 | # Setup build container 9 | FROM ${BASE_DEV_CONTAINER} AS build 10 | ARG GO_VERSION 11 | ARG ARCH 12 | ARG OS 13 | 14 | RUN apt-get -y update \ 15 | && apt-get -y install software-properties-common build-essential cmake git pkg-config curl \ 16 | && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ 17 | && apt-get -y update \ 18 | && apt-get -y install libavformat-dev libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev 19 | 20 | # Install go 21 | RUN curl -sL https://golang.org/dl/go${GO_VERSION}.${OS}-${ARCH}.tar.gz | tar -C /usr/local -xz 22 | ENV PATH=$PATH:/usr/local/go/bin 23 | 24 | # Copy source 25 | WORKDIR /app 26 | COPY . . 27 | 28 | # Make whisper-server 29 | RUN make -j$(nproc) 30 | 31 | # Setup runtime container 32 | FROM ${BASE_RUN_CONTAINER} AS runtime 33 | RUN apt-get -y update \ 34 | && apt-get -y install software-properties-common \ 35 | && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ 36 | && apt-get -y update \ 37 | && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 \ 38 | && apt -y remove software-properties-common \ 39 | && apt -y autoremove 40 | COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper 41 | COPY --from=build --chmod=755 /app/build/api /usr/local/bin/api 42 | COPY --chmod=755 etc/entrypoint.sh . 43 | 44 | # Entrypoint when running the server 45 | ENTRYPOINT [ "/entrypoint.sh" ] 46 | STOPSIGNAL SIGQUIT 47 | EXPOSE 80 443 48 | VOLUME [ "/data" ] 49 | 50 | -------------------------------------------------------------------------------- /etc/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | umask 022 4 | 5 | if [ -z "$1" ]; then 6 | # Create the persistent data folder if it doesn't exist 7 | install -d -m 0755 /data || exit 1 8 | 9 | # Run as a server 10 | /usr/local/bin/whisper server --dir /data --listen :80 --endpoint /v1 11 | else 12 | exec "$@" 13 | fi 14 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mutablelogic/go-whisper 2 | 3 | go 1.23.5 4 | 5 | toolchain go1.24.2 6 | 7 | require ( 8 | github.com/alecthomas/kong v1.10.0 9 | github.com/djthorpe/go-errors v1.0.3 10 | github.com/djthorpe/go-tablewriter v0.0.11 11 | github.com/go-audio/wav v1.1.0 12 | github.com/mutablelogic/go-client v1.1.1 13 | github.com/mutablelogic/go-media v1.7.5 14 | github.com/mutablelogic/go-server v1.5.14 15 | github.com/stretchr/testify v1.10.0 16 | ) 17 | 18 | require ( 19 | github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect 20 | github.com/davecgh/go-spew v1.1.1 // indirect 21 | github.com/djthorpe/go-pg v1.0.6 // indirect 22 | github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect 23 | github.com/go-audio/audio v1.0.0 // indirect 24 | github.com/go-audio/riff v1.0.0 // indirect 25 | github.com/go-ldap/ldap/v3 v3.4.11 // indirect 26 | github.com/google/uuid v1.6.0 // indirect 27 | github.com/jackc/pgpassfile v1.0.0 // indirect 28 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect 29 | github.com/jackc/pgx/v5 v5.7.3 // indirect 30 | github.com/jackc/puddle/v2 v2.2.2 // indirect 31 | github.com/mattn/go-runewidth v0.0.16 // indirect 32 | github.com/pmezard/go-difflib v1.0.0 // indirect 33 | github.com/rivo/uniseg v0.4.7 // indirect 34 | github.com/yinyin/go-ldap-schema-parser v0.0.0-20190716182935-542aadd3dcb5 // indirect 35 | golang.org/x/crypto v0.37.0 // indirect 36 | golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect 37 | golang.org/x/sync v0.14.0 // indirect 38 | golang.org/x/sys v0.32.0 // indirect 39 | golang.org/x/term v0.31.0 // indirect 40 | golang.org/x/text v0.25.0 // indirect 41 | gopkg.in/yaml.v3 v3.0.1 // indirect 42 | ) 43 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= 2 | dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= 3 | github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= 4 | github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= 5 | github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= 6 | github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= 7 | github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= 8 | github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= 9 | github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= 10 | github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= 11 | github.com/alecthomas/kong v1.10.0 h1:8K4rGDpT7Iu+jEXCIJUeKqvpwZHbsFRoebLbnzlmrpw= 12 | github.com/alecthomas/kong v1.10.0/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU= 13 | github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= 14 | github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= 15 | github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI= 16 | github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= 17 | github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= 18 | github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= 19 | github.com/containerd/containerd v1.7.27 h1:yFyEyojddO3MIGVER2xJLWoCIn+Up4GaHFquP7hsFII= 20 | github.com/containerd/containerd v1.7.27/go.mod h1:xZmPnl75Vc+BLGt4MIfu6bp+fy03gdHAn9bz+FreFR0= 21 | github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= 22 | github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= 23 | github.com/containerd/platforms v0.2.1 h1:zvwtM3rz2YHPQsF2CHYM8+KtB5dvhISiXh5ZpSBQv6A= 24 | github.com/containerd/platforms v0.2.1/go.mod h1:XHCb+2/hzowdiut9rkudds9bE5yJ7npe7dG/wG+uFPw= 25 | github.com/cpuguy83/dockercfg v0.3.2 h1:DlJTyZGBDlXqUZ2Dk2Q3xHs/FtnooJJVaad2S9GKorA= 26 | github.com/cpuguy83/dockercfg v0.3.2/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= 27 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 28 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 29 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 30 | github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= 31 | github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= 32 | github.com/djthorpe/go-errors v1.0.3 h1:GZeMPkC1mx2vteXLI/gvxZS0Ee9zxzwD1mcYyKU5jD0= 33 | github.com/djthorpe/go-errors v1.0.3/go.mod h1:HtfrZnMd6HsX75Mtbv9Qcnn0BqOrrFArvCaj3RMnZhY= 34 | github.com/djthorpe/go-pg v1.0.6 h1:v/ZcMhtgULa301LPYyUEo7fJJwuseOaAdpCyKid0qbU= 35 | github.com/djthorpe/go-pg v1.0.6/go.mod h1:XHl/w8+66Hs746nOYd+gdjqPImNuLVZ5UsXLI47rb4c= 36 | github.com/djthorpe/go-tablewriter v0.0.11 h1:CimrEsiAG/KN2C8bTDC85RsZTsP2s5a7m7dqhaGFTv0= 37 | github.com/djthorpe/go-tablewriter v0.0.11/go.mod h1:ednj4tB4GHpenQL6NtDrbQW9VXyDdbIVTSH2693B+lI= 38 | github.com/docker/docker v27.1.1+incompatible h1:hO/M4MtV36kzKldqnA37IWhebRA+LnqqcqDja6kVaKY= 39 | github.com/docker/docker v27.1.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= 40 | github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= 41 | github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= 42 | github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= 43 | github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= 44 | github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= 45 | github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= 46 | github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo= 47 | github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= 48 | github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= 49 | github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= 50 | github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= 51 | github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498= 52 | github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g= 53 | github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE= 54 | github.com/go-ldap/ldap/v3 v3.4.11 h1:4k0Yxweg+a3OyBLjdYn5OKglv18JNvfDykSoI8bW0gU= 55 | github.com/go-ldap/ldap/v3 v3.4.11/go.mod h1:bY7t0FLK8OAVpp/vV6sSlpz3EQDGcQwc8pF0ujLgKvM= 56 | github.com/go-ldap/ldif v0.0.0-20180918085934-3491d58cdb60/go.mod h1:blBiFTfuR1Jrw4xZ7t3xuNObLzzBG+ce+5W/bEYwJq0= 57 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= 58 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 59 | github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= 60 | github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 61 | github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= 62 | github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= 63 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 64 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 65 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= 66 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= 67 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 68 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 69 | github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= 70 | github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= 71 | github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= 72 | github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= 73 | github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= 74 | github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= 75 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= 76 | github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= 77 | github.com/jackc/pgx/v5 v5.7.3 h1:PO1wNKj/bTAwxSJnO1Z4Ai8j4magtqg2SLNjEDzcXQo= 78 | github.com/jackc/pgx/v5 v5.7.3/go.mod h1:ncY89UGWxg82EykZUwSpUKEfccBGGYq1xjrOpsbsfGQ= 79 | github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= 80 | github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= 81 | github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= 82 | github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= 83 | github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= 84 | github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= 85 | github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= 86 | github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= 87 | github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= 88 | github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= 89 | github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8= 90 | github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs= 91 | github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= 92 | github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= 93 | github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= 94 | github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= 95 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 96 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 97 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 98 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 99 | github.com/llgcode/draw2d v0.0.0-20240627062922-0ed1ff131195 h1:Vdz2cBh5Fw2MYHWi3ED2PraDQaWEUhNCr1XFHrP4N5A= 100 | github.com/llgcode/draw2d v0.0.0-20240627062922-0ed1ff131195/go.mod h1:1Vk0LDW6jG5cGc2D9RQUxHaE0vYhTvIwSo9mOL6K4/U= 101 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= 102 | github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= 103 | github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= 104 | github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= 105 | github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= 106 | github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 107 | github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= 108 | github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= 109 | github.com/moby/patternmatcher v0.6.0 h1:GmP9lR19aU5GqSSFko+5pRqHi+Ohk1O69aFiKkVGiPk= 110 | github.com/moby/patternmatcher v0.6.0/go.mod h1:hDPoyOpDY7OrrMDLaYoY3hf52gNCR/YOUYxkhApJIxc= 111 | github.com/moby/sys/sequential v0.5.0 h1:OPvI35Lzn9K04PBbCLW0g4LcFAJgHsvXsRyewg5lXtc= 112 | github.com/moby/sys/sequential v0.5.0/go.mod h1:tH2cOOs5V9MlPiXcQzRC+eEyab644PWKGRYaaV5ZZlo= 113 | github.com/moby/sys/user v0.3.0 h1:9ni5DlcW5an3SvRSx4MouotOygvzaXbaSrc/wGDFWPo= 114 | github.com/moby/sys/user v0.3.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs= 115 | github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= 116 | github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= 117 | github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= 118 | github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= 119 | github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= 120 | github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= 121 | github.com/mutablelogic/go-client v1.1.1 h1:0pg1n2WbzIUw3yS/3eiUtzfwbM0WehMrpbv8Kp8NCMw= 122 | github.com/mutablelogic/go-client v1.1.1/go.mod h1:guZo2r4fp4y20/TgAefR5gWBMiRmLY+asC4FImrKiv8= 123 | github.com/mutablelogic/go-media v1.7.5 h1:SQQ9wBIHPZDXMwIb0XNdzbEhPIdGC1ptSwYaxt+wer4= 124 | github.com/mutablelogic/go-media v1.7.5/go.mod h1:PSUhoVDrsZaUNsz9N6eIEWTK4wT7rdTHfFiFwMi4FSg= 125 | github.com/mutablelogic/go-server v1.5.14 h1:CYQl77oHYoHRZ/dBQ/S3xgMlNOnh9rZUnqlSNpYGaUI= 126 | github.com/mutablelogic/go-server v1.5.14/go.mod h1:HCX8WZtE3RXR4i+npBvCdILfnBelomDIe8/B68O/MA4= 127 | github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= 128 | github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= 129 | github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= 130 | github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= 131 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 132 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 133 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 134 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 135 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= 136 | github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= 137 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 138 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 139 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 140 | github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= 141 | github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= 142 | github.com/shirou/gopsutil/v3 v3.23.12 h1:z90NtUkp3bMtmICZKpC4+WaknU1eXtp5vtbQ11DgpE4= 143 | github.com/shirou/gopsutil/v3 v3.23.12/go.mod h1:1FrWgea594Jp7qmjHUUPlJDTPgcsb9mGnXDxavtikzM= 144 | github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= 145 | github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= 146 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= 147 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 148 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 149 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 150 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 151 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 152 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 153 | github.com/testcontainers/testcontainers-go v0.35.0 h1:uADsZpTKFAtp8SLK+hMwSaa+X+JiERHtd4sQAFmXeMo= 154 | github.com/testcontainers/testcontainers-go v0.35.0/go.mod h1:oEVBj5zrfJTrgjwONs1SsRbnBtH9OKl+IGl3UMcr2B4= 155 | github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= 156 | github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= 157 | github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= 158 | github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= 159 | github.com/yinyin/go-ldap-schema-parser v0.0.0-20190716182935-542aadd3dcb5 h1:siJ/5leB7JENBScgD/qG8JAGiS/2Q76qxCPK81icczU= 160 | github.com/yinyin/go-ldap-schema-parser v0.0.0-20190716182935-542aadd3dcb5/go.mod h1:Hb9db5nLRb/cT+dBKUrukgT3Z9mbtrpF3o2g8+sw7ic= 161 | github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw= 162 | github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= 163 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= 164 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= 165 | go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo= 166 | go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo= 167 | go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI= 168 | go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= 169 | go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= 170 | go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= 171 | golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= 172 | golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= 173 | golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 h1:y5zboxd6LQAqYIhHnB48p0ByQ/GnQx2BE33L8BOHQkI= 174 | golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= 175 | golang.org/x/image v0.27.0 h1:C8gA4oWU/tKkdCfYT6T2u4faJu3MeNS5O8UPWlPF61w= 176 | golang.org/x/image v0.27.0/go.mod h1:xbdrClrAUway1MUTEZDq9mz/UpRwYAkFFNUslZtcB+g= 177 | golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= 178 | golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= 179 | golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= 180 | golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= 181 | golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= 182 | golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 183 | golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o= 184 | golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw= 185 | golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= 186 | golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= 187 | gopkg.in/asn1-ber.v1 v1.0.0-20181015200546-f715ec2f112d/go.mod h1:cuepJuh7vyXfUyUwEgHQXw849cJrilpS5NeIjOWESAw= 188 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 189 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 190 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 191 | gopkg.in/ldap.v2 v2.5.1/go.mod h1:oI0cpe/D7HRtBQl8aTg+ZmzFUAvu4lsv3eLXMLGFxWk= 192 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 193 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 194 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 195 | -------------------------------------------------------------------------------- /opt.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | // Namespace imports 5 | . "github.com/djthorpe/go-errors" 6 | ) 7 | 8 | /////////////////////////////////////////////////////////////////////////////// 9 | // TYPES 10 | 11 | type opts struct { 12 | MaxConcurrent int 13 | logfn LogFn 14 | debug bool 15 | gpu int 16 | } 17 | 18 | type Opt func(*opts) error 19 | type LogFn func(string) 20 | 21 | /////////////////////////////////////////////////////////////////////////////// 22 | // PUBLIC METHODS 23 | 24 | // Set maximum number of concurrent tasks 25 | func OptMaxConcurrent(v int) Opt { 26 | return func(o *opts) error { 27 | if v < 1 { 28 | return ErrBadParameter.With("max concurrent must be greater than zero") 29 | } 30 | o.MaxConcurrent = v 31 | return nil 32 | } 33 | } 34 | 35 | // Set logging function 36 | func OptLog(fn LogFn) Opt { 37 | return func(o *opts) error { 38 | o.logfn = fn 39 | return nil 40 | } 41 | } 42 | 43 | // Set debugging 44 | func OptDebug() Opt { 45 | return func(o *opts) error { 46 | o.debug = true 47 | return nil 48 | } 49 | } 50 | 51 | // Disable GPU acceleration 52 | func OptNoGPU() Opt { 53 | return func(o *opts) error { 54 | o.gpu = -1 55 | return nil 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /pkg/api/logging.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "log" 5 | "net/http" 6 | "sync/atomic" 7 | "time" 8 | ) 9 | 10 | var ( 11 | req int32 12 | ) 13 | 14 | func wrapLogging(fn http.HandlerFunc) http.HandlerFunc { 15 | return func(w http.ResponseWriter, r *http.Request) { 16 | req := nextReq() 17 | delta := time.Now() 18 | log.Printf("R%d %s %s", req, r.Method, r.URL) 19 | fn(w, r) 20 | log.Printf("R%d Took %v", req, time.Since(delta).Truncate(time.Millisecond)) 21 | } 22 | } 23 | 24 | func nextReq() int32 { 25 | return atomic.AddInt32(&req, 1) 26 | } 27 | -------------------------------------------------------------------------------- /pkg/api/models.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | "path/filepath" 9 | "time" 10 | 11 | // Packages 12 | "github.com/mutablelogic/go-server/pkg/httprequest" 13 | "github.com/mutablelogic/go-server/pkg/httpresponse" 14 | "github.com/mutablelogic/go-whisper" 15 | "github.com/mutablelogic/go-whisper/pkg/schema" 16 | ) 17 | 18 | /////////////////////////////////////////////////////////////////////////////// 19 | // TYPES 20 | 21 | type respModels struct { 22 | Object string `json:"object,omitempty"` 23 | Models []*schema.Model `json:"models"` 24 | } 25 | 26 | type reqDownloadModel struct { 27 | Path string `json:"path"` 28 | } 29 | 30 | type queryDownloadModel struct { 31 | Stream bool `json:"stream"` 32 | } 33 | 34 | type respDownloadModelStatus struct { 35 | Status string `json:"status"` 36 | Total uint64 `json:"total,omitempty"` 37 | Completed uint64 `json:"completed,omitempty"` 38 | } 39 | 40 | /////////////////////////////////////////////////////////////////////////////// 41 | // PUBLIC METHODS 42 | 43 | func ListModels(ctx context.Context, w http.ResponseWriter, service *whisper.Whisper) { 44 | httpresponse.JSON(w, http.StatusOK, 2, respModels{ 45 | Object: "list", 46 | Models: service.ListModels(), 47 | }) 48 | } 49 | 50 | func DownloadModel(ctx context.Context, w http.ResponseWriter, r *http.Request, service *whisper.Whisper) { 51 | // Get query and body 52 | var query queryDownloadModel 53 | var req reqDownloadModel 54 | if err := httprequest.Query(r.URL.Query(), &query); err != nil { 55 | httpresponse.Error(w, httpresponse.ErrBadRequest, err.Error()) 56 | return 57 | } 58 | if err := httprequest.Read(r, &req); err != nil { 59 | httpresponse.Error(w, httpresponse.ErrBadRequest, err.Error()) 60 | return 61 | } 62 | 63 | // Validate the request 64 | if err := req.Validate(); err != nil { 65 | httpresponse.Error(w, httpresponse.ErrBadRequest, err.Error()) 66 | return 67 | } 68 | 69 | // Create a text stream 70 | var stream *httpresponse.TextStream 71 | if query.Stream { 72 | if stream = httpresponse.NewTextStream(w); stream == nil { 73 | httpresponse.Error(w, httpresponse.ErrInternalError, "Cannot create text stream") 74 | return 75 | } 76 | defer stream.Close() 77 | } 78 | 79 | // Download the model 80 | t := time.Now() 81 | model, err := service.DownloadModel(ctx, req.Name(), func(curBytes, totalBytes uint64) { 82 | if time.Since(t) > time.Second && stream != nil { 83 | t = time.Now() 84 | stream.Write("progress", respDownloadModelStatus{ 85 | Status: fmt.Sprint("downloading ", req.Name()), 86 | Total: totalBytes, 87 | Completed: curBytes, 88 | }) 89 | } 90 | }) 91 | if err != nil { 92 | if stream != nil { 93 | stream.Write("error", err.Error()) 94 | } else { 95 | httpresponse.Error(w, httpresponse.ErrGatewayError, err.Error()) 96 | } 97 | return 98 | } 99 | 100 | // Return the model information 101 | if query.Stream { 102 | stream.Write("ok", model) 103 | } else { 104 | httpresponse.JSON(w, http.StatusCreated, 2, model) 105 | } 106 | } 107 | 108 | func GetModelById(ctx context.Context, w http.ResponseWriter, service *whisper.Whisper, id string) { 109 | model := service.GetModelById(id) 110 | if model == nil { 111 | httpresponse.Error(w, httpresponse.ErrNotFound, id) 112 | return 113 | } 114 | httpresponse.JSON(w, http.StatusOK, 2, model) 115 | } 116 | 117 | func DeleteModelById(ctx context.Context, w http.ResponseWriter, service *whisper.Whisper, id string) { 118 | model := service.GetModelById(id) 119 | if model == nil { 120 | httpresponse.Error(w, httpresponse.ErrNotFound, id) 121 | return 122 | } 123 | if err := service.DeleteModelById(model.Id); err != nil { 124 | httpresponse.Error(w, httpresponse.ErrInternalError, err.Error()) 125 | return 126 | } 127 | httpresponse.Empty(w, http.StatusOK) 128 | } 129 | 130 | /////////////////////////////////////////////////////////////////////////////// 131 | // PRIVATE METHODS 132 | 133 | // Validate the request 134 | func (r reqDownloadModel) Validate() error { 135 | if r.Path == "" { 136 | return errors.New("missing path") 137 | } 138 | return nil 139 | } 140 | 141 | // Return the model name 142 | func (r reqDownloadModel) Name() string { 143 | return filepath.Base(r.Path) 144 | } 145 | 146 | // Return the model path 147 | func (r reqDownloadModel) DestPath() string { 148 | return filepath.Dir(r.Path) 149 | } 150 | -------------------------------------------------------------------------------- /pkg/api/register.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "net/http" 5 | "path/filepath" 6 | 7 | // Packages 8 | "github.com/mutablelogic/go-server/pkg/httpresponse" 9 | "github.com/mutablelogic/go-whisper" 10 | ) 11 | 12 | ///////////////////////////////////////////////////////////////////////////// 13 | // PUBLIC METHODS 14 | 15 | func RegisterEndpoints(base string, whisper *whisper.Whisper, mux *http.ServeMux) *http.ServeMux { 16 | // Create a new router 17 | if mux == nil { 18 | mux = http.NewServeMux() 19 | } 20 | 21 | // Health: GET /v1/health 22 | // returns an empty OK response 23 | mux.HandleFunc(joinPath(base, "health"), func(w http.ResponseWriter, r *http.Request) { 24 | defer r.Body.Close() 25 | 26 | switch r.Method { 27 | case http.MethodGet: 28 | httpresponse.Empty(w, http.StatusOK) 29 | default: 30 | httpresponse.Error(w, httpresponse.Err(http.StatusMethodNotAllowed), r.Method) 31 | } 32 | }) 33 | 34 | // List Models: GET /v1/models 35 | // returns available models 36 | // Download Model: POST /v1/models?stream={bool} 37 | // downloads a model from the server 38 | // if stream is true then progress is streamed back to the client 39 | mux.HandleFunc(joinPath(base, "models"), func(w http.ResponseWriter, r *http.Request) { 40 | defer r.Body.Close() 41 | 42 | switch r.Method { 43 | case http.MethodGet: 44 | ListModels(r.Context(), w, whisper) 45 | case http.MethodPost: 46 | DownloadModel(r.Context(), w, r, whisper) 47 | default: 48 | httpresponse.Error(w, httpresponse.Err(http.StatusMethodNotAllowed), r.Method) 49 | } 50 | }) 51 | 52 | // Get: GET /v1/models/{id} 53 | // returns an existing model 54 | // Delete: DELETE /v1/models/{id} 55 | // deletes an existing model 56 | mux.HandleFunc(joinPath(base, "models/{id}"), func(w http.ResponseWriter, r *http.Request) { 57 | defer r.Body.Close() 58 | 59 | id := r.PathValue("id") 60 | switch r.Method { 61 | case http.MethodGet: 62 | GetModelById(r.Context(), w, whisper, id) 63 | case http.MethodDelete: 64 | DeleteModelById(r.Context(), w, whisper, id) 65 | default: 66 | httpresponse.Error(w, httpresponse.Err(http.StatusMethodNotAllowed), r.Method) 67 | } 68 | }) 69 | 70 | // Translate: POST /v1/audio/translations 71 | // Translates audio into english or another language - language parameter should be set to the 72 | // destination language of the audio. Will default to english if not set. 73 | mux.HandleFunc(joinPath(base, "audio/translations"), func(w http.ResponseWriter, r *http.Request) { 74 | defer r.Body.Close() 75 | 76 | switch r.Method { 77 | case http.MethodPost: 78 | TranscribeFile(r.Context(), whisper, w, r, Translate) 79 | default: 80 | httpresponse.Error(w, httpresponse.Err(http.StatusMethodNotAllowed), r.Method) 81 | } 82 | }) 83 | 84 | // Transcribe: POST /v1/audio/transcriptions 85 | // Transcribes audio into the input language - language parameter should be set to the source 86 | // language of the audio 87 | mux.HandleFunc(joinPath(base, "audio/transcriptions"), func(w http.ResponseWriter, r *http.Request) { 88 | defer r.Body.Close() 89 | 90 | switch r.Method { 91 | case http.MethodPost: 92 | TranscribeFile(r.Context(), whisper, w, r, Transcribe) 93 | default: 94 | httpresponse.Error(w, httpresponse.Err(http.StatusMethodNotAllowed), r.Method) 95 | } 96 | }) 97 | 98 | // Diarize: POST /v1/audio/diarize 99 | // Transcribes audio into the input language - language parameter should be set to the source 100 | // language of the audio. Output speaker parts. 101 | mux.HandleFunc(joinPath(base, "audio/diarize"), func(w http.ResponseWriter, r *http.Request) { 102 | defer r.Body.Close() 103 | 104 | switch r.Method { 105 | case http.MethodPost: 106 | TranscribeFile(r.Context(), whisper, w, r, Diarize) 107 | default: 108 | httpresponse.Error(w, httpresponse.Err(http.StatusMethodNotAllowed), r.Method) 109 | } 110 | }) 111 | 112 | // Transcribe: POST /v1/audio/transcriptions/{model-id} 113 | // Transcribes streamed media into the input language 114 | /* 115 | mux.HandleFunc(joinPath(base, "audio/transcriptions/{model}"), func(w http.ResponseWriter, r *http.Request) { 116 | defer r.Body.Close() 117 | 118 | model := r.PathValue("model") 119 | switch r.Method { 120 | case http.MethodPost: 121 | TranscribeStream(r.Context(), whisper, w, r, model) 122 | default: 123 | httpresponse.Error(w, http.StatusMethodNotAllowed) 124 | } 125 | })*/ 126 | 127 | // Return mux 128 | return mux 129 | } 130 | 131 | ///////////////////////////////////////////////////////////////////////////// 132 | // PRIVATE METHODS 133 | 134 | func joinPath(base, rel string) string { 135 | return filepath.Join(base, rel) 136 | } 137 | -------------------------------------------------------------------------------- /pkg/api/transcribe.go: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "mime/multipart" 8 | "net/http" 9 | "strings" 10 | "time" 11 | 12 | // Packages 13 | "github.com/mutablelogic/go-media/pkg/segmenter" 14 | "github.com/mutablelogic/go-server/pkg/httprequest" 15 | "github.com/mutablelogic/go-server/pkg/httpresponse" 16 | "github.com/mutablelogic/go-whisper" 17 | "github.com/mutablelogic/go-whisper/pkg/schema" 18 | "github.com/mutablelogic/go-whisper/pkg/task" 19 | 20 | // Namespace imports 21 | . "github.com/djthorpe/go-errors" 22 | ) 23 | 24 | /////////////////////////////////////////////////////////////////////////////// 25 | // TYPES 26 | 27 | type reqTranscribe struct { 28 | File *multipart.FileHeader `json:"file"` 29 | Model string `json:"model"` 30 | Language *string `json:"language"` 31 | Temperature *float32 `json:"temperature"` 32 | SegmentSize *time.Duration `json:"segment_size"` 33 | ResponseFmt *string `json:"response_format"` 34 | } 35 | 36 | type queryTranscribe struct { 37 | Stream bool `json:"stream"` 38 | } 39 | 40 | type TaskType int 41 | type ResponseFormat string 42 | 43 | /////////////////////////////////////////////////////////////////////////////// 44 | // GLOBALS 45 | 46 | const ( 47 | minSegmentSize = 5 * time.Second 48 | maxSegmentSize = 10 * time.Minute 49 | defaultSegmentSize = 5 * time.Minute 50 | ) 51 | 52 | const ( 53 | _ TaskType = iota 54 | Transcribe // Transcribe audio 55 | Translate // Translate text 56 | Diarize // Diarize audio 57 | ) 58 | 59 | const ( 60 | FormatJson ResponseFormat = "json" 61 | FormatText ResponseFormat = "text" 62 | FormatSrt ResponseFormat = "srt" 63 | FormatVerboseJson ResponseFormat = "verbose_json" 64 | FormatVtt ResponseFormat = "vtt" 65 | ) 66 | 67 | /////////////////////////////////////////////////////////////////////////////// 68 | // PUBLIC METHODS 69 | 70 | func TranscribeFile(ctx context.Context, service *whisper.Whisper, w http.ResponseWriter, r *http.Request, t TaskType) { 71 | var req reqTranscribe 72 | var query queryTranscribe 73 | if err := httprequest.Query(r.URL.Query(), &query); err != nil { 74 | httpresponse.Error(w, httpresponse.ErrBadRequest, err.Error()) 75 | return 76 | } 77 | if err := httprequest.Read(r, &req); err != nil { 78 | httpresponse.Error(w, httpresponse.ErrBadRequest, err.Error()) 79 | return 80 | } 81 | 82 | // Validate the request 83 | if err := req.Validate(); err != nil { 84 | httpresponse.Error(w, httpresponse.ErrBadRequest, err.Error()) 85 | return 86 | } 87 | 88 | // Get the model 89 | model := service.GetModelById(req.Model) 90 | if model == nil { 91 | httpresponse.Error(w, httpresponse.ErrNotFound, req.Model) 92 | return 93 | } 94 | 95 | // Open file 96 | f, err := req.File.Open() 97 | if err != nil { 98 | httpresponse.Error(w, httpresponse.ErrInternalError, err.Error()) 99 | return 100 | } 101 | defer f.Close() 102 | 103 | // Create a segmenter - read segments based on requested segment size 104 | segmenter, err := segmenter.NewReader(f, req.SegmentDur(), whisper.SampleRate) 105 | if err != nil { 106 | httpresponse.Error(w, httpresponse.ErrBadRequest, err.Error()) 107 | return 108 | } 109 | 110 | // Create a text stream 111 | var stream *httpresponse.TextStream 112 | if query.Stream { 113 | if stream = httpresponse.NewTextStream(w); stream == nil { 114 | httpresponse.Error(w, httpresponse.ErrInternalError, "Cannot create text stream") 115 | return 116 | } 117 | defer stream.Close() 118 | } 119 | 120 | // Get context for the model, perform transcription 121 | var result *schema.Transcription 122 | if err := service.WithModel(model, func(taskctx *task.Context) error { 123 | result = taskctx.Result() 124 | 125 | switch t { 126 | case Translate: 127 | // Check model 128 | if !taskctx.CanTranslate() { 129 | return ErrBadParameter.With("model is not multilingual, cannot translate") 130 | } 131 | taskctx.SetTranslate(true) 132 | taskctx.SetDiarize(false) 133 | result.Task = "translate" 134 | 135 | // Set language to EN 136 | if err := taskctx.SetLanguage("en"); err != nil { 137 | return err 138 | } 139 | case Diarize: 140 | taskctx.SetTranslate(false) 141 | taskctx.SetDiarize(true) 142 | result.Task = "diarize" 143 | 144 | // Set language 145 | if req.Language != nil { 146 | if err := taskctx.SetLanguage(*req.Language); err != nil { 147 | return err 148 | } 149 | } 150 | default: 151 | // Transcribe 152 | taskctx.SetTranslate(false) 153 | taskctx.SetDiarize(false) 154 | result.Task = "transribe" 155 | 156 | // Set language 157 | if req.Language != nil { 158 | if err := taskctx.SetLanguage(*req.Language); err != nil { 159 | return err 160 | } 161 | } 162 | } 163 | 164 | // TODO: Set temperature, etc 165 | 166 | // Output the header 167 | result.Language = taskctx.Language() 168 | if stream != nil { 169 | stream.Write("task", taskctx.Result()) 170 | } 171 | 172 | // Read samples and transcribe them 173 | if err := segmenter.DecodeFloat32(ctx, func(ts time.Duration, buf []float32) error { 174 | // Perform the transcription, return any errors 175 | return taskctx.Transcribe(ctx, ts, buf, func(segment *schema.Segment) { 176 | // Segment callback 177 | if stream == nil { 178 | return 179 | } 180 | var buf bytes.Buffer 181 | switch req.ResponseFormat() { 182 | case FormatVerboseJson, FormatJson: 183 | stream.Write("segment", segment) 184 | return 185 | case FormatSrt: 186 | task.WriteSegmentSrt(&buf, segment) 187 | case FormatVtt: 188 | task.WriteSegmentVtt(&buf, segment) 189 | case FormatText: 190 | task.WriteSegmentText(&buf, segment) 191 | } 192 | stream.Write("segment", buf.String()) 193 | }) 194 | }); err != nil { 195 | return err 196 | } 197 | 198 | // Set the language and duration 199 | result.Language = taskctx.Language() 200 | 201 | // Return success 202 | return nil 203 | }); err != nil { 204 | if stream != nil { 205 | stream.Write("error", err.Error()) 206 | } else { 207 | httpresponse.Error(w, httpresponse.ErrInternalError, err.Error()) 208 | } 209 | return 210 | } 211 | 212 | // Return transcription if not streaming 213 | if stream == nil { 214 | httpresponse.JSON(w, http.StatusOK, 2, result) 215 | } else { 216 | stream.Write("ok") 217 | } 218 | } 219 | 220 | /* 221 | func TranscribeStream(ctx context.Context, service *whisper.Whisper, w http.ResponseWriter, r *http.Request, modelId string) { 222 | var query queryTranscribe 223 | if err := httprequest.Query(&query, r.URL.Query()); err != nil { 224 | httpresponse.Error(w, http.StatusBadRequest, err.Error()) 225 | return 226 | } 227 | 228 | // Get the model 229 | model := service.GetModelById(modelId) 230 | if model == nil { 231 | httpresponse.Error(w, http.StatusNotFound, "model not found") 232 | return 233 | } 234 | 235 | // Create a segmenter - read segments based on 10 second segment size 236 | segmenter, err := segmenter.New(r.Body, 10*time.Second, whisper.SampleRate) 237 | if err != nil { 238 | httpresponse.Error(w, http.StatusBadRequest, err.Error()) 239 | return 240 | } 241 | 242 | // Create a text stream 243 | var stream *httpresponse.TextStream 244 | if query.Stream { 245 | if stream = httpresponse.NewTextStream(w); stream == nil { 246 | httpresponse.Error(w, http.StatusInternalServerError, "Cannot create text stream") 247 | return 248 | } 249 | defer stream.Close() 250 | } 251 | 252 | // Get context for the model, perform transcription 253 | var result *schema.Transcription 254 | if err := service.WithModel(model, func(task *task.Context) error { 255 | // Set parameters for ttranslation, default to auto 256 | task.SetTranslate(false) 257 | if err := task.SetLanguage("auto"); err != nil { 258 | return err 259 | } 260 | 261 | // TODO: Set temperature, etc 262 | 263 | // Create response 264 | result = task.Result() 265 | result.Task = "transcribe" 266 | result.Language = task.Language() 267 | 268 | // Output the header 269 | if stream != nil { 270 | stream.Write("task", result) 271 | } 272 | 273 | // Read samples and transcribe them 274 | if err := segmenter.Decode(ctx, func(ts time.Duration, buf []float32) error { 275 | // Perform the transcription, output segments in realtime, return any errors 276 | return task.Transcribe(ctx, ts, buf, func(segment *schema.Segment) { 277 | if stream != nil { 278 | stream.Write("segment", segment) 279 | } 280 | }) 281 | }); err != nil { 282 | return err 283 | } 284 | 285 | // Set the language 286 | result.Language = taskctx.Language() 287 | 288 | // Return success 289 | return nil 290 | }); err != nil { 291 | if stream != nil { 292 | stream.Write("error", err.Error()) 293 | } else { 294 | httpresponse.Error(w, http.StatusInternalServerError, err.Error()) 295 | } 296 | return 297 | } 298 | 299 | // Return streaming ok 300 | if stream != nil { 301 | stream.Write("ok") 302 | return 303 | } 304 | 305 | // Rrturn result based on response format 306 | switch req.ResponseFormat() { 307 | case FormatJson, FormatVerboseJson: 308 | httpresponse.JSON(w, result, http.StatusOK, 0) 309 | case FormatText: 310 | httpresponse.Text(w, "", http.StatusOK) 311 | for _, seg := range result.Segments { 312 | task.WriteSegmentText(w, seg) 313 | } 314 | w.Write([]byte("\n")) 315 | case FormatSrt: 316 | httpresponse.Text(w, "", http.StatusOK, "Content-Type", "application/x-subrip") 317 | for _, seg := range result.Segments { 318 | task.WriteSegmentSrt(w, seg) 319 | } 320 | case FormatVtt: 321 | httpresponse.Text(w, "WEBVTT\n\n", http.StatusOK, "Content-Type", "text/vtt") 322 | for _, seg := range result.Segments { 323 | task.WriteSegmentVtt(w, seg) 324 | } 325 | } 326 | } 327 | */ 328 | 329 | /////////////////////////////////////////////////////////////////////////////// 330 | // PRIVATE METHODS 331 | 332 | func (r reqTranscribe) Validate() error { 333 | if r.Model == "" { 334 | return fmt.Errorf("model is required") 335 | } 336 | if r.File == nil { 337 | return fmt.Errorf("file is required") 338 | } 339 | if r.ResponseFmt != nil { 340 | switch *r.ResponseFmt { 341 | case "json", "text", "srt", "verbose_json", "vtt": 342 | break 343 | default: 344 | return fmt.Errorf("response_format must be one of: json, text, srt, verbose_json, vtt") 345 | } 346 | } 347 | return nil 348 | } 349 | func (r reqTranscribe) ResponseFormat() ResponseFormat { 350 | if r.ResponseFmt == nil { 351 | return FormatJson 352 | } 353 | switch strings.ToLower(*r.ResponseFmt) { 354 | case "json": 355 | return FormatJson 356 | case "text": 357 | return FormatText 358 | case "srt": 359 | return FormatSrt 360 | case "verbose_json": 361 | return FormatVerboseJson 362 | case "vtt": 363 | return FormatVtt 364 | } 365 | return FormatJson 366 | } 367 | 368 | func (r reqTranscribe) OutputSegments() bool { 369 | // We want to output segments if the response format is "srt", "verbose_json", "vtt" 370 | switch r.ResponseFormat() { 371 | case FormatSrt, FormatVerboseJson, FormatVtt: 372 | return true 373 | default: 374 | return false 375 | } 376 | } 377 | 378 | func (r reqTranscribe) SegmentDur() time.Duration { 379 | if r.SegmentSize == nil { 380 | return defaultSegmentSize 381 | } 382 | if *r.SegmentSize < minSegmentSize { 383 | return minSegmentSize 384 | } 385 | if *r.SegmentSize > maxSegmentSize { 386 | return maxSegmentSize 387 | } 388 | return *r.SegmentSize 389 | } 390 | -------------------------------------------------------------------------------- /pkg/client/client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "io" 7 | "net/url" 8 | "os" 9 | "path/filepath" 10 | 11 | // Packages 12 | "github.com/mutablelogic/go-client" 13 | "github.com/mutablelogic/go-client/pkg/multipart" 14 | "github.com/mutablelogic/go-server/pkg/types" 15 | "github.com/mutablelogic/go-whisper/pkg/schema" 16 | ) 17 | 18 | /////////////////////////////////////////////////////////////////////////////// 19 | // TYPES 20 | 21 | type Client struct { 22 | *client.Client 23 | } 24 | 25 | /////////////////////////////////////////////////////////////////////////////// 26 | // LIFECYCLE 27 | 28 | // New creates a new client, with the endpoint of the whisper service 29 | // ie, http://localhost:8080/v1 30 | func New(endpoint string, opts ...client.ClientOpt) (*Client, error) { 31 | if client, err := client.New(append(opts, client.OptEndpoint(endpoint))...); err != nil { 32 | return nil, err 33 | } else { 34 | return &Client{Client: client}, nil 35 | } 36 | } 37 | 38 | /////////////////////////////////////////////////////////////////////////////// 39 | // PING 40 | 41 | func (c *Client) Ping(ctx context.Context) error { 42 | return c.DoWithContext(ctx, client.MethodGet, nil, client.OptPath("health")) 43 | } 44 | 45 | /////////////////////////////////////////////////////////////////////////////// 46 | // MODELS 47 | 48 | func (c *Client) ListModels(ctx context.Context) ([]schema.Model, error) { 49 | var models struct { 50 | Models []schema.Model `json:"models"` 51 | } 52 | if err := c.DoWithContext(ctx, client.MethodGet, &models, client.OptPath("models")); err != nil { 53 | return nil, err 54 | } 55 | // Return success 56 | return models.Models, nil 57 | } 58 | 59 | func (c *Client) DeleteModel(ctx context.Context, model string) error { 60 | return c.DoWithContext(ctx, client.MethodDelete, nil, client.OptPath("models", model)) 61 | } 62 | 63 | func (c *Client) DownloadModel(ctx context.Context, path string, fn func(status string, cur, total int64)) (schema.Model, error) { 64 | var req struct { 65 | Path string `json:"path"` 66 | } 67 | type resp struct { 68 | schema.Model 69 | Status string `json:"status"` 70 | Total int64 `json:"total,omitempty"` 71 | Completed int64 `json:"completed,omitempty"` 72 | } 73 | 74 | // stream=true for progress reports 75 | query := url.Values{} 76 | if fn != nil { 77 | query.Set("stream", "true") 78 | } 79 | 80 | // Download the model 81 | req.Path = path 82 | 83 | var r resp 84 | if payload, err := client.NewJSONRequest(req); err != nil { 85 | return schema.Model{}, err 86 | } else if err := c.DoWithContext(ctx, payload, &r, 87 | client.OptPath("models"), 88 | client.OptQuery(query), 89 | client.OptNoTimeout(), 90 | client.OptTextStreamCallback(func(evt client.TextStreamEvent) error { 91 | switch evt.Event { 92 | case "progress": 93 | var r resp 94 | if err := evt.Json(&r); err != nil { 95 | return err 96 | } else { 97 | fn(r.Status, r.Completed, r.Total) 98 | } 99 | case "error": 100 | var errstr string 101 | if evt.Event == "error" { 102 | if err := evt.Json(&errstr); err != nil { 103 | return err 104 | } else { 105 | return errors.New(errstr) 106 | } 107 | } 108 | case "ok": 109 | if err := evt.Json(&r); err != nil { 110 | return err 111 | } 112 | } 113 | return nil 114 | }), 115 | ); err != nil { 116 | return schema.Model{}, err 117 | } 118 | 119 | // Return success 120 | return r.Model, nil 121 | } 122 | 123 | func (c *Client) Transcribe(ctx context.Context, model string, r io.Reader, opt ...Opt) (*schema.Transcription, error) { 124 | var request struct { 125 | File multipart.File `json:"file"` 126 | Model string `json:"model"` 127 | opts 128 | } 129 | var response schema.Transcription 130 | 131 | // Get the name from the io.Reader 132 | name := "" 133 | if f, ok := r.(*os.File); ok { 134 | name = filepath.Base(f.Name()) 135 | } 136 | 137 | // Create the request 138 | request.Model = model 139 | request.File = multipart.File{ 140 | Path: name, 141 | Body: r, 142 | } 143 | for _, o := range opt { 144 | if err := o(&request.opts); err != nil { 145 | return nil, err 146 | } 147 | } 148 | 149 | // Request->Response 150 | if payload, err := client.NewMultipartRequest(request, types.ContentTypeFormData); err != nil { 151 | return nil, err 152 | } else if err := c.DoWithContext(ctx, payload, &response, client.OptPath("audio/transcriptions"), client.OptNoTimeout()); err != nil { 153 | return nil, err 154 | } 155 | 156 | // Return success 157 | return &response, nil 158 | } 159 | 160 | func (c *Client) Translate(ctx context.Context, model string, r io.Reader, opt ...Opt) (*schema.Transcription, error) { 161 | var request struct { 162 | File multipart.File `json:"file"` 163 | Model string `json:"model"` 164 | opts 165 | } 166 | var response schema.Transcription 167 | 168 | // Get the name from the io.Reader 169 | name := "" 170 | if f, ok := r.(*os.File); ok { 171 | name = filepath.Base(f.Name()) 172 | } 173 | 174 | // Create the request 175 | request.Model = model 176 | request.File = multipart.File{ 177 | Path: name, 178 | Body: r, 179 | } 180 | for _, o := range opt { 181 | if err := o(&request.opts); err != nil { 182 | return nil, err 183 | } 184 | } 185 | 186 | // Request->Response 187 | if payload, err := client.NewMultipartRequest(request, types.ContentTypeFormData); err != nil { 188 | return nil, err 189 | } else if err := c.DoWithContext(ctx, payload, &response, client.OptPath("audio/translations"), client.OptNoTimeout()); err != nil { 190 | return nil, err 191 | } 192 | 193 | // Return success 194 | return &response, nil 195 | } 196 | -------------------------------------------------------------------------------- /pkg/client/opts.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import "time" 4 | 5 | // Request options 6 | type opts struct { 7 | Language string `json:"language,omitempty"` 8 | SegmentSize time.Duration `json:"segment_size,omitempty"` 9 | ResponseFmt string `json:"response_format,omitempty"` 10 | } 11 | 12 | type Opt func(*opts) error 13 | 14 | /////////////////////////////////////////////////////////////////////////////// 15 | // PUBLIC METHODS 16 | 17 | func OptLanguage(language string) Opt { 18 | return func(o *opts) error { 19 | o.Language = language 20 | return nil 21 | } 22 | } 23 | 24 | func OptSegmentSize(v time.Duration) Opt { 25 | return func(o *opts) error { 26 | o.SegmentSize = v 27 | return nil 28 | } 29 | } 30 | 31 | func OptResponseFormat(v string) Opt { 32 | return func(o *opts) error { 33 | o.ResponseFmt = v 34 | return nil 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /pkg/pool/contextpool.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | 7 | // Packages 8 | schema "github.com/mutablelogic/go-whisper/pkg/schema" 9 | task "github.com/mutablelogic/go-whisper/pkg/task" 10 | 11 | // Namespace imports 12 | . "github.com/djthorpe/go-errors" 13 | ) 14 | 15 | ////////////////////////////////////////////////////////////////////////////// 16 | // TYPES 17 | 18 | // ContextPool is a pool of context objects 19 | type ContextPool struct { 20 | // Pool of context objects 21 | *Pool 22 | 23 | // Base path for models 24 | path string 25 | 26 | // GPU flags 27 | gpu int 28 | } 29 | 30 | ////////////////////////////////////////////////////////////////////////////// 31 | // LIFECYCLE 32 | 33 | // Create a new context pool of context objects, up to 'max' items 34 | // Set the path for the model storage 35 | // If GPU is -1 then disable, if 0 then use default, if >0 then enable 36 | // and use the specified device 37 | func NewContextPool(path string, max int, gpu int) *ContextPool { 38 | pool := new(ContextPool) 39 | pool.Pool = NewPool(max, func() any { 40 | return task.New() 41 | }) 42 | pool.path = path 43 | pool.gpu = gpu 44 | 45 | // Return success 46 | return pool 47 | } 48 | 49 | // Close the pool and release all resources 50 | func (m *ContextPool) Close() error { 51 | return m.Pool.Close() 52 | } 53 | 54 | ////////////////////////////////////////////////////////////////////////////// 55 | // STRINGIFY 56 | 57 | func (m *ContextPool) MarshalJSON() ([]byte, error) { 58 | return json.Marshal(struct { 59 | Gpu int `json:"gpu"` 60 | N int `json:"n"` 61 | Max int `json:"max"` 62 | }{ 63 | Gpu: m.gpu, 64 | N: m.N(), 65 | Max: m.max, 66 | }) 67 | } 68 | 69 | func (m *ContextPool) String() string { 70 | data, err := json.MarshalIndent(m, "", " ") 71 | if err != nil { 72 | return err.Error() 73 | } 74 | return string(data) 75 | } 76 | 77 | ////////////////////////////////////////////////////////////////////////////// 78 | // PUBLIC METHODS 79 | 80 | // Get a context from the pool, for a model 81 | func (m *ContextPool) Get(model *schema.Model) (*task.Context, error) { 82 | // Check parameters 83 | if model == nil { 84 | return nil, ErrBadParameter 85 | } 86 | 87 | // Get a context from the pool 88 | t, ok := m.Pool.Get().(*task.Context) 89 | if !ok || t == nil { 90 | return nil, ErrChannelBlocked.With("unable to get a context from the pool, try again later") 91 | } 92 | 93 | // If the model matches, return it, or else release the resources 94 | if t.Is(model) { 95 | return t, nil 96 | } else if err := t.Close(); err != nil { 97 | return nil, err 98 | } 99 | 100 | // Initialise the context 101 | if err := t.Init(m.path, model, m.gpu); err != nil { 102 | return nil, err 103 | } 104 | 105 | // Return the context 106 | return t, nil 107 | } 108 | 109 | // Put a context back into the pool 110 | func (m *ContextPool) Put(ctx *task.Context) { 111 | m.Pool.Put(ctx) 112 | } 113 | 114 | // Drain the pool of all contexts for a model, freeing resources 115 | func (m *ContextPool) Drain(model *schema.Model) error { 116 | fmt.Println("TODO: DRAIN", model.Id) 117 | return nil 118 | } 119 | -------------------------------------------------------------------------------- /pkg/pool/contextpool_test.go: -------------------------------------------------------------------------------- 1 | package pool_test 2 | 3 | import ( 4 | "testing" 5 | 6 | // Packages 7 | pool "github.com/mutablelogic/go-whisper/pkg/pool" 8 | schema "github.com/mutablelogic/go-whisper/pkg/schema" 9 | ) 10 | 11 | func Test_contextpool_001(t *testing.T) { 12 | var pool = pool.NewContextPool(t.TempDir(), 2, 0) 13 | 14 | model1, err := pool.Get(&schema.Model{ 15 | Id: "model1", 16 | }) 17 | if err != nil { 18 | t.Error(err) 19 | } 20 | if model1 == nil { 21 | t.Error("Expected model1") 22 | } 23 | t.Log("Got model1", model1) 24 | 25 | model2, err := pool.Get(&schema.Model{ 26 | Id: "model2", 27 | }) 28 | if err != nil { 29 | t.Error(err) 30 | } 31 | if model2 == nil { 32 | t.Error("Expected model2") 33 | } 34 | t.Log("Got model2", model2) 35 | 36 | pool.Put(model1) 37 | 38 | model3, err := pool.Get(&schema.Model{ 39 | Id: "model1", 40 | }) 41 | if err != nil { 42 | t.Error(err) 43 | } 44 | if model3 == nil { 45 | t.Error("Expected model3") 46 | } 47 | t.Log("Got model3", model3) 48 | 49 | pool.Put(model2) 50 | pool.Put(model3) 51 | 52 | t.Log("Closing the pool") 53 | pool.Close() 54 | } 55 | -------------------------------------------------------------------------------- /pkg/pool/pool.go: -------------------------------------------------------------------------------- 1 | package pool 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "sync" 7 | ) 8 | 9 | //////////////////////////////////////////////////////////////////////////////// 10 | // TYPES 11 | 12 | // Pool is a pool of context objects, up to a maximum number 13 | // This acts as a cache so we don't need to reload models 14 | // If the pool is full, then Get will return nil 15 | type Pool struct { 16 | sync.RWMutex 17 | 18 | // The pool 19 | pool []any 20 | fn NewFunc 21 | n int 22 | max int 23 | empty bool 24 | } 25 | 26 | // Create a new object to place in the pool 27 | type NewFunc func() any 28 | 29 | //////////////////////////////////////////////////////////////////////////////// 30 | // LIFECYCLE 31 | 32 | // Create a new pool of context objects, up to 'max' objects 33 | func NewPool(max int, fn NewFunc) *Pool { 34 | // Max needs to be one or more 35 | if max <= 0 { 36 | return nil 37 | } 38 | // Create pool 39 | pool := new(Pool) 40 | pool.max = max 41 | pool.fn = func() any { 42 | if pool.atCapacity() { 43 | return nil 44 | } 45 | return fn() 46 | } 47 | 48 | // Return success 49 | return pool 50 | } 51 | 52 | func (m *Pool) Close() error { 53 | var result error 54 | 55 | // We repeatedly call Get until we get nil 56 | m.setEmpty(true) 57 | for { 58 | ctx := m.Get() 59 | if ctx == nil { 60 | break 61 | } 62 | // If it is an io.Closer, then close it 63 | if ctx, ok := ctx.(io.Closer); ok { 64 | result = errors.Join(result, ctx.Close()) 65 | } 66 | } 67 | 68 | // Return any error 69 | return result 70 | } 71 | 72 | //////////////////////////////////////////////////////////////////////////////// 73 | // PUBLIC METHODS 74 | 75 | // Returns an item, or nil if the maximum number of contexts has been reached 76 | func (m *Pool) Get() any { 77 | m.Lock() 78 | defer m.Unlock() 79 | 80 | // Create a new item 81 | var item any 82 | if len(m.pool) > 0 { 83 | item, m.pool = m.pool[0], m.pool[1:] 84 | } else { 85 | item = m.fn() 86 | if item != nil { 87 | m.n++ 88 | } 89 | } 90 | return item 91 | } 92 | 93 | // Puts the context back in the pool 94 | func (m *Pool) Put(ctx any) { 95 | m.Lock() 96 | defer m.Unlock() 97 | 98 | if ctx != nil { 99 | m.pool = append(m.pool, ctx) 100 | m.n-- 101 | } 102 | } 103 | 104 | // Return the number of contexts in the pool 105 | func (m *Pool) N() int { 106 | m.RLock() 107 | defer m.RUnlock() 108 | return m.n 109 | } 110 | 111 | //////////////////////////////////////////////////////////////////////////////// 112 | // PRIVATE METHODS 113 | 114 | // Return true if pool is at capacity 115 | func (m *Pool) atCapacity() bool { 116 | return m.n >= m.max || m.empty 117 | } 118 | 119 | // Set pool in drain mode, no more contexts will be added 120 | func (m *Pool) setEmpty(v bool) { 121 | m.Lock() 122 | defer m.Unlock() 123 | m.empty = v 124 | } 125 | -------------------------------------------------------------------------------- /pkg/pool/pool_test.go: -------------------------------------------------------------------------------- 1 | package pool_test 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | 7 | // Packages 8 | "github.com/mutablelogic/go-whisper/pkg/pool" 9 | ) 10 | 11 | type Item struct { 12 | *testing.T 13 | closed bool 14 | } 15 | 16 | func (i *Item) Close() error { 17 | i.Log("Closing item") 18 | i.closed = true 19 | return nil 20 | } 21 | 22 | func Test_basepool_001(t *testing.T) { 23 | t.Log("Creating pool with max=10") 24 | var pool = pool.NewPool(10, func() any { 25 | return &Item{t, false} 26 | }) 27 | 28 | var items []*Item 29 | for i := 0; i < 100; i++ { 30 | t.Log("Getting pool", i) 31 | item, _ := pool.Get().(*Item) 32 | if item == nil { 33 | t.Log("Pool is full") 34 | break 35 | } 36 | t.Log("Got item", item) 37 | items = append(items, item) 38 | } 39 | 40 | for i := 0; i < len(items); i++ { 41 | t.Log("Putting item", i) 42 | pool.Put(items[i]) 43 | } 44 | 45 | t.Log("Closing the pool") 46 | pool.Close() 47 | } 48 | 49 | func Test_basepool_002(t *testing.T) { 50 | t.Log("Creating pool with max=100") 51 | var pool = pool.NewPool(100, func() any { 52 | return &Item{t, false} 53 | }) 54 | 55 | var wg sync.WaitGroup 56 | for i := 0; i < 120; i++ { 57 | wg.Add(1) 58 | go func(i int) { 59 | defer wg.Done() 60 | t.Log("Getting pool", i) 61 | item, _ := pool.Get().(*Item) 62 | if item == nil { 63 | t.Log("Pool is full") 64 | } 65 | t.Log("Got item", item) 66 | defer pool.Put(item) 67 | }(i) 68 | } 69 | 70 | wg.Wait() 71 | 72 | t.Log("Closing the pool") 73 | pool.Close() 74 | } 75 | -------------------------------------------------------------------------------- /pkg/schema/model.go: -------------------------------------------------------------------------------- 1 | package schema 2 | 3 | import ( 4 | "encoding/json" 5 | ) 6 | 7 | ////////////////////////////////////////////////////////////////////////////// 8 | // TYPES 9 | 10 | type Model struct { 11 | Id string `json:"id" writer:",width:28,wrap"` 12 | Object string `json:"object,omitempty" writer:"-"` 13 | Path string `json:"path,omitempty" writer:",width:40,wrap"` 14 | Created int64 `json:"created,omitempty"` 15 | OwnedBy string `json:"owned_by,omitempty"` 16 | } 17 | 18 | ////////////////////////////////////////////////////////////////////////////// 19 | // STRINGIFY 20 | 21 | func (m *Model) String() string { 22 | data, err := json.MarshalIndent(m, "", " ") 23 | if err != nil { 24 | return err.Error() 25 | } 26 | return string(data) 27 | } 28 | -------------------------------------------------------------------------------- /pkg/schema/segment.go: -------------------------------------------------------------------------------- 1 | package schema 2 | 3 | import ( 4 | "encoding/json" 5 | ) 6 | 7 | ////////////////////////////////////////////////////////////////////////////// 8 | // TYPES 9 | 10 | type Segment struct { 11 | Id int32 `json:"id" writer:",right,width:5"` 12 | Start Timestamp `json:"start" writer:",right,width:5"` 13 | End Timestamp `json:"end" writer:",right,width:5"` 14 | Text string `json:"text" writer:",wrap,width:70"` 15 | SpeakerTurn bool `json:"speaker_turn,omitempty"` // TODO 16 | } 17 | 18 | ////////////////////////////////////////////////////////////////////////////// 19 | // STRINGIFY 20 | 21 | func (s *Segment) String() string { 22 | data, err := json.MarshalIndent(s, "", " ") 23 | if err != nil { 24 | return err.Error() 25 | } 26 | return string(data) 27 | } 28 | -------------------------------------------------------------------------------- /pkg/schema/transcription.go: -------------------------------------------------------------------------------- 1 | package schema 2 | 3 | import ( 4 | "encoding/json" 5 | "time" 6 | ) 7 | 8 | ////////////////////////////////////////////////////////////////////////////// 9 | // TYPES 10 | 11 | type Timestamp time.Duration 12 | 13 | type Transcription struct { 14 | Task string `json:"task,omitempty"` 15 | Language string `json:"language,omitempty" writer:",width:8"` 16 | Duration Timestamp `json:"duration,omitempty" writer:",width:8,right"` 17 | Text string `json:"text,omitempty" writer:",width:60,wrap"` 18 | Segments []*Segment `json:"segments,omitempty" writer:",width:40,wrap"` 19 | } 20 | 21 | ////////////////////////////////////////////////////////////////////////////// 22 | // STRINGIFY 23 | 24 | func (t *Transcription) String() string { 25 | data, err := json.MarshalIndent(t, "", " ") 26 | if err != nil { 27 | return err.Error() 28 | } 29 | return string(data) 30 | } 31 | 32 | func (t Timestamp) MarshalJSON() ([]byte, error) { 33 | // We convert durations into float64 seconds 34 | return json.Marshal(time.Duration(t).Seconds()) 35 | } 36 | -------------------------------------------------------------------------------- /pkg/store/doc.go: -------------------------------------------------------------------------------- 1 | /* store implements a model store which allows downloading models from a remote server */ 2 | package store 3 | -------------------------------------------------------------------------------- /pkg/store/store.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "errors" 7 | "io/fs" 8 | "net/http" 9 | "os" 10 | "path/filepath" 11 | "strings" 12 | "sync" 13 | 14 | // Packages 15 | schema "github.com/mutablelogic/go-whisper/pkg/schema" 16 | whisper "github.com/mutablelogic/go-whisper/sys/whisper" 17 | 18 | // Namespace imports 19 | . "github.com/djthorpe/go-errors" 20 | ) 21 | 22 | ////////////////////////////////////////////////////////////////////////////// 23 | // TYPES 24 | 25 | type Store struct { 26 | sync.RWMutex 27 | 28 | // Path to the models directory and file extension 29 | path, ext string 30 | 31 | // list of all models 32 | models []*schema.Model 33 | 34 | // download models 35 | client whisper.Client 36 | } 37 | 38 | ////////////////////////////////////////////////////////////////////////////// 39 | // LIFECYCLE 40 | 41 | // Create a new model store 42 | func NewStore(path, ext, modelUrl string) (*Store, error) { 43 | store := new(Store) 44 | 45 | // Check model path exists and is writable 46 | if info, err := os.Stat(path); err != nil { 47 | return nil, err 48 | } else if !info.IsDir() { 49 | return nil, ErrBadParameter.With("not a directory:", path) 50 | } 51 | 52 | // Get a listing of the models 53 | store.path = path 54 | store.ext = ext 55 | if err := store.Rescan(); err != nil { 56 | return nil, err 57 | } 58 | 59 | // Create a client 60 | if client := whisper.NewClient(modelUrl); client == nil { 61 | return nil, ErrInternalAppError 62 | } else { 63 | store.client = client 64 | } 65 | 66 | // Return success 67 | return store, nil 68 | } 69 | 70 | ////////////////////////////////////////////////////////////////////////////// 71 | // STRINGIFY 72 | 73 | func (s *Store) MarshalJSON() ([]byte, error) { 74 | modelNames := func() []string { 75 | result := make([]string, len(s.models)) 76 | for i, model := range s.models { 77 | result[i] = model.Id 78 | } 79 | return result 80 | } 81 | return json.Marshal(struct { 82 | Path string `json:"path"` 83 | Ext string `json:"ext,omitempty"` 84 | Models []string `json:"models"` 85 | }{ 86 | Path: s.path, 87 | Ext: s.ext, 88 | Models: modelNames(), 89 | }) 90 | } 91 | 92 | func (s *Store) String() string { 93 | data, err := json.MarshalIndent(s, "", " ") 94 | if err != nil { 95 | return err.Error() 96 | } 97 | return string(data) 98 | } 99 | 100 | ////////////////////////////////////////////////////////////////////////////// 101 | // PUBLIC METHODS 102 | 103 | // Return the models 104 | func (s *Store) List() []*schema.Model { 105 | s.RLock() 106 | defer s.RUnlock() 107 | return s.models 108 | } 109 | 110 | // Rescan models directory 111 | func (s *Store) Rescan() error { 112 | s.Lock() 113 | defer s.Unlock() 114 | if models, err := listModels(s.path, s.ext); err != nil { 115 | return err 116 | } else { 117 | s.models = models 118 | } 119 | return nil 120 | } 121 | 122 | // Return a model by its Id 123 | func (s *Store) ById(id string) *schema.Model { 124 | s.RLock() 125 | defer s.RUnlock() 126 | 127 | for _, model := range s.models { 128 | if model.Id == id { 129 | return model 130 | } 131 | } 132 | return nil 133 | } 134 | 135 | // Return a model by path 136 | func (s *Store) ByPath(path string) *schema.Model { 137 | s.RLock() 138 | defer s.RUnlock() 139 | for _, model := range s.models { 140 | if model.Path == path { 141 | return model 142 | } 143 | } 144 | return nil 145 | } 146 | 147 | // Delete a model by its Id 148 | func (s *Store) Delete(id string) error { 149 | model := s.ById(id) 150 | if model == nil { 151 | return ErrNotFound.Withf("%q", id) 152 | } 153 | 154 | // Lock the store 155 | s.Lock() 156 | defer s.Unlock() 157 | 158 | // Delete the model 159 | path := filepath.Join(s.path, model.Path) 160 | if err := os.Remove(path); err != nil { 161 | return err 162 | } 163 | 164 | // Rescan the models directory 165 | if models, err := listModels(s.path, s.ext); err != nil { 166 | return err 167 | } else { 168 | s.models = models 169 | } 170 | 171 | // Return success 172 | return nil 173 | } 174 | 175 | // Download a model to the models directory. If the model already exists, it will be returned 176 | // without downloading. The destination directory is relative to the models directory. 177 | // 178 | // A function can be provided to track the progress of the download. If no Content-Length is 179 | // provided by the server, the total bytes will be unknown and is set to zero. 180 | func (s *Store) Download(ctx context.Context, path string, fn func(curBytes, totalBytes uint64)) (*schema.Model, error) { 181 | // abspath should be contained within the models directory 182 | abspath := filepath.Clean(filepath.Join(s.path, path)) 183 | if !strings.HasPrefix(abspath, s.path) { 184 | return nil, ErrBadParameter.With(path) 185 | } 186 | 187 | // Get the model by path relative to the models directory 188 | relpath, err := filepath.Rel(s.path, abspath) 189 | if err != nil { 190 | return nil, err 191 | } 192 | model := s.ByPath(relpath) 193 | if model != nil { 194 | return model, nil 195 | } 196 | 197 | // File extension should match the store extension 198 | if s.ext != "" && filepath.Ext(abspath) != s.ext { 199 | return nil, ErrBadParameter.Withf("Bad file extension: %q", filepath.Base(abspath)) 200 | } 201 | 202 | // Create the destination directory if it's not empty 203 | absdir := filepath.Dir(abspath) 204 | if info, err := os.Stat(absdir); errors.Is(err, os.ErrNotExist) { 205 | if err := os.MkdirAll(absdir, 0755); err != nil { 206 | return nil, err 207 | } 208 | } else if err != nil { 209 | return nil, err 210 | } else if !info.IsDir() { 211 | return nil, ErrBadParameter.With(path) 212 | } 213 | 214 | // Create the destination file 215 | f, err := os.Create(abspath) 216 | if err != nil { 217 | return nil, err 218 | } 219 | defer f.Close() 220 | 221 | // Download the model, with callback. If an error occurs, the model is deleted again 222 | if _, err := s.client.Get(ctx, &writer{Writer: f, fn: fn}, filepath.Base(abspath)); err != nil { 223 | return nil, errors.Join(toError(err), os.Remove(f.Name())) 224 | } 225 | 226 | // Rescan the models directory 227 | if err := s.Rescan(); err != nil { 228 | return nil, err 229 | } 230 | 231 | // Get a model by path 232 | model = s.ByPath(relpath) 233 | if model == nil { 234 | return nil, ErrNotFound.With(relpath) 235 | } 236 | 237 | // Return success 238 | return model, nil 239 | } 240 | 241 | ////////////////////////////////////////////////////////////////////////////// 242 | // PRIVATE METHODS 243 | 244 | // Convert 404 errors to ErrNotFound 245 | func toError(err error) error { 246 | if err == nil { 247 | return nil 248 | } 249 | switch err := err.(type) { 250 | case *whisper.HTTPError: 251 | if err.Code == http.StatusNotFound { 252 | return ErrNotFound.With(err.Message) 253 | } 254 | } 255 | return err 256 | } 257 | 258 | func listModels(path, ext string) ([]*schema.Model, error) { 259 | result := make([]*schema.Model, 0, 100) 260 | 261 | // Walk filesystem 262 | return result, fs.WalkDir(os.DirFS(path), ".", func(path string, d fs.DirEntry, err error) error { 263 | if err != nil { 264 | return err 265 | } 266 | if d.IsDir() { 267 | return nil 268 | } 269 | 270 | // Ignore hidden files or files without a .bin extension 271 | if strings.HasPrefix(d.Name(), ".") { 272 | return nil 273 | } 274 | if ext != "" && filepath.Ext(d.Name()) != ext { 275 | return nil 276 | } 277 | 278 | // Ignore files we can't get information on 279 | info, err := d.Info() 280 | if err != nil { 281 | return nil 282 | } 283 | 284 | // Ignore non-regular files 285 | if !d.Type().IsRegular() { 286 | return nil 287 | } 288 | 289 | // Ignore files less than 8MB 290 | if info.Size() < 8*1024*1024 { 291 | return nil 292 | } 293 | 294 | // Get model information 295 | model := new(schema.Model) 296 | model.Object = "model" 297 | model.Path = path 298 | model.Created = info.ModTime().Unix() 299 | 300 | // Generate an Id for the model 301 | model.Id = modelNameToId(filepath.Base(path)) 302 | 303 | // Append to result 304 | result = append(result, model) 305 | 306 | // Continue walking 307 | return nil 308 | }) 309 | } 310 | 311 | func modelNameToId(name string) string { 312 | // Lowercase the name, remove the extension 313 | name = strings.TrimSuffix(strings.ToLower(name), filepath.Ext(name)) 314 | 315 | // We replace all non-alphanumeric characters with underscores 316 | return strings.Map(func(r rune) rune { 317 | if r >= 'a' && r <= 'z' { 318 | return r 319 | } 320 | if r >= 'A' && r <= 'Z' { 321 | return r 322 | } 323 | if r >= '0' && r <= '9' { 324 | return r 325 | } 326 | if r == '.' || r == '-' { 327 | return r 328 | } 329 | return '_' 330 | }, name) 331 | } 332 | -------------------------------------------------------------------------------- /pkg/store/writer.go: -------------------------------------------------------------------------------- 1 | package store 2 | 3 | import ( 4 | "io" 5 | "net/http" 6 | "strconv" 7 | ) 8 | 9 | ////////////////////////////////////////////////////////////////////////////// 10 | // PRIVATE METHODS 11 | 12 | type writer struct { 13 | io.Writer 14 | 15 | // Current and total bytes 16 | curBytes, totalBytes uint64 17 | 18 | // Callback function 19 | fn func(curBytes, totalBytes uint64) 20 | } 21 | 22 | // Collect number of bytes written 23 | func (w *writer) Write(p []byte) (int, error) { 24 | n, err := w.Writer.Write(p) 25 | if err == nil && w.fn != nil { 26 | w.curBytes += uint64(n) 27 | w.fn(w.curBytes, w.totalBytes) 28 | } 29 | return n, nil 30 | } 31 | 32 | // Collect total number of bytes 33 | func (w *writer) Header(h http.Header) error { 34 | if contentLength := h.Get("Content-Length"); contentLength != "" { 35 | if v, err := strconv.ParseUint(contentLength, 10, 64); err != nil { 36 | return err 37 | } else { 38 | w.totalBytes = v 39 | } 40 | } 41 | return nil 42 | } 43 | -------------------------------------------------------------------------------- /pkg/task/context.go: -------------------------------------------------------------------------------- 1 | package task 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "path/filepath" 8 | "sync" 9 | "time" 10 | 11 | // Packages 12 | schema "github.com/mutablelogic/go-whisper/pkg/schema" 13 | whisper "github.com/mutablelogic/go-whisper/sys/whisper" 14 | 15 | // Namespace imports 16 | . "github.com/djthorpe/go-errors" 17 | ) 18 | 19 | ////////////////////////////////////////////////////////////////////////////// 20 | // TYPES 21 | 22 | // Context is used for running the transcription or translation 23 | type Context struct { 24 | sync.Mutex 25 | 26 | // Model Id and whisper context 27 | model string 28 | whisper *whisper.Context 29 | 30 | // Parameters for the next transcription 31 | params whisper.FullParams 32 | 33 | // Collect the transcription 34 | result *schema.Transcription 35 | } 36 | 37 | // Callback for new segments during the transcription process 38 | type NewSegmentFunc func(*schema.Segment) 39 | 40 | ////////////////////////////////////////////////////////////////////////////// 41 | // LIFECYCLE 42 | 43 | // Create a new context object 44 | func New() *Context { 45 | return new(Context) 46 | } 47 | 48 | // Init the context 49 | func (m *Context) Init(path string, model *schema.Model, gpu int) error { 50 | m.Lock() 51 | defer m.Unlock() 52 | 53 | // Check parameters 54 | if model == nil { 55 | return ErrBadParameter 56 | } 57 | 58 | // Get default parameters 59 | params := whisper.DefaultContextParams() 60 | 61 | // If gpu is -1, then disable 62 | // If gpu is 0, then use whatever the default is 63 | // If gpu is >0, then enable and set the device 64 | if gpu == -1 { 65 | params.SetUseGpu(false) 66 | } else if gpu > 0 { 67 | params.SetUseGpu(true) 68 | params.SetGpuDevice(gpu) 69 | } 70 | 71 | // Get a context 72 | ctx := whisper.Whisper_init_from_file_with_params(filepath.Join(path, model.Path), params) 73 | if ctx == nil { 74 | return ErrInternalAppError.With("whisper_init") 75 | } 76 | 77 | // Set resources 78 | m.whisper = ctx 79 | m.model = model.Id 80 | 81 | // Return success 82 | return nil 83 | } 84 | 85 | // Close the context and release all resources. The context 86 | // itself can be re-used by calling Init again 87 | func (ctx *Context) Close() error { 88 | // Do nothing if nil 89 | if ctx == nil { 90 | return nil 91 | } 92 | 93 | // Release resources 94 | if ctx.whisper != nil { 95 | whisper.Whisper_free(ctx.whisper) 96 | } 97 | ctx.whisper = nil 98 | ctx.model = "" 99 | 100 | // Return success 101 | return nil 102 | } 103 | 104 | ////////////////////////////////////////////////////////////////////////////// 105 | // STRINGIFY 106 | 107 | func (ctx *Context) MarshalJSON() ([]byte, error) { 108 | type j struct { 109 | Model string `json:"model"` 110 | Params whisper.FullParams `json:"params"` 111 | Context string `json:"context"` 112 | } 113 | return json.Marshal(j{ 114 | Model: ctx.model, 115 | Params: ctx.params, 116 | Context: fmt.Sprintf("%p", ctx.whisper), 117 | }) 118 | } 119 | 120 | func (ctx *Context) String() string { 121 | data, err := json.MarshalIndent(ctx, "", " ") 122 | if err != nil { 123 | return err.Error() 124 | } 125 | return string(data) 126 | } 127 | 128 | ////////////////////////////////////////////////////////////////////////////// 129 | // PUBLIC METHODS 130 | 131 | // Context has a loaded model that matches the argument 132 | func (ctx *Context) Is(model *schema.Model) bool { 133 | if ctx.model == "" { 134 | return false 135 | } 136 | if model == nil { 137 | return false 138 | } 139 | return ctx.model == model.Id 140 | } 141 | 142 | // Reset task context for re-use 143 | func (task *Context) CopyParams() { 144 | task.params = whisper.DefaultFullParams(whisper.SAMPLING_GREEDY) 145 | task.params.SetLanguage("auto") 146 | task.result = new(schema.Transcription) 147 | } 148 | 149 | // Model is multilingual and can translate 150 | func (task *Context) CanTranslate() bool { 151 | return whisper.Whisper_is_multilingual(task.whisper) 152 | } 153 | 154 | // Transcribe samples. The samples should be 16KHz float32 samples in 155 | // a single channel. Appends the transcription to the result, and includes 156 | // segment data if the new segment function is not nil 157 | func (task *Context) Transcribe(ctx context.Context, ts time.Duration, samples []float32, fn NewSegmentFunc) error { 158 | // Set the 'abort' function 159 | task.params.SetAbortCallback(task.whisper, func() bool { 160 | select { 161 | case <-ctx.Done(): 162 | return true 163 | default: 164 | return false 165 | } 166 | }) 167 | 168 | // Set the new segment function 169 | if fn != nil { 170 | task.params.SetSegmentCallback(task.whisper, func(new_segments int) { 171 | num_segments := task.whisper.NumSegments() 172 | offset := len(task.result.Segments) 173 | for i := num_segments - new_segments; i < num_segments; i++ { 174 | fn(newSegment(ts, int32(offset), task.whisper.Segment(i))) 175 | } 176 | }) 177 | } 178 | 179 | // TODO: Set the initial prompt tokens from any previous transcription call 180 | 181 | // Perform the transcription 182 | if err := whisper.Whisper_full(task.whisper, task.params, samples); err != nil { 183 | if ctx.Err() != nil { 184 | return ctx.Err() 185 | } else { 186 | return err 187 | } 188 | } 189 | 190 | // Remove the callbacks 191 | task.params.SetAbortCallback(task.whisper, nil) 192 | task.params.SetSegmentCallback(task.whisper, nil) 193 | 194 | // Append the transcription 195 | task.appendResult(ts, fn != nil) 196 | 197 | // Return success 198 | return nil 199 | } 200 | 201 | // Set the language. For transcription, this is the language of the 202 | // audio samples. For translation, this is the language to translate 203 | // to. If you set this to "auto" then the language will be detected 204 | func (ctx *Context) SetLanguage(v string) error { 205 | if v == "" || v == "auto" { 206 | ctx.params.SetLanguage("auto") 207 | return nil 208 | } 209 | id := whisper.Whisper_lang_id(v) 210 | if id == -1 { 211 | return ErrBadParameter.Withf("invalid language: %q", v) 212 | } 213 | ctx.params.SetLanguage(v) 214 | return nil 215 | } 216 | 217 | func (ctx *Context) Language() string { 218 | return ctx.params.Language() 219 | } 220 | 221 | // Set translate to true or false 222 | func (ctx *Context) SetTranslate(v bool) { 223 | ctx.params.SetTranslate(v) 224 | } 225 | 226 | // Return the translate flag 227 | func (ctx *Context) Translate() bool { 228 | return ctx.params.Translate() 229 | } 230 | 231 | // Set diarize flag 232 | func (ctx *Context) SetDiarize(v bool) { 233 | ctx.params.SetDiarize(v) 234 | } 235 | 236 | // Return the diarize flag 237 | func (ctx *Context) Diarize() bool { 238 | return ctx.params.Diarize() 239 | } 240 | 241 | // Return the transcription result 242 | func (ctx *Context) Result() *schema.Transcription { 243 | return ctx.result 244 | } 245 | 246 | ////////////////////////////////////////////////////////////////////////////// 247 | // PRIVATE METHODS 248 | 249 | func (ctx *Context) appendResult(ts time.Duration, segments bool) { 250 | offset := len(ctx.result.Segments) 251 | 252 | // Append text 253 | for i := 0; i < ctx.whisper.NumSegments(); i++ { 254 | seg := ctx.whisper.Segment(i) 255 | ctx.result.Text += seg.Text 256 | } 257 | if segments { 258 | // Append segments 259 | for i := 0; i < ctx.whisper.NumSegments(); i++ { 260 | ctx.result.Segments = append(ctx.result.Segments, newSegment(ts, int32(offset), ctx.whisper.Segment(i))) 261 | } 262 | } 263 | } 264 | -------------------------------------------------------------------------------- /pkg/task/transcription.go: -------------------------------------------------------------------------------- 1 | package task 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "regexp" 7 | "strings" 8 | "time" 9 | 10 | // Packages 11 | "github.com/mutablelogic/go-whisper/pkg/schema" 12 | "github.com/mutablelogic/go-whisper/sys/whisper" 13 | ) 14 | 15 | ////////////////////////////////////////////////////////////////////////////// 16 | // LIFECYCLE 17 | 18 | func newSegment(ts time.Duration, offset int32, seg *whisper.Segment) *schema.Segment { 19 | // Dumb copy function 20 | return &schema.Segment{ 21 | Id: offset + seg.Id, 22 | Text: seg.Text, 23 | Start: schema.Timestamp(seg.T0 + ts), 24 | End: schema.Timestamp(seg.T1 + ts), 25 | SpeakerTurn: seg.SpeakerTurn, 26 | } 27 | } 28 | 29 | ////////////////////////////////////////////////////////////////////////////// 30 | // PUBLIC METHODS 31 | 32 | func WriteSegmentSrt(w io.Writer, seg *schema.Segment) { 33 | fmt.Fprintf(w, "%d\n%s --> %s\n", seg.Id, tsToSrt(time.Duration(seg.Start)), tsToSrt(time.Duration(seg.End))) 34 | if seg.SpeakerTurn { 35 | fmt.Fprintf(w, "[SPEAKER] ") 36 | } 37 | fmt.Fprintf(w, "%s\n\n", strings.TrimSpace(seg.Text)) 38 | } 39 | 40 | func WriteSegmentVtt(w io.Writer, seg *schema.Segment) { 41 | fmt.Fprintf(w, "%s --> %s\n", tsToVtt(time.Duration(seg.Start)), tsToVtt(time.Duration(seg.End))) 42 | if seg.SpeakerTurn { 43 | fmt.Fprintf(w, "") 44 | } 45 | fmt.Fprintf(w, "%s\n\n", strings.TrimSpace(seg.Text)) 46 | } 47 | 48 | var ( 49 | reToken = regexp.MustCompile(`^\s*\[.*\]$`) 50 | ) 51 | 52 | func WriteSegmentText(w io.Writer, seg *schema.Segment) { 53 | if isToken := reToken.MatchString(seg.Text); isToken && seg.Id > 0 { 54 | fmt.Fprint(w, "\n\n"+strings.TrimSpace(seg.Text)+"\n") 55 | return 56 | } 57 | if seg.SpeakerTurn { 58 | fmt.Fprint(w, "\n\n[SPEAKER]") 59 | } 60 | if seg.Id > 0 || seg.SpeakerTurn { 61 | fmt.Fprint(w, seg.Text) 62 | } else { 63 | fmt.Fprint(w, strings.TrimSpace(seg.Text)) 64 | } 65 | } 66 | 67 | ////////////////////////////////////////////////////////////////////////////// 68 | // PRIVATE METHODS 69 | 70 | func tsToSrt(ts time.Duration) string { 71 | // Extract hours, minutes, seconds, and milliseconds from the duration 72 | hours := int(ts.Hours()) 73 | minutes := int(ts.Minutes()) % 60 74 | seconds := int(ts.Seconds()) % 60 75 | milliseconds := int(ts.Milliseconds()) % 1000 76 | 77 | // Format the timestamp in the SRT format 78 | return fmt.Sprintf("%02d:%02d:%02d,%03d", hours, minutes, seconds, milliseconds) 79 | } 80 | 81 | func tsToVtt(ts time.Duration) string { 82 | // Extract hours, minutes, seconds, and milliseconds from the duration 83 | hours := int(ts.Hours()) 84 | minutes := int(ts.Minutes()) % 60 85 | seconds := int(ts.Seconds()) % 60 86 | milliseconds := int(ts.Milliseconds()) % 1000 87 | 88 | // Format the timestamp in the SRT format 89 | return fmt.Sprintf("%02d:%02d:%02d.%03d", hours, minutes, seconds, milliseconds) 90 | } 91 | -------------------------------------------------------------------------------- /pkg/version/version.go: -------------------------------------------------------------------------------- 1 | package version 2 | 3 | /////////////////////////////////////////////////////////////////////////////// 4 | // GLOBALS 5 | 6 | var ( 7 | GitSource string 8 | GitTag string 9 | GitBranch string 10 | GitHash string 11 | GoBuildTime string 12 | ) 13 | -------------------------------------------------------------------------------- /samples/OlivierL.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mutablelogic/go-whisper/678cd8e86b7118bca6a67058640f028388e7221e/samples/OlivierL.wav -------------------------------------------------------------------------------- /samples/de-podcast.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mutablelogic/go-whisper/678cd8e86b7118bca6a67058640f028388e7221e/samples/de-podcast.wav -------------------------------------------------------------------------------- /samples/en-audiobook.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mutablelogic/go-whisper/678cd8e86b7118bca6a67058640f028388e7221e/samples/en-audiobook.mp3 -------------------------------------------------------------------------------- /samples/en-office.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mutablelogic/go-whisper/678cd8e86b7118bca6a67058640f028388e7221e/samples/en-office.mp3 -------------------------------------------------------------------------------- /samples/jfk.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mutablelogic/go-whisper/678cd8e86b7118bca6a67058640f028388e7221e/samples/jfk.wav -------------------------------------------------------------------------------- /sys/pkg-config/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | ) 9 | 10 | var ( 11 | flagDir = flag.String("dir", "${PKG_CONFIG_PATH}", "Destination directory") 12 | flagPrefix = flag.String("prefix", "", "Prefix for the package") 13 | flagVersion = flag.String("version", "", "Version for the package") 14 | flagDescription = flag.String("description", "", "Description for the package") 15 | flagCompileFlags = flag.String("cflags", "", "Compiler flag") 16 | flagLinkerFlags = flag.String("libs", "", "Linker flags") 17 | ) 18 | 19 | func main() { 20 | flag.Parse() 21 | if flag.NArg() != 1 { 22 | fmt.Fprintln(os.Stderr, "Missing filename") 23 | os.Exit(-1) 24 | } 25 | dest := filepath.Join(os.ExpandEnv(*flagDir), flag.Arg(0)) 26 | 27 | var prefix string 28 | if *flagPrefix != "" { 29 | var err error 30 | prefix, err = filepath.Abs(*flagPrefix) 31 | if err != nil { 32 | fmt.Fprintln(os.Stderr, err) 33 | os.Exit(-1) 34 | } 35 | } 36 | 37 | w, err := os.Create(dest) 38 | if err != nil { 39 | fmt.Fprintln(os.Stderr, err) 40 | os.Exit(-1) 41 | } 42 | defer w.Close() 43 | 44 | // Write the package 45 | if prefix != "" { 46 | fmt.Fprintf(w, "prefix=%s\n\n", prefix) 47 | } 48 | fmt.Fprintf(w, "Name: %s\n", filepath.Base(dest)) 49 | if *flagDescription != "" { 50 | fmt.Fprintf(w, "Description: %s\n", *flagDescription) 51 | } else { 52 | fmt.Fprintf(w, "Description: No description\n") 53 | } 54 | if *flagVersion != "" { 55 | fmt.Fprintf(w, "Version: %s\n", *flagVersion) 56 | } 57 | if *flagCompileFlags != "" { 58 | fmt.Fprintf(w, "Cflags: %s\n", *flagCompileFlags) 59 | } 60 | if *flagLinkerFlags != "" { 61 | fmt.Fprintf(w, "Libs: %s\n", *flagLinkerFlags) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /sys/whisper/alignment_aheads_preset.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | /* 4 | #cgo pkg-config: libwhisper 5 | #include 6 | */ 7 | import "C" 8 | 9 | type AlignmentAheadsPreset C.enum_whisper_alignment_heads_preset 10 | 11 | const ( 12 | AlignmentAheadsPresetNone = C.WHISPER_AHEADS_NONE 13 | AlignmentAheadsPresetNTopMost = C.WHISPER_AHEADS_N_TOP_MOST 14 | AlignmentAheadsPresetCustom = C.WHISPER_AHEADS_CUSTOM 15 | AlignmentAheadsPresetTinyEn = C.WHISPER_AHEADS_TINY_EN 16 | AlignmentAheadsPresetTiny = C.WHISPER_AHEADS_TINY 17 | AlignmentAheadsPresetBaseEn = C.WHISPER_AHEADS_BASE_EN 18 | AlignmentAheadsPresetBase = C.WHISPER_AHEADS_BASE 19 | AlignmentAheadsPresetSmallEn = C.WHISPER_AHEADS_SMALL_EN 20 | AlignmentAheadsPresetSmall = C.WHISPER_AHEADS_SMALL 21 | AlignmentAheadsPresetMediumEn = C.WHISPER_AHEADS_MEDIUM_EN 22 | AlignmentAheadsPresetMedium = C.WHISPER_AHEADS_MEDIUM 23 | AlignmentAheadsPresetLargeV1 = C.WHISPER_AHEADS_LARGE_V1 24 | AlignmentAheadsPresetLargeV2 = C.WHISPER_AHEADS_LARGE_V2 25 | AlignmentAheadsPresetLargeV3 = C.WHISPER_AHEADS_LARGE_V3 26 | ) 27 | -------------------------------------------------------------------------------- /sys/whisper/contextparams.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import "encoding/json" 4 | 5 | /////////////////////////////////////////////////////////////////////////////// 6 | // CGO 7 | 8 | /* 9 | #cgo pkg-config: libwhisper 10 | #include 11 | */ 12 | import "C" 13 | 14 | /////////////////////////////////////////////////////////////////////////////// 15 | // TYPES 16 | 17 | type ( 18 | ContextParams C.struct_whisper_context_params 19 | ) 20 | 21 | /////////////////////////////////////////////////////////////////////////////// 22 | // LIFECYCLE 23 | 24 | func DefaultContextParams() ContextParams { 25 | return (ContextParams)(C.whisper_context_default_params()) 26 | } 27 | 28 | /////////////////////////////////////////////////////////////////////////////// 29 | // STRINGIFY 30 | 31 | func (ctx ContextParams) MarshalJSON() ([]byte, error) { 32 | type j struct { 33 | UseGpu bool `json:"use_gpu"` 34 | GpuDevice int `json:"gpu_device"` 35 | FlashAttn bool `json:"flash_attn"` 36 | TokenTimestamps bool `json:"dtw_token_timestamps"` 37 | } 38 | return json.Marshal(j{ 39 | UseGpu: bool(ctx.use_gpu), 40 | GpuDevice: int(ctx.gpu_device), 41 | FlashAttn: bool(ctx.flash_attn), 42 | TokenTimestamps: bool(ctx.dtw_token_timestamps), 43 | }) 44 | } 45 | 46 | func (ctx ContextParams) String() string { 47 | str, err := json.MarshalIndent(ctx, "", " ") 48 | if err != nil { 49 | return err.Error() 50 | } 51 | return string(str) 52 | } 53 | 54 | /////////////////////////////////////////////////////////////////////////////// 55 | // PUBLIC METHODS 56 | 57 | func (ctx *ContextParams) UseGpu() bool { 58 | return bool(ctx.use_gpu) 59 | } 60 | 61 | func (ctx *ContextParams) SetUseGpu(v bool) { 62 | ctx.use_gpu = (C.bool)(v) 63 | } 64 | 65 | func (ctx *ContextParams) GpuDevice() int { 66 | return int(ctx.gpu_device) 67 | } 68 | 69 | func (ctx *ContextParams) SetGpuDevice(v int) { 70 | ctx.gpu_device = (C.int)(v) 71 | } 72 | 73 | func (ctx *ContextParams) FlashAttn() bool { 74 | return bool(ctx.flash_attn) 75 | } 76 | 77 | func (ctx *ContextParams) SetFlashAttn(v bool) { 78 | ctx.flash_attn = (C.bool)(v) 79 | } 80 | 81 | func (ctx *ContextParams) TokenTimestamps() bool { 82 | return bool(ctx.dtw_token_timestamps) 83 | } 84 | 85 | func (ctx *ContextParams) SetTokenTimestamps(v bool) { 86 | ctx.dtw_token_timestamps = (C.bool)(v) 87 | } 88 | 89 | func (ctx *ContextParams) DTWAheadsPreset() AlignmentAheadsPreset { 90 | return AlignmentAheadsPreset(ctx.dtw_aheads_preset) 91 | } 92 | 93 | func (ctx *ContextParams) SetDTWAheadsPreset(v AlignmentAheadsPreset) { 94 | ctx.dtw_aheads_preset = (C.enum_whisper_alignment_heads_preset)(v) 95 | } 96 | 97 | func (ctx *ContextParams) DTWNTop() int { 98 | return int(ctx.dtw_n_top) 99 | } 100 | 101 | func (ctx *ContextParams) SetDTWNTop(nTop int) { 102 | ctx.dtw_n_top = (C.int)(nTop) 103 | } 104 | 105 | func (ctx *ContextParams) DTWMemSize() uintptr { 106 | return uintptr(ctx.dtw_mem_size) 107 | } 108 | 109 | func (ctx *ContextParams) SetDTWMemSize(memSize uintptr) { 110 | ctx.dtw_mem_size = (C.size_t)(memSize) 111 | } 112 | -------------------------------------------------------------------------------- /sys/whisper/contextparams_test.go: -------------------------------------------------------------------------------- 1 | package whisper_test 2 | 3 | import ( 4 | "testing" 5 | 6 | // Packages 7 | "github.com/mutablelogic/go-whisper/sys/whisper" 8 | ) 9 | 10 | func Test_contextparams_00(t *testing.T) { 11 | params := whisper.DefaultContextParams() 12 | t.Log(params) 13 | } 14 | -------------------------------------------------------------------------------- /sys/whisper/error.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import "errors" 4 | 5 | var ( 6 | ErrTranscriptionFailed = errors.New("whisper_full failed") 7 | ) 8 | 9 | type HTTPError struct { 10 | Code int 11 | Message string 12 | } 13 | 14 | func (e *HTTPError) Error() string { 15 | return e.Message 16 | } 17 | -------------------------------------------------------------------------------- /sys/whisper/fullparams.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "encoding/json" 5 | "strings" 6 | "unsafe" 7 | ) 8 | 9 | /////////////////////////////////////////////////////////////////////////////// 10 | // CGO 11 | 12 | /* 13 | #cgo pkg-config: libwhisper 14 | #include 15 | #include 16 | #include 17 | 18 | extern void whisper_progress_cb_ex(struct whisper_context * ctx, struct whisper_state * state, int progress, void * user_data); 19 | extern void whisper_segment_cb_ex(struct whisper_context * ctx, struct whisper_state * state, int n, void * user_data); 20 | extern bool whisper_abort_cb_ex(void * user_data); 21 | 22 | // Set callbacks 23 | static void set_callbacks(struct whisper_full_params* params, bool enabled) { 24 | if (enabled) { 25 | params->progress_callback = whisper_progress_cb_ex; 26 | params->abort_callback = whisper_abort_cb_ex; 27 | params->new_segment_callback = whisper_segment_cb_ex; 28 | } else { 29 | params->progress_callback = NULL; 30 | params->abort_callback = NULL; 31 | params->new_segment_callback = NULL; 32 | } 33 | } 34 | */ 35 | import "C" 36 | 37 | /////////////////////////////////////////////////////////////////////////////// 38 | // TYPES 39 | 40 | type ( 41 | FullParams C.struct_whisper_full_params 42 | SamplingStrategy C.enum_whisper_sampling_strategy 43 | ) 44 | 45 | // Returns the new segment number 46 | type ProgressCallback func(progress int) 47 | 48 | // Returns the new segment number 49 | type SegmentCallback func(segment int) 50 | 51 | // If it returns true, the computation is aborted 52 | type AbortCallback func() bool 53 | 54 | /////////////////////////////////////////////////////////////////////////////// 55 | // GLOBALS 56 | 57 | const ( 58 | SAMPLING_GREEDY SamplingStrategy = C.WHISPER_SAMPLING_GREEDY // similar to OpenAI's GreedyDecoder 59 | SAMPLING_BEAM_SEARCH SamplingStrategy = C.WHISPER_SAMPLING_BEAM_SEARCH // similar to OpenAI's BeamSearchDecoder 60 | ) 61 | 62 | var ( 63 | // Map a uintptr context to a callback 64 | progressCb = map[uint]ProgressCallback{} 65 | segmentCb = map[uint]SegmentCallback{} 66 | abortCb = map[uint]AbortCallback{} 67 | ) 68 | 69 | /////////////////////////////////////////////////////////////////////////////// 70 | // LIFECYCLE 71 | 72 | func DefaultFullParams(strategy SamplingStrategy) FullParams { 73 | params := (FullParams)(C.whisper_full_default_params((C.enum_whisper_sampling_strategy)(strategy))) 74 | C.set_callbacks((*C.struct_whisper_full_params)(¶ms), C.bool(true)) 75 | 76 | params.entropy_thold = 2.40 77 | params.greedy.best_of = 5 78 | params.beam_search.beam_size = 5 79 | params.audio_ctx = 0 80 | params.split_on_word = C.bool(false) 81 | params.entropy_thold = 2.40 82 | params.logprob_thold = -1.0 83 | params.no_speech_thold = 0.60 84 | params.temperature = 0.0 85 | params.temperature_inc = 0.2 86 | 87 | return params 88 | } 89 | 90 | /////////////////////////////////////////////////////////////////////////////// 91 | // STRINGIFY 92 | 93 | func (ctx FullParams) MarshalJSON() ([]byte, error) { 94 | type j struct { 95 | Strategy SamplingStrategy `json:"strategy"` 96 | NumThreads int `json:"n_threads,omitempty"` 97 | MaxTextCtx int `json:"n_max_text_ctx,omitempty"` // max tokens to use from past text as prompt for the decoder 98 | OffsetMS int `json:"offset_ms,omitempty"` // start offset in ms 99 | DurationMS int `json:"duration_ms,omitempty"` // audio duration to process in ms 100 | Translate bool `json:"translate,omitempty"` 101 | NoContext bool `json:"no_context,omitempty"` // do not use past transcription (if any) as initial prompt for the decoder 102 | NoTimestamps bool `json:"no_timestamps,omitempty"` // do not generate timestamps 103 | SingleSegment bool `json:"single_segment,omitempty"` // force single segment output (useful for streaming) 104 | PrintSpecial bool `json:"print_special,omitempty"` // print special tokens (e.g. , , , etc.) 105 | PrintProgress bool `json:"print_progress,omitempty"` // print progress information 106 | PrintRealtime bool `json:"print_realtime,omitempty"` // print results from within whisper.cpp (avoid it, use callback instead) 107 | PrintTimestamps bool `json:"print_timestamps,omitempty"` // print timestamps for each text segment when printing realtime 108 | TokenTimestamps bool `json:"token_timestamps,omitempty"` // enable token-level timestamps 109 | TholdPt int `json:"thold_pt,omitempty"` // timestamp token probability threshold (~0.01) 110 | TholdPtsum int `json:"thold_ptsum,omitempty"` // timestamp token sum probability threshold (~0.01) 111 | MaxLen int `json:"max_len,omitempty"` // max segment length in characters 112 | SplitOnWord bool `json:"split_on_word,omitempty"` // split on word rather than on token (when used with max_len) 113 | MaxTokens int `json:"max_tokens,omitempty"` // max tokens per segment (0 = no limit) 114 | DebugMode bool `json:"debug_mode,omitempty"` // enable debug_mode provides extra info (eg. Dump log_mel) 115 | AudioCtx int `json:"audio_ctx,omitempty"` // overwrite the audio context size (0 = use default) 116 | DiarizeEnable bool `json:"tdrz_enable,omitempty"` // enable tinydiarize speaker turn detection 117 | SuppressRegex string `json:"suppress_regex,omitempty"` // A regular expression that matches tokens to suppress 118 | InitialPrompt string `json:"initial_prompt,omitempty"` // tokens to provide to the whisper decoder as initial prompt 119 | PromptTokens []string `json:"prompt_tokens,omitempty"` // use whisper_tokenize() to convert text to tokens 120 | Language string `json:"language,omitempty"` // for auto-detection, set to "" or "auto" 121 | DetectLanguage bool `json:"detect_language,omitempty"` 122 | SuppressBlank bool `json:"suppress_blank,omitempty"` // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L89 123 | SuppressNonSpeechTokens bool `json:"suppress_nst,omitempty"` // ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253 124 | Temperature float32 `json:"temperature,omitempty"` // initial decoding temperature, ref: https://ai.stackexchange.com/a/32478 125 | MaxInitialTs float32 `json:"max_initial_ts,omitempty"` // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/decoding.py#L97 126 | LengthPenalty float32 `json:"length_penalty,omitempty"` // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L267 127 | TemperatureInc float32 `json:"temperature_inc,omitempty"` // ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L274-L278 128 | EntropyThreshold float32 `json:"entropy_thold,omitempty"` // similar to OpenAI's "compression_ratio_threshold" 129 | LogProbThreshold float32 `json:"logprob_thold,omitempty"` 130 | ProgressCallback uintptr `json:"progress_callback,omitempty"` 131 | AbortCallback uintptr `json:"abort_callback,omitempty"` 132 | SegmentCallback uintptr `json:"segment_callback,omitempty"` 133 | } 134 | return json.Marshal(j{ 135 | Strategy: SamplingStrategy(ctx.strategy), 136 | NumThreads: int(ctx.n_threads), 137 | MaxTextCtx: int(ctx.n_max_text_ctx), 138 | OffsetMS: int(ctx.offset_ms), 139 | DurationMS: int(ctx.duration_ms), 140 | Translate: bool(ctx.translate), 141 | NoContext: bool(ctx.no_context), 142 | NoTimestamps: bool(ctx.no_timestamps), 143 | SingleSegment: bool(ctx.single_segment), 144 | PrintSpecial: bool(ctx.print_special), 145 | PrintProgress: bool(ctx.print_progress), 146 | PrintRealtime: bool(ctx.print_realtime), 147 | PrintTimestamps: bool(ctx.print_timestamps), 148 | TholdPt: int(ctx.thold_pt), 149 | TholdPtsum: int(ctx.thold_ptsum), 150 | MaxLen: int(ctx.max_len), 151 | SplitOnWord: bool(ctx.split_on_word), 152 | MaxTokens: int(ctx.max_tokens), 153 | DebugMode: bool(ctx.debug_mode), 154 | AudioCtx: int(ctx.audio_ctx), 155 | DiarizeEnable: bool(ctx.tdrz_enable), 156 | SuppressRegex: C.GoString(ctx.suppress_regex), 157 | InitialPrompt: C.GoString(ctx.initial_prompt), 158 | PromptTokens: nil, // TODO 159 | Language: C.GoString(ctx.language), 160 | DetectLanguage: bool(ctx.detect_language), 161 | SuppressBlank: bool(ctx.suppress_blank), 162 | SuppressNonSpeechTokens: bool(ctx.suppress_nst), 163 | Temperature: float32(ctx.temperature), 164 | MaxInitialTs: float32(ctx.max_initial_ts), 165 | LengthPenalty: float32(ctx.length_penalty), 166 | TemperatureInc: float32(ctx.temperature_inc), 167 | EntropyThreshold: float32(ctx.entropy_thold), 168 | LogProbThreshold: float32(ctx.logprob_thold), 169 | ProgressCallback: uintptr(ctx.progress_callback_user_data), 170 | AbortCallback: uintptr(ctx.abort_callback_user_data), 171 | SegmentCallback: uintptr(ctx.new_segment_callback_user_data), 172 | }) 173 | } 174 | 175 | func (ctx FullParams) String() string { 176 | str, err := json.MarshalIndent(ctx, "", " ") 177 | if err != nil { 178 | return err.Error() 179 | } 180 | return string(str) 181 | } 182 | 183 | func (v SamplingStrategy) MarshalJSON() ([]byte, error) { 184 | return json.Marshal(v.String()) 185 | } 186 | 187 | func (v SamplingStrategy) String() string { 188 | switch v { 189 | case SAMPLING_GREEDY: 190 | return "SAMPLING_GREEDY" 191 | case SAMPLING_BEAM_SEARCH: 192 | return "SAMPLING_BEAM_SEARCH" 193 | default: 194 | return "???" 195 | } 196 | } 197 | 198 | /////////////////////////////////////////////////////////////////////////////// 199 | // PUBLIC METHODS 200 | 201 | func (c *FullParams) SetNumThreads(v int) { 202 | c.n_threads = (C.int)(v) 203 | } 204 | 205 | func (c *FullParams) SetMaxTextCtx(v int) { 206 | c.n_max_text_ctx = (C.int)(v) 207 | } 208 | 209 | func (c *FullParams) SetOffsetMS(v int) { 210 | c.offset_ms = (C.int)(v) 211 | } 212 | 213 | func (c *FullParams) SetDurationMS(v int) { 214 | c.duration_ms = (C.int)(v) 215 | } 216 | 217 | func (c *FullParams) SetNoContext(v bool) { 218 | c.no_context = (C.bool)(v) 219 | } 220 | 221 | func (c *FullParams) SetNoTimestamps(v bool) { 222 | c.no_timestamps = (C.bool)(v) 223 | } 224 | 225 | func (c *FullParams) SetSingleSegment(v bool) { 226 | c.single_segment = (C.bool)(v) 227 | } 228 | 229 | func (c *FullParams) SetPrintSpecial(v bool) { 230 | c.print_special = (C.bool)(v) 231 | } 232 | 233 | func (c *FullParams) SetPrintProgress(v bool) { 234 | c.print_progress = (C.bool)(v) 235 | } 236 | 237 | func (c *FullParams) SetPrintRealtime(v bool) { 238 | c.print_realtime = (C.bool)(v) 239 | } 240 | 241 | func (c *FullParams) SetPrintTimestamps(v bool) { 242 | c.print_timestamps = (C.bool)(v) 243 | } 244 | 245 | func (c *FullParams) SetTokenTimestamps(v bool) { 246 | c.token_timestamps = (C.bool)(v) 247 | } 248 | 249 | func (c *FullParams) SetTranslate(v bool) { 250 | c.translate = (C.bool)(v) 251 | } 252 | 253 | func (c *FullParams) Translate() bool { 254 | return bool(c.translate) 255 | } 256 | 257 | func (c *FullParams) SetDiarize(v bool) { 258 | c.tdrz_enable = (C.bool)(v) 259 | } 260 | 261 | func (c *FullParams) Diarize() bool { 262 | return bool(c.tdrz_enable) 263 | } 264 | 265 | func (c *FullParams) SetLanguage(v string) { 266 | v = strings.ToLower(v) 267 | if v == "" || v == "auto" { 268 | c.language = nil 269 | } else if id := Whisper_lang_id(v); id == -1 { 270 | c.language = nil 271 | } else { 272 | c.language = C.whisper_lang_str(C.int(id)) 273 | } 274 | } 275 | 276 | func (c *FullParams) Language() string { 277 | v := C.GoString(c.language) 278 | if v == "" { 279 | return "auto" 280 | } 281 | return v 282 | } 283 | 284 | func (c *FullParams) SetProgressCallback(ctx *Context, cb ProgressCallback) { 285 | key := cbkey(unsafe.Pointer(ctx)) 286 | if cb == nil { 287 | c.progress_callback_user_data = nil 288 | delete(progressCb, key) 289 | } else { 290 | c.progress_callback_user_data = unsafe.Pointer(uintptr(key)) 291 | progressCb[key] = cb 292 | } 293 | } 294 | 295 | func (c *FullParams) SetSegmentCallback(ctx *Context, cb SegmentCallback) { 296 | key := cbkey(unsafe.Pointer(ctx)) 297 | if cb == nil { 298 | c.new_segment_callback_user_data = nil 299 | delete(segmentCb, key) 300 | } else { 301 | c.new_segment_callback_user_data = unsafe.Pointer(uintptr(key)) 302 | segmentCb[key] = cb 303 | } 304 | } 305 | 306 | func (c *FullParams) SetAbortCallback(ctx *Context, cb AbortCallback) { 307 | key := cbkey(unsafe.Pointer(ctx)) 308 | if cb == nil { 309 | c.abort_callback_user_data = nil 310 | delete(abortCb, key) 311 | } else { 312 | c.abort_callback_user_data = unsafe.Pointer(uintptr(key)) 313 | abortCb[key] = cb 314 | } 315 | } 316 | 317 | /////////////////////////////////////////////////////////////////////////////// 318 | // PRIVATE METHODS 319 | 320 | func cbkey(ptr unsafe.Pointer) uint { 321 | return uint(uintptr(ptr)) 322 | } 323 | 324 | //export whisper_progress_cb_ex 325 | func whisper_progress_cb_ex(ctx *C.struct_whisper_context, state *C.struct_whisper_state, progress C.int, user_data unsafe.Pointer) { 326 | key := cbkey(user_data) 327 | if cb, ok := progressCb[key]; ok { 328 | cb(int(progress)) 329 | } 330 | } 331 | 332 | //export whisper_segment_cb_ex 333 | func whisper_segment_cb_ex(ctx *C.struct_whisper_context, state *C.struct_whisper_state, n C.int, user_data unsafe.Pointer) { 334 | key := cbkey(user_data) 335 | if cb, ok := segmentCb[key]; ok { 336 | cb(int(n)) 337 | } 338 | } 339 | 340 | //export whisper_abort_cb_ex 341 | func whisper_abort_cb_ex(user_data unsafe.Pointer) C.bool { 342 | key := cbkey(user_data) 343 | if cb, ok := abortCb[key]; ok { 344 | return C.bool(cb()) 345 | } 346 | return C.bool(false) 347 | } 348 | -------------------------------------------------------------------------------- /sys/whisper/fullparams_test.go: -------------------------------------------------------------------------------- 1 | package whisper_test 2 | 3 | import ( 4 | "testing" 5 | 6 | // Packages 7 | "github.com/mutablelogic/go-whisper/sys/whisper" 8 | ) 9 | 10 | func Test_fullparams_00(t *testing.T) { 11 | var params = whisper.DefaultFullParams(whisper.SAMPLING_GREEDY) 12 | t.Log(params) 13 | } 14 | -------------------------------------------------------------------------------- /sys/whisper/generate.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | /////////////////////////////////////////////////////////////////////////////// 4 | // CGO 5 | 6 | /* 7 | #cgo pkg-config: libwhisper 8 | #cgo linux pkg-config: libwhisper-linux 9 | #cgo darwin pkg-config: libwhisper-darwin 10 | */ 11 | import "C" 12 | 13 | // Generate the whisper pkg-config files 14 | // Setting the prefix to the base of the repository 15 | //go:generate go run ../pkg-config --version "0.0.0" --prefix "${PREFIX}" --cflags "-I$DOLLAR{prefix}/include" libwhisper.pc 16 | //go:generate go run ../pkg-config --version "0.0.0" --prefix "${PREFIX}" --cflags "-fopenmp" --libs "-L$DOLLAR{prefix}/lib -lwhisper -lggml -lggml-base -lggml-cpu -lgomp -lm -lstdc++" libwhisper-linux.pc 17 | //go:generate go run ../pkg-config --version "0.0.0" --prefix "${PREFIX}" --libs "-L$DOLLAR{prefix}/lib -lwhisper -lggml -lggml-base -lggml-cpu -lggml-blas -lggml-metal -lm -lstdc++ -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics" libwhisper-darwin.pc 18 | //go:generate go run ../pkg-config --version "0.0.0" --prefix "${PREFIX}" --libs "-L$DOLLAR{prefix}/lib -lggml-cuda" libwhisper-cuda.pc 19 | //go:generate go run ../pkg-config --version "0.0.0" --prefix "${PREFIX}" --libs "-L$DOLLAR{prefix}/lib -lvulkan -lggml-vulkan" libwhisper-vulkan.pc 20 | -------------------------------------------------------------------------------- /sys/whisper/generate_cuda.go: -------------------------------------------------------------------------------- 1 | //go:build cuda 2 | 3 | package whisper 4 | 5 | /////////////////////////////////////////////////////////////////////////////// 6 | // CGO 7 | 8 | /* 9 | #cgo pkg-config: libwhisper-cuda cuda-12.2 cublas-12.2 cudart-12.2 10 | */ 11 | import "C" 12 | -------------------------------------------------------------------------------- /sys/whisper/generate_vulkan.go: -------------------------------------------------------------------------------- 1 | //go:build vulkan 2 | 3 | package whisper 4 | 5 | /////////////////////////////////////////////////////////////////////////////// 6 | // CGO 7 | 8 | /* 9 | #cgo pkg-config: libwhisper-vulkan 10 | */ 11 | import "C" 12 | -------------------------------------------------------------------------------- /sys/whisper/logging.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "unsafe" 5 | ) 6 | 7 | /////////////////////////////////////////////////////////////////////////////// 8 | // CGO 9 | 10 | /* 11 | #cgo pkg-config: libwhisper 12 | #include 13 | #include 14 | 15 | extern void callLog(enum ggml_log_level level,char* text, void* user_data); 16 | 17 | // Logging callback 18 | static void whisper_log_cb(enum ggml_log_level level, const char* text, void* user_data) { 19 | callLog(level, (char*)text, user_data); 20 | } 21 | 22 | // Set or unset logging callback 23 | static void whisper_log_set_ex(void* user_data) { 24 | if (user_data) { 25 | whisper_log_set(whisper_log_cb, user_data); 26 | } else { 27 | whisper_log_set(NULL, NULL); 28 | } 29 | } 30 | */ 31 | import "C" 32 | 33 | /////////////////////////////////////////////////////////////////////////////// 34 | // TYPES 35 | 36 | type ( 37 | LogLevel C.enum_ggml_log_level 38 | ) 39 | 40 | /////////////////////////////////////////////////////////////////////////////// 41 | // GLOBALS 42 | 43 | var ( 44 | cbLog func(level LogLevel, text string) 45 | ) 46 | 47 | const ( 48 | LogLevelDebug LogLevel = C.GGML_LOG_LEVEL_DEBUG 49 | LogLevelInfo LogLevel = C.GGML_LOG_LEVEL_INFO 50 | LogLevelWarn LogLevel = C.GGML_LOG_LEVEL_WARN 51 | LogLevelError LogLevel = C.GGML_LOG_LEVEL_ERROR 52 | ) 53 | 54 | /////////////////////////////////////////////////////////////////////////////// 55 | // STRINGIFY 56 | 57 | func (v LogLevel) String() string { 58 | switch v { 59 | case LogLevelDebug: 60 | return "DEBUG" 61 | case LogLevelInfo: 62 | return "INFO" 63 | case LogLevelWarn: 64 | return "WARN" 65 | case LogLevelError: 66 | return "ERROR" 67 | default: 68 | return "LOG" 69 | } 70 | } 71 | 72 | /////////////////////////////////////////////////////////////////////////////// 73 | // PUBLIC METHODS 74 | 75 | // Set logging output 76 | func Whisper_log_set(fn func(level LogLevel, text string)) { 77 | cbLog = fn 78 | if fn == nil { 79 | C.whisper_log_set_ex(nil) 80 | } else { 81 | C.whisper_log_set_ex((unsafe.Pointer)(uintptr(1))) 82 | } 83 | } 84 | 85 | // Call logging output 86 | func Whisper_log(level LogLevel, text string, user_data unsafe.Pointer) { 87 | cStr := C.CString(text) 88 | defer C.free(unsafe.Pointer(cStr)) 89 | C.whisper_log_cb((C.enum_ggml_log_level)(level), cStr, user_data) 90 | } 91 | 92 | /////////////////////////////////////////////////////////////////////////////// 93 | // PRIVATE METHODS 94 | 95 | //export callLog 96 | func callLog(level C.enum_ggml_log_level, text *C.char, user_data unsafe.Pointer) { 97 | if cbLog != nil { 98 | cbLog(LogLevel(level), C.GoString(text)) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /sys/whisper/model.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | "net/http" 8 | "net/url" 9 | ) 10 | 11 | /////////////////////////////////////////////////////////////////////////////// 12 | // TYPES 13 | 14 | // HTTP client for downloading models, includes the root URL of the models 15 | type client struct { 16 | http.Client 17 | 18 | root *url.URL 19 | } 20 | 21 | // The client interface is used to download models 22 | type Client interface { 23 | // Get a file from the server, writing the response to the writer 24 | // and returning the number of bytes copied 25 | Get(ctx context.Context, w io.Writer, path string) (int64, error) 26 | } 27 | 28 | // If the writer contains a Header method, it can be used to set the 29 | // content type and length of the response, to measure progress 30 | type Writer interface { 31 | io.Writer 32 | 33 | // Returns the header of the response. If the return value is 34 | // not nil, then the Get method will end before the response 35 | // data is written 36 | Header(http.Header) error 37 | } 38 | 39 | // Body reader which can be used to read the response body 40 | // and return an error if the context is cancelled early 41 | type reader struct { 42 | io.Reader 43 | ctx context.Context 44 | } 45 | 46 | /////////////////////////////////////////////////////////////////////////////// 47 | // LIFECYCLE 48 | 49 | // Create a new client with the specified root URL for downloading models 50 | func NewClient(abspath string) *client { 51 | url, err := url.Parse(abspath) 52 | if err != nil { 53 | return nil 54 | } 55 | return &client{ 56 | root: url, 57 | } 58 | } 59 | 60 | /////////////////////////////////////////////////////////////////////////////// 61 | // PUBLIC METHODS 62 | 63 | // Get a model from the server, writing the response to the writer 64 | func (c *client) Get(ctx context.Context, w io.Writer, path string) (int64, error) { 65 | // Construct a URL 66 | url := resolveUrl(c.root, path) 67 | if url == nil { 68 | return 0, fmt.Errorf("invalid path: %s", path) 69 | } 70 | 71 | // Make a request 72 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, url.String(), nil) 73 | if err != nil { 74 | return 0, err 75 | } 76 | 77 | // Perform the request 78 | response, err := c.Do(req) 79 | if err != nil { 80 | return 0, err 81 | } 82 | defer response.Body.Close() 83 | 84 | // Unexpected status code 85 | if response.StatusCode != http.StatusOK { 86 | return 0, &HTTPError{response.StatusCode, response.Status} 87 | } 88 | 89 | // Set response header 90 | if writer, ok := w.(Writer); ok { 91 | if err := writer.Header(response.Header); err != nil { 92 | return 0, err 93 | } 94 | } 95 | 96 | // Write the response, cancelling if the context is cancelled or deadline 97 | // is exceeded. Return number of bytes copied 98 | return io.Copy(w, &reader{response.Body, ctx}) 99 | } 100 | 101 | /////////////////////////////////////////////////////////////////////////////// 102 | // READER INTERFACE 103 | 104 | func (r *reader) Read(p []byte) (n int, err error) { 105 | select { 106 | case <-r.ctx.Done(): 107 | return 0, r.ctx.Err() 108 | default: 109 | return r.Reader.Read(p) 110 | } 111 | } 112 | 113 | /////////////////////////////////////////////////////////////////////////////// 114 | // PRIVATE METHODS 115 | 116 | func resolveUrl(base *url.URL, path string) *url.URL { 117 | // Check arguments 118 | if base == nil { 119 | return nil 120 | } 121 | if path == "" || path == "/" { 122 | return base 123 | } 124 | 125 | // Construct an absolute URL 126 | query := base.Query() 127 | rel := url.URL{Path: path} 128 | abs := base.ResolveReference(&rel) 129 | abs.RawQuery = query.Encode() 130 | 131 | // Return the absolute URL 132 | return abs 133 | } 134 | -------------------------------------------------------------------------------- /sys/whisper/model_test.go: -------------------------------------------------------------------------------- 1 | package whisper_test 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "strconv" 7 | "testing" 8 | "time" 9 | 10 | // Packages 11 | "github.com/mutablelogic/go-whisper/sys/whisper" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | const MODEL_URL = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/?download=true" 16 | const MODEL_TINY = "ggml-tiny-q5_1.bin" 17 | const MODEL_MEDIUM = "ggml-medium-q5_0.bin" // approx 540MB 18 | 19 | func Test_model_001(t *testing.T) { 20 | assert := assert.New(t) 21 | client := whisper.NewClient(MODEL_URL) 22 | assert.NotNil(client) 23 | } 24 | 25 | func Test_model_002(t *testing.T) { 26 | assert := assert.New(t) 27 | client := whisper.NewClient(MODEL_URL) 28 | assert.NotNil(client) 29 | 30 | // Basic writer 31 | w := &writer{t: t} 32 | _, err := client.Get(context.Background(), w, MODEL_TINY) 33 | assert.NoError(err) 34 | } 35 | 36 | func Test_model_003(t *testing.T) { 37 | assert := assert.New(t) 38 | client := whisper.NewClient(MODEL_URL) 39 | assert.NotNil(client) 40 | 41 | // Cancel download after 1s 42 | ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) 43 | defer cancel() 44 | 45 | w := &writer{t: t} 46 | _, err := client.Get(ctx, w, MODEL_MEDIUM) 47 | assert.ErrorIs(context.DeadlineExceeded, err) 48 | } 49 | 50 | //////////////////////////////////////////////////////////////////////////////// 51 | // WRITER 52 | 53 | type writer struct { 54 | t *testing.T 55 | curBytes uint64 56 | totalBytes uint64 57 | } 58 | 59 | // Collect number of bytes written 60 | func (w *writer) Write(p []byte) (n int, err error) { 61 | w.curBytes += uint64(len(p)) 62 | w.t.Log("Written", w.curBytes, " bytes of", w.totalBytes) 63 | return len(p), nil 64 | } 65 | 66 | // Collect total number of bytes 67 | func (w *writer) Header(h http.Header) error { 68 | contentLength := h.Get("Content-Length") 69 | if contentLength != "" { 70 | v, err := strconv.ParseUint(contentLength, 10, 64) 71 | if err != nil { 72 | return err 73 | } 74 | w.totalBytes = v 75 | } 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /sys/whisper/token.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "encoding/json" 5 | "time" 6 | ) 7 | 8 | /////////////////////////////////////////////////////////////////////////////// 9 | // CGO 10 | 11 | /* 12 | #cgo pkg-config: libwhisper 13 | #include 14 | */ 15 | import "C" 16 | 17 | /////////////////////////////////////////////////////////////////////////////// 18 | // TYPES 19 | 20 | type ( 21 | TokenData C.struct_whisper_token_data 22 | Token struct { 23 | Id int32 `json:"id"` 24 | Text string `json:"text,omitempty"` 25 | P float32 `json:"p,omitempty"` 26 | T0 time.Duration `json:"t0,omitempty"` 27 | T1 time.Duration `json:"t1,omitempty"` 28 | Type TokenType `json:"type,omitempty"` 29 | } 30 | Segment struct { 31 | Id int32 `json:"id"` 32 | Text string `json:"text,omitempty"` 33 | T0 time.Duration `json:"t0,omitempty"` 34 | T1 time.Duration `json:"t1,omitempty"` 35 | SpeakerTurn bool `json:"speaker_turn,omitempty"` 36 | Tokens []Token `json:"tokens,omitempty"` 37 | } 38 | TokenType int 39 | ) 40 | 41 | /////////////////////////////////////////////////////////////////////////////// 42 | // GLOBALS 43 | 44 | const ( 45 | _ TokenType = iota 46 | EOT // end of text 47 | SOT // start of transcript 48 | SOLM // start of language model 49 | PREV // start of previous segment 50 | NOSP // no speaker 51 | NOT // no timestamps 52 | BEG // begin 53 | LANG // language (TODO) 54 | ) 55 | 56 | /////////////////////////////////////////////////////////////////////////////// 57 | // STRINGIFY 58 | 59 | func (t Token) String() string { 60 | data, err := json.MarshalIndent(t, "", " ") 61 | if err != nil { 62 | return err.Error() 63 | } 64 | return string(data) 65 | } 66 | 67 | func (s Segment) String() string { 68 | data, err := json.MarshalIndent(s, "", " ") 69 | if err != nil { 70 | return err.Error() 71 | } 72 | return string(data) 73 | } 74 | 75 | func (t TokenType) String() string { 76 | switch t { 77 | case EOT: 78 | return "[EOT]" 79 | case SOT: 80 | return "[SOT]" 81 | case SOLM: 82 | return "[SOLM]" 83 | case PREV: 84 | return "[PREV]" 85 | case NOSP: 86 | return "[NOSP]" 87 | case NOT: 88 | return "[NOT]" 89 | case BEG: 90 | return "[BEG]" 91 | case LANG: 92 | return "[LANG]" 93 | default: 94 | return "[TEXT]" 95 | } 96 | } 97 | 98 | func (t TokenType) MarshalJSON() ([]byte, error) { 99 | return json.Marshal(t.String()) 100 | } 101 | 102 | /////////////////////////////////////////////////////////////////////////////// 103 | // PUBLIC METHODS 104 | 105 | // Return a segment, or nil 106 | func (ctx *Context) Segment(n int) *Segment { 107 | if n < 0 || n >= ctx.NumSegments() { 108 | return nil 109 | } 110 | return &Segment{ 111 | Id: int32(n), 112 | Text: C.GoString(C.whisper_full_get_segment_text((*C.struct_whisper_context)(ctx), C.int(n))), 113 | SpeakerTurn: (bool)(C.whisper_full_get_segment_speaker_turn_next((*C.struct_whisper_context)(ctx), C.int(n))), 114 | Tokens: ctx.Tokens(n), 115 | T0: tsToDuration(C.whisper_full_get_segment_t0((*C.struct_whisper_context)(ctx), C.int(n))), 116 | T1: tsToDuration(C.whisper_full_get_segment_t1((*C.struct_whisper_context)(ctx), C.int(n))), 117 | } 118 | } 119 | 120 | // Return a token from TokenData 121 | func (ctx *Context) Token(data TokenData) Token { 122 | return Token{ 123 | Id: int32(data.id), 124 | Text: C.GoString(C.whisper_token_to_str((*C.struct_whisper_context)(ctx), C.whisper_token(data.id))), 125 | P: float32(data.p), 126 | T0: tsToDuration(data.t0), 127 | T1: tsToDuration(data.t1), 128 | Type: tokenToType(ctx, data.id), 129 | } 130 | } 131 | 132 | // Return tokens for a segment 133 | func (ctx *Context) Tokens(n int) []Token { 134 | if n >= ctx.NumSegments() { 135 | return nil 136 | } 137 | t := int(C.whisper_full_n_tokens((*C.struct_whisper_context)(ctx), C.int(n))) 138 | if t < 0 { 139 | return nil 140 | } 141 | result := make([]Token, t) 142 | for i := 0; i < t; i++ { 143 | data := (TokenData)(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(n), C.int(i))) 144 | result[i] = ctx.Token(data) 145 | } 146 | return result 147 | } 148 | 149 | /////////////////////////////////////////////////////////////////////////////// 150 | // PRIVATE METHODS 151 | 152 | // Convert a int64 timestamp to a time.Duration 153 | func tsToDuration(ts C.int64_t) time.Duration { 154 | if ts == -1 { 155 | return 0 156 | } 157 | return time.Duration(ts) * time.Millisecond * 10 158 | } 159 | 160 | // return a token type from a token 161 | // TODO: Return language tokens 162 | func tokenToType(ctx *Context, token C.int32_t) TokenType { 163 | switch { 164 | case token == C.whisper_token_eot((*C.struct_whisper_context)(ctx)): 165 | return EOT 166 | case token == C.whisper_token_sot((*C.struct_whisper_context)(ctx)): 167 | return SOT 168 | case token == C.whisper_token_solm((*C.struct_whisper_context)(ctx)): 169 | return SOLM 170 | case token == C.whisper_token_prev((*C.struct_whisper_context)(ctx)): 171 | return PREV 172 | case token == C.whisper_token_nosp((*C.struct_whisper_context)(ctx)): 173 | return NOSP 174 | case token == C.whisper_token_not((*C.struct_whisper_context)(ctx)): 175 | return NOT 176 | case token == C.whisper_token_beg((*C.struct_whisper_context)(ctx)): 177 | return BEG 178 | default: 179 | return 0 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /sys/whisper/whisper.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import "unsafe" 4 | 5 | /////////////////////////////////////////////////////////////////////////////// 6 | // CGO 7 | 8 | /* 9 | #cgo pkg-config: libwhisper 10 | #include 11 | #include 12 | */ 13 | import "C" 14 | 15 | /////////////////////////////////////////////////////////////////////////////// 16 | // TYPES 17 | 18 | type ( 19 | Context C.struct_whisper_context 20 | ) 21 | 22 | /////////////////////////////////////////////////////////////////////////////// 23 | // Constants 24 | 25 | const ( 26 | SampleRate = C.WHISPER_SAMPLE_RATE 27 | ) 28 | 29 | /////////////////////////////////////////////////////////////////////////////// 30 | // LIFECYCLE 31 | 32 | // Create a new context with path to model and context parameters. Returns nil on error. 33 | func Whisper_init_from_file_with_params(path string, params ContextParams) *Context { 34 | cPath := C.CString(path) 35 | defer C.free(unsafe.Pointer(cPath)) 36 | 37 | return (*Context)(C.whisper_init_from_file_with_params(cPath, (C.struct_whisper_context_params)(params))) 38 | } 39 | 40 | // Create a new context with model data and context parameters. Returns nil on error. 41 | func Whisper_init_from_buffer_with_params(data []byte, params ContextParams) *Context { 42 | return (*Context)(C.whisper_init_from_buffer_with_params(unsafe.Pointer(&data[0]), C.size_t(len(data)), (C.struct_whisper_context_params)(params))) 43 | } 44 | 45 | // Frees all memory allocated by the model. 46 | func Whisper_free(ctx *Context) { 47 | C.whisper_free((*C.struct_whisper_context)(ctx)) 48 | } 49 | 50 | /////////////////////////////////////////////////////////////////////////////// 51 | // PUBLIC FUNCTIONS 52 | 53 | // Return largest language id (i.e. number of available languages - 1) 54 | func Whisper_lang_max_id() int { 55 | return int(C.whisper_lang_max_id()) 56 | } 57 | 58 | // Return the language id of the specified short or full string (e.g. "de" -> 2), 59 | // or -1 if not found 60 | func Whisper_lang_id(str string) int { 61 | cStr := C.CString(str) 62 | defer C.free(unsafe.Pointer(cStr)) 63 | return int(C.whisper_lang_id(cStr)) 64 | } 65 | 66 | // Return the short string of the specified language id (e.g. 2 -> "de"), 67 | // or empty string if not found 68 | func Whisper_lang_str(id int) string { 69 | return C.GoString(C.whisper_lang_str(C.int(id))) 70 | } 71 | 72 | // Return the long string of the specified language name (e.g. 2 -> "german"), 73 | // or empty string if not found 74 | func Whisper_lang_str_full(id int) string { 75 | return C.GoString(C.whisper_lang_str_full(C.int(id))) 76 | } 77 | 78 | // Return model capabilities 79 | func Whisper_is_multilingual(ctx *Context) bool { 80 | return C.whisper_is_multilingual((*C.struct_whisper_context)(ctx)) != 0 81 | } 82 | 83 | // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text 84 | // Not thread safe for same context 85 | // Uses the specified decoding strategy to obtain the text. 86 | func Whisper_full(ctx *Context, params FullParams, samples []float32) error { 87 | if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) != 0 { 88 | return ErrTranscriptionFailed 89 | } 90 | return nil 91 | } 92 | 93 | // Number of generated text segments 94 | // A segment can be a few words, a sentence, or even a paragraph. 95 | func (ctx *Context) NumSegments() int { 96 | return int(C.whisper_full_n_segments((*C.struct_whisper_context)(ctx))) 97 | } 98 | 99 | // Language id associated with the context's default state 100 | func (ctx *Context) DefaultLangId() int { 101 | return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx))) 102 | } 103 | 104 | // Get the start time of the specified segment 105 | func (ctx *Context) SegmentT0(n int) int64 { 106 | return int64(C.whisper_full_get_segment_t0((*C.struct_whisper_context)(ctx), C.int(n))) 107 | } 108 | 109 | // Get the end time of the specified segment 110 | func (ctx *Context) SegmentT1(n int) int64 { 111 | return int64(C.whisper_full_get_segment_t1((*C.struct_whisper_context)(ctx), C.int(n))) 112 | } 113 | 114 | // Get whether the next segment is predicted as a speaker turn 115 | func (ctx *Context) SegmentSpeakerTurnNext(n int) bool { 116 | return (bool)(C.whisper_full_get_segment_speaker_turn_next((*C.struct_whisper_context)(ctx), C.int(n))) 117 | } 118 | 119 | // Get the text of the specified segment 120 | func (ctx *Context) SegmentText(n int) string { 121 | return C.GoString(C.whisper_full_get_segment_text((*C.struct_whisper_context)(ctx), C.int(n))) 122 | } 123 | 124 | // Get number of tokens in the specified segment 125 | func (ctx *Context) SegmentNumTokens(n int) int { 126 | return int(C.whisper_full_n_tokens((*C.struct_whisper_context)(ctx), C.int(n))) 127 | } 128 | 129 | // Get the token text in the specified segment 130 | func (ctx *Context) SegmentTokenText(n, i int) string { 131 | return C.GoString(C.whisper_full_get_token_text((*C.struct_whisper_context)(ctx), C.int(n), C.int(i))) 132 | } 133 | 134 | // Get the token id in the specified segment 135 | func (ctx *Context) SegmentTokenId(n, i int) int32 { 136 | return int32(C.whisper_full_get_token_id((*C.struct_whisper_context)(ctx), C.int(n), C.int(i))) 137 | } 138 | 139 | // Get the token probability of the specified token in the specified segment 140 | func (ctx *Context) SegmentTokenProb(n, i int) float32 { 141 | return float32(C.whisper_full_get_token_p((*C.struct_whisper_context)(ctx), C.int(n), C.int(i))) 142 | } 143 | 144 | // Get token data for the specified token in the specified segment 145 | // This contains probabilities, timestamps, etc. 146 | func (ctx *Context) SegmentTokenData(n, i int) TokenData { 147 | return (TokenData)(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(n), C.int(i))) 148 | } 149 | -------------------------------------------------------------------------------- /sys/whisper/whisper_test.go: -------------------------------------------------------------------------------- 1 | package whisper_test 2 | 3 | import ( 4 | "context" 5 | "os" 6 | "path/filepath" 7 | "strings" 8 | "testing" 9 | 10 | // Packages 11 | "github.com/go-audio/wav" 12 | "github.com/mutablelogic/go-whisper/sys/whisper" 13 | "github.com/stretchr/testify/assert" 14 | ) 15 | 16 | const SAMPLE_EN = "../../samples/jfk.wav" 17 | const SAMPLE_FR = "../../samples/OlivierL.wav" 18 | const SAMPLE_DE = "../../samples/de-podcast.wav" 19 | 20 | func Test_whisper_00(t *testing.T) { 21 | assert := assert.New(t) 22 | 23 | // Set logging 24 | whisper.Whisper_log_set(func(level whisper.LogLevel, text string) { 25 | t.Log(level, strings.TrimSpace(text)) 26 | }) 27 | 28 | // Create a file for the model 29 | w, err := os.Create(filepath.Join(t.TempDir(), MODEL_TINY)) 30 | if !assert.NoError(err) { 31 | t.SkipNow() 32 | } 33 | defer w.Close() 34 | 35 | // Read the model 36 | client := whisper.NewClient(MODEL_URL) 37 | if !assert.NotNil(client) { 38 | t.SkipNow() 39 | } 40 | if _, err := client.Get(context.Background(), w, MODEL_TINY); !assert.NoError(err) { 41 | t.SkipNow() 42 | } 43 | 44 | t.Run("InitFromFileWithParams", func(t *testing.T) { 45 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), whisper.DefaultContextParams()) 46 | assert.NotNil(ctx) 47 | // Free memory 48 | whisper.Whisper_free(ctx) 49 | }) 50 | 51 | t.Run("InitFromMemoryWithParams", func(t *testing.T) { 52 | data, err := os.ReadFile(w.Name()) 53 | if !assert.NoError(err) { 54 | t.SkipNow() 55 | } 56 | 57 | ctx := whisper.Whisper_init_from_buffer_with_params(data, whisper.DefaultContextParams()) 58 | assert.NotNil(ctx) 59 | // Free memory 60 | whisper.Whisper_free(ctx) 61 | }) 62 | 63 | } 64 | 65 | func Test_whisper_01(t *testing.T) { 66 | assert := assert.New(t) 67 | 68 | // Set logging 69 | whisper.Whisper_log_set(func(level whisper.LogLevel, text string) { 70 | t.Log(level, strings.TrimSpace(text)) 71 | }) 72 | 73 | // Get maximum language id 74 | max := whisper.Whisper_lang_max_id() 75 | assert.Greater(max, 0) 76 | 77 | for i := 0; i < max; i++ { 78 | short := whisper.Whisper_lang_str(i) 79 | long := whisper.Whisper_lang_str_full(i) 80 | assert.NotEmpty(short) 81 | assert.NotEmpty(long) 82 | assert.Equal(i, whisper.Whisper_lang_id(short)) 83 | assert.Equal(i, whisper.Whisper_lang_id(long)) 84 | t.Logf("Language %d: %s (%s)", i, short, long) 85 | } 86 | } 87 | 88 | func Test_whisper_02(t *testing.T) { 89 | assert := assert.New(t) 90 | 91 | // Set logging 92 | whisper.Whisper_log_set(func(level whisper.LogLevel, text string) { 93 | t.Log(level, strings.TrimSpace(text)) 94 | }) 95 | 96 | // Create a file for the model 97 | w, err := os.Create(filepath.Join(t.TempDir(), MODEL_TINY)) 98 | if !assert.NoError(err) { 99 | t.SkipNow() 100 | } 101 | defer w.Close() 102 | 103 | // Read the model 104 | client := whisper.NewClient(MODEL_URL) 105 | if !assert.NotNil(client) { 106 | t.SkipNow() 107 | } 108 | if _, err := client.Get(context.Background(), w, MODEL_TINY); !assert.NoError(err) { 109 | t.SkipNow() 110 | } 111 | 112 | // Let's run three in parallel 113 | params := whisper.DefaultContextParams() 114 | params.SetUseGpu(false) 115 | 116 | t.Run("Full_en", func(t *testing.T) { 117 | t.Parallel() 118 | 119 | // Create a context 120 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 121 | if !assert.NotNil(ctx) { 122 | t.SkipNow() 123 | } 124 | 125 | // Load samples 126 | data, err := LoadSamples(SAMPLE_EN) 127 | if !assert.NoError(err) { 128 | t.SkipNow() 129 | } 130 | 131 | // Set parameters 132 | params := whisper.DefaultFullParams(whisper.SAMPLING_GREEDY) 133 | params.SetLanguage("auto") 134 | params.SetTranslate(false) 135 | t.Log(params) 136 | 137 | // Run the model 138 | err = whisper.Whisper_full(ctx, params, data) 139 | if !assert.NoError(err) { 140 | t.SkipNow() 141 | } 142 | 143 | // Free memory 144 | whisper.Whisper_free(ctx) 145 | }) 146 | 147 | t.Run("Full_fr", func(t *testing.T) { 148 | t.Parallel() 149 | 150 | // Create a context 151 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 152 | if !assert.NotNil(ctx) { 153 | t.SkipNow() 154 | } 155 | 156 | // Load samples 157 | data, err := LoadSamples(SAMPLE_FR) 158 | if !assert.NoError(err) { 159 | t.SkipNow() 160 | } 161 | 162 | // Set parameters 163 | params := whisper.DefaultFullParams(whisper.SAMPLING_GREEDY) 164 | params.SetLanguage("auto") 165 | params.SetTranslate(false) 166 | t.Log(params) 167 | 168 | // Run the model 169 | err = whisper.Whisper_full(ctx, params, data) 170 | if !assert.NoError(err) { 171 | t.SkipNow() 172 | } 173 | 174 | // Free memory 175 | whisper.Whisper_free(ctx) 176 | }) 177 | 178 | t.Run("Full_de", func(t *testing.T) { 179 | t.Parallel() 180 | 181 | // Create a context 182 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 183 | if !assert.NotNil(ctx) { 184 | t.SkipNow() 185 | } 186 | 187 | // Load samples 188 | data, err := LoadSamples(SAMPLE_DE) 189 | if !assert.NoError(err) { 190 | t.SkipNow() 191 | } 192 | 193 | // Set parameters 194 | params := whisper.DefaultFullParams(whisper.SAMPLING_GREEDY) 195 | params.SetLanguage("auto") 196 | params.SetTranslate(false) 197 | t.Log(params) 198 | 199 | // Run the model 200 | err = whisper.Whisper_full(ctx, params, data) 201 | if !assert.NoError(err) { 202 | t.SkipNow() 203 | } 204 | 205 | // Free memory 206 | whisper.Whisper_free(ctx) 207 | }) 208 | 209 | } 210 | 211 | ////////////////////////////////////////////////////////////////////////////// 212 | 213 | func Test_whisper_03(t *testing.T) { 214 | assert := assert.New(t) 215 | 216 | // Set logging 217 | whisper.Whisper_log_set(func(level whisper.LogLevel, text string) { 218 | t.Log(level, strings.TrimSpace(text)) 219 | }) 220 | 221 | // Create a file for the model 222 | w, err := os.Create(filepath.Join(t.TempDir(), MODEL_TINY)) 223 | if !assert.NoError(err) { 224 | t.SkipNow() 225 | } 226 | defer w.Close() 227 | 228 | // Read the model 229 | client := whisper.NewClient(MODEL_URL) 230 | if !assert.NotNil(client) { 231 | t.SkipNow() 232 | } 233 | if _, err := client.Get(context.Background(), w, MODEL_TINY); !assert.NoError(err) { 234 | t.SkipNow() 235 | } 236 | 237 | // Let's run three in parallel 238 | params := whisper.DefaultContextParams() 239 | params.SetUseGpu(false) 240 | 241 | t.Run("Progress_en", func(t *testing.T) { 242 | t.Parallel() 243 | 244 | // Create a context 245 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 246 | if !assert.NotNil(ctx) { 247 | t.SkipNow() 248 | } 249 | 250 | // Load samples 251 | data, err := LoadSamples(SAMPLE_EN) 252 | if !assert.NoError(err) { 253 | t.SkipNow() 254 | } 255 | 256 | // Set parameters 257 | params := whisper.DefaultFullParams(whisper.SAMPLING_GREEDY) 258 | params.SetLanguage("auto") 259 | params.SetTranslate(false) 260 | params.SetProgressCallback(ctx, func(progress int) { 261 | t.Logf("Progress: %d%%", progress) 262 | }) 263 | params.SetAbortCallback(ctx, func() bool { 264 | t.Logf("Abort Callback called") 265 | return false 266 | }) 267 | params.SetSegmentCallback(ctx, func(segment int) { 268 | t.Logf("Segment %d", segment) 269 | }) 270 | 271 | t.Log(params) 272 | 273 | // Run the model 274 | err = whisper.Whisper_full(ctx, params, data) 275 | if !assert.NoError(err) { 276 | t.SkipNow() 277 | } 278 | 279 | // Free memory 280 | whisper.Whisper_free(ctx) 281 | }) 282 | 283 | t.Run("Progress_fr", func(t *testing.T) { 284 | t.Parallel() 285 | 286 | // Create a context 287 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 288 | if !assert.NotNil(ctx) { 289 | t.SkipNow() 290 | } 291 | 292 | // Load samples 293 | data, err := LoadSamples(SAMPLE_FR) 294 | if !assert.NoError(err) { 295 | t.SkipNow() 296 | } 297 | 298 | // Set parameters 299 | params := whisper.DefaultFullParams(whisper.SAMPLING_GREEDY) 300 | params.SetLanguage("auto") 301 | params.SetTranslate(false) 302 | params.SetProgressCallback(ctx, func(progress int) { 303 | t.Logf("Progress: %d%%", progress) 304 | }) 305 | params.SetAbortCallback(ctx, func() bool { 306 | t.Logf("Abort Callback called") 307 | return false 308 | }) 309 | params.SetSegmentCallback(ctx, func(segment int) { 310 | t.Logf("Segment %d", segment) 311 | }) 312 | 313 | t.Log(params) 314 | 315 | // Run the model 316 | err = whisper.Whisper_full(ctx, params, data) 317 | if !assert.NoError(err) { 318 | t.SkipNow() 319 | } 320 | 321 | // Free memory 322 | whisper.Whisper_free(ctx) 323 | }) 324 | 325 | t.Run("Progress_de", func(t *testing.T) { 326 | t.Parallel() 327 | 328 | // Create a context 329 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 330 | if !assert.NotNil(ctx) { 331 | t.SkipNow() 332 | } 333 | 334 | // Load samples 335 | data, err := LoadSamples(SAMPLE_DE) 336 | if !assert.NoError(err) { 337 | t.SkipNow() 338 | } 339 | 340 | // Set parameters 341 | params := whisper.DefaultFullParams(whisper.SAMPLING_GREEDY) 342 | params.SetLanguage("auto") 343 | params.SetTranslate(false) 344 | params.SetProgressCallback(ctx, func(progress int) { 345 | t.Logf("Progress: %d%%", progress) 346 | }) 347 | params.SetAbortCallback(ctx, func() bool { 348 | t.Logf("Abort Callback called") 349 | return false 350 | }) 351 | params.SetSegmentCallback(ctx, func(segment int) { 352 | t.Logf("Segment %d", segment) 353 | }) 354 | 355 | t.Log(params) 356 | 357 | // Run the model 358 | err = whisper.Whisper_full(ctx, params, data) 359 | if !assert.NoError(err) { 360 | t.SkipNow() 361 | } 362 | 363 | // Free memory 364 | whisper.Whisper_free(ctx) 365 | }) 366 | } 367 | 368 | func Test_whisper_04(t *testing.T) { 369 | assert := assert.New(t) 370 | 371 | // Set logging 372 | whisper.Whisper_log_set(func(level whisper.LogLevel, text string) { 373 | t.Log(level, strings.TrimSpace(text)) 374 | }) 375 | 376 | // Create a file for the model 377 | w, err := os.Create(filepath.Join(t.TempDir(), MODEL_TINY)) 378 | if !assert.NoError(err) { 379 | t.SkipNow() 380 | } 381 | defer w.Close() 382 | 383 | // Read the model 384 | client := whisper.NewClient(MODEL_URL) 385 | if !assert.NotNil(client) { 386 | t.SkipNow() 387 | } 388 | if _, err := client.Get(context.Background(), w, MODEL_TINY); !assert.NoError(err) { 389 | t.SkipNow() 390 | } 391 | 392 | // Let's run three in parallel 393 | params := whisper.DefaultContextParams() 394 | params.SetUseGpu(false) 395 | 396 | t.Run("Abort_fr", func(t *testing.T) { 397 | // Create a context 398 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 399 | if !assert.NotNil(ctx) { 400 | t.SkipNow() 401 | } 402 | 403 | // Load samples 404 | data, err := LoadSamples(SAMPLE_FR) 405 | if !assert.NoError(err) { 406 | t.SkipNow() 407 | } 408 | 409 | // Progress counter 410 | var p int 411 | 412 | // Set parameters 413 | params := whisper.DefaultFullParams(whisper.SAMPLING_BEAM_SEARCH) 414 | params.SetLanguage("auto") 415 | params.SetTranslate(false) 416 | params.SetProgressCallback(ctx, func(progress int) { 417 | t.Logf("Progress: %d%%", progress) 418 | p = progress 419 | }) 420 | params.SetAbortCallback(ctx, func() bool { 421 | if p > 20 { 422 | t.Logf("Aborting") 423 | return true 424 | } 425 | return false 426 | }) 427 | 428 | // Run the model 429 | err = whisper.Whisper_full(ctx, params, data) 430 | if !assert.ErrorIs(err, whisper.ErrTranscriptionFailed) { 431 | t.SkipNow() 432 | } 433 | 434 | // Free memory 435 | whisper.Whisper_free(ctx) 436 | }) 437 | } 438 | 439 | func Test_whisper_05(t *testing.T) { 440 | assert := assert.New(t) 441 | 442 | // Set logging 443 | whisper.Whisper_log_set(func(level whisper.LogLevel, text string) { 444 | t.Log(level, strings.TrimSpace(text)) 445 | }) 446 | 447 | // Create a file for the model 448 | w, err := os.Create(filepath.Join(t.TempDir(), MODEL_TINY)) 449 | if !assert.NoError(err) { 450 | t.SkipNow() 451 | } 452 | defer w.Close() 453 | 454 | // Read the model 455 | client := whisper.NewClient(MODEL_URL) 456 | if !assert.NotNil(client) { 457 | t.SkipNow() 458 | } 459 | if _, err := client.Get(context.Background(), w, MODEL_TINY); !assert.NoError(err) { 460 | t.SkipNow() 461 | } 462 | 463 | // Let's run three in parallel 464 | params := whisper.DefaultContextParams() 465 | params.SetUseGpu(false) 466 | 467 | t.Run("Tokens_fr", func(t *testing.T) { 468 | // Create a context 469 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 470 | if !assert.NotNil(ctx) { 471 | t.SkipNow() 472 | } 473 | 474 | // Load samples 475 | data, err := LoadSamples(SAMPLE_FR) 476 | if !assert.NoError(err) { 477 | t.SkipNow() 478 | } 479 | 480 | // Set parameters 481 | params := whisper.DefaultFullParams(whisper.SAMPLING_BEAM_SEARCH) 482 | params.SetLanguage("auto") 483 | params.SetTranslate(false) 484 | params.SetTokenTimestamps(true) 485 | params.SetDiarize(true) 486 | params.SetSegmentCallback(ctx, func(new_segments int) { 487 | num_segments := ctx.NumSegments() 488 | for i := num_segments - new_segments; i < num_segments; i++ { 489 | t.Logf("Segment %d: %v", i, ctx.SegmentText(i)) 490 | } 491 | }) 492 | 493 | // Run the model 494 | err = whisper.Whisper_full(ctx, params, data) 495 | if !assert.NoError(err) { 496 | t.SkipNow() 497 | } 498 | 499 | // Free memory 500 | whisper.Whisper_free(ctx) 501 | }) 502 | 503 | t.Run("Tokens_de", func(t *testing.T) { 504 | // Create a context 505 | ctx := whisper.Whisper_init_from_file_with_params(w.Name(), params) 506 | if !assert.NotNil(ctx) { 507 | t.SkipNow() 508 | } 509 | 510 | // Load samples 511 | data, err := LoadSamples(SAMPLE_DE) 512 | if !assert.NoError(err) { 513 | t.SkipNow() 514 | } 515 | 516 | // Set parameters 517 | params := whisper.DefaultFullParams(whisper.SAMPLING_BEAM_SEARCH) 518 | params.SetLanguage("auto") 519 | params.SetTranslate(false) 520 | params.SetTokenTimestamps(true) 521 | params.SetDiarize(true) 522 | params.SetSegmentCallback(ctx, func(new_segments int) { 523 | num_segments := ctx.NumSegments() 524 | for i := num_segments - new_segments; i < num_segments; i++ { 525 | t.Logf("Segment %d: %v", i, ctx.Segment(i)) 526 | } 527 | }) 528 | 529 | // Run the model 530 | err = whisper.Whisper_full(ctx, params, data) 531 | if !assert.NoError(err) { 532 | t.SkipNow() 533 | } 534 | 535 | // Free memory 536 | whisper.Whisper_free(ctx) 537 | }) 538 | } 539 | 540 | ////////////////////////////////////////////////////////////////////////////// 541 | 542 | // Return samples as []float32 543 | func LoadSamples(path string) ([]float32, error) { 544 | fh, err := os.Open(path) 545 | if err != nil { 546 | return nil, err 547 | } 548 | defer fh.Close() 549 | 550 | // Read samples 551 | d := wav.NewDecoder(fh) 552 | if buf, err := d.FullPCMBuffer(); err != nil { 553 | return nil, err 554 | } else { 555 | return buf.AsFloat32Buffer().Data, nil 556 | } 557 | } 558 | -------------------------------------------------------------------------------- /whisper.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "runtime" 9 | "strings" 10 | 11 | // Packages 12 | ffmpeg "github.com/mutablelogic/go-media/pkg/ffmpeg" 13 | pool "github.com/mutablelogic/go-whisper/pkg/pool" 14 | schema "github.com/mutablelogic/go-whisper/pkg/schema" 15 | store "github.com/mutablelogic/go-whisper/pkg/store" 16 | task "github.com/mutablelogic/go-whisper/pkg/task" 17 | whisper "github.com/mutablelogic/go-whisper/sys/whisper" 18 | 19 | // Namespace imports 20 | . "github.com/djthorpe/go-errors" 21 | ) 22 | 23 | ////////////////////////////////////////////////////////////////////////////// 24 | // TYPES 25 | 26 | // Whisper represents a whisper service for running transcription and translation 27 | type Whisper struct { 28 | pool *pool.ContextPool 29 | store *store.Store 30 | } 31 | 32 | ////////////////////////////////////////////////////////////////////////////// 33 | // GLOBALS 34 | 35 | const ( 36 | // This is the extension of the model files 37 | extModel = ".bin" 38 | 39 | // This is where the model is downloaded from 40 | defaultModelUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/?download=true" 41 | 42 | // Sample Rate 43 | SampleRate = whisper.SampleRate 44 | ) 45 | 46 | ////////////////////////////////////////////////////////////////////////////// 47 | // LIFECYCLE 48 | 49 | // Create a new whisper service with the path to the models directory 50 | // and optional parameters 51 | func New(path string, opt ...Opt) (*Whisper, error) { 52 | var o opts 53 | 54 | // Set options 55 | o.MaxConcurrent = runtime.NumCPU() 56 | for _, fn := range opt { 57 | if err := fn(&o); err != nil { 58 | return nil, err 59 | } 60 | } 61 | 62 | // Create a new whisper service 63 | w := new(Whisper) 64 | if store, err := store.NewStore(path, extModel, defaultModelUrl); err != nil { 65 | return nil, err 66 | } else { 67 | w.store = store 68 | } 69 | 70 | if pool := pool.NewContextPool(path, o.MaxConcurrent, o.gpu); pool == nil { 71 | return nil, ErrInternalAppError 72 | } else { 73 | w.pool = pool 74 | } 75 | 76 | // Logging 77 | if o.logfn != nil { 78 | whisper.Whisper_log_set(func(level whisper.LogLevel, text string) { 79 | if !o.debug && level > whisper.LogLevelError { 80 | return 81 | } 82 | o.logfn(fmt.Sprintf("[%s] %s", level, strings.TrimSpace(text))) 83 | }) 84 | ffmpeg.SetLogging(o.debug, func(text string) { 85 | o.logfn(text) 86 | }) 87 | } 88 | 89 | // Return success 90 | return w, nil 91 | } 92 | 93 | // Release all resources 94 | func (w *Whisper) Close() error { 95 | var result error 96 | 97 | // Release pool resources 98 | if w.pool != nil { 99 | result = errors.Join(result, w.pool.Close()) 100 | } 101 | 102 | // Set all to nil 103 | w.pool = nil 104 | w.store = nil 105 | 106 | // Return any errors 107 | return result 108 | } 109 | 110 | ////////////////////////////////////////////////////////////////////////////// 111 | // STRINGIFY 112 | 113 | func (w *Whisper) MarshalJSON() ([]byte, error) { 114 | return json.Marshal(struct { 115 | Store *store.Store `json:"store"` 116 | Pool *pool.ContextPool `json:"pool"` 117 | }{ 118 | Store: w.store, 119 | Pool: w.pool, 120 | }) 121 | } 122 | 123 | func (w *Whisper) String() string { 124 | data, err := json.MarshalIndent(w, "", " ") 125 | if err != nil { 126 | return err.Error() 127 | } 128 | return string(data) 129 | } 130 | 131 | ////////////////////////////////////////////////////////////////////////////// 132 | // PUBLIC METHODS 133 | 134 | // Return all models in the models directory 135 | func (w *Whisper) ListModels() []*schema.Model { 136 | return w.store.List() 137 | } 138 | 139 | // Get a model by its Id, returns nil if the model does not exist 140 | func (w *Whisper) GetModelById(id string) *schema.Model { 141 | return w.store.ById(id) 142 | } 143 | 144 | // Delete a model by its id 145 | func (w *Whisper) DeleteModelById(id string) error { 146 | model := w.store.ById(id) 147 | if model == nil { 148 | return ErrNotFound.Withf("%q", id) 149 | } 150 | 151 | // Empty the pool of this model 152 | if err := w.pool.Drain(model); err != nil { 153 | return err 154 | } 155 | 156 | // Delete the model 157 | if err := w.store.Delete(model.Id); err != nil { 158 | return err 159 | } 160 | 161 | // Return success 162 | return nil 163 | } 164 | 165 | // Download a model by path, where the directory is the root of the model 166 | // within the models directory. The model is returned immediately if it 167 | // already exists in the store 168 | func (w *Whisper) DownloadModel(ctx context.Context, path string, fn func(curBytes, totalBytes uint64)) (*schema.Model, error) { 169 | return w.store.Download(ctx, path, fn) 170 | } 171 | 172 | // Get a task for the specified model, which may load the model or 173 | // return an existing one. The context can then be used to run the Transcribe 174 | // function, and after the context is returned to the pool. 175 | func (w *Whisper) WithModel(model *schema.Model, fn func(task *task.Context) error) error { 176 | if model == nil || fn == nil { 177 | return ErrBadParameter 178 | } 179 | 180 | // Get a context from the pool 181 | task, err := w.pool.Get(model) 182 | if err != nil { 183 | return err 184 | } 185 | defer w.pool.Put(task) 186 | 187 | // Copy parameters 188 | task.CopyParams() 189 | 190 | // Execute the function 191 | return fn(task) 192 | } 193 | -------------------------------------------------------------------------------- /whisper_test.go: -------------------------------------------------------------------------------- 1 | package whisper_test 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "os" 7 | "testing" 8 | 9 | // Packages 10 | wav "github.com/go-audio/wav" 11 | whisper "github.com/mutablelogic/go-whisper" 12 | task "github.com/mutablelogic/go-whisper/pkg/task" 13 | assert "github.com/stretchr/testify/assert" 14 | 15 | // Namespace imports 16 | . "github.com/djthorpe/go-errors" 17 | ) 18 | 19 | const MODEL_TINY = "ggml-tiny.en-q5_1.bin" 20 | const SAMPLE_EN = "samples/jfk.wav" 21 | const SAMPLE_FR = "samples/OlivierL.wav" 22 | const SAMPLE_DE = "samples/ge-podcast.wav" 23 | 24 | func Test_whisper_001(t *testing.T) { 25 | assert := assert.New(t) 26 | service, err := whisper.New(t.TempDir()) 27 | if !assert.Nil(err) { 28 | t.SkipNow() 29 | } 30 | assert.NotNil(service) 31 | assert.NoError(service.Close()) 32 | } 33 | 34 | func Test_whisper_002(t *testing.T) { 35 | assert := assert.New(t) 36 | service, err := whisper.New(t.TempDir(), whisper.OptDebug(), whisper.OptLog(func(text string) { 37 | t.Log(text) 38 | })) 39 | if !assert.Nil(err) { 40 | t.SkipNow() 41 | } 42 | defer service.Close() 43 | 44 | t.Run("NotFound", func(t *testing.T) { 45 | // Download a model - not found 46 | _, err = service.DownloadModel(context.Background(), "notfound.bin", nil) 47 | assert.ErrorIs(err, ErrNotFound) 48 | }) 49 | 50 | t.Run("Download", func(t *testing.T) { 51 | // Download a model 52 | model, err := service.DownloadModel(context.Background(), MODEL_TINY, nil) 53 | assert.NoError(err) 54 | t.Log(model) 55 | }) 56 | 57 | t.Run("Exists", func(t *testing.T) { 58 | // Get the model 59 | model := service.GetModelById(MODEL_TINY) 60 | assert.NotNil(model) 61 | }) 62 | 63 | t.Run("Delete", func(t *testing.T) { 64 | // Delete the model 65 | err := service.DeleteModelById(MODEL_TINY) 66 | assert.NoError(err) 67 | }) 68 | 69 | t.Run("NotExists", func(t *testing.T) { 70 | // Get the model 71 | model := service.GetModelById(MODEL_TINY) 72 | assert.Nil(model) 73 | }) 74 | } 75 | 76 | func Test_whisper_003(t *testing.T) { 77 | assert := assert.New(t) 78 | service, err := whisper.New(t.TempDir(), whisper.OptDebug(), whisper.OptLog(func(text string) { 79 | t.Log(text) 80 | })) 81 | if !assert.Nil(err) { 82 | t.SkipNow() 83 | } 84 | defer service.Close() 85 | 86 | t.Run("Download", func(t *testing.T) { 87 | // Download a model 88 | model, err := service.DownloadModel(context.Background(), MODEL_TINY, nil) 89 | assert.NoError(err) 90 | t.Log(model) 91 | }) 92 | 93 | t.Run("WithModelContext1", func(t *testing.T) { 94 | model := service.GetModelById(MODEL_TINY) 95 | assert.NotNil(model) 96 | 97 | // Get the model for the first time 98 | assert.NoError(service.WithModel(model, func(ctx *task.Context) error { 99 | assert.NotNil(ctx) 100 | return nil 101 | })) 102 | }) 103 | 104 | t.Run("WithModelContext2", func(t *testing.T) { 105 | model := service.GetModelById(MODEL_TINY) 106 | assert.NotNil(model) 107 | 108 | // Get the model for the second time 109 | assert.NoError(service.WithModel(model, func(ctx *task.Context) error { 110 | assert.NotNil(ctx) 111 | return nil 112 | })) 113 | }) 114 | 115 | t.Run("WithModelContext3", func(t *testing.T) { 116 | model := service.GetModelById(MODEL_TINY) 117 | assert.NotNil(model) 118 | 119 | // Get the model for the third time 120 | assert.NoError(service.WithModel(model, func(ctx *task.Context) error { 121 | assert.NotNil(ctx) 122 | return nil 123 | })) 124 | }) 125 | } 126 | 127 | func Test_whisper_004(t *testing.T) { 128 | assert := assert.New(t) 129 | service, err := whisper.New(t.TempDir(), whisper.OptMaxConcurrent(2), whisper.OptDebug(), whisper.OptLog(func(text string) { 130 | t.Log(text) 131 | })) 132 | if !assert.Nil(err) { 133 | t.SkipNow() 134 | } 135 | defer service.Close() 136 | 137 | t.Run("Download", func(t *testing.T) { 138 | // Download a model 139 | model, err := service.DownloadModel(context.Background(), MODEL_TINY, nil) 140 | assert.NoError(err) 141 | t.Log(model) 142 | }) 143 | 144 | var blocked int 145 | t.Run("InParallel", func(t *testing.T) { 146 | t.Run("WithModelContext1", func(t *testing.T) { 147 | t.Parallel() 148 | model := service.GetModelById(MODEL_TINY) 149 | assert.NotNil(model) 150 | 151 | err := service.WithModel(model, func(ctx *task.Context) error { 152 | assert.NotNil(ctx) 153 | return nil 154 | }) 155 | if errors.Is(err, ErrChannelBlocked) { 156 | blocked++ 157 | } else { 158 | assert.NoError(err) 159 | } 160 | }) 161 | 162 | t.Run("WithModelContext2", func(t *testing.T) { 163 | t.Parallel() 164 | model := service.GetModelById(MODEL_TINY) 165 | assert.NotNil(model) 166 | 167 | err := service.WithModel(model, func(ctx *task.Context) error { 168 | assert.NotNil(ctx) 169 | return nil 170 | }) 171 | if errors.Is(err, ErrChannelBlocked) { 172 | blocked++ 173 | } else { 174 | assert.NoError(err) 175 | } 176 | }) 177 | 178 | t.Run("WithModelContext3", func(t *testing.T) { 179 | t.Parallel() 180 | model := service.GetModelById(MODEL_TINY) 181 | assert.NotNil(model) 182 | 183 | err := service.WithModel(model, func(ctx *task.Context) error { 184 | assert.NotNil(ctx) 185 | return nil 186 | }) 187 | if errors.Is(err, ErrChannelBlocked) { 188 | blocked++ 189 | } else { 190 | assert.NoError(err) 191 | } 192 | }) 193 | }) 194 | 195 | // One of these should have been blocked 196 | assert.Equal(1, blocked) 197 | } 198 | 199 | func Test_whisper_005(t *testing.T) { 200 | assert := assert.New(t) 201 | service, err := whisper.New(t.TempDir(), whisper.OptMaxConcurrent(1), whisper.OptDebug(), whisper.OptLog(func(text string) { 202 | t.Log(text) 203 | })) 204 | if !assert.Nil(err) { 205 | t.SkipNow() 206 | } 207 | defer service.Close() 208 | 209 | t.Run("Download", func(t *testing.T) { 210 | // Download a model 211 | model, err := service.DownloadModel(context.Background(), MODEL_TINY, nil) 212 | assert.NoError(err) 213 | t.Log(model) 214 | }) 215 | 216 | t.Run("TranscribeEN", func(t *testing.T) { 217 | model := service.GetModelById(MODEL_TINY) 218 | assert.NotNil(model) 219 | 220 | // Read samples 221 | samples, err := LoadSamples(SAMPLE_EN) 222 | if !assert.NoError(err) { 223 | t.SkipNow() 224 | } 225 | 226 | assert.NoError(service.WithModel(model, func(task *task.Context) error { 227 | t.Log("Transcribing", len(samples), "samples") 228 | return task.Transcribe(context.Background(), 0, samples, nil) 229 | })) 230 | }) 231 | 232 | t.Run("TranscribeDE", func(t *testing.T) { 233 | model := service.GetModelById(MODEL_TINY) 234 | assert.NotNil(model) 235 | 236 | // Read samples 237 | samples, err := LoadSamples(SAMPLE_DE) 238 | if !assert.NoError(err) { 239 | t.SkipNow() 240 | } 241 | 242 | assert.NoError(service.WithModel(model, func(task *task.Context) error { 243 | t.Log("Transcribing", len(samples), "samples") 244 | return task.Transcribe(context.Background(), 0, samples, nil) 245 | })) 246 | }) 247 | 248 | t.Run("TranscribeFR", func(t *testing.T) { 249 | model := service.GetModelById(MODEL_TINY) 250 | assert.NotNil(model) 251 | 252 | // Read samples 253 | samples, err := LoadSamples(SAMPLE_FR) 254 | if !assert.NoError(err) { 255 | t.SkipNow() 256 | } 257 | 258 | assert.NoError(service.WithModel(model, func(task *task.Context) error { 259 | t.Log("Transcribing", len(samples), "samples") 260 | return task.Transcribe(context.Background(), 0, samples, nil) 261 | })) 262 | }) 263 | } 264 | 265 | func Test_whisper_006(t *testing.T) { 266 | assert := assert.New(t) 267 | service, err := whisper.New(t.TempDir(), whisper.OptNoGPU(), whisper.OptMaxConcurrent(3), whisper.OptLog(func(text string) { 268 | t.Log(text) 269 | })) 270 | if !assert.Nil(err) { 271 | t.SkipNow() 272 | } 273 | 274 | // Run after all the other tests 275 | t.Cleanup(func() { 276 | service.Close() 277 | }) 278 | 279 | // Download a model 280 | model, err := service.DownloadModel(context.Background(), MODEL_TINY, nil) 281 | assert.NoError(err) 282 | 283 | t.Run("Parallel", func(t *testing.T) { 284 | 285 | t.Run("TranscribeEN", func(t *testing.T) { 286 | t.Parallel() 287 | t.Log(service) 288 | 289 | // Read samples 290 | samples, err := LoadSamples(SAMPLE_EN) 291 | if !assert.NoError(err) { 292 | t.SkipNow() 293 | } 294 | 295 | assert.NoError(service.WithModel(model, func(task *task.Context) error { 296 | t.Log("Transcribing", len(samples), "samples") 297 | return task.Transcribe(context.Background(), 0, samples, nil) 298 | })) 299 | }) 300 | 301 | t.Run("TranscribeDE", func(t *testing.T) { 302 | t.Parallel() 303 | 304 | model := service.GetModelById(MODEL_TINY) 305 | assert.NotNil(model) 306 | 307 | // Read samples 308 | samples, err := LoadSamples(SAMPLE_DE) 309 | if !assert.NoError(err) { 310 | t.SkipNow() 311 | } 312 | 313 | assert.NoError(service.WithModel(model, func(task *task.Context) error { 314 | t.Log("Transcribing", len(samples), "samples") 315 | return task.Transcribe(context.Background(), 0, samples, nil) 316 | })) 317 | }) 318 | 319 | t.Run("TranscribeFR", func(t *testing.T) { 320 | t.Parallel() 321 | 322 | model := service.GetModelById(MODEL_TINY) 323 | assert.NotNil(model) 324 | 325 | // Read samples 326 | samples, err := LoadSamples(SAMPLE_FR) 327 | if !assert.NoError(err) { 328 | t.SkipNow() 329 | } 330 | 331 | assert.NoError(service.WithModel(model, func(task *task.Context) error { 332 | t.Log("Transcribing", len(samples), "samples") 333 | return task.Transcribe(context.Background(), 0, samples, nil) 334 | })) 335 | }) 336 | }) 337 | } 338 | 339 | ////////////////////////////////////////////////////////////////////////////// 340 | 341 | // Return samples as []float32 342 | func LoadSamples(path string) ([]float32, error) { 343 | fh, err := os.Open(path) 344 | if err != nil { 345 | return nil, err 346 | } 347 | defer fh.Close() 348 | 349 | // Read samples 350 | d := wav.NewDecoder(fh) 351 | if buf, err := d.FullPCMBuffer(); err != nil { 352 | return nil, err 353 | } else { 354 | return buf.AsFloat32Buffer().Data, nil 355 | } 356 | } 357 | --------------------------------------------------------------------------------