├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── codeql.yml │ ├── docker.yml │ ├── goreleaser.yml │ └── lint.yml ├── .gitignore ├── .golangci.yml ├── .goreleaser.yaml ├── .hadolint.yaml ├── LICENSE ├── Makefile ├── README.md ├── _example ├── go.mod ├── go.sum └── main.go ├── config └── whisper.go ├── docker └── Dockerfile ├── go.mod ├── go.sum ├── main.go ├── models └── .gitkeep ├── testdata └── jfk.wav ├── third_party └── .gitkeep ├── webhook ├── error.go ├── error_test.go ├── utils.go ├── utils_test.go └── webhook.go ├── whisper ├── audio.go ├── helper.go ├── helper_test.go ├── whisper.go └── whisper_test.go └── youtube └── youtube.go /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: ['https://www.paypal.me/appleboy46'] 14 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | - package-ecosystem: gomod 8 | directory: / 9 | schedule: 10 | interval: weekly 11 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [main] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [main] 20 | schedule: 21 | - cron: "41 23 * * 6" 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: ["go"] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v4 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v3 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 52 | 53 | - name: Perform CodeQL Analysis 54 | uses: github/codeql-action/analyze@v3 55 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - "v*" 9 | pull_request: 10 | branches: 11 | - "main" 12 | 13 | jobs: 14 | build-docker: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Checkout repository 18 | uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 21 | 22 | - name: Set up QEMU 23 | uses: docker/setup-qemu-action@v3 24 | 25 | - name: Set up Docker Buildx 26 | uses: docker/setup-buildx-action@v3 27 | 28 | - name: Login to Docker Hub 29 | uses: docker/login-action@v3 30 | with: 31 | username: ${{ secrets.DOCKERHUB_USERNAME }} 32 | password: ${{ secrets.DOCKERHUB_TOKEN }} 33 | 34 | - name: Login to GitHub Container Registry 35 | uses: docker/login-action@v3 36 | with: 37 | registry: ghcr.io 38 | username: ${{ github.repository_owner }} 39 | password: ${{ secrets.GITHUB_TOKEN }} 40 | 41 | - name: Docker meta 42 | id: docker-meta 43 | uses: docker/metadata-action@v5 44 | with: 45 | images: | 46 | ${{ github.repository }} 47 | ghcr.io/${{ github.repository }} 48 | tags: | 49 | type=raw,value=latest,enable={{is_default_branch}} 50 | type=semver,pattern={{version}} 51 | type=semver,pattern={{major}}.{{minor}} 52 | type=semver,pattern={{major}} 53 | 54 | - name: Build and push 55 | uses: docker/build-push-action@v6 56 | with: 57 | context: . 58 | platforms: linux/amd64,linux/arm64 59 | file: docker/Dockerfile 60 | push: ${{ github.event_name != 'pull_request' }} 61 | tags: ${{ steps.docker-meta.outputs.tags }} 62 | labels: ${{ steps.docker-meta.outputs.labels }} 63 | -------------------------------------------------------------------------------- /.github/workflows/goreleaser.yml: -------------------------------------------------------------------------------- 1 | name: Goreleaser 2 | 3 | on: 4 | push: 5 | tags: 6 | - "*" 7 | 8 | permissions: 9 | contents: write 10 | 11 | jobs: 12 | goreleaser: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 19 | - name: Setup go 20 | uses: actions/setup-go@v5 21 | with: 22 | go-version: "^1" 23 | 24 | - name: Run GoReleaser 25 | uses: goreleaser/goreleaser-action@v6 26 | with: 27 | # either 'goreleaser' (default) or 'goreleaser-pro' 28 | distribution: goreleaser 29 | version: latest 30 | args: release --clean 31 | env: 32 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 33 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint and Testing 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | lint: 9 | runs-on: ubuntu-latest 10 | container: golang:1.21-alpine 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v4 14 | 15 | # - name: setup tool 16 | # run: | 17 | # apk add make git gcc g++ 18 | # make dependency 19 | 20 | # - name: Setup golangci-lint 21 | # uses: golangci/golangci-lint-action@v3 22 | # with: 23 | # version: latest 24 | # args: --verbose --timeout 20m 25 | 26 | - uses: hadolint/hadolint-action@v3.1.0 27 | name: hadolint for Dockerfile 28 | with: 29 | dockerfile: docker/Dockerfile 30 | 31 | testing: 32 | runs-on: ubuntu-latest 33 | container: golang:1.22-alpine 34 | steps: 35 | - name: Checkout repository 36 | uses: actions/checkout@v4 37 | 38 | - name: setup tool 39 | run: | 40 | apk add bash make git gcc g++ 41 | 42 | - name: testing 43 | run: | 44 | make dependency 45 | make test 46 | 47 | - name: Upload coverage to Codecov 48 | uses: codecov/codecov-action@v5 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | bin 23 | models/*.bin 24 | third_party/whisper.cpp 25 | .env 26 | dist 27 | testdata/jfk.txt 28 | testdata/jfk.srt 29 | testdata/* 30 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | skip-dirs: 3 | - third_party/whisper.cpp 4 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | builds: 2 | - skip: true 3 | 4 | -------------------------------------------------------------------------------- /.hadolint.yaml: -------------------------------------------------------------------------------- 1 | ignored: 2 | - DL3018 3 | - DL3008 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Bo-Yi Wu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | EXECUTABLE := go-whisper 2 | GO ?= go 3 | GOFILES := $(shell find . -name "*.go" -type f) 4 | HAS_GO = $(shell hash $(GO) > /dev/null 2>&1 && echo "GO" || echo "NOGO" ) 5 | 6 | ifneq ($(shell uname), Darwin) 7 | EXTLDFLAGS = -extldflags "-static" $(null) 8 | else 9 | EXTLDFLAGS = 10 | endif 11 | 12 | ifeq ($(HAS_GO), GO) 13 | GOPATH ?= $(shell $(GO) env GOPATH) 14 | export PATH := $(GOPATH)/bin:$(PATH) 15 | 16 | CGO_EXTRA_CFLAGS := -DSQLITE_MAX_VARIABLE_NUMBER=32766 17 | CGO_CFLAGS ?= $(shell $(GO) env CGO_CFLAGS) $(CGO_EXTRA_CFLAGS) 18 | endif 19 | 20 | ifeq ($(OS), Windows_NT) 21 | GOFLAGS := -v -buildmode=exe 22 | EXECUTABLE ?= $(EXECUTABLE).exe 23 | else ifeq ($(OS), Windows) 24 | GOFLAGS := -v -buildmode=exe 25 | EXECUTABLE ?= $(EXECUTABLE).exe 26 | else 27 | GOFLAGS := -v 28 | EXECUTABLE ?= $(EXECUTABLE) 29 | endif 30 | 31 | ifneq ($(DRONE_TAG),) 32 | VERSION ?= $(DRONE_TAG) 33 | else 34 | VERSION ?= $(shell git describe --tags --always || git rev-parse --short HEAD) 35 | endif 36 | 37 | TAGS ?= 38 | GOLDFLAGS ?= -X 'main.Version=$(VERSION)' 39 | INCLUDE_PATH := $(abspath third_party/whisper.cpp):$(INCLUDE_PATH) 40 | LIBRARY_PATH := $(abspath third_party/whisper.cpp):$(LIBRARY_PATH) 41 | 42 | ifdef WHISPER_CUBLAS 43 | CGO_CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include 44 | CGO_CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include 45 | EXTLDFLAGS = -extldflags "-lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib" 46 | 47 | build: $(EXECUTABLE) 48 | 49 | $(EXECUTABLE): $(GOFILES) 50 | CGO_CXXFLAGS=${CGO_CXXFLAGS} CGO_CFLAGS=${CGO_CFLAGS} C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GO) build -v -tags '$(TAGS)' -ldflags '$(EXTLDFLAGS)-s -w $(GOLDFLAGS)' -o bin/$@ 51 | endif 52 | 53 | all: build 54 | 55 | clone: 56 | @[ -d third_party/whisper.cpp ] || git clone https://github.com/appleboy/whisper.cpp.git third_party/whisper.cpp 57 | 58 | dependency: clone 59 | @echo Build whisper 60 | @make -C third_party/whisper.cpp libwhisper.a 61 | 62 | test: 63 | @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GO) test -v -cover -coverprofile coverage.txt ./... && echo "\n==>\033[32m Ok\033[m\n" || exit 1 64 | 65 | install: $(GOFILES) 66 | C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GO) install -v -tags '$(TAGS)' -ldflags '$(EXTLDFLAGS)-s -w $(GOLDFLAGS)' 67 | 68 | build: $(EXECUTABLE) 69 | 70 | $(EXECUTABLE): $(GOFILES) 71 | C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GO) build -v -tags '$(TAGS)' -ldflags '$(EXTLDFLAGS)-s -w $(GOLDFLAGS)' -o bin/$@ 72 | 73 | clean: 74 | $(GO) clean -x -i ./... 75 | rm -rf coverage.txt $(EXECUTABLE) $(DIST) 76 | 77 | version: 78 | @echo $(VERSION) 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-whisper 2 | 3 | Docker Image for Speech-to-Text using [ggerganov/whisper.cpp][1]. 4 | 5 | This Docker image provides a ready-to-use environment for converting speech to text using the [ggerganov/whisper.cpp][1] library. The whisper.cpp library is an open-source project that enables efficient and accurate speech recognition. By utilizing this Docker image, users can easily set up and run the speech-to-text conversion process without worrying about installing dependencies or configuring the system. 6 | 7 | The Docker image includes all necessary components and dependencies, ensuring a seamless experience for users who want to leverage the power of the whisper.cpp library for their speech recognition needs. Simply pull the Docker image, run the container, and start converting your audio files into text with minimal effort. 8 | 9 | In summary, this Docker image offers a convenient and efficient way to utilize the [ggerganov/whisper.cpp][1] library for speech-to-text conversion, making it an excellent choice for those looking to implement speech recognition in their projects. 10 | 11 | [1]:https://github.com/ggerganov/whisper.cpp 12 | 13 | ## OpenAI's Whisper models converted to ggml format 14 | 15 | See the [Available models][2]. 16 | 17 | | Model | Disk | Mem | SHA | 18 | |------------|---------|-----------|----------------------------------------------| 19 | | tiny | 75 MB | ~390 MB | bd577a113a864445d4c299885e0cb97d4ba92b5f | 20 | | tiny.en | 75 MB | ~390 MB | c78c86eb1a8faa21b369bcd33207cc90d64ae9df | 21 | | base | 142 MB | ~500 MB | 465707469ff3a37a2b9b8d8f89f2f99de7299dac | 22 | | base.en | 142 MB | ~500 MB | 137c40403d78fd54d454da0f9bd998f78703390c | 23 | | small | 466 MB | ~1.0 GB | 55356645c2b361a969dfd0ef2c5a50d530afd8d5 | 24 | | small.en | 466 MB | ~1.0 GB | db8a495a91d927739e50b3fc1cc4c6b8f6c2d022 | 25 | | medium | 1.5 GB | ~2.6 GB | fd9727b6e1217c2f614f9b698455c4ffd82463b4 | 26 | | medium.en | 1.5 GB | ~2.6 GB | 8c30f0e44ce9560643ebd10bbe50cd20eafd3723 | 27 | | large-v1 | 2.9 GB | ~4.7 GB | b1caaf735c4cc1429223d5a74f0f4d0b9b59a299 | 28 | | large | 2.9 GB | ~4.7 GB | 0f4c8e34f21cf1a914c59d8b3ce882345ad349d6 | 29 | 30 | For more information see [ggerganov/whisper.cpp][3]. 31 | 32 | [2]: https://huggingface.co/ggerganov/whisper.cpp/tree/main 33 | [3]: https://github.com/ggerganov/whisper.cpp/tree/master/models 34 | 35 | ## Prepare 36 | 37 | Download the model you want to use and put it in the `models` directory. 38 | 39 | ```sh 40 | curl -LJ https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin \ 41 | --output models/ggml-small.bin 42 | ``` 43 | 44 | ## Usage 45 | 46 | Please follow these simplified instructions to transcribe the audio file using a Docker container: 47 | 48 | 1. Ensure that you have a `testdata` directory containing the `jfk.wav` file. 49 | 2. Mount both the `models` and `testdata` directories to the Docker container. 50 | 3. Specify the model using the `--model` flag and the audio file path using the `--audio-path` flag. 51 | 4. The transcript result file will be saved in the same directory as the audio file. 52 | 53 | To transcribe the audio file, execute the command provided below. 54 | 55 | ```sh 56 | docker run \ 57 | -v $PWD/models:/app/models \ 58 | -v $PWD/testdata:/app/testdata \ 59 | ghcr.io/appleboy/go-whisper:latest \ 60 | --model /app/models/ggml-small.bin \ 61 | --audio-path /app/testdata/jfk.wav 62 | ``` 63 | 64 | See the following output: 65 | 66 | ```sh 67 | whisper_init_from_file_no_state: loading model from '/app/models/ggml-small.bin' 68 | whisper_model_load: loading model 69 | whisper_model_load: n_vocab = 51865 70 | whisper_model_load: n_audio_ctx = 1500 71 | whisper_model_load: n_audio_state = 768 72 | whisper_model_load: n_audio_head = 12 73 | whisper_model_load: n_audio_layer = 12 74 | whisper_model_load: n_text_ctx = 448 75 | whisper_model_load: n_text_state = 768 76 | whisper_model_load: n_text_head = 12 77 | whisper_model_load: n_text_layer = 12 78 | whisper_model_load: n_mels = 80 79 | whisper_model_load: ftype = 1 80 | whisper_model_load: qntvr = 0 81 | whisper_model_load: type = 3 82 | whisper_model_load: mem required = 743.00 MB (+ 16.00 MB per decoder) 83 | whisper_model_load: adding 1608 extra tokens 84 | whisper_model_load: model ctx = 464.68 MB 85 | whisper_model_load: model size = 464.44 MB 86 | whisper_init_state: kv self size = 15.75 MB 87 | whisper_init_state: kv cross size = 52.73 MB 88 | 1:46AM INF system_info: n_threads = 8 / 8 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | VSX = 0 | COREML = 0 | 89 | module=transcript 90 | whisper_full_with_state: auto-detected language: en (p = 0.967331) 91 | 1:46AM INF [ 0s -> 11s] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country. module=transcript 92 | ``` 93 | 94 | command line arguments: 95 | | Options | Description | Default Value | 96 | |-----------------------|------------------------------------------------------------|-------------------| 97 | | --model | Model is the interface to a whisper model | [$PLUGIN_MODEL, $INPUT_MODEL] | 98 | | --audio-path | audio path | [$PLUGIN_AUDIO_PATH, $INPUT_AUDIO_PATH] | 99 | | --output-folder | output folder | [$PLUGIN_OUTPUT_FOLDER, $INPUT_OUTPUT_FOLDER] | 100 | | --output-format | output format, support txt, srt, csv | (default: "txt") [$PLUGIN_OUTPUT_FORMAT, $INPUT_OUTPUT_FORMAT] | 101 | | --output-filename | output filename | [$PLUGIN_OUTPUT_FILENAME, $INPUT_OUTPUT_FILENAME] | 102 | | --language | Set the language to use for speech recognition | (default: "auto") [$PLUGIN_LANGUAGE, $INPUT_LANGUAGE] | 103 | | --threads | Set number of threads to use | (default: 8) [$PLUGIN_THREADS, $INPUT_THREADS] | 104 | | --debug | enable debug mode | (default: false) [$PLUGIN_DEBUG, $INPUT_DEBUG] | 105 | | --speedup | speed up audio by x2 (reduced accuracy) | (default: false) [$PLUGIN_SPEEDUP, $INPUT_SPEEDUP] | 106 | | --translate | translate from source language to english | (default: false) [$PLUGIN_TRANSLATE, $INPUT_TRANSLATE] | 107 | | --print-progress | print progress | (default: true) [$PLUGIN_PRINT_PROGRESS, $INPUT_PRINT_PROGRESS] | 108 | | --print-segment | print segment | (default: false) [$PLUGIN_PRINT_SEGMENT, $INPUT_PRINT_SEGMENT] | 109 | | --webhook-url | webhook url | [$PLUGIN_WEBHOOK_URL, $INPUT_WEBHOOK_URL] | 110 | | --webhook-insecure | webhook insecure | (default: false) [$PLUGIN_WEBHOOK_INSECURE, $INPUT_WEBHOOK_INSECURE] | 111 | | --webhook-headers | webhook headers | [$PLUGIN_WEBHOOK_HEADERS, $INPUT_WEBHOOK_HEADERS] | 112 | | --youtube-url | youtube url | [$PLUGIN_YOUTUBE_URL, $INPUT_YOUTUBE_URL] | 113 | | --youtube-insecure | youtube insecure | (default: false) [$PLUGIN_YOUTUBE_INSECURE, $INPUT_YOUTUBE_INSECURE] | 114 | | --youtube-retry-count | youtube retry count | (default: 20) [$PLUGIN_YOUTUBE_RETRY_COUNT, $INPUT_YOUTUBE_RETRY_COUNT] | 115 | | --prompt | initial prompt | [$PLUGIN_PROMPT, $INPUT_PROMPT] | 116 | | --help, -h | show help | | 117 | | --version, -v | print the version | | 118 | -------------------------------------------------------------------------------- /_example/go.mod: -------------------------------------------------------------------------------- 1 | module example 2 | 3 | go 1.20 4 | 5 | require github.com/gin-gonic/gin v1.9.1 6 | 7 | require ( 8 | github.com/bytedance/sonic v1.9.1 // indirect 9 | github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect 10 | github.com/gabriel-vasile/mimetype v1.4.2 // indirect 11 | github.com/gin-contrib/sse v0.1.0 // indirect 12 | github.com/go-playground/locales v0.14.1 // indirect 13 | github.com/go-playground/universal-translator v0.18.1 // indirect 14 | github.com/go-playground/validator/v10 v10.14.0 // indirect 15 | github.com/goccy/go-json v0.10.2 // indirect 16 | github.com/json-iterator/go v1.1.12 // indirect 17 | github.com/klauspost/cpuid/v2 v2.2.4 // indirect 18 | github.com/leodido/go-urn v1.2.4 // indirect 19 | github.com/mattn/go-isatty v0.0.19 // indirect 20 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 21 | github.com/modern-go/reflect2 v1.0.2 // indirect 22 | github.com/pelletier/go-toml/v2 v2.0.8 // indirect 23 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect 24 | github.com/ugorji/go/codec v1.2.11 // indirect 25 | golang.org/x/arch v0.3.0 // indirect 26 | golang.org/x/crypto v0.9.0 // indirect 27 | golang.org/x/net v0.10.0 // indirect 28 | golang.org/x/sys v0.8.0 // indirect 29 | golang.org/x/text v0.9.0 // indirect 30 | google.golang.org/protobuf v1.30.0 // indirect 31 | gopkg.in/yaml.v3 v3.0.1 // indirect 32 | ) 33 | -------------------------------------------------------------------------------- /_example/go.sum: -------------------------------------------------------------------------------- 1 | github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= 2 | github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= 3 | github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= 4 | github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= 5 | github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= 6 | github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= 7 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 8 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 9 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 10 | github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= 11 | github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= 12 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= 13 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= 14 | github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= 15 | github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= 16 | github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= 17 | github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= 18 | github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= 19 | github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= 20 | github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= 21 | github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js= 22 | github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= 23 | github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= 24 | github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= 25 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 26 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 27 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 28 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 29 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 30 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 31 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 32 | github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk= 33 | github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= 34 | github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= 35 | github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= 36 | github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= 37 | github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 38 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 39 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 40 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 41 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 42 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 43 | github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= 44 | github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= 45 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 46 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 47 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 48 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 49 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 50 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 51 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 52 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 53 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 54 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 55 | github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 56 | github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= 57 | github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 58 | github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= 59 | github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= 60 | github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= 61 | github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= 62 | golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= 63 | golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= 64 | golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= 65 | golang.org/x/crypto v0.9.0 h1:LF6fAI+IutBocDJ2OT0Q1g8plpYljMZ4+lty+dsqw3g= 66 | golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0= 67 | golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= 68 | golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= 69 | golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 70 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 71 | golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= 72 | golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 73 | golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= 74 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= 75 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 76 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 77 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 78 | google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= 79 | google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 80 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 81 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 82 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 83 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 84 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 85 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 86 | -------------------------------------------------------------------------------- /_example/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "net/http" 6 | 7 | "github.com/gin-gonic/gin" 8 | ) 9 | 10 | type Webhook struct { 11 | Progress int `form:"progress" json:"progress" xml:"progress" binding:"required"` 12 | } 13 | 14 | func main() { 15 | router := gin.Default() 16 | 17 | router.POST("/webhook", func(c *gin.Context) { 18 | var json Webhook 19 | if err := c.ShouldBindJSON(&json); err != nil { 20 | c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) 21 | return 22 | } 23 | 24 | c.JSON(http.StatusOK, gin.H{"progress": json.Progress}) 25 | }) 26 | 27 | router.POST("/webhook2", func(c *gin.Context) { 28 | var json Webhook 29 | if err := c.ShouldBindJSON(&json); err != nil { 30 | c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) 31 | return 32 | } 33 | 34 | if v, ok := c.Request.Header["X-Server-Token"]; ok { 35 | log.Println("show server token: ", v) 36 | } 37 | 38 | if v, ok := c.Request.Header["X-Data-Uuid"]; ok { 39 | log.Println("show data uuid: ", v) 40 | } 41 | 42 | c.JSON(http.StatusOK, gin.H{"progress": json.Progress}) 43 | }) 44 | 45 | // Listen and serve on 0.0.0.0:8080 46 | router.Run(":8080") 47 | } 48 | -------------------------------------------------------------------------------- /config/whisper.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "fmt" 4 | 5 | // Whisper is the configuration for whisper. 6 | type Whisper struct { 7 | Model string 8 | AudioPath string 9 | Threads uint 10 | Language string 11 | Debug bool 12 | SpeedUp bool 13 | Translate bool 14 | Prompt string 15 | MaxContext uint 16 | BeamSize uint 17 | EntropyThold float64 18 | 19 | PrintProgress bool 20 | PrintSegment bool 21 | 22 | OutputFolder string 23 | OutputFilename string 24 | OutputFormat []string 25 | } 26 | 27 | // Validate checks if the Whisper configuration is valid. 28 | // It returns an error if the audio path or model is missing. 29 | func (c *Whisper) Validate() error { 30 | if c.AudioPath == "" { 31 | return fmt.Errorf("audio path is required") 32 | } 33 | 34 | if c.Model == "" { 35 | return fmt.Errorf("model is required") 36 | } 37 | 38 | return nil 39 | } 40 | 41 | // Webhook represents a webhook configuration with URL, Insecure and Headers. 42 | type Webhook struct { 43 | URL string 44 | Insecure bool 45 | Headers []string 46 | } 47 | 48 | // Setting is the configuration for whisper. 49 | type Setting struct { 50 | Whisper Whisper 51 | Webhook Webhook 52 | Youtube Youtube 53 | } 54 | 55 | // Youtube represents the configuration for a YouTube video. 56 | type Youtube struct { 57 | URL string // URL is the YouTube video URL. 58 | Insecure bool // Insecure specifies whether to skip SSL verification. 59 | Debug bool // Debug specifies whether to enable debug mode. 60 | Retry int // Retry specifies the number of times to retry on failure. 61 | } 62 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG UBUNTU_VERSION=22.04 2 | # This needs to generally match the container host's environment. 3 | ARG CUDA_VERSION=12.0.0 4 | # Target the CUDA build image 5 | ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} 6 | # Target the CUDA runtime image 7 | ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} 8 | 9 | FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} AS build 10 | WORKDIR /app 11 | # Unless otherwise specified, we make a fat build. 12 | ARG CUDA_DOCKER_ARCH=all 13 | # Set nvcc architecture 14 | ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} 15 | # Enable cuBLAS 16 | ENV WHISPER_CUBLAS=1 17 | 18 | #apt-get 19 | RUN apt-get update && \ 20 | apt-get install -y --no-install-recommends build-essential git gcc g++ wget \ 21 | && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* 22 | 23 | # install golang 24 | RUN wget --progress=dot:giga https://go.dev/dl/go1.22.10.linux-amd64.tar.gz 25 | RUN rm -rf /usr/local/go && tar -C /usr/local -xzf go1.22.10.linux-amd64.tar.gz 26 | ENV PATH ${PATH}:/usr/local/go/bin 27 | 28 | # Ref: https://stackoverflow.com/a/53464012 29 | ENV CUDA_MAIN_VERSION=12.0 30 | ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH 31 | 32 | COPY ./ . 33 | RUN make dependency && env && make build && \ 34 | mv bin/go-whisper /bin/ && \ 35 | rm -rf bin 36 | 37 | FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS runtime 38 | WORKDIR /app 39 | 40 | LABEL maintainer="Bo-Yi Wu " \ 41 | org.label-schema.name="Speech-to-Text" \ 42 | org.label-schema.vendor="Bo-Yi Wu" \ 43 | org.label-schema.schema-version="1.0" 44 | 45 | LABEL org.opencontainers.image.source=https://github.com/appleboy/go-whisper 46 | LABEL org.opencontainers.image.description="Speech-to-Text." 47 | LABEL org.opencontainers.image.licenses=MIT 48 | 49 | RUN apt-get update && \ 50 | apt-get install -y --no-install-recommends curl ffmpeg \ 51 | && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* 52 | 53 | COPY --from=build /bin/go-whisper /bin/go-whisper 54 | ENTRYPOINT ["/bin/go-whisper"] -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/appleboy/go-whisper 2 | 3 | go 1.22.0 4 | 5 | require ( 6 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc 7 | github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230606002726-57543c169e27 8 | github.com/go-audio/wav v1.1.0 9 | github.com/joho/godotenv v1.5.1 10 | github.com/kkdai/youtube/v2 v2.10.2 11 | github.com/mattn/go-isatty v0.0.20 12 | github.com/rs/zerolog v1.33.0 13 | github.com/urfave/cli/v2 v2.27.5 14 | golang.org/x/net v0.35.0 15 | ) 16 | 17 | require ( 18 | github.com/VividCortex/ewma v1.2.0 // indirect 19 | github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect 20 | github.com/bitly/go-simplejson v0.5.1 // indirect 21 | github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect 22 | github.com/dlclark/regexp2 v1.11.4 // indirect 23 | github.com/dop251/goja v0.0.0-20241024094426-79f3a7efcdbd // indirect 24 | github.com/go-audio/audio v1.0.0 // indirect 25 | github.com/go-audio/riff v1.0.0 // indirect 26 | github.com/go-sourcemap/sourcemap v2.1.4+incompatible // indirect 27 | github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect 28 | github.com/mattn/go-colorable v0.1.13 // indirect 29 | github.com/mattn/go-runewidth v0.0.16 // indirect 30 | github.com/rivo/uniseg v0.4.7 // indirect 31 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 32 | github.com/vbauerster/mpb/v5 v5.4.0 // indirect 33 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect 34 | golang.org/x/sys v0.30.0 // indirect 35 | golang.org/x/text v0.22.0 // indirect 36 | ) 37 | 38 | replace github.com/ggerganov/whisper.cpp/bindings/go => github.com/appleboy/whisper.cpp/bindings/go v0.0.0-20240124072204-1dd0f53753ab 39 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0= 2 | github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ= 3 | github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= 4 | github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= 5 | github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= 6 | github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= 7 | github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= 8 | github.com/appleboy/whisper.cpp/bindings/go v0.0.0-20240124072204-1dd0f53753ab h1:XN2Jr6lLtsIHKLMTxMCchXW6zOYGpJ4S2rali9h2qmg= 9 | github.com/appleboy/whisper.cpp/bindings/go v0.0.0-20240124072204-1dd0f53753ab/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo= 10 | github.com/bitly/go-simplejson v0.5.1 h1:xgwPbetQScXt1gh9BmoJ6j9JMr3TElvuIyjR8pgdoow= 11 | github.com/bitly/go-simplejson v0.5.1/go.mod h1:YOPVLzCfwK14b4Sff3oP1AmGhI9T9Vsg84etUnlyp+Q= 12 | github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 13 | github.com/cpuguy83/go-md2man/v2 v2.0.6 h1:XJtiaUW6dEEqVuZiMTn1ldk455QWwEIsMIJlo5vtkx0= 14 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= 15 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= 16 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 17 | github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo= 18 | github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= 19 | github.com/dop251/goja v0.0.0-20241024094426-79f3a7efcdbd h1:QMSNEh9uQkDjyPwu/J541GgSH+4hw+0skJDIj9HJ3mE= 20 | github.com/dop251/goja v0.0.0-20241024094426-79f3a7efcdbd/go.mod h1:MxLav0peU43GgvwVgNbLAj1s/bSGboKkhuULvq/7hx4= 21 | github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= 22 | github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= 23 | github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= 24 | github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498= 25 | github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g= 26 | github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE= 27 | github.com/go-sourcemap/sourcemap v2.1.4+incompatible h1:a+iTbH5auLKxaNwQFg0B+TCYl6lbukKPc7b5x0n1s6Q= 28 | github.com/go-sourcemap/sourcemap v2.1.4+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= 29 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 30 | github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= 31 | github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= 32 | github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= 33 | github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= 34 | github.com/kkdai/youtube/v2 v2.10.2 h1:e3JslUDiKEfjMzxFyrOh3O59C/aLfKNZyrcav00MZV0= 35 | github.com/kkdai/youtube/v2 v2.10.2/go.mod h1:4y1MIg7f1o5/kQfkr7nwXFtv8PGSoe4kChOB9/iMA88= 36 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= 37 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= 38 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= 39 | github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 40 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 41 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 42 | github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= 43 | github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= 44 | github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 45 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 46 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= 47 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 48 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 49 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 50 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 51 | github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= 52 | github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8= 53 | github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= 54 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 55 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 56 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 57 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 58 | github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= 59 | github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= 60 | github.com/vbauerster/mpb/v5 v5.4.0 h1:n8JPunifvQvh6P1D1HAl2Ur9YcmKT1tpoUuiea5mlmg= 61 | github.com/vbauerster/mpb/v5 v5.4.0/go.mod h1:fi4wVo7BVQ22QcvFObm+VwliQXlV1eBT8JDaKXR4JGI= 62 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= 63 | github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= 64 | golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= 65 | golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= 66 | golang.org/x/sys v0.0.0-20201218084310-7d0127a74742/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 67 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 68 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 69 | golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 70 | golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= 71 | golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 72 | golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= 73 | golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= 74 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 75 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 76 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 77 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 78 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "runtime" 6 | "strconv" 7 | "time" 8 | 9 | "github.com/appleboy/go-whisper/config" 10 | "github.com/appleboy/go-whisper/webhook" 11 | "github.com/appleboy/go-whisper/whisper" 12 | "github.com/appleboy/go-whisper/youtube" 13 | 14 | "github.com/davecgh/go-spew/spew" 15 | _ "github.com/joho/godotenv/autoload" 16 | "github.com/mattn/go-isatty" 17 | "github.com/rs/zerolog" 18 | "github.com/rs/zerolog/log" 19 | "github.com/urfave/cli/v2" 20 | ) 21 | 22 | // Version set at compile-time 23 | var ( 24 | Version string 25 | ) 26 | 27 | func main() { 28 | isTerm := isatty.IsTerminal(os.Stdout.Fd()) 29 | zerolog.SetGlobalLevel(zerolog.InfoLevel) 30 | log.Logger = log.Output( 31 | zerolog.ConsoleWriter{ 32 | Out: os.Stderr, 33 | NoColor: !isTerm, 34 | }, 35 | ) 36 | zerolog.CallerMarshalFunc = func(pc uintptr, file string, line int) string { 37 | short := file 38 | for i := len(file) - 1; i > 0; i-- { 39 | if file[i] == '/' { 40 | short = file[i+1:] 41 | break 42 | } 43 | } 44 | file = short 45 | return file + ":" + strconv.Itoa(line) 46 | } 47 | 48 | app := cli.NewApp() 49 | app.Name = "Speech-to-Text Using Whisper API" 50 | app.Usage = "Speech-to-Text." 51 | app.Copyright = "Copyright (c) " + strconv.Itoa(time.Now().Year()) + " Bo-Yi Wu" 52 | app.Authors = []*cli.Author{ 53 | { 54 | Name: "Bo-Yi Wu", 55 | Email: "appleboy.tw@gmail.com", 56 | }, 57 | } 58 | app.Action = run 59 | app.Version = Version 60 | app.Flags = []cli.Flag{ 61 | &cli.StringFlag{ 62 | Name: "model", 63 | Usage: "Model is the interface to a whisper model", 64 | EnvVars: []string{"PLUGIN_MODEL", "INPUT_MODEL"}, 65 | }, 66 | &cli.StringFlag{ 67 | Name: "audio-path", 68 | Usage: "audio path", 69 | EnvVars: []string{"PLUGIN_AUDIO_PATH", "INPUT_AUDIO_PATH"}, 70 | }, 71 | &cli.StringFlag{ 72 | Name: "output-folder", 73 | Usage: "output folder", 74 | EnvVars: []string{"PLUGIN_OUTPUT_FOLDER", "INPUT_OUTPUT_FOLDER"}, 75 | }, 76 | &cli.StringSliceFlag{ 77 | Name: "output-format", 78 | Usage: "output format, support txt, srt, csv", 79 | EnvVars: []string{"PLUGIN_OUTPUT_FORMAT", "INPUT_OUTPUT_FORMAT"}, 80 | Value: cli.NewStringSlice("txt"), 81 | }, 82 | &cli.StringFlag{ 83 | Name: "output-filename", 84 | Usage: "output filename", 85 | EnvVars: []string{"PLUGIN_OUTPUT_FILENAME", "INPUT_OUTPUT_FILENAME"}, 86 | }, 87 | &cli.StringFlag{ 88 | Name: "language", 89 | Usage: "Set the language to use for speech recognition", 90 | EnvVars: []string{"PLUGIN_LANGUAGE", "INPUT_LANGUAGE"}, 91 | Value: "auto", 92 | }, 93 | &cli.UintFlag{ 94 | Name: "threads", 95 | Usage: "Set number of threads to use", 96 | EnvVars: []string{"PLUGIN_THREADS", "INPUT_THREADS"}, 97 | Value: uint(runtime.NumCPU()), 98 | }, 99 | &cli.BoolFlag{ 100 | Name: "debug", 101 | Usage: "enable debug mode", 102 | EnvVars: []string{"PLUGIN_DEBUG", "INPUT_DEBUG"}, 103 | }, 104 | &cli.BoolFlag{ 105 | Name: "speedup", 106 | Usage: "speed up audio by x2 (reduced accuracy)", 107 | EnvVars: []string{"PLUGIN_SPEEDUP", "INPUT_SPEEDUP"}, 108 | }, 109 | &cli.BoolFlag{ 110 | Name: "translate", 111 | Usage: "translate from source language to english", 112 | EnvVars: []string{"PLUGIN_TRANSLATE", "INPUT_TRANSLATE"}, 113 | }, 114 | &cli.BoolFlag{ 115 | Name: "print-progress", 116 | Usage: "print progress", 117 | EnvVars: []string{"PLUGIN_PRINT_PROGRESS", "INPUT_PRINT_PROGRESS"}, 118 | Value: true, 119 | }, 120 | &cli.BoolFlag{ 121 | Name: "print-segment", 122 | Usage: "print segment", 123 | EnvVars: []string{"PLUGIN_PRINT_SEGMENT", "INPUT_PRINT_SEGMENT"}, 124 | }, 125 | &cli.StringFlag{ 126 | Name: "webhook-url", 127 | Usage: "webhook url", 128 | EnvVars: []string{"PLUGIN_WEBHOOK_URL", "INPUT_WEBHOOK_URL"}, 129 | }, 130 | &cli.BoolFlag{ 131 | Name: "webhook-insecure", 132 | Usage: "webhook insecure", 133 | EnvVars: []string{"PLUGIN_WEBHOOK_INSECURE", "INPUT_WEBHOOK_INSECURE"}, 134 | }, 135 | &cli.StringSliceFlag{ 136 | Name: "webhook-headers", 137 | Usage: "webhook headers", 138 | EnvVars: []string{"PLUGIN_WEBHOOK_HEADERS", "INPUT_WEBHOOK_HEADERS"}, 139 | }, 140 | &cli.StringFlag{ 141 | Name: "youtube-url", 142 | Usage: "youtube url", 143 | EnvVars: []string{"PLUGIN_YOUTUBE_URL", "INPUT_YOUTUBE_URL"}, 144 | }, 145 | &cli.BoolFlag{ 146 | Name: "youtube-insecure", 147 | Usage: "youtube insecure", 148 | EnvVars: []string{"PLUGIN_YOUTUBE_INSECURE", "INPUT_YOUTUBE_INSECURE"}, 149 | }, 150 | &cli.IntFlag{ 151 | Name: "youtube-retry-count", 152 | Usage: "youtube retry count", 153 | EnvVars: []string{"PLUGIN_YOUTUBE_RETRY_COUNT", "INPUT_YOUTUBE_RETRY_COUNT"}, 154 | Value: 20, 155 | }, 156 | &cli.StringFlag{ 157 | Name: "prompt", 158 | Usage: "initial prompt", 159 | EnvVars: []string{"PLUGIN_PROMPT", "INPUT_PROMPT"}, 160 | }, 161 | &cli.UintFlag{ 162 | Name: "max-context", 163 | Usage: "maximum number of text context tokens to store", 164 | EnvVars: []string{"PLUGIN_MAX_CONTEXT", "INPUT_MAX_CONTEXT"}, 165 | Value: 32, 166 | }, 167 | &cli.UintFlag{ 168 | Name: "beam-size", 169 | Usage: "beam size for beam search", 170 | EnvVars: []string{"PLUGIN_BEAM_SIZE", "INPUT_BEAM_SIZE"}, 171 | Value: 5, 172 | }, 173 | &cli.Float64Flag{ 174 | Name: "entropy-thold", 175 | Usage: "entropy threshold for decoder fail", 176 | EnvVars: []string{"PLUGIN_ENTROPY_THOLD", "INPUT_ENTROPY_THOLD"}, 177 | Value: 2.4, 178 | }, 179 | } 180 | 181 | if err := app.Run(os.Args); err != nil { 182 | log.Fatal().Err(err).Msg("can't run app") 183 | } 184 | } 185 | 186 | func run(c *cli.Context) error { 187 | cfg := config.Setting{ 188 | Whisper: config.Whisper{ 189 | Model: c.String("model"), 190 | AudioPath: c.String("audio-path"), 191 | Threads: c.Uint("threads"), 192 | Language: c.String("language"), 193 | Debug: c.Bool("debug"), 194 | SpeedUp: c.Bool("speedup"), 195 | Translate: c.Bool("translate"), 196 | Prompt: c.String("prompt"), 197 | MaxContext: c.Uint("max-context"), 198 | BeamSize: c.Uint("beam-size"), 199 | EntropyThold: c.Float64("entropy-thold"), 200 | 201 | PrintProgress: c.Bool("print-progress"), 202 | PrintSegment: c.Bool("print-segment"), 203 | 204 | OutputFolder: c.String("output-folder"), 205 | OutputFilename: c.String("output-filename"), 206 | OutputFormat: c.StringSlice("output-format"), 207 | }, 208 | 209 | Webhook: config.Webhook{ 210 | URL: c.String("webhook-url"), 211 | Insecure: c.Bool("webhook-insecure"), 212 | Headers: c.StringSlice("webhook-headers"), 213 | }, 214 | 215 | Youtube: config.Youtube{ 216 | URL: c.String("youtube-url"), 217 | Insecure: c.Bool("youtube-insecure"), 218 | Debug: c.Bool("debug"), 219 | Retry: c.Int("youtube-retry-count"), 220 | }, 221 | } 222 | 223 | if cfg.Whisper.Debug { 224 | zerolog.SetGlobalLevel(zerolog.DebugLevel) 225 | log.Logger = log.With().Caller().Logger() 226 | } 227 | 228 | if cfg.Whisper.Debug { 229 | spew.Dump(cfg) 230 | } 231 | 232 | yt, err := youtube.New(&cfg.Youtube) 233 | if err != nil { 234 | return err 235 | } 236 | if yt != nil && cfg.Youtube.URL != "" { 237 | videoPath, err := yt.Download(c.Context) 238 | if err != nil { 239 | return err 240 | } 241 | cfg.Whisper.AudioPath = videoPath 242 | if cfg.Whisper.OutputFilename == "" { 243 | cfg.Whisper.OutputFilename = yt.Filename() 244 | } 245 | } 246 | 247 | e, err := whisper.New( 248 | &cfg.Whisper, 249 | webhook.NewClient( 250 | cfg.Webhook.URL, 251 | cfg.Webhook.Insecure, 252 | webhook.ToHeaders(cfg.Webhook.Headers), 253 | ), 254 | ) 255 | if err != nil { 256 | return err 257 | } 258 | 259 | if err := e.Transcript(); err != nil { 260 | return err 261 | } 262 | defer e.Close() 263 | 264 | for _, ext := range cfg.Whisper.OutputFormat { 265 | if err := e.Save(ext); err != nil { 266 | return err 267 | } 268 | } 269 | 270 | return nil 271 | } 272 | -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appleboy/go-whisper/a091a113fae7e3e510a094a064adc2e6b2c8ee99/models/.gitkeep -------------------------------------------------------------------------------- /testdata/jfk.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appleboy/go-whisper/a091a113fae7e3e510a094a064adc2e6b2c8ee99/testdata/jfk.wav -------------------------------------------------------------------------------- /third_party/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/appleboy/go-whisper/a091a113fae7e3e510a094a064adc2e6b2c8ee99/third_party/.gitkeep -------------------------------------------------------------------------------- /webhook/error.go: -------------------------------------------------------------------------------- 1 | package webhook 2 | 3 | import "fmt" 4 | 5 | // RequestError provides informations about generic request errors. 6 | type RequestError struct { 7 | HTTPStatusCode int 8 | Err error 9 | } 10 | 11 | func (e *RequestError) Error() string { 12 | return fmt.Sprintf("error, status code: %d, message: %s", e.HTTPStatusCode, e.Err) 13 | } 14 | 15 | func (e *RequestError) Unwrap() error { 16 | return e.Err 17 | } 18 | -------------------------------------------------------------------------------- /webhook/error_test.go: -------------------------------------------------------------------------------- 1 | package webhook 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | ) 7 | 8 | func TestRequestError_Error(t *testing.T) { 9 | type fields struct { 10 | HTTPStatusCode int 11 | Err error 12 | } 13 | tests := []struct { 14 | name string 15 | fields fields 16 | want string 17 | }{ 18 | { 19 | name: "test error message", 20 | fields: fields{ 21 | HTTPStatusCode: 404, 22 | Err: errors.New("not found"), 23 | }, 24 | want: "error, status code: 404, message: not found", 25 | }, 26 | } 27 | for _, tt := range tests { 28 | t.Run(tt.name, func(t *testing.T) { 29 | e := &RequestError{ 30 | HTTPStatusCode: tt.fields.HTTPStatusCode, 31 | Err: tt.fields.Err, 32 | } 33 | if got := e.Error(); got != tt.want { 34 | t.Errorf("RequestError.Error() = %v, want %v", got, tt.want) 35 | } 36 | }) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /webhook/utils.go: -------------------------------------------------------------------------------- 1 | package webhook 2 | 3 | import "strings" 4 | 5 | // ToHeaders converts a slice of strings to a map of strings. 6 | func ToHeaders(headers []string) map[string]string { 7 | h := map[string]string{} 8 | for _, header := range headers { 9 | kv := strings.Split(header, "=") 10 | if len(kv) != 2 { 11 | continue 12 | } 13 | 14 | h[kv[0]] = kv[1] 15 | } 16 | 17 | return h 18 | } 19 | -------------------------------------------------------------------------------- /webhook/utils_test.go: -------------------------------------------------------------------------------- 1 | package webhook 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func TestToHeaders(t *testing.T) { 9 | type args struct { 10 | headers []string 11 | } 12 | tests := []struct { 13 | name string 14 | args args 15 | want map[string]string 16 | }{ 17 | { 18 | name: "empty header", 19 | args: args{ 20 | headers: []string{}, 21 | }, 22 | want: map[string]string{}, 23 | }, 24 | { 25 | name: "single header", 26 | args: args{ 27 | headers: []string{"X-Drone-Token=1234"}, 28 | }, 29 | want: map[string]string{ 30 | "X-Drone-Token": "1234", 31 | }, 32 | }, 33 | { 34 | name: "multiple headers", 35 | args: args{ 36 | headers: []string{ 37 | "X-Drone-Token=1234", 38 | "X-UUID=foobar", 39 | }, 40 | }, 41 | want: map[string]string{ 42 | "X-Drone-Token": "1234", 43 | "X-UUID": "foobar", 44 | }, 45 | }, 46 | } 47 | for _, tt := range tests { 48 | t.Run(tt.name, func(t *testing.T) { 49 | if got := ToHeaders(tt.args.headers); !reflect.DeepEqual(got, tt.want) { 50 | t.Errorf("ToHeaders() = %v, want %v", got, tt.want) 51 | } 52 | }) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /webhook/webhook.go: -------------------------------------------------------------------------------- 1 | package webhook 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/tls" 7 | "encoding/json" 8 | "fmt" 9 | "net/http" 10 | "net/url" 11 | "time" 12 | ) 13 | 14 | // Client represents a webhook client that sends HTTP requests to a specified URL with custom headers. 15 | type Client struct { 16 | url string 17 | httpClient *http.Client 18 | headers map[string]string 19 | } 20 | 21 | func (c *Client) build(ctx context.Context, request any) (*http.Request, error) { 22 | if request == nil { 23 | return http.NewRequestWithContext(ctx, http.MethodPost, c.url, nil) 24 | } 25 | 26 | var reqBytes []byte 27 | reqBytes, err := json.Marshal(request) 28 | if err != nil { 29 | return nil, err 30 | } 31 | 32 | return http.NewRequestWithContext( 33 | ctx, 34 | http.MethodPost, 35 | c.url, 36 | bytes.NewBuffer(reqBytes), 37 | ) 38 | } 39 | 40 | func (c *Client) Send(ctx context.Context, payload any) error { 41 | req, err := c.build(ctx, payload) 42 | if err != nil { 43 | return &RequestError{ 44 | HTTPStatusCode: http.StatusInternalServerError, 45 | Err: fmt.Errorf("build request with error: %s", err.Error()), 46 | } 47 | } 48 | 49 | req.Header.Set("Accept", "application/json; charset=utf-8") 50 | req.Header.Set("Content-Type", "application/json; charset=utf-8") 51 | 52 | // Add headers to request 53 | for k, v := range c.headers { 54 | req.Header.Set(k, v) 55 | } 56 | 57 | res, err := c.httpClient.Do(req) 58 | if err != nil { 59 | return &RequestError{ 60 | HTTPStatusCode: http.StatusInternalServerError, 61 | Err: fmt.Errorf("request failed with error: %s", err.Error()), 62 | } 63 | } 64 | defer res.Body.Close() 65 | 66 | if isFailureStatusCode(res) { 67 | return &RequestError{ 68 | HTTPStatusCode: res.StatusCode, 69 | Err: fmt.Errorf("request failed with status code: %d", res.StatusCode), 70 | } 71 | } 72 | 73 | return nil 74 | } 75 | 76 | func isFailureStatusCode(resp *http.Response) bool { 77 | return resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest 78 | } 79 | 80 | // NewClient creates a new webhook client with the given URL, insecure flag and headers. 81 | // It returns a pointer to a Client struct. 82 | // If the URL is empty or invalid, it returns nil. 83 | // If the URL scheme is not http or https, it returns nil. 84 | // If insecure is true, it sets the client to skip TLS verification. 85 | // The client has a default timeout of 5 seconds. 86 | func NewClient(s string, insecure bool, headers map[string]string) *Client { 87 | if s == "" { 88 | return nil 89 | } 90 | 91 | u, err := url.Parse(s) 92 | if err != nil { 93 | return nil 94 | } 95 | 96 | if u.Scheme != "http" && u.Scheme != "https" { 97 | return nil 98 | } 99 | 100 | client := http.DefaultClient 101 | client.Timeout = 5 * time.Second 102 | 103 | if insecure { 104 | client.Transport = &http.Transport{ 105 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 106 | } 107 | } 108 | 109 | return &Client{ 110 | url: s, 111 | httpClient: client, 112 | headers: headers, 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /whisper/audio.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | ) 8 | 9 | // sh executes shell command. 10 | func sh(c string) (string, error) { 11 | cmd := exec.Command("/bin/sh", "-c", c) 12 | cmd.Env = os.Environ() 13 | o, err := cmd.CombinedOutput() 14 | return string(o), err 15 | } 16 | 17 | // AudioToWav converts audio to wav for transcribe. 18 | func audioToWav(src, dst string) error { 19 | out, err := sh(fmt.Sprintf("ffmpeg -i %s -format s16le -ar 16000 -ac 1 -acodec pcm_s16le %s", src, dst)) 20 | if err != nil { 21 | return fmt.Errorf("error: %w out: %s", err, out) 22 | } 23 | 24 | return nil 25 | } 26 | 27 | // CutSilences cuts silences from audio. 28 | func cutSilences(src, dst string) error { 29 | out, err := sh(fmt.Sprintf("vmh cut-silences %s %s", src, dst)) 30 | if err != nil { 31 | return fmt.Errorf("error: %w out: %s", err, out) 32 | } 33 | return nil 34 | } 35 | -------------------------------------------------------------------------------- /whisper/helper.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | // SrtTimestamp converts time.Duration to srt timestamp. 9 | func srtTimestamp(t time.Duration) string { 10 | return fmt.Sprintf("%02d:%02d:%02d,%03d", 11 | t/time.Hour, 12 | (t%time.Hour)/time.Minute, 13 | (t%time.Minute)/time.Second, 14 | (t%time.Second)/time.Millisecond, 15 | ) 16 | } 17 | -------------------------------------------------------------------------------- /whisper/helper_test.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | ) 7 | 8 | func TestSrtTimestamp(t *testing.T) { 9 | type args struct { 10 | t time.Duration 11 | } 12 | tests := []struct { 13 | name string 14 | args args 15 | want string 16 | }{ 17 | { 18 | name: "test 1", 19 | args: args{ 20 | t: time.Duration(1*time.Hour + 2*time.Minute + 3*time.Second + 4*time.Millisecond), 21 | }, 22 | want: "01:02:03,004", 23 | }, 24 | { 25 | name: "test 2", 26 | args: args{ 27 | t: time.Duration(10*time.Hour + 20*time.Minute + 30*time.Second + 40*time.Millisecond), 28 | }, 29 | want: "10:20:30,040", 30 | }, 31 | } 32 | for _, tt := range tests { 33 | t.Run(tt.name, func(t *testing.T) { 34 | if got := srtTimestamp(tt.args.t); got != tt.want { 35 | t.Errorf("srtTimestamp() = %v, want %v", got, tt.want) 36 | } 37 | }) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /whisper/whisper.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "path" 8 | "path/filepath" 9 | "strings" 10 | "time" 11 | 12 | "github.com/appleboy/go-whisper/config" 13 | "github.com/appleboy/go-whisper/webhook" 14 | 15 | "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" 16 | "github.com/go-audio/wav" 17 | "github.com/rs/zerolog/log" 18 | ) 19 | 20 | type OutputFormat string 21 | 22 | func (f OutputFormat) String() string { 23 | return string(f) 24 | } 25 | 26 | var ( 27 | FormatTxt OutputFormat = "txt" 28 | FormatSrt OutputFormat = "srt" 29 | FormatCSV OutputFormat = "csv" 30 | ) 31 | 32 | type request struct { 33 | Progress int `json:"progress"` 34 | } 35 | 36 | // New for creating a new whisper engine. 37 | func New(cfg *config.Whisper, webhook *webhook.Client) (*Engine, error) { 38 | if err := cfg.Validate(); err != nil { 39 | return nil, err 40 | } 41 | 42 | return &Engine{ 43 | cfg: cfg, 44 | webhook: webhook, 45 | }, nil 46 | } 47 | 48 | // Engine is the whisper engine. 49 | type Engine struct { 50 | cfg *config.Whisper 51 | webhook *webhook.Client 52 | ctx whisper.Context 53 | model whisper.Model 54 | segments []whisper.Segment 55 | progress int 56 | } 57 | 58 | // Transcribe converts audio to text. 59 | func (e *Engine) Transcript() error { 60 | var data []float32 61 | var err error 62 | 63 | dir, err := os.MkdirTemp("", "whisper") 64 | if err != nil { 65 | return err 66 | } 67 | defer os.RemoveAll(dir) 68 | 69 | convertedPath := filepath.Join(dir, "converted.wav") 70 | sourcePath := e.cfg.AudioPath 71 | outputPath := "" 72 | 73 | log.Debug().Msg("start convert audio to wav") 74 | if err := audioToWav(sourcePath, convertedPath); err != nil { 75 | return err 76 | } 77 | outputPath = convertedPath 78 | 79 | // Open the WAV file 80 | fh, err := os.Open(outputPath) 81 | if err != nil { 82 | return err 83 | } 84 | defer fh.Close() 85 | 86 | // Load the model 87 | e.model, err = whisper.New(e.cfg.Model) 88 | if err != nil { 89 | return err 90 | } 91 | defer e.model.Close() 92 | 93 | // Decode the WAV file - load the full buffer 94 | dec := wav.NewDecoder(fh) 95 | if buf, err := dec.FullPCMBuffer(); err != nil { 96 | return err 97 | } else if dec.SampleRate != whisper.SampleRate { 98 | return fmt.Errorf("unsupported sample rate: %d", dec.SampleRate) 99 | } else if dec.NumChans != 1 { 100 | return fmt.Errorf("unsupported number of channels: %d", dec.NumChans) 101 | } else { 102 | data = buf.AsFloat32Buffer().Data 103 | } 104 | 105 | e.ctx, err = e.model.NewContext() 106 | if err != nil { 107 | return err 108 | } 109 | 110 | e.ctx.SetThreads(e.cfg.Threads) 111 | e.ctx.SetSpeedup(e.cfg.SpeedUp) 112 | e.ctx.SetTranslate(e.cfg.Translate) 113 | e.ctx.SetPrompt(e.cfg.Prompt) 114 | e.ctx.SetMaxContext(int(e.cfg.MaxContext)) 115 | 116 | log.Info().Msgf("%s", e.ctx.SystemInfo()) 117 | 118 | if e.cfg.Language != "" { 119 | _ = e.ctx.SetLanguage(e.cfg.Language) 120 | } 121 | 122 | if e.cfg.BeamSize > 0 { 123 | e.ctx.SetBeamSize(int(e.cfg.BeamSize)) 124 | } 125 | 126 | if e.cfg.EntropyThold > 0 { 127 | e.ctx.SetEntropyThold(float32(e.cfg.EntropyThold)) 128 | } 129 | 130 | log.Debug().Msg("start transcribe process") 131 | e.ctx.ResetTimings() 132 | if err := e.ctx.Process(data, e.cbSegment(), e.cbProgress()); err != nil { 133 | return err 134 | } 135 | e.ctx.PrintTimings() 136 | 137 | return nil 138 | } 139 | 140 | // cbSegment is a method of the Engine struct that returns a function. 141 | // The function takes a segment whisper.Segment as input and returns nothing. 142 | // It appends the given segment to the segments field of the Engine struct. 143 | // If the PrintSegment field in the configuration is true, it prints the segment. 144 | // The segment is printed with the start and end time truncated to milliseconds. 145 | func (e *Engine) cbSegment() func(segment whisper.Segment) { 146 | return func(segment whisper.Segment) { 147 | e.segments = append(e.segments, segment) 148 | if !e.cfg.PrintSegment { 149 | return 150 | } 151 | log.Info().Msgf( 152 | "[%6s -> %6s] %s", 153 | segment.Start.Truncate(time.Millisecond), 154 | segment.End.Truncate(time.Millisecond), 155 | segment.Text, 156 | ) 157 | } 158 | } 159 | 160 | // cbProgress is a method of the Engine struct that returns a function. 161 | // The function takes a progress int as input and returns nothing. 162 | // It sets the progress field of the Engine struct to the given progress int. 163 | // If the PrintProgress field in the configuration is true, it prints the progress. 164 | func (e *Engine) cbProgress() func(progress int) { 165 | return func(progress int) { 166 | // If the progress is greater than 100, set it to 100. 167 | if progress > 100 { 168 | progress = 100 169 | } 170 | 171 | if e.progress == progress { 172 | return 173 | } 174 | e.progress = progress 175 | if e.cfg.PrintProgress { 176 | log.Info().Msgf("current progress: %d%%", progress) 177 | } 178 | 179 | // send webhook 180 | if e.webhook != nil { 181 | if err := e.webhook.Send(context.Background(), &request{ 182 | Progress: progress, 183 | }); err != nil { 184 | log.Error().Err(err).Msg("send webhook error") 185 | } 186 | } 187 | } 188 | } 189 | 190 | // getOutputPath is a method of the Engine struct that takes a format string as input. 191 | // It returns the output path for the converted audio file based on the given format. 192 | func (e *Engine) getOutputPath(format string) string { 193 | // Get the file extension of the audio file from the configuration. 194 | ext := filepath.Ext(e.cfg.AudioPath) 195 | // Get the base name of the audio file from the configuration. 196 | filename := filepath.Base(e.cfg.AudioPath) 197 | // If the OutputFilename field in the configuration is not empty, 198 | if e.cfg.OutputFilename != "" { 199 | filename = e.cfg.OutputFilename 200 | } 201 | // Get the directory path of the audio file from the configuration. 202 | folder := filepath.Dir(e.cfg.AudioPath) 203 | // If the OutputFolder field in the configuration is not empty, 204 | // use it as the folder for the output file. 205 | if e.cfg.OutputFolder != "" { 206 | folder = e.cfg.OutputFolder 207 | } 208 | 209 | // Join the folder path, the base name of the audio file without its extension, 210 | // and the new format to create the output path for the converted audio file. 211 | return path.Join(folder, strings.TrimSuffix(filename, ext)+"."+format) 212 | } 213 | 214 | // Save saves the text to a file. 215 | // It takes a format string as input and returns an error. 216 | // It gets the output path for the converted audio file based on the given format. 217 | func (e *Engine) Save(format string) error { 218 | outputPath := e.getOutputPath(format) 219 | log.Info(). 220 | Str("output-path", outputPath). 221 | Str("output-format", format). 222 | Msg("save text to file") 223 | text := "" 224 | switch OutputFormat(format) { 225 | case FormatSrt: 226 | for i, segment := range e.segments { 227 | text += fmt.Sprintf("%d\n", i+1) 228 | text += fmt.Sprintf("%s --> %s\n", srtTimestamp(segment.Start), srtTimestamp(segment.End)) 229 | text += segment.Text + "\n\n" 230 | 231 | } 232 | case FormatTxt: 233 | for _, segment := range e.segments { 234 | text += segment.Text 235 | } 236 | case FormatCSV: 237 | text = "start,end,text\n" 238 | for _, segment := range e.segments { 239 | text += fmt.Sprintf("%s,%s,\"%s\"\n", segment.Start, segment.End, segment.Text) 240 | } 241 | } 242 | 243 | if err := os.WriteFile(outputPath, []byte(text), 0o644); err != nil { 244 | return err 245 | } 246 | 247 | return nil 248 | } 249 | 250 | // Close closes the engine. 251 | func (e *Engine) Close() error { 252 | if e.ctx == nil { 253 | return nil 254 | } 255 | 256 | return e.model.Close() 257 | } 258 | -------------------------------------------------------------------------------- /whisper/whisper_test.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/appleboy/go-whisper/config" 7 | "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" 8 | ) 9 | 10 | func TestEngine_getOutputPath(t *testing.T) { 11 | type fields struct { 12 | cfg *config.Whisper 13 | ctx whisper.Context 14 | model whisper.Model 15 | segments []whisper.Segment 16 | } 17 | type args struct { 18 | format string 19 | } 20 | tests := []struct { 21 | name string 22 | fields fields 23 | args args 24 | want string 25 | }{ 26 | { 27 | name: "change wav to txt", 28 | fields: fields{ 29 | cfg: &config.Whisper{ 30 | AudioPath: "/test/1234/foo.wav", 31 | }, 32 | }, 33 | args: args{ 34 | format: "txt", 35 | }, 36 | want: "/test/1234/foo.txt", 37 | }, 38 | { 39 | name: "change output folder", 40 | fields: fields{ 41 | cfg: &config.Whisper{ 42 | AudioPath: "/test/1234/foo.wav", 43 | OutputFolder: "/foo/bar", 44 | }, 45 | }, 46 | args: args{ 47 | format: "txt", 48 | }, 49 | want: "/foo/bar/foo.txt", 50 | }, 51 | } 52 | for _, tt := range tests { 53 | t.Run(tt.name, func(t *testing.T) { 54 | e := &Engine{ 55 | cfg: tt.fields.cfg, 56 | ctx: tt.fields.ctx, 57 | model: tt.fields.model, 58 | segments: tt.fields.segments, 59 | } 60 | if got := e.getOutputPath(tt.args.format); got != tt.want { 61 | t.Errorf("Engine.getOutputPath() = %v, want %v", got, tt.want) 62 | } 63 | }) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /youtube/youtube.go: -------------------------------------------------------------------------------- 1 | package youtube 2 | 3 | import ( 4 | "context" 5 | "crypto/tls" 6 | "errors" 7 | "fmt" 8 | "net" 9 | "net/http" 10 | "net/url" 11 | "os" 12 | "path" 13 | "strconv" 14 | "time" 15 | 16 | "github.com/appleboy/go-whisper/config" 17 | 18 | "github.com/kkdai/youtube/v2" 19 | ytdl "github.com/kkdai/youtube/v2/downloader" 20 | "golang.org/x/net/http/httpproxy" 21 | ) 22 | 23 | // Engine is the youtube engine. 24 | type Engine struct { 25 | cfg *config.Youtube 26 | video *youtube.Video 27 | } 28 | 29 | // Filename returns a sanitized filename. 30 | func (e *Engine) Filename() string { 31 | if e.video == nil { 32 | return "" 33 | } 34 | 35 | return ytdl.SanitizeFilename(e.video.Title) 36 | } 37 | 38 | // Download downloads youtube video. 39 | func (e *Engine) Download(ctx context.Context) (string, error) { 40 | proxyFunc := httpproxy.FromEnvironment().ProxyFunc() 41 | httpTransport := &http.Transport{ 42 | Proxy: func(r *http.Request) (uri *url.URL, err error) { 43 | return proxyFunc(r.URL) 44 | }, 45 | IdleConnTimeout: 60 * time.Second, 46 | TLSHandshakeTimeout: 10 * time.Second, 47 | ExpectContinueTimeout: 1 * time.Second, 48 | ForceAttemptHTTP2: true, 49 | DialContext: (&net.Dialer{ 50 | Timeout: 30 * time.Second, 51 | KeepAlive: 30 * time.Second, 52 | }).DialContext, 53 | } 54 | 55 | if e.cfg.Insecure { 56 | httpTransport.TLSClientConfig = &tls.Config{ 57 | InsecureSkipVerify: true, 58 | } 59 | } 60 | 61 | for i := 0; i < e.cfg.Retry; i++ { 62 | output, err := e.download(ctx, httpTransport) 63 | if err != nil { 64 | return "", err 65 | } 66 | if output != "" { 67 | return output, nil 68 | } 69 | time.Sleep(1 * time.Second) 70 | } 71 | 72 | return "", errors.New("youtube video can't download") 73 | } 74 | 75 | func (e *Engine) download(ctx context.Context, trans http.RoundTripper) (string, error) { 76 | folder, err := os.MkdirTemp("", "youtube") 77 | if err != nil { 78 | panic(err) 79 | } 80 | 81 | downloader := &ytdl.Downloader{} 82 | downloader.HTTPClient = &http.Client{Transport: trans} 83 | 84 | e.video, err = downloader.GetVideo(e.cfg.URL) 85 | if err != nil { 86 | panic(err) 87 | } 88 | 89 | mimetype := "audio/mp4" 90 | outputQuality := "tiny" 91 | 92 | formats := e.video.Formats 93 | if mimetype != "" { 94 | formats = formats.Type(mimetype) 95 | } 96 | if len(formats) == 0 { 97 | return "", errors.New("no formats found") 98 | } 99 | 100 | var format *youtube.Format 101 | itag, _ := strconv.Atoi(outputQuality) 102 | switch { 103 | case itag > 0: 104 | // When an itag is specified, do not filter format with mime-type 105 | formats = e.video.Formats.Itag(itag) 106 | if len(formats) == 0 { 107 | return "", fmt.Errorf("unable to find format with itag %d", itag) 108 | } 109 | 110 | case outputQuality != "": 111 | formats = formats.Quality(outputQuality) 112 | if len(formats) == 0 { 113 | return "", fmt.Errorf("unable to find format with quality %s", outputQuality) 114 | } 115 | 116 | default: 117 | // select the first format 118 | formats.Sort() 119 | format = &formats[0] 120 | } 121 | 122 | outputFile := path.Join(folder, "video.mp4") 123 | 124 | if err := downloader.Download(ctx, e.video, format, outputFile); err != nil { 125 | return "", err 126 | } 127 | if isFileExistsAndNotEmpty(outputFile) { 128 | return outputFile, nil 129 | } 130 | 131 | return "", errors.New("download file is empty") 132 | } 133 | 134 | // New for creating a new youtube engine. 135 | func New(cfg *config.Youtube) (*Engine, error) { 136 | return &Engine{ 137 | cfg: cfg, 138 | }, nil 139 | } 140 | 141 | // isFileExistsAndNotEmpty check file not zero byte file 142 | func isFileExistsAndNotEmpty(name string) bool { 143 | fileInfo, err := os.Stat(name) 144 | if err != nil { 145 | return false 146 | } 147 | return fileInfo.Size() > 0 148 | } 149 | --------------------------------------------------------------------------------