├── models ├── .gitignore ├── requirements-openvino.txt ├── requirements-coreml.txt ├── for-tests-ggml-base.bin ├── for-tests-ggml-tiny.bin ├── for-tests-ggml-base.en.bin ├── for-tests-ggml-large.bin ├── for-tests-ggml-medium.bin ├── for-tests-ggml-small.bin ├── for-tests-ggml-tiny.en.bin ├── for-tests-ggml-medium.en.bin ├── for-tests-ggml-small.en.bin ├── generate-coreml-model.sh ├── generate-coreml-interface.sh └── download-ggml-model.cmd ├── samples ├── .gitignore ├── jfk.wav └── README.md ├── spm-headers ├── ggml.h └── whisper.h ├── bindings ├── go │ ├── .gitignore │ ├── samples │ │ └── jfk.wav │ ├── pkg │ │ └── whisper │ │ │ ├── doc.go │ │ │ ├── consts.go │ │ │ └── context_test.go │ ├── doc.go │ ├── go.mod │ ├── examples │ │ ├── go-whisper │ │ │ ├── color.go │ │ │ └── main.go │ │ └── go-model-download │ │ │ └── context.go │ └── LICENSE ├── javascript │ ├── .gitignore │ ├── package.json │ ├── package-tmpl.json │ └── CMakeLists.txt ├── java │ ├── settings.gradle │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── src │ │ ├── main │ │ │ └── java │ │ │ │ └── io │ │ │ │ └── github │ │ │ │ └── ggerganov │ │ │ │ └── whispercpp │ │ │ │ ├── ggml │ │ │ │ ├── GgmlTensor.java │ │ │ │ └── GgmlType.java │ │ │ │ ├── model │ │ │ │ ├── WhisperState.java │ │ │ │ ├── EModel.java │ │ │ │ ├── WhisperTokenData.java │ │ │ │ ├── WhisperModel.java │ │ │ │ └── WhisperModelLoader.java │ │ │ │ ├── params │ │ │ │ ├── WhisperFilters.java │ │ │ │ ├── WhisperSamplingStrategy.java │ │ │ │ ├── WhisperHParams.java │ │ │ │ ├── GreedyParams.java │ │ │ │ ├── BeamSearchParams.java │ │ │ │ ├── CBool.java │ │ │ │ └── WhisperContextParams.java │ │ │ │ ├── callbacks │ │ │ │ ├── WhisperProgressCallback.java │ │ │ │ ├── WhisperEncoderBeginCallback.java │ │ │ │ ├── WhisperNewSegmentCallback.java │ │ │ │ └── WhisperLogitsFilterCallback.java │ │ │ │ ├── bean │ │ │ │ └── WhisperSegment.java │ │ │ │ └── WhisperContext.java │ │ └── test │ │ │ └── java │ │ │ └── io │ │ │ └── github │ │ │ └── ggerganov │ │ │ └── whispercpp │ │ │ └── WhisperJnaLibraryTest.java │ └── gradle.properties ├── ruby │ └── ext │ │ ├── .gitignore │ │ ├── ruby_whisper.h │ │ └── extconf.rb └── CMakeLists.txt ├── examples ├── whisper.android │ ├── app │ │ ├── .gitignore │ │ ├── src │ │ │ ├── main │ │ │ │ ├── res │ │ │ │ │ ├── values │ │ │ │ │ │ ├── strings.xml │ │ │ │ │ │ └── themes.xml │ │ │ │ │ ├── mipmap-anydpi │ │ │ │ │ │ └── ic_launcher.xml │ │ │ │ │ ├── xml │ │ │ │ │ │ ├── backup_rules.xml │ │ │ │ │ │ └── data_extraction_rules.xml │ │ │ │ │ └── drawable │ │ │ │ │ │ └── ic_launcher_foreground.xml │ │ │ │ ├── java │ │ │ │ │ └── com │ │ │ │ │ │ └── whispercppdemo │ │ │ │ │ │ ├── ui │ │ │ │ │ │ └── theme │ │ │ │ │ │ │ ├── Color.kt │ │ │ │ │ │ │ └── Type.kt │ │ │ │ │ │ └── MainActivity.kt │ │ │ │ └── AndroidManifest.xml │ │ │ ├── test │ │ │ │ └── java │ │ │ │ │ └── com │ │ │ │ │ └── whispercppdemo │ │ │ │ │ └── ExampleUnitTest.kt │ │ │ └── androidTest │ │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── whispercppdemo │ │ │ │ └── ExampleInstrumentedTest.kt │ │ └── proguard-rules.pro │ ├── lib │ │ ├── .gitignore │ │ └── src │ │ │ └── main │ │ │ └── AndroidManifest.xml │ ├── .idea │ │ ├── .name │ │ ├── .gitignore │ │ ├── compiler.xml │ │ ├── vcs.xml │ │ ├── misc.xml │ │ └── gradle.xml │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── .gitignore │ ├── build.gradle │ ├── settings.gradle │ └── gradle.properties ├── whisper.android.java │ ├── app │ │ ├── .gitignore │ │ ├── src │ │ │ ├── main │ │ │ │ ├── res │ │ │ │ │ ├── values │ │ │ │ │ │ ├── strings.xml │ │ │ │ │ │ ├── colors.xml │ │ │ │ │ │ └── themes.xml │ │ │ │ │ ├── mipmap-hdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-mdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xhdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xxhdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xxxhdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-anydpi-v26 │ │ │ │ │ │ ├── ic_launcher.xml │ │ │ │ │ │ └── ic_launcher_round.xml │ │ │ │ │ ├── values-night │ │ │ │ │ │ └── themes.xml │ │ │ │ │ └── drawable-v24 │ │ │ │ │ │ └── ic_launcher_foreground.xml │ │ │ │ ├── java │ │ │ │ │ └── com │ │ │ │ │ │ ├── litongjava │ │ │ │ │ │ └── whisper │ │ │ │ │ │ │ └── android │ │ │ │ │ │ │ └── java │ │ │ │ │ │ │ ├── app │ │ │ │ │ │ │ └── App.java │ │ │ │ │ │ │ ├── bean │ │ │ │ │ │ │ └── WhisperSegment.java │ │ │ │ │ │ │ └── task │ │ │ │ │ │ │ ├── LoadModelTask.java │ │ │ │ │ │ │ └── TranscriptionTask.java │ │ │ │ │ │ └── whispercpp │ │ │ │ │ │ └── java │ │ │ │ │ │ └── whisper │ │ │ │ │ │ ├── WhisperCpuConfig.java │ │ │ │ │ │ └── WhisperUtils.java │ │ │ │ ├── AndroidManifest.xml │ │ │ │ └── jni │ │ │ │ │ └── whisper │ │ │ │ │ └── CMakeLists.txt │ │ │ ├── test │ │ │ │ └── java │ │ │ │ │ └── com │ │ │ │ │ └── litongjava │ │ │ │ │ └── whisper │ │ │ │ │ └── android │ │ │ │ │ └── java │ │ │ │ │ └── ExampleUnitTest.java │ │ │ └── androidTest │ │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── litongjava │ │ │ │ └── whisper │ │ │ │ └── android │ │ │ │ └── java │ │ │ │ └── ExampleInstrumentedTest.java │ │ ├── proguard-rules.pro │ │ └── build.gradle │ ├── settings.gradle │ ├── README_files │ │ └── 1.jpg │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── .gitignore │ ├── build.gradle │ ├── gradle.properties │ └── README.md ├── talk │ ├── .gitignore │ ├── speak.bat │ ├── CMakeLists.txt │ ├── speak.ps1 │ ├── gpt-2.h │ ├── README.md │ └── speak ├── talk-llama │ ├── .gitignore │ ├── speak.bat │ ├── speak.ps1 │ ├── CMakeLists.txt │ ├── unicode-data.h │ ├── unicode.h │ ├── prompts │ │ └── talk-alpaca.txt │ └── speak ├── addon.node │ ├── .gitignore │ ├── package.json │ ├── __test__ │ │ └── whisper.spec.js │ ├── index.js │ ├── README.md │ └── CMakeLists.txt ├── whisper.swiftui │ ├── .gitignore │ ├── whisper.swiftui.demo │ │ ├── Resources │ │ │ ├── models │ │ │ │ └── .gitignore │ │ │ └── samples │ │ │ │ └── .gitignore │ │ ├── Supporting files │ │ │ ├── Assets.xcassets │ │ │ │ ├── Contents.json │ │ │ │ ├── AccentColor.colorset │ │ │ │ │ └── Contents.json │ │ │ │ └── AppIcon.appiconset │ │ │ │ │ └── Contents.json │ │ │ ├── Preview Content │ │ │ │ └── Preview Assets.xcassets │ │ │ │ │ └── Contents.json │ │ │ └── WhisperCppDemo.entitlements │ │ ├── WhisperCppDemoApp.swift │ │ ├── Utils │ │ │ ├── RiffWaveUtils.swift │ │ │ └── Recorder.swift │ │ └── UI │ │ │ └── ContentView.swift │ ├── whisper.swiftui.xcodeproj │ │ ├── .gitignore │ │ └── project.xcworkspace │ │ │ ├── .gitignore │ │ │ └── xcshareddata │ │ │ └── IDEWorkspaceChecks.plist │ └── README.md ├── quantize │ ├── README.md │ └── CMakeLists.txt ├── command │ ├── commands.txt │ └── CMakeLists.txt ├── whisper.objc │ ├── whisper.objc │ │ ├── Assets.xcassets │ │ │ ├── Contents.json │ │ │ ├── AccentColor.colorset │ │ │ │ └── Contents.json │ │ │ └── AppIcon.appiconset │ │ │ │ └── Contents.json │ │ ├── AppDelegate.h │ │ ├── SceneDelegate.h │ │ ├── main.m │ │ ├── Info.plist │ │ ├── ViewController.h │ │ ├── AppDelegate.m │ │ └── Base.lproj │ │ │ └── LaunchScreen.storyboard │ └── whisper.objc.xcodeproj │ │ └── project.xcworkspace │ │ ├── contents.xcworkspacedata │ │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist ├── bench │ └── CMakeLists.txt ├── main │ └── CMakeLists.txt ├── wchess │ ├── wchess.wasm │ │ ├── chessboardjs-1.0.0 │ │ │ ├── img │ │ │ │ └── chesspieces │ │ │ │ │ └── wikipedia │ │ │ │ │ ├── bB.png │ │ │ │ │ ├── bK.png │ │ │ │ │ ├── bN.png │ │ │ │ │ ├── bP.png │ │ │ │ │ ├── bQ.png │ │ │ │ │ ├── bR.png │ │ │ │ │ ├── wB.png │ │ │ │ │ ├── wK.png │ │ │ │ │ ├── wN.png │ │ │ │ │ ├── wP.png │ │ │ │ │ ├── wQ.png │ │ │ │ │ └── wR.png │ │ │ ├── css │ │ │ │ ├── chessboard-1.0.0.min.css │ │ │ │ └── chessboard-1.0.0.css │ │ │ └── js │ │ │ │ └── chessboard-1.0.0 │ │ │ │ ├── package.json │ │ │ │ ├── LICENSE.md │ │ │ │ └── CHANGELOG.md │ │ └── CMakeLists.txt │ ├── wchess.cmd │ │ └── CMakeLists.txt │ ├── CMakeLists.txt │ ├── libwchess │ │ ├── CMakeLists.txt │ │ ├── Chessboard.h │ │ └── WChess.h │ └── README.md ├── python │ └── test_whisper_processor.py ├── sycl │ ├── ls-sycl-device.cpp │ ├── CMakeLists.txt │ ├── run-whisper.sh │ ├── build.sh │ └── README.md ├── lsp │ └── CMakeLists.txt ├── stream │ └── CMakeLists.txt ├── server │ └── CMakeLists.txt ├── common-ggml.h ├── stream.wasm │ ├── README.md │ └── CMakeLists.txt ├── bench.wasm │ ├── README.md │ └── CMakeLists.txt ├── talk.wasm │ ├── gpt-2.h │ └── CMakeLists.txt ├── command.wasm │ ├── README.md │ └── CMakeLists.txt ├── grammar-parser.h ├── common-sdl.h ├── whisper.wasm │ ├── CMakeLists.txt │ └── README.md └── generate-karaoke.sh ├── tests ├── .gitignore ├── en-2-ref.txt ├── es-0-ref.txt └── test-whisper.js ├── scripts ├── sync-ggml.last ├── sha-all.sh ├── convert-all.sh ├── gen-authors.sh ├── sync-llama.sh ├── quantize-all.sh └── deploy-wasm.sh ├── README_sycl.md ├── .gitmodules ├── ggml-cuda ├── argsort.cuh ├── sumrows.cuh ├── acc.cuh ├── pad.cuh ├── alibi.cuh ├── rope.cuh ├── arange.cuh ├── clamp.cuh ├── concat.cuh ├── im2col.cuh ├── pool2d.cuh ├── scale.cuh ├── getrows.cuh ├── softmax.cuh ├── upscale.cuh ├── diagmask.cuh ├── tsembd.cuh ├── quantize.cuh ├── cpy.cuh ├── norm.cuh ├── binbcast.cuh ├── mmvq.cuh ├── convert.cuh ├── mmq.cuh ├── dmmv.cuh ├── unary.cuh ├── scale.cu ├── clamp.cu ├── arange.cu ├── sumrows.cu ├── quantize.cu └── diagmask.cu ├── grammars ├── colors.gbnf └── chess.gbnf ├── .devops ├── main.Dockerfile ├── cublas.Dockerfile └── main-cuda.Dockerfile ├── .github └── workflows │ ├── bindings-go.yml │ ├── bindings-ruby.yml │ ├── examples.yml │ └── docker.yml ├── cmake ├── DefaultTargetOptions.cmake └── GitVars.cmake ├── coreml └── whisper-encoder.h ├── openvino └── whisper-openvino-encoder.h ├── ggml-vulkan.h ├── .gitignore ├── LICENSE ├── ggml-kompute.h ├── ggml-opencl.h └── ggml-cuda.h /models/.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | -------------------------------------------------------------------------------- /samples/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /spm-headers/ggml.h: -------------------------------------------------------------------------------- 1 | ../ggml.h -------------------------------------------------------------------------------- /spm-headers/whisper.h: -------------------------------------------------------------------------------- 1 | ../whisper.h -------------------------------------------------------------------------------- /bindings/go/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | models 3 | -------------------------------------------------------------------------------- /bindings/javascript/.gitignore: -------------------------------------------------------------------------------- 1 | publish.log 2 | -------------------------------------------------------------------------------- /examples/whisper.android/app/.gitignore: -------------------------------------------------------------------------------- 1 | /build -------------------------------------------------------------------------------- /examples/whisper.android/lib/.gitignore: -------------------------------------------------------------------------------- 1 | /build -------------------------------------------------------------------------------- /examples/whisper.android.java/app/.gitignore: -------------------------------------------------------------------------------- 1 | /build -------------------------------------------------------------------------------- /examples/whisper.android/.idea/.name: -------------------------------------------------------------------------------- 1 | WhisperCppDemo -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | *.wav 2 | *.ogg 3 | *.wav.txt 4 | -------------------------------------------------------------------------------- /examples/talk/.gitignore: -------------------------------------------------------------------------------- 1 | audio.mp3 2 | to_speak.txt 3 | -------------------------------------------------------------------------------- /examples/talk-llama/.gitignore: -------------------------------------------------------------------------------- 1 | audio.mp3 2 | to_speak.txt 3 | -------------------------------------------------------------------------------- /bindings/java/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = "whispercpp" 2 | -------------------------------------------------------------------------------- /examples/addon.node/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | node_modules 3 | build 4 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/.gitignore: -------------------------------------------------------------------------------- 1 | xcuserdata 2 | xcshareddata 3 | -------------------------------------------------------------------------------- /scripts/sync-ggml.last: -------------------------------------------------------------------------------- 1 | 4151b0420d7a8f4c3c1b420afa0f62ca441b9cd8 2 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Resources/models/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Resources/samples/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.xcodeproj/.gitignore: -------------------------------------------------------------------------------- 1 | xcuserdata/ 2 | -------------------------------------------------------------------------------- /models/requirements-openvino.txt: -------------------------------------------------------------------------------- 1 | openvino-dev[pytorch,onnx] 2 | openai-whisper -------------------------------------------------------------------------------- /README_sycl.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/README_sycl.md -------------------------------------------------------------------------------- /samples/jfk.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/samples/jfk.wav -------------------------------------------------------------------------------- /examples/talk/speak.bat: -------------------------------------------------------------------------------- 1 | @powershell -ExecutionPolicy Bypass -F examples\talk\speak.ps1 %1 %2 2 | -------------------------------------------------------------------------------- /models/requirements-coreml.txt: -------------------------------------------------------------------------------- 1 | torch 2 | coremltools 3 | openai-whisper 4 | ane_transformers 5 | -------------------------------------------------------------------------------- /examples/whisper.android/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /examples/whisper.android.java/settings.gradle: -------------------------------------------------------------------------------- 1 | include ':app' 2 | rootProject.name = "whisper.android.java" -------------------------------------------------------------------------------- /bindings/go/samples/jfk.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/bindings/go/samples/jfk.wav -------------------------------------------------------------------------------- /examples/talk-llama/speak.bat: -------------------------------------------------------------------------------- 1 | @powershell -ExecutionPolicy Bypass -F examples\talk-llama\speak.ps1 %1 %2 2 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.xcworkspace/.gitignore: -------------------------------------------------------------------------------- 1 | contents.xcworkspacedata 2 | -------------------------------------------------------------------------------- /examples/quantize/README.md: -------------------------------------------------------------------------------- 1 | # quantize 2 | 3 | Tool for integer quantization of Whisper `ggml` model files 4 | -------------------------------------------------------------------------------- /models/for-tests-ggml-base.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-base.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-tiny.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-tiny.bin -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "bindings/ios"] 2 | path = bindings/ios 3 | url = https://github.com/ggerganov/whisper.spm 4 | -------------------------------------------------------------------------------- /models/for-tests-ggml-base.en.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-base.en.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-large.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-large.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-medium.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-medium.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-small.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-small.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-tiny.en.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-tiny.en.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-medium.en.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-medium.en.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-small.en.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/models/for-tests-ggml-small.en.bin -------------------------------------------------------------------------------- /bindings/go/pkg/whisper/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | This is the higher-level speech-to-text whisper.cpp API for go 3 | */ 4 | package whisper 5 | -------------------------------------------------------------------------------- /examples/command/commands.txt: -------------------------------------------------------------------------------- 1 | enable 2 | disable 3 | cat 4 | dog 5 | apple 6 | red 7 | blue 8 | green 9 | lightblue 10 | -------------------------------------------------------------------------------- /ggml-cuda/argsort.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /ggml-cuda/sumrows.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /examples/whisper.android/app/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | WhisperCppDemo 3 | -------------------------------------------------------------------------------- /bindings/java/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/bindings/java/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /examples/whisper.android.java/README_files/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/README_files/1.jpg -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | whisper.android.java 3 | -------------------------------------------------------------------------------- /examples/whisper.objc/whisper.objc/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /ggml-cuda/acc.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_ACC_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/pad.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_PAD_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlTensor.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.ggml; 2 | 3 | public class GgmlTensor { 4 | } 5 | -------------------------------------------------------------------------------- /bindings/ruby/ext/.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | ggml.c 3 | ggml.h 4 | ggml-alloc.c 5 | ggml-alloc.h 6 | whisper.bundle 7 | whisper.cpp 8 | whisper.h 9 | dr_wav.h 10 | -------------------------------------------------------------------------------- /examples/whisper.android/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /ggml-cuda/alibi.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_ALIBI_BLOCK_SIZE 32 4 | 5 | void ggml_cuda_op_alibi(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/rope.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_ROPE_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/arange.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_ARANGE_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/clamp.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_CLAMP_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/concat.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_CONCAT_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/im2col.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_IM2COL_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/pool2d.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_POOL2D_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/scale.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_SCALE_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /bindings/go/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | github.com/ggerganov/whisper.cpp/bindings/go 3 | provides a speech-to-text service bindings for the Go programming language. 4 | */ 5 | package whisper 6 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperState.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.model; 2 | 3 | public class WhisperState { 4 | } 5 | -------------------------------------------------------------------------------- /examples/whisper.android.java/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /ggml-cuda/getrows.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_GET_ROWS_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_get_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/softmax.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_SOFT_MAX_BLOCK_SIZE 1024 4 | 5 | void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml-cuda/upscale.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_UPSCALE_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /scripts/sha-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compute the SHA1 of all model files in ./models/ggml-*.bin 4 | 5 | for f in ./models/ggml-*.bin; do 6 | shasum "$f" -a 1 7 | done 8 | -------------------------------------------------------------------------------- /examples/whisper.android/lib/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /ggml-cuda/diagmask.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 4 | 5 | void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Preview Content/Preview Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /ggml-cuda/tsembd.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_timestep_embedding(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-xxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /examples/bench/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(TARGET bench) 2 | add_executable(${TARGET} bench.cpp) 3 | 4 | include(DefaultTargetOptions) 5 | 6 | target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT}) 7 | -------------------------------------------------------------------------------- /examples/main/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(TARGET main) 2 | add_executable(${TARGET} main.cpp) 3 | 4 | include(DefaultTargetOptions) 5 | 6 | target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT}) 7 | -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-hdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-hdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-mdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-mdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/royshil/whisper.cpp/master/examples/whisper.android.java/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /examples/quantize/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(TARGET quantize) 2 | add_executable(${TARGET} quantize.cpp) 3 | 4 | include(DefaultTargetOptions) 5 | 6 | target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT}) 7 | -------------------------------------------------------------------------------- /examples/whisper.android/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /examples/whisper.android/app/src/main/res/values/themes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/values-night/themes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | -------------------------------------------------------------------------------- /examples/whisper.android/.idea/gradle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 20 | 21 | -------------------------------------------------------------------------------- /bindings/go/pkg/whisper/consts.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "errors" 5 | 6 | // Bindings 7 | whisper "github.com/ggerganov/whisper.cpp/bindings/go" 8 | ) 9 | 10 | /////////////////////////////////////////////////////////////////////////////// 11 | // ERRORS 12 | 13 | var ( 14 | ErrUnableToLoadModel = errors.New("unable to load model") 15 | ErrInternalAppError = errors.New("internal application error") 16 | ErrProcessingFailed = errors.New("processing failed") 17 | ErrUnsupportedLanguage = errors.New("unsupported language") 18 | ErrModelNotMultilingual = errors.New("model is not multilingual") 19 | ) 20 | 21 | /////////////////////////////////////////////////////////////////////////////// 22 | // CONSTANTS 23 | 24 | // SampleRate is the sample rate of the audio data. 25 | const SampleRate = whisper.SampleRate 26 | 27 | // SampleBits is the number of bytes per sample. 28 | const SampleBits = whisper.SampleBits 29 | -------------------------------------------------------------------------------- /tests/en-2-ref.txt: -------------------------------------------------------------------------------- 1 | This is the Micro Machine Man presenting the most midget miniature motorcade of Micro Machines. Each one has dramatic details, terrific trim, precision paint jobs, plus incredible Micro Machine Pocket Playsets. There's a police station, fire station, restaurant, service station, and more. Perfect pocket portables to take anyplace. And there are many miniature playsets to play with, and each one comes with its own special edition Micro Machine vehicle and fun, fantastic features that miraculously move. Raise the boat lift at the airport marina, man the gun turret at the army base, clean your car at the car wash, raise the toll bridge. And these playsets fit together to form a Micro Machine world. Micro Machine Pocket Playsets, so tremendously tiny, so perfectly precise, so dazzlingly detailed, you'll want to pocket them all. Micro Machines are Micro Machine Pocket Playsets sold separately from Galoob. The smaller they are, the better they are. -------------------------------------------------------------------------------- /examples/whisper.objc/whisper.objc/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | NSMicrophoneUsageDescription 6 | This app requires microphone access in order to transcribe speech 7 | UIApplicationSceneManifest 8 | 9 | UIApplicationSupportsMultipleScenes 10 | 11 | UISceneConfigurations 12 | 13 | UIWindowSceneSessionRoleApplication 14 | 15 | 16 | UISceneConfigurationName 17 | Default Configuration 18 | UISceneDelegateClassName 19 | SceneDelegate 20 | UISceneStoryboardFile 21 | Main 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Chris Oakman (http://chrisoakman.com/)", 3 | "name": "@chrisoakman/chessboardjs", 4 | "description": "JavaScript chessboard widget", 5 | "homepage": "https://chessboardjs.com", 6 | "license": "MIT", 7 | "version": "1.0.0", 8 | "repository": { 9 | "type": "git", 10 | "url": "git://github.com/oakmac/chessboardjs.git" 11 | }, 12 | "files": ["dist/"], 13 | "dependencies": { 14 | "jquery": ">=3.4.1" 15 | }, 16 | "devDependencies": { 17 | "csso": "3.5.1", 18 | "fs-plus": "3.1.1", 19 | "kidif": "1.1.0", 20 | "mustache": "2.3.0", 21 | "standard": "10.0.2", 22 | "uglify-js": "3.6.0" 23 | }, 24 | "scripts": { 25 | "build": "standard lib/chessboard.js && node scripts/build.js", 26 | "standard": "standard --fix lib/*.js website/js/*.js", 27 | "website": "node scripts/website.js" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/androidTest/java/com/litongjava/whisper/android/java/ExampleInstrumentedTest.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.whisper.android.java; 2 | 3 | import android.content.Context; 4 | 5 | import androidx.test.platform.app.InstrumentationRegistry; 6 | import androidx.test.ext.junit.runners.AndroidJUnit4; 7 | 8 | import org.junit.Test; 9 | import org.junit.runner.RunWith; 10 | 11 | import static org.junit.Assert.*; 12 | 13 | /** 14 | * Instrumented test, which will execute on an Android device. 15 | * 16 | * @see Testing documentation 17 | */ 18 | @RunWith(AndroidJUnit4.class) 19 | public class ExampleInstrumentedTest { 20 | @Test 21 | public void useAppContext() { 22 | // Context of the app under test. 23 | Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext(); 24 | assertEquals("com.litongjava.whisper.android.java", appContext.getPackageName()); 25 | } 26 | } -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperEncoderBeginCallback.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.callbacks; 2 | 3 | import com.sun.jna.Callback; 4 | import com.sun.jna.Pointer; 5 | import io.github.ggerganov.whispercpp.WhisperContext; 6 | import io.github.ggerganov.whispercpp.model.WhisperState; 7 | 8 | /** 9 | * Callback before the encoder starts. 10 | * If not null, called before the encoder starts. 11 | * If it returns false, the computation is aborted. 12 | */ 13 | public interface WhisperEncoderBeginCallback extends Callback { 14 | 15 | /** 16 | * Callback method before the encoder starts. 17 | * 18 | * @param ctx The whisper context. 19 | * @param state The whisper state. 20 | * @param user_data User data. 21 | * @return True if the computation should proceed, false otherwise. 22 | */ 23 | boolean callback(Pointer ctx, Pointer state, Pointer user_data); 24 | } 25 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperNewSegmentCallback.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.callbacks; 2 | 3 | import com.sun.jna.Callback; 4 | import com.sun.jna.Pointer; 5 | import io.github.ggerganov.whispercpp.WhisperContext; 6 | import io.github.ggerganov.whispercpp.model.WhisperState; 7 | 8 | /** 9 | * Callback for the text segment. 10 | * Called on every newly generated text segment. 11 | * Use the whisper_full_...() functions to obtain the text segments. 12 | */ 13 | public interface WhisperNewSegmentCallback extends Callback { 14 | 15 | /** 16 | * Callback method for the text segment. 17 | * 18 | * @param ctx The whisper context. 19 | * @param state The whisper state. 20 | * @param n_new The number of newly generated text segments. 21 | * @param user_data User data. 22 | */ 23 | void callback(Pointer ctx, Pointer state, int n_new, Pointer user_data); 24 | } 25 | -------------------------------------------------------------------------------- /examples/whisper.objc/whisper.objc/ViewController.h: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.h 3 | // whisper.objc 4 | // 5 | // Created by Georgi Gerganov on 23.10.22. 6 | // 7 | 8 | #import 9 | 10 | #import 11 | #import 12 | 13 | #define NUM_BUFFERS 3 14 | #define MAX_AUDIO_SEC 30 15 | #define SAMPLE_RATE 16000 16 | 17 | struct whisper_context; 18 | 19 | typedef struct 20 | { 21 | int ggwaveId; 22 | bool isCapturing; 23 | bool isTranscribing; 24 | bool isRealtime; 25 | UILabel * labelReceived; 26 | 27 | AudioQueueRef queue; 28 | AudioStreamBasicDescription dataFormat; 29 | AudioQueueBufferRef buffers[NUM_BUFFERS]; 30 | 31 | int n_samples; 32 | int16_t * audioBufferI16; 33 | float * audioBufferF32; 34 | 35 | struct whisper_context * ctx; 36 | 37 | void * vc; 38 | } StateInp; 39 | 40 | @interface ViewController : UIViewController 41 | { 42 | StateInp stateInp; 43 | } 44 | 45 | @end 46 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/java/com/whispercpp/java/whisper/WhisperUtils.java: -------------------------------------------------------------------------------- 1 | package com.whispercpp.java.whisper; 2 | 3 | import android.os.Build; 4 | import android.util.Log; 5 | 6 | import androidx.annotation.RequiresApi; 7 | 8 | import java.io.File; 9 | import java.nio.file.Path; 10 | 11 | public class WhisperUtils { 12 | private static final String LOG_TAG = "LibWhisper"; 13 | 14 | 15 | public static boolean isArmEabiV7a() { 16 | return Build.SUPPORTED_ABIS[0].equals("armeabi-v7a"); 17 | } 18 | 19 | public static boolean isArmEabiV8a() { 20 | return Build.SUPPORTED_ABIS[0].equals("arm64-v8a"); 21 | } 22 | 23 | @RequiresApi(api = Build.VERSION_CODES.O) 24 | public static String cpuInfo() { 25 | try { 26 | Path path = new File("/proc/cpuinfo").toPath(); 27 | return new String(java.nio.file.Files.readAllBytes(path)); 28 | } catch (Exception e) { 29 | Log.w(LOG_TAG, "Couldn't read /proc/cpuinfo", e); 30 | return null; 31 | } 32 | 33 | } 34 | } -------------------------------------------------------------------------------- /scripts/quantize-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | printf "Usage: $0 " 4 | 5 | if [ $# -ne 1 ]; then 6 | printf "\nError: Invalid number of arguments\n" 7 | exit 1 8 | fi 9 | 10 | qtype0="q5_0" 11 | qtype1="q5_1" 12 | upload="$1" 13 | declare -a filedex 14 | 15 | cd `dirname $0` 16 | cd ../ 17 | 18 | for i in `ls ./models | grep ^ggml-.*.bin | grep -v "\-q"`; do 19 | m="models/$i" 20 | if [ -f "$m" ]; then 21 | if [ "${m##*.}" == "bin" ]; then 22 | ./quantize "${m}" "${m::${#m}-4}-${qtype1}.bin" ${qtype1}; 23 | ./quantize "${m}" "${m::${#m}-4}-${qtype0}.bin" ${qtype0}; 24 | filedex+=( "${m::${#m}-4}-${qtype1}.bin" "${m::${#m}-4}-${qtype0}.bin" ) 25 | fi 26 | fi 27 | done 28 | 29 | 30 | 31 | if [ "$upload" == "1" ]; then 32 | for i in ${!filedex[@]}; do 33 | if [ "${filedex[$i]:9:8}" != "for-test" ]; then 34 | scp ${filedex[$i]} root@linode0:/mnt/Data/ggml/ggml-model-${filedex[$i]:9} 35 | fi 36 | done 37 | fi 38 | -------------------------------------------------------------------------------- /examples/talk-llama/unicode.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define CODEPOINT_TYPE_UNIDENTIFIED 0 8 | #define CODEPOINT_TYPE_DIGIT 1 9 | #define CODEPOINT_TYPE_LETTER 2 10 | #define CODEPOINT_TYPE_WHITESPACE 3 11 | #define CODEPOINT_TYPE_ACCENT_MARK 4 12 | #define CODEPOINT_TYPE_PUNCTUATION 5 13 | #define CODEPOINT_TYPE_SYMBOL 6 14 | #define CODEPOINT_TYPE_CONTROL 7 15 | 16 | std::string unicode_cpt_to_utf8(uint32_t cp); 17 | std::vector unicode_cpts_from_utf8(const std::string & utf8); 18 | 19 | std::vector unicode_cpts_normalize_nfd(const std::vector & cpts); 20 | 21 | int unicode_cpt_type(uint32_t cp); 22 | int unicode_cpt_type(const std::string & utf8); 23 | 24 | std::string unicode_byte_to_utf8(uint8_t byte); 25 | uint8_t unicode_utf8_to_byte(const std::string & utf8); 26 | 27 | // simple tolower that only implements one-to-one mapping, not one-to-many 28 | char32_t unicode_tolower(char32_t cp); 29 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperContextParams.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.params; 2 | 3 | import com.sun.jna.*; 4 | 5 | import java.util.Arrays; 6 | import java.util.List; 7 | 8 | /** 9 | * Parameters for the whisper_init_from_file_with_params() function. 10 | * If you change the order or add new parameters, make sure to update the default values in whisper.cpp: 11 | * whisper_context_default_params() 12 | */ 13 | public class WhisperContextParams extends Structure { 14 | 15 | public WhisperContextParams(Pointer p) { 16 | super(p); 17 | } 18 | 19 | /** Use GPU for inference Number (default = true) */ 20 | public CBool use_gpu; 21 | 22 | /** Use GPU for inference Number (default = true) */ 23 | public void useGpu(boolean enable) { 24 | use_gpu = enable ? CBool.TRUE : CBool.FALSE; 25 | } 26 | 27 | @Override 28 | protected List getFieldOrder() { 29 | return Arrays.asList("use_gpu"); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /examples/grammar-parser.h: -------------------------------------------------------------------------------- 1 | // Implements a parser for an extended Backus-Naur form (BNF), producing the 2 | // binary context-free grammar format specified by whisper.h. Supports character 3 | // ranges, grouping, and repetition operators. As an example, a grammar for 4 | // arithmetic might look like: 5 | // 6 | // root ::= expr 7 | // expr ::= term ([-+*/] term)* 8 | // term ::= num | "(" space expr ")" space 9 | // num ::= [0-9]+ space 10 | // space ::= [ \t\n]* 11 | 12 | #pragma once 13 | #include "whisper.h" 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace grammar_parser { 20 | struct parse_state { 21 | std::map symbol_ids; 22 | std::vector> rules; 23 | 24 | std::vector c_rules() const; 25 | }; 26 | 27 | parse_state parse(const char * src); 28 | void print_grammar(FILE * file, const parse_state & state); 29 | } 30 | -------------------------------------------------------------------------------- /openvino/whisper-openvino-encoder.h: -------------------------------------------------------------------------------- 1 | // Wrapper of the OpenVINO Whisper Encoder model 2 | // 3 | 4 | #if __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | struct whisper_openvino_context; 9 | 10 | // initialize openvino encoder, given path to model xml, device ("CPU", "GPU", etc.), and 11 | // path to cache_dir. Returns null upon failure. 12 | struct whisper_openvino_context * whisper_openvino_init(const char * path_model, 13 | const char * device, 14 | const char * cache_dir); 15 | 16 | // clean up a ctx previously returned from whisper_openvino_init() 17 | void whisper_openvino_free(struct whisper_openvino_context * ctx); 18 | 19 | struct ggml_tensor; 20 | 21 | // Perform encode using OpenVINO. 22 | // Returns 1 on success 23 | // Returns 0 on failure 24 | int whisper_openvino_encode( 25 | whisper_openvino_context* ctx, 26 | ggml_tensor* mel, 27 | ggml_tensor* out); 28 | 29 | #if __cplusplus 30 | } 31 | #endif 32 | -------------------------------------------------------------------------------- /examples/addon.node/index.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const { whisper } = require(path.join( 3 | __dirname, 4 | "../../build/Release/whisper-addon" 5 | )); 6 | const { promisify } = require("util"); 7 | 8 | const whisperAsync = promisify(whisper); 9 | 10 | const whisperParams = { 11 | language: "en", 12 | model: path.join(__dirname, "../../models/ggml-base.en.bin"), 13 | fname_inp: "../../samples/jfk.wav", 14 | use_gpu: true, 15 | }; 16 | 17 | const arguments = process.argv.slice(2); 18 | const params = Object.fromEntries( 19 | arguments.reduce((pre, item) => { 20 | if (item.startsWith("--")) { 21 | return [...pre, item.slice(2).split("=")]; 22 | } 23 | return pre; 24 | }, []) 25 | ); 26 | 27 | for (const key in params) { 28 | if (whisperParams.hasOwnProperty(key)) { 29 | whisperParams[key] = params[key]; 30 | } 31 | } 32 | 33 | console.log("whisperParams =", whisperParams); 34 | 35 | whisperAsync(whisperParams).then((result) => { 36 | console.log(`Result from whisper: ${result}`); 37 | }); 38 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperLogitsFilterCallback.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.callbacks; 2 | 3 | import com.sun.jna.Callback; 4 | import com.sun.jna.Pointer; 5 | import io.github.ggerganov.whispercpp.model.WhisperTokenData; 6 | 7 | /** 8 | * Callback to filter logits. 9 | * Can be used to modify the logits before sampling. 10 | * If not null, called after applying temperature to logits. 11 | */ 12 | public interface WhisperLogitsFilterCallback extends Callback { 13 | 14 | /** 15 | * Callback method to filter logits. 16 | * 17 | * @param ctx The whisper context. 18 | * @param state The whisper state. 19 | * @param tokens The array of whisper_token_data. 20 | * @param n_tokens The number of tokens. 21 | * @param logits The array of logits. 22 | * @param user_data User data. 23 | */ 24 | void callback(Pointer ctx, Pointer state, WhisperTokenData[] tokens, int n_tokens, float[] logits, Pointer user_data); 25 | } 26 | -------------------------------------------------------------------------------- /ggml-vulkan.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ggml.h" 4 | #include "ggml-backend.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #define GGML_VK_NAME "Vulkan" 11 | #define GGML_VK_MAX_DEVICES 16 12 | 13 | GGML_API void ggml_vk_instance_init(void); 14 | 15 | // backend API 16 | GGML_API GGML_CALL ggml_backend_t ggml_backend_vk_init(size_t dev_num); 17 | 18 | GGML_API GGML_CALL bool ggml_backend_is_vk(ggml_backend_t backend); 19 | GGML_API GGML_CALL int ggml_backend_vk_get_device_count(void); 20 | GGML_API GGML_CALL void ggml_backend_vk_get_device_description(int device, char * description, size_t description_size); 21 | GGML_API GGML_CALL void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total); 22 | 23 | GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_buffer_type(size_t dev_num); 24 | // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU 25 | GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(void); 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | -------------------------------------------------------------------------------- /grammars/chess.gbnf: -------------------------------------------------------------------------------- 1 | # - bishop to c3 2 | # - rook to d4 3 | # - knight to e5 4 | # - d4 d5 knight to c3 5 | # - c3 queen to d4 king b1 6 | # - pawn to a1 bishop to b2 knight to c3 7 | # 8 | # The prompt (--prompt) is the initial phrase that the user has to say. 9 | # This is used to prime Whisper with how the user is expected to speak. 10 | # 11 | # Provide long context (--context) with sample moves to help Whisper decode the correct sequence. 12 | # Longer context is better, but it slightly increases the processing time. 13 | # 14 | # example: 15 | # 16 | # ./command -m ./models/ggml-tiny.en.bin -t 8 --grammar ./grammars/chess.gbnf --prompt "rook to b4, f3," --context "d4 d5 knight to c3, pawn to a1, bishop to b2 king e8," --grammar-penalty 100 17 | # 18 | 19 | root ::= init move move? move? "." 20 | prompt ::= init "." 21 | 22 | # leading space is very important! 23 | init ::= " rook to b4, f3" 24 | 25 | move ::= ", " ((piece | pawn | king) " " "to "?)? [a-h] [1-8] 26 | 27 | piece ::= "bishop" | "rook" | "knight" | "queen" 28 | king ::= "king" 29 | pawn ::= "pawn" 30 | -------------------------------------------------------------------------------- /bindings/go/examples/go-whisper/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | 9 | // Packages 10 | whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" 11 | ) 12 | 13 | func main() { 14 | flags, err := NewFlags(filepath.Base(os.Args[0]), os.Args[1:]) 15 | if err == flag.ErrHelp { 16 | os.Exit(0) 17 | } else if err != nil { 18 | fmt.Fprintln(os.Stderr, err) 19 | os.Exit(1) 20 | } else if flags.GetModel() == "" { 21 | fmt.Fprintln(os.Stderr, "Use -model flag to specify which model file to use") 22 | os.Exit(1) 23 | } else if flags.NArg() == 0 { 24 | fmt.Fprintln(os.Stderr, "No input files specified") 25 | os.Exit(1) 26 | } 27 | 28 | // Load model 29 | model, err := whisper.New(flags.GetModel()) 30 | if err != nil { 31 | fmt.Fprintln(os.Stderr, err) 32 | os.Exit(1) 33 | } 34 | defer model.Close() 35 | 36 | // Process files 37 | for _, filename := range flags.Args() { 38 | if err := Process(model, filename, flags); err != nil { 39 | fmt.Fprintln(os.Stderr, err) 40 | continue 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /examples/wchess/libwchess/Chessboard.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | // just basic validation 7 | // fixme: missing en passant, castling, promotion, etc. 8 | struct State; 9 | class Piece; 10 | class Chessboard { 11 | public: 12 | Chessboard(); 13 | ~Chessboard(); 14 | std::string process(const std::string& command); 15 | std::string stringifyBoard(); 16 | const std::string& grammar() { return m_grammar; } 17 | const std::string& prompt() { return m_prompt; } 18 | void setPrompt(const std::string& prompt); 19 | private: 20 | bool parseCommand(const std::string& command, Piece*& piece, char& pos_to); 21 | bool move(Piece& piece, char pos); 22 | void flagUpdates(char pos_from, char pos_to); 23 | void updatePins(Piece& piece); 24 | void detectChecks(); 25 | void setGrammar(); 26 | 27 | std::unique_ptr m_state; 28 | std::set m_allowedInCheck; 29 | bool m_inCheck = false; 30 | int m_moveCounter = 0; 31 | std::string m_grammar; 32 | std::string m_prompt; 33 | }; 34 | -------------------------------------------------------------------------------- /examples/talk-llama/prompts/talk-alpaca.txt: -------------------------------------------------------------------------------- 1 | Below is an instruction that describes a task. Write a response that appropriately completes the request. 2 | 3 | ### Instruction: 4 | 5 | Write a text transcript of a never ending dialog, where {0} interacts with an AI assistant named {1}. 6 | {1} is helpful, kind, honest, friendly, good at writing and never fails to answer {0}’s requests immediately and with details and precision. 7 | There are no annotations like (30 seconds passed...) or (to himself), just what {0} and {1} say aloud to each other. 8 | The transcript only includes text, it does not include markup like HTML and Markdown. 9 | {1} responds with short and concise answers. 10 | 11 | ### Response: 12 | 13 | {0}{4} Hello, {1}! 14 | {1}{4} Hello {0}! How may I help you today? 15 | {0}{4} What time is it? 16 | {1}{4} It is {2} o'clock. 17 | {0}{4} What year is it? 18 | {1}{4} We are in {3}. 19 | {0}{4} What is a cat? 20 | {1}{4} A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae. 21 | {0}{4} Name a color. 22 | {1}{4} Blue 23 | {0}{4} 24 | -------------------------------------------------------------------------------- /ggml-cuda/unary.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_GELU_BLOCK_SIZE 256 4 | #define CUDA_SILU_BLOCK_SIZE 256 5 | #define CUDA_TANH_BLOCK_SIZE 256 6 | #define CUDA_RELU_BLOCK_SIZE 256 7 | #define CUDA_HARDSIGMOID_BLOCK_SIZE 256 8 | #define CUDA_HARDSWISH_BLOCK_SIZE 256 9 | #define CUDA_SQR_BLOCK_SIZE 256 10 | 11 | void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 12 | 13 | void ggml_cuda_op_silu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 14 | 15 | void ggml_cuda_op_gelu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 16 | 17 | void ggml_cuda_op_tanh(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 18 | 19 | void ggml_cuda_op_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 20 | 21 | void ggml_cuda_op_hardsigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 22 | 23 | void ggml_cuda_op_hardswish(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 24 | 25 | void ggml_cuda_op_leaky_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 26 | 27 | void ggml_cuda_op_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | .cache/ 4 | .coreml/ 5 | .test/ 6 | .vs/ 7 | .vscode/ 8 | .DS_Store 9 | .vimspector.json 10 | 11 | build/ 12 | build-coreml/ 13 | build-em/ 14 | build-debug/ 15 | build-release/ 16 | build-rwdi/ 17 | build-static/ 18 | build-cublas/ 19 | build-no-accel/ 20 | build-sanitize-addr/ 21 | build-sanitize-thread/ 22 | 23 | # SPM 24 | .build/ 25 | .swiftpm 26 | *.metallib 27 | 28 | /main 29 | /stream 30 | /command 31 | /talk 32 | /talk-llama 33 | /bench 34 | /quantize 35 | /server 36 | /lsp 37 | 38 | arm_neon.h 39 | sync.sh 40 | libwhisper.a 41 | libwhisper.so 42 | compile_commands.json 43 | 44 | examples/arm_neon.h 45 | examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata 46 | examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/ 47 | examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata 48 | 49 | extra/bench-gg.txt 50 | 51 | models/*.mlmodel 52 | models/*.mlmodelc 53 | models/*.mlpackage 54 | bindings/java/.gradle/ 55 | bindings/java/.idea/ 56 | .idea/ 57 | 58 | benchmark_results.csv 59 | cmake-build-debug/ 60 | .cxx/ 61 | .gradle/ 62 | local.properties 63 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/bean/WhisperSegment.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.bean; 2 | 3 | /** 4 | * Created by litonglinux@qq.com on 10/21/2023_7:48 AM 5 | */ 6 | public class WhisperSegment { 7 | private long start, end; 8 | private String sentence; 9 | 10 | public WhisperSegment() { 11 | } 12 | 13 | public WhisperSegment(long start, long end, String sentence) { 14 | this.start = start; 15 | this.end = end; 16 | this.sentence = sentence; 17 | } 18 | 19 | public long getStart() { 20 | return start; 21 | } 22 | 23 | public long getEnd() { 24 | return end; 25 | } 26 | 27 | public String getSentence() { 28 | return sentence; 29 | } 30 | 31 | public void setStart(long start) { 32 | this.start = start; 33 | } 34 | 35 | public void setEnd(long end) { 36 | this.end = end; 37 | } 38 | 39 | public void setSentence(String sentence) { 40 | this.sentence = sentence; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return "[" + start + " --> " + end + "]:" + sentence; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/bean/WhisperSegment.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.whisper.android.java.bean; 2 | 3 | /** 4 | * Created by litonglinux@qq.com on 10/21/2023_7:48 AM 5 | */ 6 | public class WhisperSegment { 7 | private long start, end; 8 | private String sentence; 9 | 10 | public WhisperSegment() { 11 | } 12 | 13 | public WhisperSegment(long start, long end, String sentence) { 14 | this.start = start; 15 | this.end = end; 16 | this.sentence = sentence; 17 | } 18 | 19 | public long getStart() { 20 | return start; 21 | } 22 | 23 | public long getEnd() { 24 | return end; 25 | } 26 | 27 | public String getSentence() { 28 | return sentence; 29 | } 30 | 31 | public void setStart(long start) { 32 | this.start = start; 33 | } 34 | 35 | public void setEnd(long end) { 36 | this.end = end; 37 | } 38 | 39 | public void setSentence(String sentence) { 40 | this.sentence = sentence; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return "["+start+" --> "+end+"]:"+sentence; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-2024 The ggml authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bindings/go/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 David Thorpe 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/wchess/README.md: -------------------------------------------------------------------------------- 1 | # wchess 2 | 3 | Voice-controlled chess using Whisper 4 | 5 | Online demo: https://whisper.ggerganov.com/wchess/ 6 | 7 | https://github.com/ggerganov/whisper.cpp/assets/1991296/c2b2f03c-9684-49f3-8106-357d2d4e67fa 8 | 9 | ## Command-line tool 10 | 11 | ```bash 12 | mkdir build && cd build 13 | cmake -DWHISPER_SDL2=1 .. 14 | make -j 15 | 16 | ./bin/wchess -m ../models/ggml-base.en.bin 17 | 18 | Move: start 19 | 20 | a b c d e f g h 21 | r n b q k b n r 8 22 | p p p p p p p p 7 23 | . * . * . * . * 6 24 | * . * . * . * . 5 25 | . * . * . * . * 4 26 | * . * . * . * . 3 27 | P P P P P P P P 2 28 | R N B Q K B N R 1 29 | 30 | White's turn 31 | [(l)isten/(p)ause/(q)uit]: 32 | ``` 33 | 34 | ## TODO 35 | 36 | - Fix bugs in the chess moves logic 37 | - Improve web-browser audio capture - sometimes it does not record the voice properly 38 | - Add support for more languages by making the generated grammar string multilingual 39 | - Explore ways to improve the dynamic grammar to be narrower 40 | 41 | PRs welcome! 42 | 43 | ## Thanks 44 | 45 | - [chessboardjs](https://chessboardjs.com) for the neat chessboard JS library used in this demo 46 | -------------------------------------------------------------------------------- /examples/addon.node/README.md: -------------------------------------------------------------------------------- 1 | # addon 2 | 3 | This is an addon demo that can **perform whisper model reasoning in `node` and `electron` environments**, based on [cmake-js](https://github.com/cmake-js/cmake-js). 4 | It can be used as a reference for using the whisper.cpp project in other node projects. 5 | 6 | ## Install 7 | 8 | ```shell 9 | npm install 10 | ``` 11 | 12 | ## Compile 13 | 14 | Make sure it is in the project root directory and compiled with make-js. 15 | 16 | ```shell 17 | npx cmake-js compile -T whisper-addon -B Release 18 | ``` 19 | 20 | For Electron addon and cmake-js options, you can see [cmake-js](https://github.com/cmake-js/cmake-js) and make very few configuration changes. 21 | 22 | > Such as appointing special cmake path: 23 | > ```shell 24 | > npx cmake-js compile -c 'xxx/cmake' -T whisper-addon -B Release 25 | > ``` 26 | 27 | ## Run 28 | 29 | ```shell 30 | cd examples/addon.node 31 | 32 | node index.js --language='language' --model='model-path' --fname_inp='file-path' 33 | ``` 34 | 35 | Because this is a simple Demo, only the above parameters are set in the node environment. 36 | 37 | Other parameters can also be specified in the node environment. 38 | -------------------------------------------------------------------------------- /bindings/javascript/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(TARGET libwhisper) 2 | 3 | add_executable(${TARGET} 4 | emscripten.cpp 5 | ) 6 | 7 | target_link_libraries(${TARGET} PRIVATE 8 | whisper 9 | ) 10 | 11 | unset(EXTRA_FLAGS) 12 | 13 | if (WHISPER_WASM_SINGLE_FILE) 14 | set(EXTRA_FLAGS "-s SINGLE_FILE=1") 15 | message(STATUS "Embedding WASM inside whisper.js") 16 | 17 | add_custom_command( 18 | TARGET ${TARGET} POST_BUILD 19 | COMMAND ${CMAKE_COMMAND} -E copy 20 | ${CMAKE_BINARY_DIR}/bin/libwhisper.js 21 | ${CMAKE_CURRENT_SOURCE_DIR}/whisper.js 22 | ) 23 | 24 | add_custom_command( 25 | TARGET ${TARGET} POST_BUILD 26 | COMMAND ${CMAKE_COMMAND} -E copy 27 | ${CMAKE_BINARY_DIR}/bin/libwhisper.worker.js 28 | ${CMAKE_CURRENT_SOURCE_DIR}/libwhisper.worker.js 29 | ) 30 | endif() 31 | 32 | set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \ 33 | --bind \ 34 | -s MODULARIZE=1 \ 35 | -s EXPORT_NAME=\"'whisper_factory'\" \ 36 | -s FORCE_FILESYSTEM=1 \ 37 | -s USE_PTHREADS=1 \ 38 | -s PTHREAD_POOL_SIZE=8 \ 39 | -s ALLOW_MEMORY_GROWTH=1 \ 40 | ${EXTRA_FLAGS} \ 41 | ") 42 | -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2019 Chris Oakman 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /examples/whisper.android.java/gradle.properties: -------------------------------------------------------------------------------- 1 | # Project-wide Gradle settings. 2 | # IDE (e.g. Android Studio) users: 3 | # Gradle settings configured through the IDE *will override* 4 | # any settings specified in this file. 5 | # For more details on how to configure your build environment visit 6 | # http://www.gradle.org/docs/current/userguide/build_environment.html 7 | # Specifies the JVM arguments used for the daemon process. 8 | # The setting is particularly useful for tweaking memory settings. 9 | org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 10 | # When configured, Gradle will run in incubating parallel mode. 11 | # This option should only be used with decoupled projects. More details, visit 12 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects 13 | # org.gradle.parallel=true 14 | # AndroidX package structure to make it clearer which packages are bundled with the 15 | # Android operating system, and which are packaged with your app"s APK 16 | # https://developer.android.com/topic/libraries/support-library/androidx-rn 17 | android.useAndroidX=true 18 | # Automatically convert third-party libraries to use AndroidX 19 | android.enableJetifier=true -------------------------------------------------------------------------------- /examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Type.kt: -------------------------------------------------------------------------------- 1 | package com.whispercppdemo.ui.theme 2 | 3 | import androidx.compose.material3.Typography 4 | import androidx.compose.ui.text.TextStyle 5 | import androidx.compose.ui.text.font.FontFamily 6 | import androidx.compose.ui.text.font.FontWeight 7 | import androidx.compose.ui.unit.sp 8 | 9 | // Set of Material typography styles to start with 10 | val Typography = Typography( 11 | bodyLarge = TextStyle( 12 | fontFamily = FontFamily.Default, 13 | fontWeight = FontWeight.Normal, 14 | fontSize = 16.sp, 15 | lineHeight = 24.sp, 16 | letterSpacing = 0.5.sp 17 | ) 18 | /* Other default text styles to override 19 | titleLarge = TextStyle( 20 | fontFamily = FontFamily.Default, 21 | fontWeight = FontWeight.Normal, 22 | fontSize = 22.sp, 23 | lineHeight = 28.sp, 24 | letterSpacing = 0.sp 25 | ), 26 | labelSmall = TextStyle( 27 | fontFamily = FontFamily.Default, 28 | fontWeight = FontWeight.Medium, 29 | fontSize = 11.sp, 30 | lineHeight = 16.sp, 31 | letterSpacing = 0.5.sp 32 | ) 33 | */ 34 | ) -------------------------------------------------------------------------------- /ggml-cuda/scale.cu: -------------------------------------------------------------------------------- 1 | #include "scale.cuh" 2 | 3 | static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) { 4 | const int i = blockDim.x*blockIdx.x + threadIdx.x; 5 | 6 | if (i >= k) { 7 | return; 8 | } 9 | 10 | dst[i] = scale * x[i]; 11 | } 12 | 13 | static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) { 14 | const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE; 15 | scale_f32<<>>(x, dst, scale, k); 16 | } 17 | 18 | void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { 19 | const ggml_tensor * src0 = dst->src[0]; 20 | const float * src0_d = (const float *)src0->data; 21 | float * dst_d = (float *)dst->data; 22 | cudaStream_t stream = ctx.stream(); 23 | 24 | GGML_ASSERT(src0->type == GGML_TYPE_F32); 25 | GGML_ASSERT( dst->type == GGML_TYPE_F32); 26 | 27 | float scale; 28 | memcpy(&scale, dst->op_params, sizeof(float)); 29 | 30 | scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream); 31 | CUDA_CHECK(cudaGetLastError()); 32 | } 33 | -------------------------------------------------------------------------------- /examples/common-sdl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // 12 | // SDL Audio capture 13 | // 14 | 15 | class audio_async { 16 | public: 17 | audio_async(int len_ms); 18 | ~audio_async(); 19 | 20 | bool init(int capture_id, int sample_rate); 21 | 22 | // start capturing audio via the provided SDL callback 23 | // keep last len_ms seconds of audio in a circular buffer 24 | bool resume(); 25 | bool pause(); 26 | bool clear(); 27 | 28 | // callback to be called by SDL 29 | void callback(uint8_t * stream, int len); 30 | 31 | // get audio data from the circular buffer 32 | void get(int ms, std::vector & audio); 33 | 34 | private: 35 | SDL_AudioDeviceID m_dev_id_in = 0; 36 | 37 | int m_len_ms = 0; 38 | int m_sample_rate = 0; 39 | 40 | std::atomic_bool m_running; 41 | std::mutex m_mutex; 42 | 43 | std::vector m_audio; 44 | size_t m_audio_pos = 0; 45 | size_t m_audio_len = 0; 46 | }; 47 | 48 | // Return false if need to quit 49 | bool sdl_poll_events(); 50 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/task/LoadModelTask.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.whisper.android.java.task; 2 | 3 | import android.content.Context; 4 | import android.os.Build; 5 | import android.os.Handler; 6 | import android.widget.TextView; 7 | 8 | import com.blankj.utilcode.util.ThreadUtils; 9 | import com.litongjava.jfinal.aop.Aop; 10 | import com.litongjava.whisper.android.java.services.WhisperService; 11 | 12 | import java.io.File; 13 | 14 | public class LoadModelTask extends ThreadUtils.Task { 15 | private final TextView tv; 16 | public LoadModelTask(TextView tv) { 17 | this.tv = tv; 18 | } 19 | 20 | @Override 21 | public Object doInBackground() { 22 | if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { 23 | Aop.get(WhisperService.class).loadModel(tv); 24 | }else{ 25 | Aop.get(Handler.class).post(()->{ 26 | tv.append("not supported android devices"); 27 | }); 28 | 29 | } 30 | return null; 31 | } 32 | 33 | @Override 34 | public void onSuccess(Object result) { 35 | } 36 | 37 | @Override 38 | public void onCancel() { 39 | } 40 | 41 | @Override 42 | public void onFail(Throwable t) { 43 | } 44 | } -------------------------------------------------------------------------------- /ggml-kompute.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ggml.h" 4 | #include "ggml-backend.h" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | struct ggml_vk_device { 15 | int index; 16 | int type; // same as VkPhysicalDeviceType 17 | size_t heapSize; 18 | const char * name; 19 | const char * vendor; 20 | int subgroupSize; 21 | uint64_t bufferAlignment; 22 | uint64_t maxAlloc; 23 | }; 24 | 25 | struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count); 26 | bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name); 27 | bool ggml_vk_has_vulkan(void); 28 | bool ggml_vk_has_device(void); 29 | struct ggml_vk_device ggml_vk_current_device(void); 30 | 31 | // 32 | // backend API 33 | // 34 | 35 | // forward declaration 36 | typedef struct ggml_backend * ggml_backend_t; 37 | 38 | GGML_API ggml_backend_t ggml_backend_kompute_init(int device); 39 | 40 | GGML_API bool ggml_backend_is_kompute(ggml_backend_t backend); 41 | 42 | GGML_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device); 43 | 44 | #ifdef __cplusplus 45 | } 46 | #endif 47 | -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # chessboard.js Change Log 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## [1.0.0] - 2019-06-11 6 | - Orientation methods now return current orientation. [Issue #64] 7 | - Drop support for IE8 8 | - Do not check for `window.JSON` (Error #1004) 9 | - Rename `ChessBoard` to `Chessboard` (`ChessBoard` is still supported, however) 10 | - id query selectors are now supported as the first argument to `Chessboard()` 11 | - Remove Error #1002 12 | - Format code according to [StandardJS] 13 | - Bump minimum jQuery version to 1.8.3 14 | - Throttle piece drag functions 15 | 16 | ## [0.3.0] - 2013-08-10 17 | - Added `appearSpeed` animation config property 18 | - Added `onSnapbackEnd` event 19 | - Added `onMoveEnd` event 20 | 21 | ## [0.2.0] - 2013-08-05 22 | - Added `onMouseoverSquare` and `onMouseoutSquare` events 23 | - Added `onSnapEnd` event 24 | - Added square code as CSS class on the squares 25 | - Added [chess.js] integration examples 26 | 27 | ## [0.1.0] - 2013-05-21 28 | - Initial release 29 | 30 | [chess.js]:https://github.com/jhlywa/chess.js 31 | [Issue #64]:https://github.com/oakmac/chessboardjs/issues/64 32 | [StandardJS]:https://standardjs.com/ 33 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/java/com/litongjava/whisper/android/java/task/TranscriptionTask.java: -------------------------------------------------------------------------------- 1 | package com.litongjava.whisper.android.java.task; 2 | 3 | import android.content.Context; 4 | import android.os.Build; 5 | import android.widget.TextView; 6 | 7 | import com.blankj.utilcode.util.ThreadUtils; 8 | import com.litongjava.jfinal.aop.Aop; 9 | import com.litongjava.whisper.android.java.services.WhisperService; 10 | 11 | import java.io.File; 12 | 13 | public class TranscriptionTask extends ThreadUtils.Task { 14 | private final TextView tv; 15 | private final File sampleFile; 16 | 17 | public TranscriptionTask(TextView tv, File sampleFile) { 18 | this.tv = tv; 19 | this.sampleFile = sampleFile; 20 | 21 | } 22 | 23 | @Override 24 | public Object doInBackground() { 25 | if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { 26 | Aop.get(WhisperService.class).transcribeSample(tv, sampleFile); 27 | }else{ 28 | tv.append("not supported android devices"); 29 | } 30 | return null; 31 | } 32 | 33 | @Override 34 | public void onSuccess(Object result) { 35 | } 36 | 37 | @Override 38 | public void onCancel() { 39 | } 40 | 41 | @Override 42 | public void onFail(Throwable t) { 43 | } 44 | } -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Utils/Recorder.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import AVFoundation 3 | 4 | actor Recorder { 5 | private var recorder: AVAudioRecorder? 6 | 7 | enum RecorderError: Error { 8 | case couldNotStartRecording 9 | } 10 | 11 | func startRecording(toOutputFile url: URL, delegate: AVAudioRecorderDelegate?) throws { 12 | let recordSettings: [String : Any] = [ 13 | AVFormatIDKey: Int(kAudioFormatLinearPCM), 14 | AVSampleRateKey: 16000.0, 15 | AVNumberOfChannelsKey: 1, 16 | AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue 17 | ] 18 | #if !os(macOS) 19 | let session = AVAudioSession.sharedInstance() 20 | try session.setCategory(.playAndRecord, mode: .default) 21 | #endif 22 | let recorder = try AVAudioRecorder(url: url, settings: recordSettings) 23 | recorder.delegate = delegate 24 | if recorder.record() == false { 25 | print("Could not start recording") 26 | throw RecorderError.couldNotStartRecording 27 | } 28 | self.recorder = recorder 29 | } 30 | 31 | func stopRecording() { 32 | recorder?.stop() 33 | recorder = nil 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/css/chessboard-1.0.0.css: -------------------------------------------------------------------------------- 1 | /*! chessboard.js v1.0.0 | (c) 2019 Chris Oakman | MIT License chessboardjs.com/license */ 2 | 3 | .clearfix-7da63 { 4 | clear: both; 5 | } 6 | 7 | .board-b72b1 { 8 | border: 2px solid #404040; 9 | box-sizing: content-box; 10 | } 11 | 12 | .square-55d63 { 13 | float: left; 14 | position: relative; 15 | 16 | /* disable any native browser highlighting */ 17 | -webkit-touch-callout: none; 18 | -webkit-user-select: none; 19 | -khtml-user-select: none; 20 | -moz-user-select: none; 21 | -ms-user-select: none; 22 | user-select: none; 23 | } 24 | 25 | .white-1e1d7 { 26 | background-color: #f0d9b5; 27 | color: #b58863; 28 | } 29 | 30 | .black-3c85d { 31 | background-color: #b58863; 32 | color: #f0d9b5; 33 | } 34 | 35 | .highlight1-32417, .highlight2-9c5d2 { 36 | box-shadow: inset 0 0 3px 3px yellow; 37 | } 38 | 39 | .notation-322f9 { 40 | cursor: default; 41 | font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; 42 | font-size: 14px; 43 | position: absolute; 44 | } 45 | 46 | .alpha-d2270 { 47 | bottom: 1px; 48 | right: 3px; 49 | } 50 | 51 | .numeric-fc462 { 52 | top: 2px; 53 | left: 2px; 54 | } 55 | -------------------------------------------------------------------------------- /scripts/deploy-wasm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This is a helper script to deploy all WebAssembly examples to my node 4 | # Run from the build directory: 5 | # 6 | # cd build-em 7 | # ../scripts/deploy-wasm.sh 8 | # 9 | 10 | # check if emcmake is available 11 | if ! command -v emcmake &> /dev/null 12 | then 13 | echo "Error: emscripten environment is not set up" 14 | exit 15 | fi 16 | 17 | emcmake cmake .. && make -j 18 | if [ $? -ne 0 ]; then 19 | echo "Error: build failed" 20 | exit 21 | fi 22 | 23 | # copy all wasm files to the node 24 | scp bin/whisper.wasm/* root@linode0:/var/www/html/whisper/ && scp bin/libmain.worker.js root@linode0:/var/www/html/whisper/ 25 | scp bin/stream.wasm/* root@linode0:/var/www/html/whisper/stream/ && scp bin/libstream.worker.js root@linode0:/var/www/html/whisper/stream/ 26 | scp bin/command.wasm/* root@linode0:/var/www/html/whisper/command/ && scp bin/libcommand.worker.js root@linode0:/var/www/html/whisper/command/ 27 | scp bin/talk.wasm/* root@linode0:/var/www/html/whisper/talk/ && scp bin/libtalk.worker.js root@linode0:/var/www/html/whisper/talk/ 28 | scp bin/bench.wasm/* root@linode0:/var/www/html/whisper/bench/ && scp bin/libbench.worker.js root@linode0:/var/www/html/whisper/bench/ 29 | 30 | echo "Done" 31 | exit 32 | -------------------------------------------------------------------------------- /examples/whisper.android/app/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 16 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /examples/whisper.android.java/README.md: -------------------------------------------------------------------------------- 1 | A sample Android app using java code and [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions. 2 | 3 | To use: 4 | 5 | 1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1] 6 | 2. Copy the model to the "app/src/main/assets/models" folder. 7 | 3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)). 8 | 4. Copy the sample to the "app/src/main/assets/samples" folder. 9 | 5. Modify the modelFilePath in the WhisperService.java 10 | 6. Modify the sampleFilePath in the WhisperService.java 11 | 7. Select the "release" active build variant, and use Android Studio to run and deploy to your device. 12 | [^1]: I recommend the tiny or base models for running on an Android device. 13 | 14 | PS: 15 | 1. Do not move this android project folder individually to other folders, because this android project folder depends on the files of the whole project. 16 | 2. The cpp code is compiled during the build process 17 | 3. If you want to import a compiled cpp project in your Android project, please refer to the https://github.com/litongjava/whisper.cpp.android.java.demo 18 | 19 | ![](README_files/1.jpg) 20 | 21 | -------------------------------------------------------------------------------- /ggml-cuda/clamp.cu: -------------------------------------------------------------------------------- 1 | #include "clamp.cuh" 2 | 3 | static __global__ void clamp_f32(const float * x, float * dst, const float min, const float max, const int k) { 4 | const int i = blockDim.x*blockIdx.x + threadIdx.x; 5 | 6 | if (i >= k) { 7 | return; 8 | } 9 | 10 | dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]); 11 | } 12 | 13 | static void clamp_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) { 14 | const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE; 15 | clamp_f32<<>>(x, dst, min, max, k); 16 | } 17 | 18 | 19 | void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { 20 | const ggml_tensor * src0 = dst->src[0]; 21 | const float * src0_d = (const float *)src0->data; 22 | float * dst_d = (float *)dst->data; 23 | cudaStream_t stream = ctx.stream(); 24 | 25 | GGML_ASSERT(src0->type == GGML_TYPE_F32); 26 | GGML_ASSERT( dst->type == GGML_TYPE_F32); 27 | 28 | float min; 29 | float max; 30 | memcpy(&min, dst->op_params, sizeof(float)); 31 | memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); 32 | 33 | clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream); 34 | CUDA_CHECK(cudaGetLastError()); 35 | } 36 | -------------------------------------------------------------------------------- /ggml-cuda/arange.cu: -------------------------------------------------------------------------------- 1 | #include "arange.cuh" 2 | 3 | static __global__ void arange_f32(float * dst, const int ne0, const float start, const float step) { 4 | // blockIDx.x: idx of ne0 / BLOCK_SIZE 5 | int nidx = threadIdx.x + blockIdx.x * blockDim.x; 6 | if (nidx >= ne0) { 7 | return; 8 | } 9 | dst[nidx] = start + step * nidx; 10 | } 11 | 12 | static void arange_f32_cuda(float * dst, const int ne0, const float start, const float step, cudaStream_t stream) { 13 | int num_blocks = (ne0 + CUDA_ARANGE_BLOCK_SIZE - 1) / CUDA_ARANGE_BLOCK_SIZE; 14 | arange_f32<<>>(dst, ne0, start, step); 15 | } 16 | 17 | void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { 18 | float * dst_d = (float *)dst->data; 19 | cudaStream_t stream = ctx.stream(); 20 | 21 | GGML_ASSERT(dst->type == GGML_TYPE_F32); 22 | 23 | float start; 24 | float stop; 25 | float step; 26 | memcpy(&start, (float *)dst->op_params + 0, sizeof(float)); 27 | memcpy(&stop, (float *)dst->op_params + 1, sizeof(float)); 28 | memcpy(&step, (float *)dst->op_params + 2, sizeof(float)); 29 | 30 | int64_t steps = (int64_t)ceil((stop - start) / step); 31 | GGML_ASSERT(ggml_nelements(dst) == steps); 32 | 33 | arange_f32_cuda(dst_d, dst->ne[0], start, step, stream); 34 | } 35 | -------------------------------------------------------------------------------- /ggml-cuda/sumrows.cu: -------------------------------------------------------------------------------- 1 | #include "sumrows.cuh" 2 | 3 | static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) { 4 | const int row = blockIdx.x; 5 | const int col = threadIdx.x; 6 | 7 | float sum = 0.0f; 8 | for (int i = col; i < ncols; i += blockDim.x) { 9 | sum += x[row * ncols + i]; 10 | } 11 | 12 | sum = warp_reduce_sum(sum); 13 | 14 | if (col == 0) { 15 | dst[row] = sum; 16 | } 17 | } 18 | 19 | static void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { 20 | const dim3 block_dims(WARP_SIZE, 1, 1); 21 | const dim3 block_nums(nrows, 1, 1); 22 | k_sum_rows_f32<<>>(x, dst, ncols); 23 | } 24 | 25 | void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { 26 | const ggml_tensor * src0 = dst->src[0]; 27 | const float * src0_d = (const float *)src0->data; 28 | float * dst_d = (float *)dst->data; 29 | cudaStream_t stream = ctx.stream(); 30 | 31 | GGML_ASSERT(src0->type == GGML_TYPE_F32); 32 | GGML_ASSERT( dst->type == GGML_TYPE_F32); 33 | GGML_ASSERT(ggml_is_contiguous(src0)); 34 | 35 | 36 | const int64_t ncols = src0->ne[0]; 37 | const int64_t nrows = ggml_nrows(src0); 38 | 39 | sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream); 40 | } 41 | -------------------------------------------------------------------------------- /.github/workflows/examples.yml: -------------------------------------------------------------------------------- 1 | name: Examples Tests 2 | on: 3 | push: 4 | paths: 5 | - examples/addon.node/** 6 | - whisper.h 7 | pull_request: 8 | paths: 9 | - examples/addon.node/** 10 | - whisper.h 11 | 12 | jobs: 13 | addon_node-ubuntu-latest: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | node-version: [ 16.x, 18.x ] 18 | steps: 19 | - name: Clone 20 | uses: actions/checkout@v1 21 | 22 | - name: Dependencies 23 | run: | 24 | sudo apt-get update 25 | sudo apt-get install build-essential 26 | sudo apt-get install cmake 27 | sudo apt-get install libsdl2-dev 28 | 29 | - name: Use Node.js ${{ matrix.node-version }} 30 | uses: actions/setup-node@v1 31 | with: 32 | node-version: ${{ matrix.node-version }} 33 | cache: 'npm' 34 | 35 | - name: Install package.json dependencies 36 | working-directory: ./examples/addon.node 37 | run: npm install 38 | 39 | - name: Compile addon.node 40 | run: npx cmake-js compile -T whisper-addon -B Release 41 | 42 | - name: Download test model 43 | run: | 44 | bash ./models/download-ggml-model.sh base.en 45 | - name: Test 46 | run: | 47 | cd examples/addon.node 48 | npm run test 49 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "universal", 5 | "platform" : "ios", 6 | "size" : "1024x1024" 7 | }, 8 | { 9 | "idiom" : "mac", 10 | "scale" : "1x", 11 | "size" : "16x16" 12 | }, 13 | { 14 | "idiom" : "mac", 15 | "scale" : "2x", 16 | "size" : "16x16" 17 | }, 18 | { 19 | "idiom" : "mac", 20 | "scale" : "1x", 21 | "size" : "32x32" 22 | }, 23 | { 24 | "idiom" : "mac", 25 | "scale" : "2x", 26 | "size" : "32x32" 27 | }, 28 | { 29 | "idiom" : "mac", 30 | "scale" : "1x", 31 | "size" : "128x128" 32 | }, 33 | { 34 | "idiom" : "mac", 35 | "scale" : "2x", 36 | "size" : "128x128" 37 | }, 38 | { 39 | "idiom" : "mac", 40 | "scale" : "1x", 41 | "size" : "256x256" 42 | }, 43 | { 44 | "idiom" : "mac", 45 | "scale" : "2x", 46 | "size" : "256x256" 47 | }, 48 | { 49 | "idiom" : "mac", 50 | "scale" : "1x", 51 | "size" : "512x512" 52 | }, 53 | { 54 | "idiom" : "mac", 55 | "scale" : "2x", 56 | "size" : "512x512" 57 | } 58 | ], 59 | "info" : { 60 | "author" : "xcode", 61 | "version" : 1 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /examples/bench.wasm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # libbench 3 | # 4 | 5 | set(TARGET libbench) 6 | 7 | add_executable(${TARGET} 8 | emscripten.cpp 9 | ) 10 | 11 | include(DefaultTargetOptions) 12 | 13 | target_link_libraries(${TARGET} PRIVATE 14 | whisper 15 | ) 16 | 17 | unset(EXTRA_FLAGS) 18 | 19 | if (WHISPER_WASM_SINGLE_FILE) 20 | set(EXTRA_FLAGS "-s SINGLE_FILE=1") 21 | message(STATUS "Embedding WASM inside bench.js") 22 | 23 | add_custom_command( 24 | TARGET ${TARGET} POST_BUILD 25 | COMMAND ${CMAKE_COMMAND} -E copy 26 | ${CMAKE_BINARY_DIR}/bin/libbench.js 27 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/bench.wasm/bench.js 28 | ) 29 | endif() 30 | 31 | set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \ 32 | --bind \ 33 | -s USE_PTHREADS=1 \ 34 | -s PTHREAD_POOL_SIZE_STRICT=0 \ 35 | -s INITIAL_MEMORY=2000MB \ 36 | -s TOTAL_MEMORY=2000MB \ 37 | -s FORCE_FILESYSTEM=1 \ 38 | -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \ 39 | ${EXTRA_FLAGS} \ 40 | ") 41 | 42 | # 43 | # bench.wasm 44 | # 45 | 46 | set(TARGET bench.wasm) 47 | 48 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY) 49 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY) 50 | -------------------------------------------------------------------------------- /examples/stream.wasm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # libstream 3 | # 4 | 5 | set(TARGET libstream) 6 | 7 | add_executable(${TARGET} 8 | emscripten.cpp 9 | ) 10 | 11 | include(DefaultTargetOptions) 12 | 13 | target_link_libraries(${TARGET} PRIVATE 14 | whisper 15 | ) 16 | 17 | unset(EXTRA_FLAGS) 18 | 19 | if (WHISPER_WASM_SINGLE_FILE) 20 | set(EXTRA_FLAGS "-s SINGLE_FILE=1") 21 | message(STATUS "Embedding WASM inside stream.js") 22 | 23 | add_custom_command( 24 | TARGET ${TARGET} POST_BUILD 25 | COMMAND ${CMAKE_COMMAND} -E copy 26 | ${CMAKE_BINARY_DIR}/bin/libstream.js 27 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/stream.wasm/stream.js 28 | ) 29 | endif() 30 | 31 | set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \ 32 | --bind \ 33 | -s USE_PTHREADS=1 \ 34 | -s PTHREAD_POOL_SIZE=8 \ 35 | -s INITIAL_MEMORY=1024MB \ 36 | -s TOTAL_MEMORY=1024MB \ 37 | -s FORCE_FILESYSTEM=1 \ 38 | -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \ 39 | ${EXTRA_FLAGS} \ 40 | ") 41 | 42 | # 43 | # stream.wasm 44 | # 45 | 46 | set(TARGET stream.wasm) 47 | 48 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY) 49 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY) 50 | -------------------------------------------------------------------------------- /examples/whisper.wasm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # libmain 3 | # 4 | 5 | set(TARGET libmain) 6 | 7 | add_executable(${TARGET} 8 | emscripten.cpp 9 | ) 10 | 11 | include(DefaultTargetOptions) 12 | 13 | target_link_libraries(${TARGET} PRIVATE 14 | whisper 15 | ) 16 | 17 | unset(EXTRA_FLAGS) 18 | 19 | if (WHISPER_WASM_SINGLE_FILE) 20 | set(EXTRA_FLAGS "-s SINGLE_FILE=1") 21 | message(STATUS "Embedding WASM inside main.js") 22 | 23 | add_custom_command( 24 | TARGET ${TARGET} POST_BUILD 25 | COMMAND ${CMAKE_COMMAND} -E copy 26 | ${CMAKE_BINARY_DIR}/bin/libmain.js 27 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/whisper.wasm/main.js 28 | ) 29 | endif() 30 | 31 | set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \ 32 | --bind \ 33 | -s USE_PTHREADS=1 \ 34 | -s PTHREAD_POOL_SIZE_STRICT=0 \ 35 | -s INITIAL_MEMORY=2000MB \ 36 | -s TOTAL_MEMORY=2000MB \ 37 | -s FORCE_FILESYSTEM=1 \ 38 | -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \ 39 | ${EXTRA_FLAGS} \ 40 | ") 41 | 42 | # 43 | # whisper.wasm 44 | # 45 | 46 | set(TARGET whisper.wasm) 47 | 48 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY) 49 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY) 50 | -------------------------------------------------------------------------------- /.devops/main-cuda.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG UBUNTU_VERSION=22.04 2 | # This needs to generally match the container host's environment. 3 | ARG CUDA_VERSION=12.3.1 4 | # Target the CUDA build image 5 | ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} 6 | # Target the CUDA runtime image 7 | ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} 8 | 9 | FROM ${BASE_CUDA_DEV_CONTAINER} AS build 10 | WORKDIR /app 11 | 12 | # Unless otherwise specified, we make a fat build. 13 | ARG CUDA_DOCKER_ARCH=all 14 | # Set nvcc architecture 15 | ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} 16 | # Enable cuBLAS 17 | ENV WHISPER_CUBLAS=1 18 | 19 | RUN apt-get update && \ 20 | apt-get install -y build-essential \ 21 | && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* 22 | 23 | # Ref: https://stackoverflow.com/a/53464012 24 | ENV CUDA_MAIN_VERSION=12.3 25 | ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH 26 | 27 | COPY .. . 28 | RUN make 29 | 30 | FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime 31 | ENV CUDA_MAIN_VERSION=12.3 32 | ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH 33 | WORKDIR /app 34 | 35 | RUN apt-get update && \ 36 | apt-get install -y curl ffmpeg \ 37 | && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* 38 | 39 | COPY --from=build /app /app 40 | ENTRYPOINT [ "bash", "-c" ] 41 | -------------------------------------------------------------------------------- /examples/talk.wasm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # libtalk 3 | # 4 | 5 | set(TARGET libtalk) 6 | 7 | add_executable(${TARGET} 8 | emscripten.cpp 9 | gpt-2.cpp 10 | ) 11 | 12 | include(DefaultTargetOptions) 13 | 14 | target_link_libraries(${TARGET} PRIVATE 15 | whisper 16 | common 17 | ) 18 | 19 | unset(EXTRA_FLAGS) 20 | 21 | if (WHISPER_WASM_SINGLE_FILE) 22 | set(EXTRA_FLAGS "-s SINGLE_FILE=1") 23 | message(STATUS "Embedding WASM inside talk.js") 24 | 25 | add_custom_command( 26 | TARGET ${TARGET} POST_BUILD 27 | COMMAND ${CMAKE_COMMAND} -E copy 28 | ${CMAKE_BINARY_DIR}/bin/libtalk.js 29 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/talk.wasm/talk.js 30 | ) 31 | endif() 32 | 33 | set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \ 34 | --bind \ 35 | -s USE_PTHREADS=1 \ 36 | -s PTHREAD_POOL_SIZE=8 \ 37 | -s INITIAL_MEMORY=1800MB \ 38 | -s TOTAL_MEMORY=1800MB \ 39 | -s FORCE_FILESYSTEM=1 \ 40 | -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \ 41 | ${EXTRA_FLAGS} \ 42 | ") 43 | 44 | # 45 | # talk.wasm 46 | # 47 | 48 | set(TARGET talk.wasm) 49 | 50 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY) 51 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY) 52 | -------------------------------------------------------------------------------- /examples/command.wasm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # libcommand 3 | # 4 | 5 | set(TARGET libcommand) 6 | 7 | add_executable(${TARGET} 8 | emscripten.cpp 9 | ) 10 | 11 | include(DefaultTargetOptions) 12 | 13 | target_link_libraries(${TARGET} PRIVATE 14 | common 15 | whisper 16 | ) 17 | 18 | unset(EXTRA_FLAGS) 19 | 20 | if (WHISPER_WASM_SINGLE_FILE) 21 | set(EXTRA_FLAGS "-s SINGLE_FILE=1") 22 | message(STATUS "Embedding WASM inside command.js") 23 | 24 | add_custom_command( 25 | TARGET ${TARGET} POST_BUILD 26 | COMMAND ${CMAKE_COMMAND} -E copy 27 | ${CMAKE_BINARY_DIR}/bin/libcommand.js 28 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/command.wasm/command.js 29 | ) 30 | endif() 31 | 32 | set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \ 33 | --bind \ 34 | -s USE_PTHREADS=1 \ 35 | -s PTHREAD_POOL_SIZE=8 \ 36 | -s INITIAL_MEMORY=1024MB \ 37 | -s TOTAL_MEMORY=1024MB \ 38 | -s FORCE_FILESYSTEM=1 \ 39 | -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \ 40 | ${EXTRA_FLAGS} \ 41 | ") 42 | 43 | # 44 | # command.wasm 45 | # 46 | 47 | set(TARGET command.wasm) 48 | 49 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY) 50 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/helpers.js @ONLY) 51 | -------------------------------------------------------------------------------- /examples/addon.node/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(TARGET whisper-addon) 2 | 3 | # Base settings 4 | #================================================================== 5 | # env var supported by cmake-js 6 | add_definitions(-DNAPI_VERSION=4) 7 | include_directories(${CMAKE_JS_INC}) 8 | #================================================================== 9 | 10 | add_library(${TARGET} SHARED ${CMAKE_JS_SRC} addon.cpp) 11 | set_target_properties(${TARGET} PROPERTIES PREFIX "" SUFFIX ".node") 12 | 13 | include(DefaultTargetOptions) 14 | 15 | # Include N-API wrappers 16 | #================================================================== 17 | execute_process(COMMAND node -p "require('node-addon-api').include" 18 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 19 | OUTPUT_VARIABLE NODE_ADDON_API_DIR 20 | ) 21 | string(REPLACE "\n" "" NODE_ADDON_API_DIR ${NODE_ADDON_API_DIR}) 22 | string(REPLACE "\"" "" NODE_ADDON_API_DIR ${NODE_ADDON_API_DIR}) 23 | target_include_directories(${TARGET} PRIVATE ${NODE_ADDON_API_DIR}) 24 | #================================================================== 25 | 26 | target_link_libraries(${TARGET} ${CMAKE_JS_LIB} common whisper ${CMAKE_THREAD_LIBS_INIT}) 27 | 28 | if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET) 29 | # Generate node.lib 30 | execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS}) 31 | endif() 32 | -------------------------------------------------------------------------------- /examples/whisper.android/gradle.properties: -------------------------------------------------------------------------------- 1 | # Project-wide Gradle settings. 2 | # IDE (e.g. Android Studio) users: 3 | # Gradle settings configured through the IDE *will override* 4 | # any settings specified in this file. 5 | # For more details on how to configure your build environment visit 6 | # http://www.gradle.org/docs/current/userguide/build_environment.html 7 | # Specifies the JVM arguments used for the daemon process. 8 | # The setting is particularly useful for tweaking memory settings. 9 | org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 10 | # When configured, Gradle will run in incubating parallel mode. 11 | # This option should only be used with decoupled projects. More details, visit 12 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects 13 | # org.gradle.parallel=true 14 | # AndroidX package structure to make it clearer which packages are bundled with the 15 | # Android operating system, and which are packaged with your app's APK 16 | # https://developer.android.com/topic/libraries/support-library/androidx-rn 17 | android.useAndroidX=true 18 | # Kotlin code style for this project: "official" or "obsolete": 19 | kotlin.code.style=official 20 | # Enables namespacing of each library's R class so that its R class includes only the 21 | # resources declared in the library itself and none from the library's dependencies, 22 | # thereby reducing the size of the R class for that library 23 | android.nonTransitiveRClass=true -------------------------------------------------------------------------------- /examples/generate-karaoke.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Simple tool to record audio from the microphone and generate a karaoke video 4 | # Usage: 5 | # 6 | # cd whisper.cpp 7 | # make 8 | # 9 | # ./examples/generate-karaoke.sh [model] [step_ms] 10 | # 11 | # Press Ctrl+C to stop recording 12 | # 13 | 14 | executable="./main" 15 | model="base.en" 16 | model_path="models/ggml-$model.bin" 17 | 18 | # require sox and ffmpeg to be installed 19 | if ! command -v sox &> /dev/null 20 | then 21 | echo "sox could not be found" 22 | exit 1 23 | fi 24 | 25 | if ! command -v ffmpeg &> /dev/null 26 | then 27 | echo "ffmpeg could not be found" 28 | exit 2 29 | fi 30 | 31 | if [ ! -f "$executable" ]; then 32 | echo "'$executable' does not exist. Please build it first." 33 | exit 3 34 | fi 35 | 36 | if [ ! -f "$model_path" ]; then 37 | echo "'$model_path' does not exist. Please download it first." 38 | exit 4 39 | fi 40 | 41 | # record some raw audio 42 | sox -d rec.wav 43 | 44 | # resample to 16kHz 45 | ffmpeg -y -i ./rec.wav -ar 16000 -ac 1 -c:a pcm_s16le ./rec16.wav > /dev/null 2>&1 46 | 47 | # run Whisper 48 | echo "Processing ..." 49 | ./main -m models/ggml-base.en.bin rec16.wav -owts > /dev/null 2>&1 50 | 51 | # generate Karaoke video 52 | echo "Generating video ..." 53 | source rec16.wav.wts > /dev/null 2>&1 54 | 55 | # play the video 56 | echo "Playing ./rec16.wav.mp4 ..." 57 | ffplay -loglevel 0 -autoexit ./rec16.wav.mp4 58 | 59 | echo "Done" 60 | exit 0 61 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperTokenData.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.model; 2 | 3 | import com.sun.jna.Structure; 4 | 5 | import java.util.Arrays; 6 | import java.util.List; 7 | 8 | /** 9 | * Structure representing token data. 10 | */ 11 | public class WhisperTokenData extends Structure { 12 | 13 | /** Token ID. */ 14 | public int id; 15 | 16 | /** Forced timestamp token ID. */ 17 | public int tid; 18 | 19 | /** Probability of the token. */ 20 | public float p; 21 | 22 | /** Log probability of the token. */ 23 | public float plog; 24 | 25 | /** Probability of the timestamp token. */ 26 | public float pt; 27 | 28 | /** Sum of probabilities of all timestamp tokens. */ 29 | public float ptsum; 30 | 31 | /** 32 | * Start time of the token (token-level timestamp data). 33 | * Do not use if you haven't computed token-level timestamps. 34 | */ 35 | public long t0; 36 | 37 | /** 38 | * End time of the token (token-level timestamp data). 39 | * Do not use if you haven't computed token-level timestamps. 40 | */ 41 | public long t1; 42 | 43 | /** Voice length of the token. */ 44 | public float vlen; 45 | 46 | @Override 47 | protected List getFieldOrder() { 48 | return Arrays.asList("id", "tid", "p", "plog", "pt", "ptsum", "t0", "t1", "vlen"); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /models/generate-coreml-model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Usage: ./generate-coreml-model.sh 4 | if [ $# -eq 0 ]; then 5 | echo "No model name supplied" 6 | echo "Usage for Whisper models: ./generate-coreml-model.sh " 7 | echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 " 8 | exit 1 9 | elif [ "$1" = "-h5" ] && [ $# != 3 ]; then 10 | echo "No model name and model path supplied for a HuggingFace model" 11 | echo "Usage for HuggingFace models: ./generate-coreml-model.sh -h5 " 12 | exit 1 13 | fi 14 | 15 | mname="$1" 16 | 17 | wd=$(dirname "$0") 18 | cd "$wd/../" || exit 19 | 20 | if [ "$mname" = "-h5" ]; then 21 | mname="$2" 22 | mpath="$3" 23 | echo "$mpath" 24 | python3 models/convert-h5-to-coreml.py --model-name "$mname" --model-path "$mpath" --encoder-only True 25 | else 26 | python3 models/convert-whisper-to-coreml.py --model "$mname" --encoder-only True --optimize-ane True 27 | fi 28 | 29 | xcrun coremlc compile models/coreml-encoder-"${mname}".mlpackage models/ 30 | rm -rf models/ggml-"${mname}"-encoder.mlmodelc 31 | mv -v models/coreml-encoder-"${mname}".mlmodelc models/ggml-"${mname}"-encoder.mlmodelc 32 | 33 | # TODO: decoder (sometime in the future maybe) 34 | #xcrun coremlc compile models/whisper-decoder-${mname}.mlpackage models/ 35 | #rm -rf models/ggml-${mname}-decoder.mlmodelc 36 | #mv -v models/coreml_decoder_${mname}.mlmodelc models/ggml-${mname}-decoder.mlmodelc 37 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModel.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp; 2 | 3 | import io.github.ggerganov.whispercpp.ggml.GgmlTensor; 4 | import io.github.ggerganov.whispercpp.model.EModel; 5 | 6 | public class WhisperModel { 7 | // EModel type = EModel.MODEL_UNKNOWN; 8 | // 9 | // WhisperHParams hparams; 10 | // WhisperFilters filters; 11 | // 12 | // // encoder.positional_embedding 13 | // GgmlTensor e_pe; 14 | // 15 | // // encoder.conv1 16 | // GgmlTensor e_conv_1_w; 17 | // GgmlTensor e_conv_1_b; 18 | // 19 | // // encoder.conv2 20 | // GgmlTensor e_conv_2_w; 21 | // GgmlTensor e_conv_2_b; 22 | // 23 | // // encoder.ln_post 24 | // GgmlTensor e_ln_w; 25 | // GgmlTensor e_ln_b; 26 | // 27 | // // decoder.positional_embedding 28 | // GgmlTensor d_pe; 29 | // 30 | // // decoder.token_embedding 31 | // GgmlTensor d_te; 32 | // 33 | // // decoder.ln 34 | // GgmlTensor d_ln_w; 35 | // GgmlTensor d_ln_b; 36 | // 37 | // std::vector layers_encoder; 38 | // std::vector layers_decoder; 39 | // 40 | // // context 41 | // struct ggml_context * ctx; 42 | // 43 | // // the model memory buffer is read-only and can be shared between processors 44 | // std::vector * buf; 45 | // 46 | // // tensors 47 | // int n_loaded; 48 | // Map tensors; 49 | } 50 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/README.md: -------------------------------------------------------------------------------- 1 | A sample SwiftUI app using [whisper.cpp](https://github.com/ggerganov/whisper.cpp/) to do voice-to-text transcriptions. 2 | See also: [whisper.objc](https://github.com/ggerganov/whisper.cpp/tree/master/examples/whisper.objc). 3 | 4 | **Usage**: 5 | 6 | 1. Select a model from the [whisper.cpp repository](https://github.com/ggerganov/whisper.cpp/tree/master/models).[^1] 7 | 2. Add the model to `whisper.swiftui.demo/Resources/models` **via Xcode**. 8 | 3. Select a sample audio file (for example, [jfk.wav](https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav)). 9 | 4. Add the sample audio file to `whisper.swiftui.demo/Resources/samples` **via Xcode**. 10 | 5. Select the "Release" [^2] build configuration under "Run", then deploy and run to your device. 11 | 12 | **Note:** Pay attention to the folder path: `whisper.swiftui.demo/Resources/models` is the appropriate directory to place resources whilst `whisper.swiftui.demo/Models` is related to actual code. 13 | 14 | [^1]: I recommend the tiny, base or small models for running on an iOS device. 15 | 16 | [^2]: The `Release` build can boost performance of transcription. In this project, it also added `-O3 -DNDEBUG` to `Other C Flags`, but adding flags to app proj is not ideal in real world (applies to all C/C++ files), consider splitting xcodeproj in workspace in your own project. 17 | 18 | ![image](https://user-images.githubusercontent.com/1991296/212539216-0aef65e4-f882-480a-8358-0f816838fd52.png) 19 | -------------------------------------------------------------------------------- /bindings/go/pkg/whisper/context_test.go: -------------------------------------------------------------------------------- 1 | package whisper_test 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | // Packages 8 | whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" 9 | assert "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | const ( 13 | ModelPath = "../../models/ggml-tiny.bin" 14 | SamplePath = "../../samples/jfk.wav" 15 | ) 16 | 17 | func Test_Whisper_000(t *testing.T) { 18 | assert := assert.New(t) 19 | if _, err := os.Stat(ModelPath); os.IsNotExist(err) { 20 | t.Skip("Skipping test, model not found:", ModelPath) 21 | } 22 | if _, err := os.Stat(SamplePath); os.IsNotExist(err) { 23 | t.Skip("Skipping test, sample not found:", SamplePath) 24 | } 25 | 26 | // Load model 27 | model, err := whisper.New(ModelPath) 28 | assert.NoError(err) 29 | assert.NotNil(model) 30 | assert.NoError(model.Close()) 31 | 32 | t.Log("languages=", model.Languages()) 33 | } 34 | 35 | func Test_Whisper_001(t *testing.T) { 36 | assert := assert.New(t) 37 | if _, err := os.Stat(ModelPath); os.IsNotExist(err) { 38 | t.Skip("Skipping test, model not found:", ModelPath) 39 | } 40 | if _, err := os.Stat(SamplePath); os.IsNotExist(err) { 41 | t.Skip("Skipping test, sample not found:", SamplePath) 42 | } 43 | 44 | // Load model 45 | model, err := whisper.New(ModelPath) 46 | assert.NoError(err) 47 | assert.NotNil(model) 48 | defer model.Close() 49 | 50 | // Get context for decoding 51 | ctx, err := model.NewContext() 52 | assert.NoError(err) 53 | assert.NotNil(ctx) 54 | 55 | } 56 | -------------------------------------------------------------------------------- /examples/whisper.objc/whisper.objc/AppDelegate.m: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.m 3 | // whisper.objc 4 | // 5 | // Created by Georgi Gerganov on 23.10.22. 6 | // 7 | 8 | #import "AppDelegate.h" 9 | 10 | @interface AppDelegate () 11 | 12 | @end 13 | 14 | @implementation AppDelegate 15 | 16 | 17 | - (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { 18 | // Override point for customization after application launch. 19 | return YES; 20 | } 21 | 22 | 23 | #pragma mark - UISceneSession lifecycle 24 | 25 | 26 | - (UISceneConfiguration *)application:(UIApplication *)application configurationForConnectingSceneSession:(UISceneSession *)connectingSceneSession options:(UISceneConnectionOptions *)options { 27 | // Called when a new scene session is being created. 28 | // Use this method to select a configuration to create the new scene with. 29 | return [[UISceneConfiguration alloc] initWithName:@"Default Configuration" sessionRole:connectingSceneSession.role]; 30 | } 31 | 32 | 33 | - (void)application:(UIApplication *)application didDiscardSceneSessions:(NSSet *)sceneSessions { 34 | // Called when the user discards a scene session. 35 | // If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions. 36 | // Use this method to release any resources that were specific to the discarded scenes, as they will not return. 37 | } 38 | 39 | 40 | @end 41 | -------------------------------------------------------------------------------- /ggml-opencl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ggml.h" 4 | #include "ggml-backend.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | GGML_API void ggml_cl_init(void); 11 | 12 | GGML_API void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); 13 | GGML_API void ggml_cl_add(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); 14 | GGML_API bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * dst); 15 | GGML_API size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst); 16 | GGML_API void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize); 17 | 18 | // GGML_API void * ggml_cl_host_malloc(size_t size); 19 | // GGML_API void ggml_cl_host_free(void * ptr); 20 | 21 | GGML_API void ggml_cl_free_data(const struct ggml_tensor* tensor); 22 | 23 | GGML_API void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor); 24 | 25 | // backend API 26 | 27 | // GGML_API ggml_backend_t ggml_backend_opencl_init(void); 28 | 29 | // GGML_API bool ggml_backend_is_opencl(ggml_backend_t backend); 30 | 31 | GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_buffer_type(void); 32 | // GGML_API ggml_backend_buffer_type_t ggml_backend_opencl_host_buffer_type(void); 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/UI/ContentView.swift: -------------------------------------------------------------------------------- 1 | import SwiftUI 2 | import AVFoundation 3 | 4 | struct ContentView: View { 5 | @StateObject var whisperState = WhisperState() 6 | 7 | var body: some View { 8 | NavigationStack { 9 | VStack { 10 | HStack { 11 | Button("Transcribe", action: { 12 | Task { 13 | await whisperState.transcribeSample() 14 | } 15 | }) 16 | .buttonStyle(.bordered) 17 | .disabled(!whisperState.canTranscribe) 18 | 19 | Button(whisperState.isRecording ? "Stop recording" : "Start recording", action: { 20 | Task { 21 | await whisperState.toggleRecord() 22 | } 23 | }) 24 | .buttonStyle(.bordered) 25 | .disabled(!whisperState.canTranscribe) 26 | } 27 | 28 | ScrollView { 29 | Text(verbatim: whisperState.messageLog) 30 | .frame(maxWidth: .infinity, alignment: .leading) 31 | } 32 | } 33 | .navigationTitle("Whisper SwiftUI Demo") 34 | .padding() 35 | } 36 | } 37 | } 38 | 39 | struct ContentView_Previews: PreviewProvider { 40 | static var previews: some View { 41 | ContentView() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp; 2 | 3 | import com.sun.jna.Structure; 4 | import com.sun.jna.ptr.PointerByReference; 5 | import io.github.ggerganov.whispercpp.ggml.GgmlType; 6 | import io.github.ggerganov.whispercpp.WhisperModel; 7 | import io.github.ggerganov.whispercpp.params.WhisperContextParams; 8 | 9 | import java.util.List; 10 | 11 | public class WhisperContext extends Structure { 12 | int t_load_us = 0; 13 | int t_start_us = 0; 14 | 15 | /** weight type (FP32 / FP16 / QX) */ 16 | GgmlType wtype = GgmlType.GGML_TYPE_F16; 17 | /** intermediate type (FP32 or FP16) */ 18 | GgmlType itype = GgmlType.GGML_TYPE_F16; 19 | 20 | // WhisperModel model; 21 | public PointerByReference model; 22 | // whisper_vocab vocab; 23 | // whisper_state * state = nullptr; 24 | public PointerByReference vocab; 25 | public PointerByReference state; 26 | 27 | /** populated by whisper_init_from_file_with_params() */ 28 | String path_model; 29 | WhisperContextParams params; 30 | 31 | // public static class ByReference extends WhisperContext implements Structure.ByReference { 32 | // } 33 | // 34 | // public static class ByValue extends WhisperContext implements Structure.ByValue { 35 | // } 36 | // 37 | // @Override 38 | // protected List getFieldOrder() { 39 | // return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model"); 40 | // } 41 | } 42 | -------------------------------------------------------------------------------- /models/generate-coreml-interface.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # This generates: 4 | # - coreml/whisper-encoder-impl.h and coreml/whisper-encoder-impl.m 5 | # - coreml/whisper-decoder-impl.h and coreml/whisper-decoder-impl.m 6 | # 7 | 8 | wd=$(dirname "$0") 9 | cd "$wd/../" || exit 10 | 11 | python3 models/convert-whisper-to-coreml.py --model tiny.en 12 | 13 | mv -v models/coreml-encoder-tiny.en.mlpackage models/whisper-encoder-impl.mlpackage 14 | xcrun coremlc generate models/whisper-encoder-impl.mlpackage coreml/ 15 | mv coreml/whisper_encoder_impl.h coreml/whisper-encoder-impl.h 16 | mv coreml/whisper_encoder_impl.m coreml/whisper-encoder-impl.m 17 | sed -i '' 's/whisper_encoder_impl\.h/whisper-encoder-impl.h/g' coreml/whisper-encoder-impl.m 18 | sed -i '' 's/whisper_encoder_impl\.m/whisper-encoder-impl.m/g' coreml/whisper-encoder-impl.m 19 | sed -i '' 's/whisper_encoder_impl\.h/whisper-encoder-impl.h/g' coreml/whisper-encoder-impl.h 20 | 21 | mv -v models/coreml-decoder-tiny.en.mlpackage models/whisper-decoder-impl.mlpackage 22 | xcrun coremlc generate models/whisper-decoder-impl.mlpackage coreml/ 23 | mv coreml/whisper_decoder_impl.h coreml/whisper-decoder-impl.h 24 | mv coreml/whisper_decoder_impl.m coreml/whisper-decoder-impl.m 25 | sed -i '' 's/whisper_decoder_impl\.h/whisper-decoder-impl.h/g' coreml/whisper-decoder-impl.m 26 | sed -i '' 's/whisper_decoder_impl\.m/whisper-decoder-impl.m/g' coreml/whisper-decoder-impl.m 27 | sed -i '' 's/whisper_decoder_impl\.h/whisper-decoder-impl.h/g' coreml/whisper-decoder-impl.h 28 | 29 | rm -rfv models/whisper-encoder-impl.mlpackage models/whisper-decoder-impl.mlpackage 30 | -------------------------------------------------------------------------------- /ggml-cuda.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ggml.h" 4 | #include "ggml-backend.h" 5 | 6 | #ifdef GGML_USE_HIPBLAS 7 | #define GGML_CUDA_NAME "ROCm" 8 | #define GGML_CUBLAS_NAME "hipBLAS" 9 | #else 10 | #define GGML_CUDA_NAME "CUDA" 11 | #define GGML_CUBLAS_NAME "cuBLAS" 12 | #endif 13 | 14 | #ifdef __cplusplus 15 | extern "C" { 16 | #endif 17 | 18 | #define GGML_CUDA_MAX_DEVICES 16 19 | 20 | // backend API 21 | GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device); 22 | 23 | GGML_API GGML_CALL bool ggml_backend_is_cuda(ggml_backend_t backend); 24 | 25 | // device buffer 26 | GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device); 27 | 28 | // split tensor buffer that splits matrices by rows across multiple devices 29 | GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split); 30 | 31 | // pinned host buffer for use with the CPU backend for faster copies between CPU and GPU 32 | GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void); 33 | 34 | GGML_API GGML_CALL int ggml_backend_cuda_get_device_count(void); 35 | GGML_API GGML_CALL void ggml_backend_cuda_get_device_description(int device, char * description, size_t description_size); 36 | GGML_API GGML_CALL void ggml_backend_cuda_get_device_memory(int device, size_t * free, size_t * total); 37 | 38 | GGML_API GGML_CALL bool ggml_backend_cuda_register_host_buffer(void * buffer, size_t size); 39 | GGML_API GGML_CALL void ggml_backend_cuda_unregister_host_buffer(void * buffer); 40 | 41 | #ifdef __cplusplus 42 | } 43 | #endif 44 | -------------------------------------------------------------------------------- /ggml-cuda/quantize.cu: -------------------------------------------------------------------------------- 1 | #include "quantize.cuh" 2 | 3 | static __global__ void quantize_q8_1(const float * __restrict__ x, void * __restrict__ vy, const int64_t kx, const int64_t kx_padded) { 4 | const int64_t ix = (int64_t)blockDim.x*blockIdx.x + threadIdx.x; 5 | 6 | if (ix >= kx_padded) { 7 | return; 8 | } 9 | 10 | const int64_t iy = (int64_t)blockDim.y*blockIdx.y + threadIdx.y; 11 | 12 | const int64_t i_padded = (int64_t)iy*kx_padded + ix; 13 | 14 | block_q8_1 * y = (block_q8_1 *) vy; 15 | 16 | const int64_t ib = i_padded / QK8_1; // block index 17 | const int64_t iqs = i_padded % QK8_1; // quant index 18 | 19 | const float xi = ix < kx ? x[iy*kx + ix] : 0.0f; 20 | float amax = fabsf(xi); 21 | float sum = xi; 22 | 23 | amax = warp_reduce_max(amax); 24 | sum = warp_reduce_sum(sum); 25 | 26 | const float d = amax / 127; 27 | const int8_t q = amax == 0.0f ? 0 : roundf(xi / d); 28 | 29 | y[ib].qs[iqs] = q; 30 | 31 | if (iqs > 0) { 32 | return; 33 | } 34 | 35 | reinterpret_cast(y[ib].ds.x) = d; 36 | reinterpret_cast(y[ib].ds.y) = sum; 37 | } 38 | 39 | void quantize_row_q8_1_cuda(const float * x, void * vy, const int64_t kx, const int64_t ky, const int64_t kx_padded, cudaStream_t stream) { 40 | const int64_t block_num_x = (kx_padded + CUDA_QUANTIZE_BLOCK_SIZE - 1) / CUDA_QUANTIZE_BLOCK_SIZE; 41 | const dim3 num_blocks(block_num_x, ky, 1); 42 | const dim3 block_size(CUDA_QUANTIZE_BLOCK_SIZE, 1, 1); 43 | quantize_q8_1<<>>(x, vy, kx, kx_padded); 44 | } 45 | 46 | -------------------------------------------------------------------------------- /examples/talk/README.md: -------------------------------------------------------------------------------- 1 | # talk 2 | 3 | Talk with an Artificial Intelligence in your terminal 4 | 5 | [Demo Talk](https://user-images.githubusercontent.com/1991296/206805012-48e71cc2-588d-4745-8798-c1c70ea3b40d.mp4) 6 | 7 | Web version: [examples/talk.wasm](/examples/talk.wasm) 8 | 9 | ## Building 10 | 11 | The `talk` tool depends on SDL2 library to capture audio from the microphone. You can build it like this: 12 | 13 | ```bash 14 | # Install SDL2 15 | # On Debian based linux distributions: 16 | sudo apt-get install libsdl2-dev 17 | 18 | # On Fedora Linux: 19 | sudo dnf install SDL2 SDL2-devel 20 | 21 | # Install SDL2 on Mac OS 22 | brew install sdl2 23 | 24 | # Build the "talk" executable 25 | make talk 26 | 27 | # Run it 28 | ./talk -p Santa 29 | ``` 30 | 31 | ## GPT-2 32 | 33 | To run this, you will need a ggml GPT-2 model: [instructions](https://github.com/ggerganov/ggml/tree/master/examples/gpt-2#downloading-and-converting-the-original-models) 34 | 35 | Alternatively, you can simply download the smallest ggml GPT-2 117M model (240 MB) like this: 36 | 37 | ``` 38 | wget --quiet --show-progress -O models/ggml-gpt-2-117M.bin https://huggingface.co/ggerganov/ggml/resolve/main/ggml-model-gpt-2-117M.bin 39 | ``` 40 | 41 | ## TTS 42 | 43 | For best experience, this example needs a TTS tool to convert the generated text responses to voice. 44 | You can use any TTS engine that you would like - simply edit the [speak](speak) script to your needs. 45 | By default, it is configured to use MacOS's `say` or `espeak` or Windows SpeechSynthesizer, but you can use whatever you wish. 46 | -------------------------------------------------------------------------------- /bindings/ruby/ext/extconf.rb: -------------------------------------------------------------------------------- 1 | require 'mkmf' 2 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.cpp')} .") 3 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','whisper.h')} .") 4 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .") 5 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .") 6 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .") 7 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .") 8 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .") 9 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .") 10 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.h')} .") 11 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend.c')} .") 12 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-common.h')} .") 13 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.h')} .") 14 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-quants.c')} .") 15 | system("cp #{File.join(File.dirname(__FILE__),'..','..','..','examples','dr_wav.h')} .") 16 | 17 | 18 | # need to use c++ compiler flags 19 | $CXXFLAGS << ' -std=c++11' 20 | # Set to true when building binary gems 21 | if enable_config('static-stdlib', false) 22 | $LDFLAGS << ' -static-libgcc -static-libstdc++' 23 | end 24 | 25 | if enable_config('march-tune-native', false) 26 | $CFLAGS << ' -march=native -mtune=native' 27 | $CXXFLAGS << ' -march=native -mtune=native' 28 | end 29 | 30 | create_makefile('whisper') 31 | -------------------------------------------------------------------------------- /examples/talk/speak: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Usage: 4 | # speak 5 | 6 | function installed() { command -v $1 >/dev/null 2>&1; } 7 | 8 | if installed espeak; then 9 | espeak -v en-us+m$1 -s 225 -p 50 -a 200 -g 5 -k 5 -f $2 10 | 11 | elif installed piper && installed aplay; then 12 | cat $2 | piper --model ~/en_US-lessac-medium.onnx --output-raw | aplay -q -r 22050 -f S16_LE -t raw - 13 | 14 | # for Mac 15 | elif installed say; then 16 | say -f $2 17 | 18 | # Eleven Labs 19 | elif installed python3 && \ 20 | python3 -c 'import importlib.util; exit(not importlib.util.find_spec("elevenlabs"))' && \ 21 | installed ffplay; then 22 | # It's possible to use the API for free with limited number of characters. 23 | # To increase this limit register to https://beta.elevenlabs.io to get an api key 24 | # and paste it after 'ELEVEN_API_KEY=' 25 | # Keep the line commented to use the free version without api key 26 | #export ELEVEN_API_KEY=your_api_key 27 | wd=$(dirname $0) 28 | script=$wd/eleven-labs.py 29 | python3 $script -q -p -v $1 $2 >/dev/null 2>&1 30 | 31 | # Uncomment to keep the audio file 32 | #python3 $script -q -s ./audio.mp3 -v $1 $2 >/dev/null 2>&1 33 | #ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1 34 | 35 | else 36 | echo 'Install espeak ("brew install espeak" or "apt-get install espeak"),' 37 | echo 'piper ("pip install piper-tts" or https://github.com/rhasspy/piper) with aplay,' 38 | echo 'or elevenlabs ("pip install elevenlabs") with ffplay.' 39 | echo '(export ELEVEN_API_KEY if you have an api key from https://beta.elevenlabs.io)' 40 | fi 41 | -------------------------------------------------------------------------------- /examples/talk-llama/speak: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Usage: 4 | # speak 5 | 6 | function installed() { command -v $1 >/dev/null 2>&1; } 7 | 8 | if installed espeak; then 9 | espeak -v en-us+m$1 -s 225 -p 50 -a 200 -g 5 -k 5 -f $2 10 | 11 | elif installed piper && installed aplay; then 12 | cat $2 | piper --model ~/en_US-lessac-medium.onnx --output-raw | aplay -q -r 22050 -f S16_LE -t raw - 13 | 14 | # for Mac 15 | elif installed say; then 16 | say -f $2 17 | 18 | # Eleven Labs 19 | elif installed python3 && \ 20 | python3 -c 'import importlib.util; exit(not importlib.util.find_spec("elevenlabs"))' && \ 21 | installed ffplay; then 22 | # It's possible to use the API for free with limited number of characters. 23 | # To increase this limit register to https://beta.elevenlabs.io to get an api key 24 | # and paste it after 'ELEVEN_API_KEY=' 25 | # Keep the line commented to use the free version without api key 26 | #export ELEVEN_API_KEY=your_api_key 27 | wd=$(dirname $0) 28 | script=$wd/eleven-labs.py 29 | python3 $script -q -p -v $1 $2 >/dev/null 2>&1 30 | 31 | # Uncomment to keep the audio file 32 | #python3 $script -q -s ./audio.mp3 -v $1 $2 >/dev/null 2>&1 33 | #ffplay -autoexit -nodisp -loglevel quiet -hide_banner -i ./audio.mp3 >/dev/null 2>&1 34 | 35 | else 36 | echo 'Install espeak ("brew install espeak" or "apt-get install espeak"),' 37 | echo 'piper ("pip install piper-tts" or https://github.com/rhasspy/piper) with aplay,' 38 | echo 'or elevenlabs ("pip install elevenlabs") with ffplay.' 39 | echo '(export ELEVEN_API_KEY if you have an api key from https://beta.elevenlabs.io)' 40 | fi 41 | -------------------------------------------------------------------------------- /models/download-ggml-model.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | pushd %~dp0 4 | set models_path=%CD% 5 | for %%d in (%~dp0..) do set root_path=%%~fd 6 | popd 7 | 8 | set argc=0 9 | for %%x in (%*) do set /A argc+=1 10 | 11 | set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 12 | 13 | if %argc% neq 1 ( 14 | echo. 15 | echo Usage: download-ggml-model.cmd model 16 | CALL :list_models 17 | goto :eof 18 | ) 19 | 20 | set model=%1 21 | 22 | for %%b in (%models%) do ( 23 | if "%%b"=="%model%" ( 24 | CALL :download_model 25 | goto :eof 26 | ) 27 | ) 28 | 29 | echo Invalid model: %model% 30 | CALL :list_models 31 | goto :eof 32 | 33 | :download_model 34 | echo Downloading ggml model %model%... 35 | 36 | cd "%models_path%" 37 | 38 | if exist "ggml-%model%.bin" ( 39 | echo Model %model% already exists. Skipping download. 40 | goto :eof 41 | ) 42 | 43 | PowerShell -NoProfile -ExecutionPolicy Bypass -Command "Start-BitsTransfer -Source https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-%model%.bin -Destination ggml-%model%.bin" 44 | 45 | if %ERRORLEVEL% neq 0 ( 46 | echo Failed to download ggml model %model% 47 | echo Please try again later or download the original Whisper model files and convert them yourself. 48 | goto :eof 49 | ) 50 | 51 | echo Done! Model %model% saved in %root_path%\models\ggml-%model%.bin 52 | echo You can now use it like this: 53 | echo main.exe -m %root_path%\models\ggml-%model%.bin -f %root_path%\samples\jfk.wav 54 | 55 | goto :eof 56 | 57 | :list_models 58 | echo. 59 | echo Available models: 60 | (for %%a in (%models%) do ( 61 | echo %%a 62 | )) 63 | echo. 64 | exit /b 65 | -------------------------------------------------------------------------------- /examples/sycl/README.md: -------------------------------------------------------------------------------- 1 | # llama.cpp/example/sycl 2 | 3 | This example program provide the tools for llama.cpp for SYCL on Intel GPU. 4 | 5 | ## Tool 6 | 7 | |Tool Name| Function|Status| 8 | |-|-|-| 9 | |ls-sycl-device| List all SYCL devices with ID, compute capability, max work group size, ect.|Support| 10 | 11 | ### ls-sycl-device 12 | 13 | List all SYCL devices with ID, compute capability, max work group size, ect. 14 | 15 | 1. Build the llama.cpp for SYCL for all targets. 16 | 17 | 2. Enable oneAPI running environment 18 | 19 | ``` 20 | source /opt/intel/oneapi/setvars.sh 21 | ``` 22 | 23 | 3. Execute 24 | 25 | ``` 26 | ./build/bin/ls-sycl-device 27 | ``` 28 | 29 | Check the ID in startup log, like: 30 | 31 | ``` 32 | found 4 SYCL devices: 33 | Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3, 34 | max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136 35 | Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2, 36 | max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280 37 | Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0, 38 | max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280 39 | Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0, 40 | max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136 41 | 42 | ``` 43 | 44 | |Attribute|Note| 45 | |-|-| 46 | |compute capability 1.3|Level-zero running time, recommended | 47 | |compute capability 3.0|OpenCL running time, slower than level-zero in most cases| -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/drawable-v24/ic_launcher_foreground.xml: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 15 | 18 | 21 | 22 | 23 | 24 | 30 | -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(TARGET wchess.wasm) 2 | 3 | add_executable(${TARGET} 4 | wchess.wasm.cpp 5 | ) 6 | 7 | include(DefaultTargetOptions) 8 | 9 | target_link_libraries(${TARGET} PRIVATE 10 | common 11 | wchess-core 12 | ) 13 | 14 | unset(EXTRA_FLAGS) 15 | 16 | if (WHISPER_WASM_SINGLE_FILE) 17 | set(EXTRA_FLAGS "-s SINGLE_FILE=1") 18 | message(STATUS "Embedding WASM inside chess.js") 19 | 20 | add_custom_command( 21 | TARGET ${TARGET} POST_BUILD 22 | COMMAND ${CMAKE_COMMAND} -E copy 23 | ${CMAKE_BINARY_DIR}/bin/${TARGET}.js 24 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/chess.js 25 | ) 26 | endif() 27 | 28 | set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \ 29 | --bind \ 30 | -s USE_PTHREADS=1 \ 31 | -s PTHREAD_POOL_SIZE=8 \ 32 | -s INITIAL_MEMORY=1024MB \ 33 | -s TOTAL_MEMORY=1024MB \ 34 | -s FORCE_FILESYSTEM=1 \ 35 | -s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \ 36 | ${EXTRA_FLAGS} \ 37 | ") 38 | 39 | 40 | add_custom_command( 41 | TARGET ${TARGET} POST_BUILD 42 | COMMAND ${CMAKE_COMMAND} -E copy_directory 43 | ${CMAKE_CURRENT_SOURCE_DIR}/chessboardjs-1.0.0 44 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/ 45 | COMMAND ${CMAKE_COMMAND} -E copy 46 | ${CMAKE_CURRENT_SOURCE_DIR}/jquery-3.7.1.min.js 47 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/ 48 | ) 49 | 50 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/index-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/index.html @ONLY) 51 | configure_file(${CMAKE_SOURCE_DIR}/examples/helpers.js ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${TARGET}/js/helpers.js @ONLY) 52 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'com.android.application' 3 | } 4 | 5 | android { 6 | compileSdkVersion 30 7 | buildToolsVersion '30.0.3' 8 | 9 | defaultConfig { 10 | applicationId "com.litongjava.whisper.android.java" 11 | minSdkVersion 21 12 | targetSdkVersion 30 13 | versionCode 1 14 | versionName "1.0" 15 | 16 | testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" 17 | externalNativeBuild { 18 | cmake { 19 | cppFlags "" 20 | } 21 | } 22 | ndk { 23 | abiFilters 'arm64-v8a', 'armeabi-v7a', 'x86', 'x86_64' 24 | } 25 | } 26 | 27 | buildTypes { 28 | release { 29 | signingConfig signingConfigs.debug 30 | minifyEnabled true 31 | proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' 32 | } 33 | } 34 | externalNativeBuild { 35 | cmake { 36 | path "src/main/jni/whisper/CMakeLists.txt" 37 | } 38 | } 39 | ndkVersion "25.2.9519653" 40 | compileOptions { 41 | sourceCompatibility JavaVersion.VERSION_1_8 42 | targetCompatibility JavaVersion.VERSION_1_8 43 | } 44 | } 45 | 46 | dependencies { 47 | implementation 'androidx.appcompat:appcompat:1.1.0' 48 | implementation 'com.google.android.material:material:1.1.0' 49 | implementation 'androidx.constraintlayout:constraintlayout:1.1.3' 50 | testImplementation 'junit:junit:4.+' 51 | androidTestImplementation 'androidx.test.ext:junit:1.1.5' 52 | androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1' 53 | 54 | //litongjava 55 | implementation 'com.litongjava:android-view-inject:1.0' 56 | implementation 'com.litongjava:jfinal-aop:1.0.1' 57 | implementation 'com.litongjava:litongjava-android-utils:1.0.0' 58 | } -------------------------------------------------------------------------------- /examples/whisper.objc/whisper.objc/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /examples/whisper.android/app/src/main/res/drawable/ic_launcher_foreground.xml: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 15 | 18 | 21 | 22 | 23 | 24 | 30 | -------------------------------------------------------------------------------- /tests/es-0-ref.txt: -------------------------------------------------------------------------------- 1 | Hola, como están todos? Mi nombre es Julián Virrueta Mendoza y en este podcast les vengo a hablar sobre la contaminación del agua. Bueno, empezaré por decir que el ser humano no está midiendo las consecuencias de sus actos. No hay duda que uno de los mayores problemas a los que se enfrentan muchas poblaciones actualmente es la contaminación del agua. Principalmente porque como bien sabemos el agua prácticamente es fundamental para la vida, por lo que la contaminación puede ser algo muy negativo para el desarrollo tanto económico como social de los pueblos o de las poblaciones próximas en ese lugar contaminado. Los comienzos de la contaminación, como lo definen muchos expertos en la materia, la contaminación del agua es causada por las actividades humanas. Es un fenómeno ambiental de importancia, el cual se comienza a producir desde los primeros intentos de industrialización para transformarse luego en un problema tan habitual como generalizado. Generalmente la contaminación del agua se produce a través de la introducción directa o indirecta en los acuíferos o caos de agua, ríos, mares, lagos, océanos, etc. o de diversas sustancias que pueden ser consideradas como contaminantes. Pero existen dos formas principales de contaminación del agua. Una de ellas tiene que ver con la contaminación natural del agua que se corresponde con el ciclo natural de esta durante el que puede entrar en contacto con ciertos constituyentes contaminantes como sustancias minerales y orgánicas disueltas o en suspensión que se vierten en la corteza terrestre, la atmósfera y en las aguas. Pero todo esto se puede contradecir si el ser humano comía sus consecuencias, si no tirara basura a los lagos, a los ríos, no tirara botes de aceite, no contaminara. Bueno amigos, yo los invito a que no contaminen el agua y que sepan cuidar la naturaleza. Los saluda su buen amigo y compañero Julián Virreta. Nos vemos. ¡Claro! -------------------------------------------------------------------------------- /tests/test-whisper.js: -------------------------------------------------------------------------------- 1 | var factory = require('../bindings/javascript/whisper.js') 2 | 3 | factory().then(function(whisper) { 4 | var fs = require('fs'); 5 | 6 | // to avoid reading WAV files and depending on some 3rd-party package, we read 7 | // 32-bit float PCM directly. to genereate it: 8 | // 9 | // $ ffmpeg -i samples/jfk.wav -f f32le -acodec pcm_f32le samples/jfk.pcmf32 10 | // 11 | let fname_wav = "../samples/jfk.pcmf32"; 12 | let fname_model = "../models/ggml-base.en.bin"; 13 | 14 | // init whisper 15 | { 16 | // read binary data from file 17 | var model_data = fs.readFileSync(fname_model); 18 | if (model_data == null) { 19 | console.log("whisper: failed to read model file"); 20 | process.exit(1); 21 | } 22 | 23 | // write binary data to WASM memory 24 | whisper.FS_createDataFile("/", "whisper.bin", model_data, true, true); 25 | 26 | // init the model 27 | var ret = whisper.init("whisper.bin"); 28 | if (ret == false) { 29 | console.log('whisper: failed to init'); 30 | process.exit(1); 31 | } 32 | } 33 | 34 | // transcribe wav file 35 | { 36 | // read raw binary data 37 | var pcm_data = fs.readFileSync(fname_wav); 38 | if (pcm_data == null) { 39 | console.log("whisper: failed to read wav file"); 40 | process.exit(1); 41 | } 42 | 43 | // convert to 32-bit float array 44 | var pcm = new Float32Array(pcm_data.buffer); 45 | 46 | // transcribe 47 | var ret = whisper.full_default(pcm, "en", false); 48 | if (ret != 0) { 49 | console.log("whisper: failed to transcribe"); 50 | process.exit(1); 51 | } 52 | } 53 | 54 | // free memory 55 | { 56 | whisper.free(); 57 | } 58 | }); 59 | -------------------------------------------------------------------------------- /examples/wchess/libwchess/WChess.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "whisper.h" 3 | #include 4 | #include 5 | #include 6 | 7 | class Chessboard; 8 | 9 | class WChess { 10 | public: 11 | using CheckRunningCb = bool (*)(); 12 | using GetAudioCb = bool (*)(std::vector &); 13 | using SetMovesCb = void (*)(const std::string &, float); 14 | using SetGrammarCb = void (*)(const std::string &); 15 | using ClearAudioCb = void (*)(); 16 | 17 | struct callbacks { 18 | GetAudioCb get_audio = nullptr; 19 | SetMovesCb set_move = nullptr; 20 | SetGrammarCb set_grammar = nullptr; 21 | }; 22 | 23 | struct settings { 24 | int32_t vad_ms = 2000; 25 | int32_t prompt_ms = 5000; 26 | int32_t command_ms = 4000; 27 | float vad_thold = 0.2f; 28 | float freq_thold = 100.0f; 29 | bool print_energy = false; 30 | }; 31 | 32 | WChess( 33 | whisper_context * ctx, 34 | const whisper_full_params & wparams, 35 | callbacks cb, 36 | settings s 37 | ); 38 | ~WChess(); 39 | 40 | void run(); 41 | 42 | std::string stringify_board() const; 43 | 44 | std::string get_grammar() const; 45 | 46 | private: 47 | bool get_audio(std::vector& pcmf32) const; 48 | void set_move(const std::string& moves, float prob) const; 49 | void set_grammar(const std::string& grammar) const; 50 | 51 | std::string transcribe( 52 | const std::vector & pcmf32, 53 | float & logprob_min, 54 | float & logprob_sum, 55 | int & n_tokens, 56 | int64_t & t_ms); 57 | 58 | whisper_context * m_ctx; 59 | whisper_full_params m_wparams; 60 | const callbacks m_cb; 61 | const settings m_settings; 62 | std::unique_ptr m_board; 63 | }; 64 | -------------------------------------------------------------------------------- /ggml-cuda/diagmask.cu: -------------------------------------------------------------------------------- 1 | #include "diagmask.cuh" 2 | 3 | static __global__ void diag_mask_inf_f32(const float * x, float * dst, const int ncols, const int rows_per_channel, const int n_past) { 4 | const int col = blockDim.y*blockIdx.y + threadIdx.y; 5 | const int row = blockDim.x*blockIdx.x + threadIdx.x; 6 | 7 | if (col >= ncols) { 8 | return; 9 | } 10 | 11 | const int i = row*ncols + col; 12 | //dst[i] = col > (n_past + row % rows_per_channel) ? -INFINITY : x[i]; 13 | //dst[i] = x[i] - (col > n_past + row % rows_per_channel) * INT_MAX; // equivalent within rounding error but slightly faster on GPU 14 | dst[i] = x[i] - (col > n_past + row % rows_per_channel) * FLT_MAX; 15 | } 16 | 17 | static void diag_mask_inf_f32_cuda(const float * x, float * dst, const int ncols_x, const int nrows_x, const int rows_per_channel, const int n_past, cudaStream_t stream) { 18 | const dim3 block_dims(1, CUDA_DIAG_MASK_INF_BLOCK_SIZE, 1); 19 | const int block_num_x = (ncols_x + CUDA_DIAG_MASK_INF_BLOCK_SIZE - 1) / CUDA_DIAG_MASK_INF_BLOCK_SIZE; 20 | const dim3 block_nums(nrows_x, block_num_x, 1); 21 | diag_mask_inf_f32<<>>(x, dst, ncols_x, rows_per_channel, n_past); 22 | } 23 | 24 | void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { 25 | const ggml_tensor * src0 = dst->src[0]; 26 | const float * src0_d = (const float *)src0->data; 27 | float * dst_d = (float *)dst->data; 28 | cudaStream_t stream = ctx.stream(); 29 | 30 | GGML_ASSERT(src0->type == GGML_TYPE_F32); 31 | GGML_ASSERT( dst->type == GGML_TYPE_F32); 32 | 33 | const int64_t ne00 = src0->ne[0]; 34 | const int64_t ne01 = src0->ne[1]; 35 | const int nrows0 = ggml_nrows(src0); 36 | 37 | const int n_past = ((int32_t *) dst->op_params)[0]; 38 | 39 | diag_mask_inf_f32_cuda(src0_d, dst_d, ne00, nrows0, ne01, n_past, stream); 40 | } 41 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModelLoader.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.model; 2 | 3 | import com.sun.jna.Callback; 4 | import com.sun.jna.Pointer; 5 | import com.sun.jna.Structure; 6 | 7 | 8 | public class WhisperModelLoader extends Structure { 9 | public Pointer context; 10 | public ReadFunction read; 11 | public EOFFunction eof; 12 | public CloseFunction close; 13 | 14 | public static class ReadFunction implements Callback { 15 | public Pointer invoke(Pointer ctx, Pointer output, int readSize) { 16 | // TODO 17 | return ctx; 18 | } 19 | } 20 | 21 | public static class EOFFunction implements Callback { 22 | public boolean invoke(Pointer ctx) { 23 | // TODO 24 | return false; 25 | } 26 | } 27 | 28 | public static class CloseFunction implements Callback { 29 | public void invoke(Pointer ctx) { 30 | // TODO 31 | } 32 | } 33 | 34 | // public WhisperModelLoader(Pointer p) { 35 | // super(p); 36 | // read = new ReadFunction(); 37 | // eof = new EOFFunction(); 38 | // close = new CloseFunction(); 39 | // read.setCallback(this); 40 | // eof.setCallback(this); 41 | // close.setCallback(this); 42 | // read.write(); 43 | // eof.write(); 44 | // close.write(); 45 | // } 46 | 47 | public WhisperModelLoader() { 48 | super(); 49 | } 50 | 51 | public interface ReadCallback extends Callback { 52 | Pointer invoke(Pointer ctx, Pointer output, int readSize); 53 | } 54 | 55 | public interface EOFCallback extends Callback { 56 | boolean invoke(Pointer ctx); 57 | } 58 | 59 | public interface CloseCallback extends Callback { 60 | void invoke(Pointer ctx); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker image 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | jobs: 10 | push_to_registry: 11 | name: Push Docker image to Docker Hub 12 | if: github.event.pull_request.draft == false 13 | 14 | runs-on: ubuntu-latest 15 | env: 16 | COMMIT_SHA: ${{ github.sha }} 17 | strategy: 18 | matrix: 19 | config: 20 | - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" } 21 | - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" } 22 | 23 | steps: 24 | - name: Check out the repo 25 | uses: actions/checkout@v3 26 | 27 | - name: Set up QEMU 28 | uses: docker/setup-qemu-action@v3 29 | 30 | - name: Set up Docker Buildx 31 | uses: docker/setup-buildx-action@v3 32 | 33 | - name: Log in to Docker Hub 34 | uses: docker/login-action@v3 35 | with: 36 | registry: ghcr.io 37 | username: ${{ github.repository_owner }} 38 | password: ${{ secrets.GITHUB_TOKEN }} 39 | 40 | - name: Build and push Docker image (versioned) 41 | if: github.event_name == 'push' 42 | uses: docker/build-push-action@v5 43 | with: 44 | context: . 45 | push: true 46 | platforms: ${{ matrix.config.platforms }} 47 | tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" 48 | file: ${{ matrix.config.dockerfile }} 49 | 50 | - name: Build and push Docker image (tagged) 51 | uses: docker/build-push-action@v4 52 | with: 53 | context: . 54 | push: ${{ github.event_name == 'push' }} 55 | platforms: ${{ matrix.config.platforms }} 56 | tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}" 57 | file: ${{ matrix.config.dockerfile }} 58 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/jni/whisper/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | project(whisper.cpp) 4 | 5 | set(CMAKE_CXX_STANDARD 11) 6 | set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../) 7 | 8 | set( 9 | SOURCE_FILES 10 | ${WHISPER_LIB_DIR}/ggml.c 11 | ${WHISPER_LIB_DIR}/ggml-alloc.c 12 | ${WHISPER_LIB_DIR}/ggml-backend.c 13 | ${WHISPER_LIB_DIR}/ggml-quants.c 14 | ${WHISPER_LIB_DIR}/whisper.cpp 15 | ${CMAKE_SOURCE_DIR}/jni.c 16 | ) 17 | 18 | find_library(LOG_LIB log) 19 | 20 | function(build_library target_name) 21 | add_library( 22 | ${target_name} 23 | SHARED 24 | ${SOURCE_FILES} 25 | ) 26 | 27 | target_link_libraries(${target_name} ${LOG_LIB} android) 28 | 29 | if (${target_name} STREQUAL "whisper_v8fp16_va") 30 | target_compile_options(${target_name} PRIVATE -march=armv8.2-a+fp16) 31 | elseif (${target_name} STREQUAL "whisper_vfpv4") 32 | target_compile_options(${target_name} PRIVATE -mfpu=neon-vfpv4) 33 | endif () 34 | 35 | if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") 36 | 37 | target_compile_options(${target_name} PRIVATE -O3) 38 | target_compile_options(${target_name} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden) 39 | target_compile_options(${target_name} PRIVATE -ffunction-sections -fdata-sections) 40 | 41 | #target_link_options(${target_name} PRIVATE -Wl,--gc-sections) 42 | #target_link_options(${target_name} PRIVATE -Wl,--exclude-libs,ALL) 43 | #target_link_options(${target_name} PRIVATE -flto) 44 | 45 | endif () 46 | endfunction() 47 | 48 | build_library("whisper") # Default target 49 | 50 | if (${ANDROID_ABI} STREQUAL "arm64-v8a") 51 | build_library("whisper_v8fp16_va") 52 | elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a") 53 | build_library("whisper_vfpv4") 54 | endif () 55 | 56 | include_directories(${WHISPER_LIB_DIR}) 57 | -------------------------------------------------------------------------------- /examples/whisper.wasm/README.md: -------------------------------------------------------------------------------- 1 | # whisper.wasm 2 | 3 | Inference of [OpenAI's Whisper ASR model](https://github.com/openai/whisper) inside the browser 4 | 5 | This example uses a WebAssembly (WASM) port of the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) 6 | implementation of the transformer to run the inference inside a web page. The audio data does not leave your computer - 7 | it is processed locally on your machine. The performance is not great but you should be able to achieve x2 or x3 8 | real-time for the `tiny` and `base` models on a modern CPU and browser (i.e. transcribe a 60 seconds audio in about 9 | ~20-30 seconds). 10 | 11 | This WASM port utilizes [WASM SIMD 128-bit intrinsics](https://emcc.zcopy.site/docs/porting/simd/) so you have to make 12 | sure that [your browser supports them](https://webassembly.org/roadmap/). 13 | 14 | The example is capable of running all models up to size `small` inclusive. Beyond that, the memory requirements and 15 | performance are unsatisfactory. The implementation currently support only the `Greedy` sampling strategy. Both 16 | transcription and translation are supported. 17 | 18 | Since the model data is quite big (74MB for the `tiny` model) you need to manually load the model into the web-page. 19 | 20 | The example supports both loading audio from a file and recording audio from the microphone. The maximum length of the 21 | audio is limited to 120 seconds. 22 | 23 | ## Live demo 24 | 25 | Link: https://whisper.ggerganov.com 26 | 27 | ![image](https://user-images.githubusercontent.com/1991296/197348344-1a7fead8-3dae-4922-8b06-df223a206603.png) 28 | 29 | ## Build instructions 30 | 31 | ```bash (v3.1.2) 32 | # build using Emscripten 33 | git clone https://github.com/ggerganov/whisper.cpp 34 | cd whisper.cpp 35 | mkdir build-em && cd build-em 36 | emcmake cmake .. 37 | make -j 38 | 39 | # copy the produced page to your HTTP path 40 | cp bin/whisper.wasm/* /path/to/html/ 41 | cp bin/libmain.worker.js /path/to/html/ 42 | ``` 43 | --------------------------------------------------------------------------------