├── .gitmodules ├── models ├── .gitignore ├── requirements-openvino.txt ├── requirements-coreml.txt ├── for-tests-ggml-base.bin ├── for-tests-ggml-tiny.bin ├── for-tests-ggml-base.en.bin ├── for-tests-ggml-large.bin ├── for-tests-ggml-medium.bin ├── for-tests-ggml-small.bin ├── for-tests-ggml-tiny.en.bin ├── for-tests-ggml-medium.en.bin └── for-tests-ggml-small.en.bin ├── bindings ├── go │ ├── .gitignore │ ├── samples │ │ └── jfk.wav │ ├── pkg │ │ └── whisper │ │ │ ├── doc.go │ │ │ ├── util_test.go │ │ │ └── consts.go │ ├── doc.go │ ├── go.mod │ └── examples │ │ ├── go-whisper │ │ └── color.go │ │ └── go-model-download │ │ └── context.go ├── javascript │ ├── .gitignore │ ├── package.json │ └── package-tmpl.json ├── ruby │ ├── .gitignore │ ├── tests │ │ ├── jfk_reader │ │ │ ├── extconf.rb │ │ │ └── .gitignore │ │ ├── test_error.rb │ │ └── helper.rb │ ├── ext │ │ ├── .gitignore │ │ ├── metal.mk │ │ ├── cpu.mk │ │ └── ruby_whisper.h │ └── extsources.rb ├── java │ ├── settings.gradle │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── src │ │ ├── main │ │ │ └── java │ │ │ │ └── io │ │ │ │ └── github │ │ │ │ └── ggerganov │ │ │ │ └── whispercpp │ │ │ │ ├── ggml │ │ │ │ ├── GgmlTensor.java │ │ │ │ └── GgmlType.java │ │ │ │ ├── model │ │ │ │ ├── WhisperState.java │ │ │ │ └── EModel.java │ │ │ │ ├── params │ │ │ │ ├── WhisperFilters.java │ │ │ │ ├── WhisperSamplingStrategy.java │ │ │ │ ├── WhisperHParams.java │ │ │ │ ├── GreedyParams.java │ │ │ │ ├── BeamSearchParams.java │ │ │ │ └── CBool.java │ │ │ │ └── callbacks │ │ │ │ └── WhisperProgressCallback.java │ │ └── test │ │ │ └── java │ │ │ └── io │ │ │ └── github │ │ │ └── ggerganov │ │ │ └── whispercpp │ │ │ └── WhisperJnaLibraryTest.java │ └── gradle.properties └── CMakeLists.txt ├── examples ├── whisper.android │ ├── app │ │ ├── .gitignore │ │ ├── src │ │ │ ├── main │ │ │ │ ├── res │ │ │ │ │ ├── values │ │ │ │ │ │ ├── strings.xml │ │ │ │ │ │ └── themes.xml │ │ │ │ │ ├── mipmap-anydpi │ │ │ │ │ │ └── ic_launcher.xml │ │ │ │ │ └── xml │ │ │ │ │ │ ├── backup_rules.xml │ │ │ │ │ │ └── data_extraction_rules.xml │ │ │ │ └── java │ │ │ │ │ └── com │ │ │ │ │ └── whispercppdemo │ │ │ │ │ ├── ui │ │ │ │ │ └── theme │ │ │ │ │ │ └── Color.kt │ │ │ │ │ └── MainActivity.kt │ │ │ ├── test │ │ │ │ └── java │ │ │ │ │ └── com │ │ │ │ │ └── whispercppdemo │ │ │ │ │ └── ExampleUnitTest.kt │ │ │ └── androidTest │ │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── whispercppdemo │ │ │ │ └── ExampleInstrumentedTest.kt │ │ └── proguard-rules.pro │ ├── lib │ │ ├── .gitignore │ │ └── src │ │ │ └── main │ │ │ └── AndroidManifest.xml │ ├── .idea │ │ ├── .name │ │ ├── .gitignore │ │ ├── compiler.xml │ │ ├── vcs.xml │ │ ├── misc.xml │ │ └── gradle.xml │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── .gitignore │ ├── build.gradle │ └── settings.gradle ├── talk-llama │ ├── llama-quant.h │ ├── .gitignore │ ├── llama-cparams.cpp │ ├── llama-memory.cpp │ ├── speak.bat │ ├── llama-io.cpp │ ├── speak.ps1 │ ├── unicode-data.h │ ├── llama-sampling.h │ ├── llama-memory.h │ └── llama-io.h ├── whisper.android.java │ ├── app │ │ ├── .gitignore │ │ ├── src │ │ │ ├── main │ │ │ │ ├── res │ │ │ │ │ ├── values │ │ │ │ │ │ ├── strings.xml │ │ │ │ │ │ ├── colors.xml │ │ │ │ │ │ └── themes.xml │ │ │ │ │ ├── mipmap-hdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-mdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xhdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xxhdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-xxxhdpi │ │ │ │ │ │ ├── ic_launcher.png │ │ │ │ │ │ └── ic_launcher_round.png │ │ │ │ │ ├── mipmap-anydpi-v26 │ │ │ │ │ │ ├── ic_launcher.xml │ │ │ │ │ │ └── ic_launcher_round.xml │ │ │ │ │ └── values-night │ │ │ │ │ │ └── themes.xml │ │ │ │ ├── java │ │ │ │ │ └── com │ │ │ │ │ │ ├── litongjava │ │ │ │ │ │ └── whisper │ │ │ │ │ │ │ └── android │ │ │ │ │ │ │ └── java │ │ │ │ │ │ │ └── app │ │ │ │ │ │ │ └── App.java │ │ │ │ │ │ └── whispercpp │ │ │ │ │ │ └── java │ │ │ │ │ │ └── whisper │ │ │ │ │ │ └── WhisperCpuConfig.java │ │ │ │ └── AndroidManifest.xml │ │ │ └── test │ │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── litongjava │ │ │ │ └── whisper │ │ │ │ └── android │ │ │ │ └── java │ │ │ │ └── ExampleUnitTest.java │ │ └── proguard-rules.pro │ ├── settings.gradle │ ├── README_files │ │ └── 1.jpg │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── .gitignore │ └── build.gradle ├── addon.node │ ├── .gitignore │ ├── package.json │ └── __test__ │ │ └── whisper.spec.js ├── whisper.swiftui │ ├── .gitignore │ ├── whisper.swiftui.demo │ │ ├── Resources │ │ │ ├── models │ │ │ │ └── .gitignore │ │ │ └── samples │ │ │ │ └── .gitignore │ │ ├── Supporting files │ │ │ ├── Assets.xcassets │ │ │ │ ├── Contents.json │ │ │ │ └── AccentColor.colorset │ │ │ │ │ └── Contents.json │ │ │ ├── Preview Content │ │ │ │ └── Preview Assets.xcassets │ │ │ │ │ └── Contents.json │ │ │ └── WhisperCppDemo.entitlements │ │ ├── WhisperCppDemoApp.swift │ │ ├── Utils │ │ │ └── RiffWaveUtils.swift │ │ └── Models │ │ │ └── Model.swift │ └── whisper.swiftui.xcodeproj │ │ ├── .gitignore │ │ └── project.xcworkspace │ │ ├── .gitignore │ │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist ├── quantize │ ├── README.md │ └── CMakeLists.txt ├── command │ ├── commands.txt │ └── CMakeLists.txt ├── whisper.objc │ ├── whisper.objc │ │ ├── Assets.xcassets │ │ │ ├── Contents.json │ │ │ ├── AccentColor.colorset │ │ │ │ └── Contents.json │ │ │ └── AppIcon.appiconset │ │ │ │ └── Contents.json │ │ ├── AppDelegate.h │ │ ├── SceneDelegate.h │ │ ├── main.m │ │ └── Info.plist │ └── whisper.objc.xcodeproj │ │ └── project.xcworkspace │ │ ├── contents.xcworkspacedata │ │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist ├── wchess │ ├── wchess.wasm │ │ └── chessboardjs-1.0.0 │ │ │ ├── img │ │ │ └── chesspieces │ │ │ │ └── wikipedia │ │ │ │ ├── bB.png │ │ │ │ ├── bK.png │ │ │ │ ├── bN.png │ │ │ │ ├── bP.png │ │ │ │ ├── bQ.png │ │ │ │ ├── bR.png │ │ │ │ ├── wB.png │ │ │ │ ├── wK.png │ │ │ │ ├── wN.png │ │ │ │ ├── wP.png │ │ │ │ ├── wQ.png │ │ │ │ └── wR.png │ │ │ ├── css │ │ │ └── chessboard-1.0.0.min.css │ │ │ └── js │ │ │ └── chessboard-1.0.0 │ │ │ └── package.json │ ├── wchess.cmd │ │ └── CMakeLists.txt │ ├── CMakeLists.txt │ └── libwchess │ │ └── CMakeLists.txt ├── deprecation-warning │ ├── CMakeLists.txt │ └── README.md ├── python │ └── test_whisper_processor.py ├── bench │ └── CMakeLists.txt ├── cli │ └── CMakeLists.txt ├── sycl │ ├── ls-sycl-device.cpp │ ├── CMakeLists.txt │ ├── run-whisper.sh │ └── build.sh ├── lsp │ └── CMakeLists.txt ├── stream │ └── CMakeLists.txt ├── server │ └── CMakeLists.txt ├── common-ggml.h ├── stream.wasm │ └── README.md ├── console.h ├── bench.wasm │ └── README.md └── command.wasm │ └── README.md ├── tests ├── .gitignore ├── test-c.c └── en-2-ref.txt ├── samples ├── .gitignore ├── jfk.mp3 ├── jfk.wav └── README.md ├── scripts ├── sync-ggml.last ├── sha-all.sh ├── apple │ └── validate-apps.sh ├── convert-all.sh ├── gen-authors.sh ├── sync-llama.sh └── build-info.sh ├── ggml ├── .gitignore ├── src │ ├── ggml-cuda │ │ ├── argmax.cuh │ │ ├── out-prod.cuh │ │ ├── argsort.cuh │ │ ├── fattn.cuh │ │ ├── gla.cuh │ │ ├── fattn-tile-f16.cuh │ │ ├── fattn-tile-f32.cuh │ │ ├── fattn-wmma-f16.cuh │ │ ├── acc.cuh │ │ ├── pad.cuh │ │ ├── clamp.cuh │ │ ├── scale.cuh │ │ ├── arange.cuh │ │ ├── concat.cuh │ │ ├── im2col.cuh │ │ ├── pool2d.cuh │ │ ├── wkv6.cuhold │ │ ├── upscale.cuh │ │ ├── count-equal.cuh │ │ ├── diagmask.cuh │ │ ├── opt-step-adamw.cuh │ │ ├── tsembd.cuh │ │ ├── conv-transpose-1d.cuh │ │ ├── template-instances │ │ │ ├── mmq-instance-iq1_s.cu │ │ │ ├── mmq-instance-iq2_s.cu │ │ │ ├── mmq-instance-iq3_s.cu │ │ │ ├── mmq-instance-q2_k.cu │ │ │ ├── mmq-instance-q3_k.cu │ │ │ ├── mmq-instance-q4_0.cu │ │ │ ├── mmq-instance-q4_1.cu │ │ │ ├── mmq-instance-q4_k.cu │ │ │ ├── mmq-instance-q5_0.cu │ │ │ ├── mmq-instance-q5_1.cu │ │ │ ├── mmq-instance-q5_k.cu │ │ │ ├── mmq-instance-q6_k.cu │ │ │ ├── mmq-instance-q8_0.cu │ │ │ ├── mmq-instance-iq2_xs.cu │ │ │ ├── mmq-instance-iq2_xxs.cu │ │ │ ├── mmq-instance-iq3_xxs.cu │ │ │ ├── mmq-instance-iq4_nl.cu │ │ │ ├── mmq-instance-iq4_xs.cu │ │ │ ├── fattn-vec-f16-instance-hs64-f16-f16.cu │ │ │ ├── fattn-vec-f32-instance-hs64-f16-f16.cu │ │ │ ├── fattn-vec-f16-instance-hs128-f16-f16.cu │ │ │ ├── fattn-vec-f16-instance-hs128-f16-q4_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-f16-q4_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-f16-q5_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-f16-q5_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-f16-q8_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_0-f16.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_0-q4_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_0-q4_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_0-q5_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_0-q5_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_0-q8_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_1-f16.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_1-q4_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_1-q4_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_1-q5_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_1-q5_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q4_1-q8_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_0-f16.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_0-q4_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_0-q4_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_0-q5_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_0-q5_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_0-q8_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_1-f16.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_1-q4_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_1-q4_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_1-q5_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_1-q5_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q5_1-q8_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q8_0-f16.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q8_0-q4_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q8_0-q4_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q8_0-q5_0.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q8_0-q5_1.cu │ │ │ ├── fattn-vec-f16-instance-hs128-q8_0-q8_0.cu │ │ │ ├── fattn-vec-f16-instance-hs256-f16-f16.cu │ │ │ ├── fattn-vec-f16-instance-hs64-f16-q4_0.cu │ │ │ ├── fattn-vec-f16-instance-hs64-f16-q4_1.cu │ │ │ ├── fattn-vec-f16-instance-hs64-f16-q5_0.cu │ │ │ ├── fattn-vec-f16-instance-hs64-f16-q5_1.cu │ │ │ ├── fattn-vec-f16-instance-hs64-f16-q8_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-f16-f16.cu │ │ │ ├── fattn-vec-f32-instance-hs128-f16-q4_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-f16-q4_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-f16-q5_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-f16-q5_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-f16-q8_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_0-f16.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_0-q4_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_0-q4_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_0-q5_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_0-q5_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_0-q8_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_1-f16.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_1-q4_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_1-q4_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_1-q5_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_1-q5_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q4_1-q8_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_0-f16.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_0-q4_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_0-q4_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_0-q5_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_0-q5_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_0-q8_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_1-f16.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_1-q4_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_1-q4_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_1-q5_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_1-q5_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q5_1-q8_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q8_0-f16.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q8_0-q4_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q8_0-q4_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q8_0-q5_0.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q8_0-q5_1.cu │ │ │ ├── fattn-vec-f32-instance-hs128-q8_0-q8_0.cu │ │ │ ├── fattn-vec-f32-instance-hs256-f16-f16.cu │ │ │ ├── fattn-vec-f32-instance-hs64-f16-q4_0.cu │ │ │ ├── fattn-vec-f32-instance-hs64-f16-q4_1.cu │ │ │ ├── fattn-vec-f32-instance-hs64-f16-q5_0.cu │ │ │ ├── fattn-vec-f32-instance-hs64-f16-q5_1.cu │ │ │ ├── fattn-vec-f32-instance-hs64-f16-q8_0.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_1-ncols2_8.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_4.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_8.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_2.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_4.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_8.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_1.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_2.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_4.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_8.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_1.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_2.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_4.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_32-ncols2_1.cu │ │ │ ├── fattn-mma-f16-instance-ncols1_32-ncols2_2.cu │ │ │ └── fattn-mma-f16-instance-ncols1_64-ncols2_1.cu │ │ ├── sum.cuh │ │ ├── sumrows.cuh │ │ ├── rope.cuh │ │ ├── wkv.cuh │ │ ├── softmax.cuh │ │ ├── cross-entropy-loss.cuh │ │ ├── getrows.cuh │ │ ├── cpy.cuh │ │ ├── convert.cuh │ │ ├── norm.cuh │ │ ├── mmvq.cuh │ │ ├── binbcast.cuh │ │ ├── vendors │ │ │ └── cuda.h │ │ └── mmv.cuh │ ├── ggml-vulkan │ │ ├── vulkan-shaders │ │ │ ├── test_coopmat2_support.comp │ │ │ ├── test_coopmat_support.comp │ │ │ ├── generic_head.comp │ │ │ ├── CMakeLists.txt │ │ │ ├── dequant_head.comp │ │ │ ├── cos.comp │ │ │ ├── sin.comp │ │ │ ├── square.comp │ │ │ ├── clamp.comp │ │ │ ├── dequant_f32.comp │ │ │ ├── copy.comp │ │ │ ├── relu.comp │ │ │ ├── sigmoid.comp │ │ │ ├── tanh.comp │ │ │ ├── silu.comp │ │ │ ├── leaky_relu.comp │ │ │ ├── scale.comp │ │ │ ├── gelu_quick.comp │ │ │ ├── gelu.comp │ │ │ ├── silu_back.comp │ │ │ ├── div.comp │ │ │ ├── mul.comp │ │ │ ├── repeat.comp │ │ │ └── count_equal.comp │ │ └── cmake │ │ │ └── host-toolchain.cmake.in │ ├── ggml-cpu │ │ ├── ggml-cpu-hbm.h │ │ ├── ggml-cpu-aarch64.h │ │ ├── amx │ │ │ ├── amx.h │ │ │ └── mmq.h │ │ ├── llamafile │ │ │ └── sgemm.h │ │ └── kleidiai │ │ │ └── kleidiai.h │ ├── ggml-sycl │ │ ├── gla.hpp │ │ ├── wkv6.hpp │ │ ├── outprod.hpp │ │ ├── wkv.hpp │ │ ├── cpy.hpp │ │ ├── sycl_hw.cpp │ │ ├── sycl_hw.hpp │ │ ├── concat.hpp │ │ ├── conv.hpp │ │ ├── softmax.hpp │ │ ├── tsembd.hpp │ │ ├── rope.hpp │ │ ├── getrows.hpp │ │ ├── im2col.hpp │ │ ├── mmvq.hpp │ │ ├── dmmv.hpp │ │ ├── convert.hpp │ │ └── backend.hpp │ ├── ggml-rpc │ │ └── CMakeLists.txt │ ├── ggml-threading.h │ ├── ggml-threading.cpp │ ├── ggml-amx │ │ └── mmq.h │ ├── ggml-kompute │ │ └── kompute-shaders │ │ │ ├── op_scale.comp │ │ │ ├── op_relu.comp │ │ │ ├── op_scale_8.comp │ │ │ ├── op_silu.comp │ │ │ ├── op_getrows.comp │ │ │ ├── op_gelu.comp │ │ │ ├── op_mul_mv_q_n_pre.comp │ │ │ ├── op_addrow.comp │ │ │ ├── op_diagmask.comp │ │ │ ├── op_getrows_f32.comp │ │ │ └── op_getrows_f16.comp │ ├── ggml-opencl │ │ └── kernels │ │ │ └── embed_kernel.py │ └── ggml-cann │ │ └── kernels │ │ └── ascendc_kernels.h ├── include │ ├── ggml-blas.h │ ├── ggml-opencl.h │ └── ggml-rpc.h └── cmake │ └── GitVars.cmake ├── README_sycl.md ├── SDL2 ├── lib │ ├── x64 │ │ ├── SDL2.dll │ │ ├── SDL2.lib │ │ ├── SDL2main.lib │ │ └── SDL2test.lib │ └── x86 │ │ ├── SDL2.dll │ │ ├── SDL2.lib │ │ ├── SDL2main.lib │ │ └── SDL2test.lib ├── docs │ ├── README-hg.md │ ├── README-platforms.md │ ├── README-wince.md │ ├── README-git.md │ └── README-pandora.md ├── include │ ├── SDL_revision.h │ └── SDL_opengles2_gl2platform.h ├── README-SDL.txt ├── BUGS.txt └── README.txt ├── cmake ├── whisper.pc.in ├── DefaultTargetOptions.cmake └── git-vars.cmake ├── grammars └── colors.gbnf ├── .github └── workflows │ └── bindings-go.yml ├── .devops ├── main.Dockerfile └── cublas.Dockerfile └── src └── coreml └── whisper-encoder.h /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- 1 | *.bin 2 | -------------------------------------------------------------------------------- /bindings/go/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | models 3 | -------------------------------------------------------------------------------- /bindings/javascript/.gitignore: -------------------------------------------------------------------------------- 1 | publish.log 2 | -------------------------------------------------------------------------------- /examples/whisper.android/app/.gitignore: -------------------------------------------------------------------------------- 1 | /build -------------------------------------------------------------------------------- /examples/whisper.android/lib/.gitignore: -------------------------------------------------------------------------------- 1 | /build -------------------------------------------------------------------------------- /examples/talk-llama/llama-quant.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/.gitignore: -------------------------------------------------------------------------------- 1 | /build -------------------------------------------------------------------------------- /examples/whisper.android/.idea/.name: -------------------------------------------------------------------------------- 1 | WhisperCppDemo -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | *.wav 2 | *.ogg 3 | *.wav.txt 4 | -------------------------------------------------------------------------------- /samples/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !jfk.wave 3 | !jfk.mp3 4 | 5 | -------------------------------------------------------------------------------- /bindings/ruby/.gitignore: -------------------------------------------------------------------------------- 1 | LICENSE 2 | pkg/ 3 | lib/whisper.* 4 | -------------------------------------------------------------------------------- /examples/talk-llama/.gitignore: -------------------------------------------------------------------------------- 1 | audio.mp3 2 | to_speak.txt 3 | -------------------------------------------------------------------------------- /bindings/java/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = "whispercpp" 2 | -------------------------------------------------------------------------------- /examples/addon.node/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | node_modules 3 | build 4 | -------------------------------------------------------------------------------- /examples/talk-llama/llama-cparams.cpp: -------------------------------------------------------------------------------- 1 | #include "llama-cparams.h" 2 | -------------------------------------------------------------------------------- /examples/talk-llama/llama-memory.cpp: -------------------------------------------------------------------------------- 1 | #include "llama-memory.h" 2 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/.gitignore: -------------------------------------------------------------------------------- 1 | xcuserdata 2 | xcshareddata 3 | -------------------------------------------------------------------------------- /scripts/sync-ggml.last: -------------------------------------------------------------------------------- 1 | 7b08f4cd9e32781e769a52cf0dcaeb2c556632c3 2 | -------------------------------------------------------------------------------- /tests/test-c.c: -------------------------------------------------------------------------------- 1 | #include "whisper.h" 2 | 3 | int main(void) {} 4 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Resources/models/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Resources/samples/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.xcodeproj/.gitignore: -------------------------------------------------------------------------------- 1 | xcuserdata/ 2 | -------------------------------------------------------------------------------- /models/requirements-openvino.txt: -------------------------------------------------------------------------------- 1 | openvino-dev[pytorch,onnx] 2 | openai-whisper -------------------------------------------------------------------------------- /ggml/.gitignore: -------------------------------------------------------------------------------- 1 | src/ggml-vulkan-shaders.hpp 2 | src/ggml-vulkan-shaders.cpp 3 | -------------------------------------------------------------------------------- /README_sycl.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/README_sycl.md -------------------------------------------------------------------------------- /samples/jfk.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/samples/jfk.mp3 -------------------------------------------------------------------------------- /samples/jfk.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/samples/jfk.wav -------------------------------------------------------------------------------- /models/requirements-coreml.txt: -------------------------------------------------------------------------------- 1 | torch 2 | coremltools 3 | openai-whisper 4 | ane_transformers 5 | -------------------------------------------------------------------------------- /SDL2/lib/x64/SDL2.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/SDL2/lib/x64/SDL2.dll -------------------------------------------------------------------------------- /SDL2/lib/x64/SDL2.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/SDL2/lib/x64/SDL2.lib -------------------------------------------------------------------------------- /SDL2/lib/x86/SDL2.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/SDL2/lib/x86/SDL2.dll -------------------------------------------------------------------------------- /SDL2/lib/x86/SDL2.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/SDL2/lib/x86/SDL2.lib -------------------------------------------------------------------------------- /bindings/ruby/tests/jfk_reader/extconf.rb: -------------------------------------------------------------------------------- 1 | require "mkmf" 2 | 3 | create_makefile("jfk_reader") 4 | -------------------------------------------------------------------------------- /examples/whisper.android/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /SDL2/lib/x64/SDL2main.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/SDL2/lib/x64/SDL2main.lib -------------------------------------------------------------------------------- /SDL2/lib/x64/SDL2test.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/SDL2/lib/x64/SDL2test.lib -------------------------------------------------------------------------------- /SDL2/lib/x86/SDL2main.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/SDL2/lib/x86/SDL2main.lib -------------------------------------------------------------------------------- /SDL2/lib/x86/SDL2test.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/SDL2/lib/x86/SDL2test.lib -------------------------------------------------------------------------------- /examples/whisper.android.java/settings.gradle: -------------------------------------------------------------------------------- 1 | include ':app' 2 | rootProject.name = "whisper.android.java" -------------------------------------------------------------------------------- /bindings/go/samples/jfk.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/bindings/go/samples/jfk.wav -------------------------------------------------------------------------------- /examples/talk-llama/speak.bat: -------------------------------------------------------------------------------- 1 | @powershell -ExecutionPolicy Bypass -F examples\talk-llama\speak.ps1 %1 %2 2 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.xcworkspace/.gitignore: -------------------------------------------------------------------------------- 1 | contents.xcworkspacedata 2 | -------------------------------------------------------------------------------- /examples/quantize/README.md: -------------------------------------------------------------------------------- 1 | # quantize 2 | 3 | Tool for integer quantization of Whisper `ggml` model files 4 | -------------------------------------------------------------------------------- /models/for-tests-ggml-base.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-base.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-tiny.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-tiny.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-base.en.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-base.en.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-large.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-large.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-medium.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-medium.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-small.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-small.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-tiny.en.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-tiny.en.bin -------------------------------------------------------------------------------- /SDL2/docs/README-hg.md: -------------------------------------------------------------------------------- 1 | We are no longer hosted in Mercurial. Please see README-git.md for details. 2 | 3 | Thanks! 4 | 5 | -------------------------------------------------------------------------------- /models/for-tests-ggml-medium.en.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-medium.en.bin -------------------------------------------------------------------------------- /models/for-tests-ggml-small.en.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/models/for-tests-ggml-small.en.bin -------------------------------------------------------------------------------- /bindings/go/pkg/whisper/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | This is the higher-level speech-to-text whisper.cpp API for go 3 | */ 4 | package whisper 5 | -------------------------------------------------------------------------------- /bindings/ruby/tests/jfk_reader/.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | jfk_reader.o 3 | jfk_reader.so 4 | jfk_reader.bundle 5 | jfk_reader.dll 6 | -------------------------------------------------------------------------------- /examples/command/commands.txt: -------------------------------------------------------------------------------- 1 | enable 2 | disable 3 | cat 4 | dog 5 | apple 6 | red 7 | blue 8 | green 9 | lightblue 10 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/argmax.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_argmax(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/out-prod.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /examples/whisper.android/app/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | WhisperCppDemo 3 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/argsort.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_op_argsort(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/fattn.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /bindings/java/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/bindings/java/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /examples/whisper.android.java/README_files/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/README_files/1.jpg -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | whisper.android.java 3 | -------------------------------------------------------------------------------- /examples/whisper.objc/whisper.objc/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/gla.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_op_gated_linear_attn(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/fattn-tile-f16.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/fattn-tile-f32.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/fattn-wmma-f16.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 4 | -------------------------------------------------------------------------------- /ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | 3 | #extension GL_NV_cooperative_matrix2 : require 4 | 5 | void main() 6 | { 7 | } 8 | -------------------------------------------------------------------------------- /ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | 3 | #extension GL_KHR_cooperative_matrix : require 4 | 5 | void main() 6 | { 7 | } 8 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlTensor.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.ggml; 2 | 3 | public class GgmlTensor { 4 | } 5 | -------------------------------------------------------------------------------- /examples/whisper.android/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /bindings/go/pkg/whisper/util_test.go: -------------------------------------------------------------------------------- 1 | package whisper_test 2 | 3 | const ( 4 | ModelPath = "../../models/ggml-small.en.bin" 5 | SamplePath = "../../samples/jfk.wav" 6 | ) 7 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/acc.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_ACC_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_acc(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/pad.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_PAD_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /bindings/go/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | github.com/ggerganov/whisper.cpp/bindings/go 3 | provides a speech-to-text service bindings for the Go programming language. 4 | */ 5 | package whisper 6 | -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperState.java: -------------------------------------------------------------------------------- 1 | package io.github.ggerganov.whispercpp.model; 2 | 3 | public class WhisperState { 4 | } 5 | -------------------------------------------------------------------------------- /examples/whisper.android.java/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/clamp.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_CLAMP_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/scale.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_SCALE_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /scripts/sha-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Compute the SHA1 of all model files in ./models/ggml-*.bin 4 | 5 | for f in ./models/ggml-*.bin; do 6 | shasum "$f" -a 1 7 | done 8 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/arange.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_ARANGE_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_arange(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/concat.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_CONCAT_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_concat(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/im2col.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_IM2COL_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/pool2d.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_POOL2D_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_pool2d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/wkv6.cuhold: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_WKV_BLOCK_SIZE 64 4 | 5 | void ggml_cuda_op_rwkv_wkv6(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /examples/whisper.android/lib/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/upscale.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_UPSCALE_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/count-equal.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_COUNT_EQUAL_CHUNK_SIZE 128 4 | 5 | void ggml_cuda_count_equal(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /scripts/apple/validate-apps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./scripts/apple/validate-ios.sh 3 | ./scripts/apple/validate-macos.sh 4 | ./scripts/apple/validate-visionos.sh 5 | ./scripts/apple/validate-tvos.sh 6 | -------------------------------------------------------------------------------- /examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Preview Content/Preview Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "author" : "xcode", 4 | "version" : 1 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/diagmask.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32 4 | 5 | void ggml_cuda_op_diag_mask_inf(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/opt-step-adamw.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_OPT_STEP_ADAMW_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_opt_step_adamw(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /bindings/ruby/ext/.gitignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | whisper.so 3 | whisper.bundle 4 | whisper.dll 5 | scripts/get-flags.mk 6 | *.o 7 | /*/**/*.c 8 | /*/**/*.cpp 9 | /*/**/*.h 10 | /*/**/*.m 11 | /*/**/*.metal 12 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-xxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/tsembd.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_TIMESTEP_EMBEDDING_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_timestep_embedding(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bB.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bK.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bN.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bP.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bQ.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/bR.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wB.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wK.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wN.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wP.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wQ.png -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/wchess/wchess.wasm/chessboardjs-1.0.0/img/chesspieces/wikipedia/wR.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-hdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-hdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-mdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-mdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/conv-transpose-1d.cuh: -------------------------------------------------------------------------------- 1 | #include "common.cuh" 2 | 3 | #define CUDA_CONV_TRANPOSE_1D_BLOCK_SIZE 256 4 | 5 | void ggml_cuda_op_conv_transpose_1d(ggml_backend_cuda_context & ctx, ggml_tensor * dst); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_IQ1_S); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_IQ2_S); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_IQ3_S); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q2_K); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q3_K); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q4_0); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q4_1); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q4_K); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q5_0); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q5_1); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q5_K); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q6_K); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_Q8_0); 6 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mozer/talk-llama-fast/HEAD/examples/whisper.android.java/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /ggml/src/ggml-cpu/ggml-cpu-hbm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ggml-backend.h" 4 | #include "ggml.h" 5 | 6 | // GGML CPU internal header 7 | 8 | ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void); 9 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_IQ2_XS); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_IQ2_XXS); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_IQ3_XXS); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_IQ4_NL); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu: -------------------------------------------------------------------------------- 1 | // This file has been autogenerated by generate_cu_files.py, do not edit manually. 2 | 3 | #include "../mmq.cuh" 4 | 5 | DECL_MMQ_CASE(GGML_TYPE_IQ4_XS); 6 | -------------------------------------------------------------------------------- /ggml/src/ggml-cpu/ggml-cpu-aarch64.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ggml-cpu-traits.h" 4 | #include "ggml.h" 5 | 6 | // GGML internal header 7 | 8 | ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void); 9 | -------------------------------------------------------------------------------- /examples/quantize/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(TARGET quantize) 2 | add_executable(${TARGET} quantize.cpp) 3 | 4 | include(DefaultTargetOptions) 5 | 6 | target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT}) 7 | -------------------------------------------------------------------------------- /examples/whisper.android/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /examples/whisper.android/app/src/main/res/values/themes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | -------------------------------------------------------------------------------- /ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "common.comp" 4 | 5 | layout(local_size_x = 1) in; 6 | 7 | layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; 8 | layout(binding = 1) buffer restrict writeonly tensorOut { float out_[]; }; 9 | 10 | layout(push_constant) uniform PushConstants { 11 | uint inOff; 12 | uint outOff; 13 | uint n_past; 14 | int ne00; 15 | int ne01; 16 | } pcs; 17 | 18 | void main() { 19 | const uint i02 = gl_WorkGroupID.z; 20 | const uint i01 = gl_WorkGroupID.y; 21 | const uint i00 = gl_WorkGroupID.x; 22 | 23 | const uint index = i02*pcs.ne01*pcs.ne00 + i01*pcs.ne00 + i00; 24 | 25 | if (i00 > pcs.n_past + i01) { 26 | out_[index + pcs.outOff] = uintBitsToFloat(0xFF800000); 27 | } else { 28 | out_[index + pcs.outOff] = in_[index + pcs.inOff]; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /examples/whisper.android.java/app/src/main/res/values-night/themes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | -------------------------------------------------------------------------------- /ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "generic_head.comp" 4 | #include "types.comp" 5 | 6 | #extension GL_EXT_control_flow_attributes : enable 7 | 8 | layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; 9 | 10 | layout (binding = 0) readonly buffer X {A_TYPE data_a[];}; 11 | layout (binding = 1) writeonly buffer D {D_TYPE data_d[];}; 12 | 13 | void main() { 14 | const float GELU_COEF_A = 0.044715f; 15 | const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f; 16 | const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x; 17 | 18 | if (i >= p.KX) { 19 | return; 20 | } 21 | 22 | const float xi = float(data_a[i]); 23 | const float val = SQRT_2_OVER_PI*xi*(1.0f + GELU_COEF_A*xi*xi); 24 | data_d[i] = D_TYPE(0.5f*xi*(2.0f - 2.0f / (exp(2 * val) + 1))); 25 | } 26 | -------------------------------------------------------------------------------- /ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "generic_head.comp" 4 | #include "types.comp" 5 | 6 | #extension GL_EXT_control_flow_attributes : enable 7 | 8 | layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; 9 | 10 | layout (binding = 0) readonly buffer G {A_TYPE data_g[];}; 11 | layout (binding = 1) readonly buffer X {B_TYPE data_x[];}; 12 | layout (binding = 2) writeonly buffer D {D_TYPE data_d[];}; 13 | 14 | void main() { 15 | const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x; 16 | 17 | if (i >= p.KX) { 18 | return; 19 | } 20 | 21 | // Compute derivative of SiLU(x): 1/(1+exp(-x)) - x*exp(-x)/(1+exp(-x))^2 22 | 23 | const float xi = float(data_x[i]); 24 | const float s = 1.0f / (1.0f + exp(-xi)); 25 | data_d[i] = D_TYPE(data_g[i] * (s + xi * s * (1 - s))); 26 | } 27 | -------------------------------------------------------------------------------- /examples/addon.node/__test__/whisper.spec.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const { whisper } = require(path.join( 3 | __dirname, 4 | "../../../build/Release/addon.node" 5 | )); 6 | const { promisify } = require("util"); 7 | 8 | const whisperAsync = promisify(whisper); 9 | 10 | const whisperParamsMock = { 11 | language: "en", 12 | model: path.join(__dirname, "../../../models/ggml-base.en.bin"), 13 | fname_inp: path.join(__dirname, "../../../samples/jfk.wav"), 14 | use_gpu: true, 15 | flash_attn: false, 16 | no_prints: true, 17 | comma_in_time: false, 18 | translate: true, 19 | no_timestamps: false, 20 | audio_ctx: 0, 21 | }; 22 | 23 | describe("Run whisper.node", () => { 24 | test("it should receive a non-empty value", async () => { 25 | let result = await whisperAsync(whisperParamsMock); 26 | 27 | expect(result.length).toBeGreaterThan(0); 28 | }, 10000); 29 | }); 30 | 31 | -------------------------------------------------------------------------------- /examples/talk-llama/llama-sampling.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // TODO: rename llama-sampling.h/.cpp to llama-sampler.h/.cpp ? 4 | 5 | #include "llama.h" 6 | 7 | #include 8 | 9 | struct llama_vocab; 10 | struct llama_grammar; 11 | 12 | // sampler chain 13 | 14 | struct llama_sampler_chain { 15 | llama_sampler_chain_params params; 16 | 17 | std::vector samplers; 18 | 19 | // timing 20 | 21 | mutable int64_t t_sample_us; 22 | 23 | mutable int32_t n_sample; 24 | }; 25 | 26 | struct llama_sampler * llama_sampler_init_dry_testing( 27 | int32_t context_size, 28 | float dry_multiplier, 29 | float dry_base, 30 | int32_t dry_allowed_length, 31 | int32_t dry_penalty_last_n, 32 | const std::vector>& seq_breakers); 33 | -------------------------------------------------------------------------------- /examples/whisper.android/.idea/gradle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 20 | 21 | -------------------------------------------------------------------------------- /ggml/src/ggml-vulkan/vulkan-shaders/div.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "types.comp" 4 | #include "generic_binary_head.comp" 5 | 6 | const uint num_threads = 256; 7 | 8 | layout(local_size_x = num_threads, local_size_y = 1, local_size_z = 1) in; 9 | 10 | void main() { 11 | uint idx = get_idx(); 12 | 13 | // num_threads * num_iter must equal 512, to match the wg_denoms and get_idx calculation 14 | const uint num_iter = 2; 15 | 16 | [[unroll]] for (uint i = 0; i < num_iter; ++i) { 17 | if (idx >= p.ne) { 18 | continue; 19 | } 20 | uint i00, i01, i02, i03; 21 | get_indices(idx, i00, i01, i02, i03); 22 | 23 | data_d[get_doffset() + dst_idx(i00, i01, i02, i03)] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + src0_idx(i00, i01, i02, i03)]) / FLOAT_TYPE(data_b[get_boffset() + src1_idx(i00, i01, i02, i03)])); 24 | 25 | idx += num_threads; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /ggml/src/ggml-vulkan/vulkan-shaders/mul.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "types.comp" 4 | #include "generic_binary_head.comp" 5 | 6 | const uint num_threads = 256; 7 | 8 | layout(local_size_x = num_threads, local_size_y = 1, local_size_z = 1) in; 9 | 10 | void main() { 11 | uint idx = get_idx(); 12 | 13 | // num_threads * num_iter must equal 512, to match the wg_denoms and get_idx calculation 14 | const uint num_iter = 2; 15 | 16 | [[unroll]] for (uint i = 0; i < num_iter; ++i) { 17 | if (idx >= p.ne) { 18 | continue; 19 | } 20 | uint i00, i01, i02, i03; 21 | get_indices(idx, i00, i01, i02, i03); 22 | 23 | data_d[get_doffset() + dst_idx(i00, i01, i02, i03)] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + src0_idx(i00, i01, i02, i03)]) * FLOAT_TYPE(data_b[get_boffset() + src1_idx(i00, i01, i02, i03)])); 24 | 25 | idx += num_threads; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /ggml/src/ggml-sycl/mmvq.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // MIT license 3 | // Copyright (C) 2024 Intel Corporation 4 | // SPDX-License-Identifier: MIT 5 | // 6 | 7 | // 8 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9 | // See https://llvm.org/LICENSE.txt for license information. 10 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11 | // 12 | 13 | #ifndef GGML_SYCL_MMVQ_HPP 14 | #define GGML_SYCL_MMVQ_HPP 15 | 16 | #include "common.hpp" 17 | 18 | 19 | void ggml_sycl_op_mul_mat_vec_q( 20 | ggml_backend_sycl_context & ctx, 21 | const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, 22 | const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i, 23 | float *dst_dd_i, const int64_t row_low, const int64_t row_high, 24 | const int64_t src1_ncols, const int64_t src1_padded_row_size, 25 | const dpct::queue_ptr &stream); 26 | 27 | #endif // GGML_SYCL_MMVQ_HPP 28 | -------------------------------------------------------------------------------- /bindings/go/pkg/whisper/consts.go: -------------------------------------------------------------------------------- 1 | package whisper 2 | 3 | import ( 4 | "errors" 5 | 6 | // Bindings 7 | whisper "github.com/ggerganov/whisper.cpp/bindings/go" 8 | ) 9 | 10 | /////////////////////////////////////////////////////////////////////////////// 11 | // ERRORS 12 | 13 | var ( 14 | ErrUnableToLoadModel = errors.New("unable to load model") 15 | ErrInternalAppError = errors.New("internal application error") 16 | ErrProcessingFailed = errors.New("processing failed") 17 | ErrUnsupportedLanguage = errors.New("unsupported language") 18 | ErrModelNotMultilingual = errors.New("model is not multilingual") 19 | ) 20 | 21 | /////////////////////////////////////////////////////////////////////////////// 22 | // CONSTANTS 23 | 24 | // SampleRate is the sample rate of the audio data. 25 | const SampleRate = whisper.SampleRate 26 | 27 | // SampleBits is the number of bytes per sample. 28 | const SampleBits = whisper.SampleBits 29 | -------------------------------------------------------------------------------- /ggml/src/ggml-sycl/dmmv.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // MIT license 3 | // Copyright (C) 2024 Intel Corporation 4 | // SPDX-License-Identifier: MIT 5 | // 6 | 7 | // 8 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9 | // See https://llvm.org/LICENSE.txt for license information. 10 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11 | // 12 | 13 | #ifndef GGML_SYCL_DMMV_HPP 14 | #define GGML_SYCL_DMMV_HPP 15 | 16 | #include "common.hpp" 17 | 18 | 19 | void ggml_sycl_op_dequantize_mul_mat_vec( 20 | ggml_backend_sycl_context & ctx, 21 | const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, 22 | const char *src0_dd_i, const float *src1_ddf_i, const char *src1_ddq_i, 23 | float *dst_dd_i, const int64_t row_low, const int64_t row_high, 24 | const int64_t src1_ncols, const int64_t src1_padded_row_size, 25 | const dpct::queue_ptr &stream); 26 | 27 | #endif // GGML_SYCL_DMMV_HPP 28 | -------------------------------------------------------------------------------- /ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "common.comp" 4 | 5 | layout(local_size_x = 1) in; 6 | 7 | layout (binding = 0) readonly buffer tensorInA { float inA[]; }; 8 | layout (binding = 1) readonly buffer tensorInB { int inB[]; }; 9 | layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; 10 | 11 | layout (push_constant) uniform parameter { 12 | uint inAOff; 13 | uint inBOff; 14 | uint outOff; 15 | int ne00; 16 | int nb01; 17 | int nb1; 18 | } pcs; 19 | 20 | void dequantize_row_f32(uint x /*Based from inA unaligned*/, uint y /*Based from out_*/, int k) { 21 | for (int j = 0; j < k; j++) { 22 | out_[y + j] = inA[x + j]; 23 | } 24 | } 25 | 26 | void main() { 27 | const uint i = gl_WorkGroupID.x; 28 | const int r = inB[i + pcs.inBOff]; 29 | 30 | dequantize_row_f32(r*pcs.nb01/4 + pcs.inAOff, i*pcs.nb1/4 + pcs.outOff, pcs.ne00); 31 | } 32 | -------------------------------------------------------------------------------- /tests/en-2-ref.txt: -------------------------------------------------------------------------------- 1 | This is the Micro Machine Man presenting the most midget miniature motorcade of Micro Machines. Each one has dramatic details, terrific trim, precision paint jobs, plus incredible Micro Machine Pocket Playsets. There's a police station, fire station, restaurant, service station, and more. Perfect pocket portables to take anyplace. And there are many miniature playsets to play with, and each one comes with its own special edition Micro Machine vehicle and fun, fantastic features that miraculously move. Raise the boat lift at the airport marina, man the gun turret at the army base, clean your car at the car wash, raise the toll bridge. And these playsets fit together to form a Micro Machine world. Micro Machine Pocket Playsets, so tremendously tiny, so perfectly precise, so dazzlingly detailed, you'll want to pocket them all. Micro Machines are Micro Machine Pocket Playsets sold separately from Galoob. The smaller they are, the better they are. -------------------------------------------------------------------------------- /examples/whisper.objc/whisper.objc/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | NSMicrophoneUsageDescription 6 | This app requires microphone access in order to transcribe speech 7 | UIApplicationSceneManifest 8 | 9 | UIApplicationSupportsMultipleScenes 10 | 11 | UISceneConfigurations 12 | 13 | UIWindowSceneSessionRoleApplication 14 | 15 | 16 | UISceneConfigurationName 17 | Default Configuration 18 | UISceneDelegateClassName 19 | SceneDelegate 20 | UISceneStoryboardFile 21 | Main 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /ggml/src/ggml-sycl/convert.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // MIT license 3 | // Copyright (C) 2024 Intel Corporation 4 | // SPDX-License-Identifier: MIT 5 | // 6 | 7 | // 8 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9 | // See https://llvm.org/LICENSE.txt for license information. 10 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11 | // 12 | 13 | #ifndef GGML_SYCL_CONVERT_HPP 14 | #define GGML_SYCL_CONVERT_HPP 15 | 16 | #include "common.hpp" 17 | 18 | template 19 | using to_t_sycl_t = void (*)(const void *__restrict__ x, T *__restrict__ y, 20 | int64_t k, dpct::queue_ptr stream); 21 | typedef to_t_sycl_t to_fp32_sycl_t; 22 | typedef to_t_sycl_t to_fp16_sycl_t; 23 | 24 | to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type, ggml_tensor *dst); 25 | to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type, ggml_tensor *dst); 26 | 27 | #endif // GGML_SYCL_CONVERT_HPP 28 | -------------------------------------------------------------------------------- /ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "types.comp" 4 | #include "generic_unary_head.comp" 5 | 6 | layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; 7 | 8 | uint src0_idx_mod(uint idx) { 9 | const uint i13 = idx / (p.ne12*p.ne11*p.ne10); 10 | const uint i13_offset = i13 * p.ne12*p.ne11*p.ne10; 11 | const uint i12 = (idx - i13_offset) / (p.ne11*p.ne10); 12 | const uint i12_offset = i12*p.ne11*p.ne10; 13 | const uint i11 = (idx - i13_offset - i12_offset) / p.ne10; 14 | const uint i10 = idx - i13_offset - i12_offset - i11*p.ne10; 15 | return (i13 % p.ne03)*p.nb03 + (i12 % p.ne02)*p.nb02 + (i11 % p.ne01)*p.nb01 + (i10 % p.ne00)*p.nb00; 16 | } 17 | 18 | void main() { 19 | const uint idx = get_idx(); 20 | 21 | if (idx >= p.ne) { 22 | return; 23 | } 24 | 25 | data_d[get_doffset() + dst_idx(idx)] = D_TYPE(data_a[get_aoffset() + src0_idx_mod(idx)]); 26 | } 27 | -------------------------------------------------------------------------------- /examples/talk-llama/llama-memory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "llama.h" 4 | 5 | // general concept of LLM memory 6 | // the KV cache is a type of LLM memory, but there can be other types 7 | class llama_memory_i { 8 | public: 9 | virtual void clear() = 0; 10 | virtual void defrag() = 0; 11 | 12 | virtual bool seq_rm (llama_seq_id seq_id, llama_pos p0, llama_pos p1) = 0; 13 | virtual void seq_cp (llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) = 0; 14 | virtual void seq_keep(llama_seq_id seq_id) = 0; 15 | virtual void seq_add (llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos delta) = 0; 16 | virtual void seq_div (llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) = 0; 17 | 18 | virtual llama_pos seq_pos_max(llama_seq_id seq_id) = 0; 19 | 20 | virtual bool get_can_edit() const = 0; 21 | }; 22 | -------------------------------------------------------------------------------- /ggml/src/ggml-sycl/backend.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // MIT license 3 | // Copyright (C) 2024 Intel Corporation 4 | // SPDX-License-Identifier: MIT 5 | // 6 | 7 | // 8 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9 | // See https://llvm.org/LICENSE.txt for license information. 10 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11 | // 12 | 13 | #ifndef GGML_SYCL_BACKEND_HPP 14 | #define GGML_SYCL_BACKEND_HPP 15 | 16 | #include "concat.hpp" 17 | #include "common.hpp" 18 | #include "conv.hpp" 19 | #include "convert.hpp" 20 | #include "dequantize.hpp" 21 | #include "dmmv.hpp" 22 | #include "mmq.hpp" 23 | #include "mmvq.hpp" 24 | #include "rope.hpp" 25 | #include "norm.hpp" 26 | #include "softmax.hpp" 27 | #include "tsembd.hpp" 28 | #include "im2col.hpp" 29 | #include "wkv.hpp" 30 | #include "outprod.hpp" 31 | #include "element_wise.hpp" 32 | #include "cpy.hpp" 33 | #include "gla.hpp" 34 | 35 | #endif // GGML_SYCL_BACKEND_HPP 36 | -------------------------------------------------------------------------------- /examples/talk-llama/llama-io.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | struct ggml_tensor; 8 | 9 | class llama_io_write_i { 10 | public: 11 | llama_io_write_i() = default; 12 | virtual ~llama_io_write_i() = default; 13 | 14 | virtual void write(const void * src, size_t size) = 0; 15 | virtual void write_tensor(const ggml_tensor * tensor, size_t offset, size_t size) = 0; 16 | 17 | // bytes written so far 18 | virtual size_t n_bytes() = 0; 19 | 20 | void write_string(const std::string & str); 21 | }; 22 | 23 | class llama_io_read_i { 24 | public: 25 | llama_io_read_i() = default; 26 | virtual ~llama_io_read_i() = default; 27 | 28 | virtual const uint8_t * read(size_t size) = 0; 29 | virtual void read_to(void * dst, size_t size) = 0; 30 | 31 | // bytes read so far 32 | virtual size_t n_bytes() = 0; 33 | 34 | void read_string(std::string & str); 35 | }; 36 | -------------------------------------------------------------------------------- /ggml/include/ggml-rpc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ggml.h" 4 | #include "ggml-backend.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #define GGML_RPC_MAX_SERVERS 16 11 | 12 | // backend API 13 | GGML_BACKEND_API ggml_backend_t ggml_backend_rpc_init(const char * endpoint); 14 | GGML_BACKEND_API bool ggml_backend_is_rpc(ggml_backend_t backend); 15 | 16 | GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint); 17 | 18 | GGML_BACKEND_API void ggml_backend_rpc_get_device_memory(const char * endpoint, size_t * free, size_t * total); 19 | 20 | GGML_BACKEND_API void ggml_backend_rpc_start_server(ggml_backend_t backend, const char * endpoint, size_t free_mem, size_t total_mem); 21 | 22 | GGML_BACKEND_API ggml_backend_reg_t ggml_backend_rpc_reg(void); 23 | 24 | GGML_BACKEND_API ggml_backend_dev_t ggml_backend_rpc_add_device(const char * endpoint); 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /examples/wchess/wchess.wasm/chessboardjs-1.0.0/js/chessboard-1.0.0/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Chris Oakman (http://chrisoakman.com/)", 3 | "name": "@chrisoakman/chessboardjs", 4 | "description": "JavaScript chessboard widget", 5 | "homepage": "https://chessboardjs.com", 6 | "license": "MIT", 7 | "version": "1.0.0", 8 | "repository": { 9 | "type": "git", 10 | "url": "git://github.com/oakmac/chessboardjs.git" 11 | }, 12 | "files": ["dist/"], 13 | "dependencies": { 14 | "jquery": ">=3.4.1" 15 | }, 16 | "devDependencies": { 17 | "csso": "3.5.1", 18 | "fs-plus": "3.1.1", 19 | "kidif": "1.1.0", 20 | "mustache": "2.3.0", 21 | "standard": "10.0.2", 22 | "uglify-js": "3.6.0" 23 | }, 24 | "scripts": { 25 | "build": "standard lib/chessboard.js && node scripts/build.js", 26 | "standard": "standard --fix lib/*.js website/js/*.js", 27 | "website": "node scripts/website.js" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #include "common.comp" 4 | 5 | layout(local_size_x = 1) in; 6 | 7 | layout (binding = 0) readonly buffer tensorInA { float16_t inA[]; }; 8 | layout (binding = 1) readonly buffer tensorInB { int inB[]; }; 9 | layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; 10 | 11 | layout (push_constant) uniform parameter { 12 | uint inAOff; 13 | uint inBOff; 14 | uint outOff; 15 | int ne00; 16 | int nb01; 17 | int nb1; 18 | } pcs; 19 | 20 | void dequantize_row_f16(uint x /*Based from inA unaligned*/, uint y /*Based from out_*/, int k) { 21 | for (int j = 0; j < k; j++) { 22 | out_[y + j] = inA[x + j]; 23 | } 24 | } 25 | 26 | void main() { 27 | const uint i = gl_WorkGroupID.x; 28 | const int r = inB[i + pcs.inBOff]; 29 | 30 | dequantize_row_f16(r*pcs.nb01/2/*bytes for float16*/ + pcs.inAOff, i*pcs.nb1/4 + pcs.outOff, pcs.ne00); 31 | } 32 | -------------------------------------------------------------------------------- /ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | #extension GL_EXT_control_flow_attributes : enable 4 | 5 | #include "types.comp" 6 | #include "generic_head.comp" 7 | 8 | layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; 9 | 10 | layout (binding = 0) readonly buffer X {A_TYPE data_a[];}; 11 | layout (binding = 1) readonly buffer Y {B_TYPE data_b[];}; 12 | layout (binding = 2) buffer D {D_TYPE data_d[];}; 13 | 14 | const uint CHUNK_SIZE = 512; 15 | 16 | void main() { 17 | const uint base = gl_WorkGroupID.x * CHUNK_SIZE; 18 | const uint col = gl_LocalInvocationID.x; 19 | 20 | uint count = 0; 21 | [[unroll]] 22 | for (uint i = 0; i < CHUNK_SIZE; i += gl_WorkGroupSize.x) { 23 | const uint idx = base + i + col; 24 | if (idx >= p.KX) { 25 | break; 26 | } 27 | count += uint(data_a[idx] == data_b[idx]); 28 | } 29 | 30 | atomicAdd(data_d[0], D_TYPE(count)); 31 | } 32 | --------------------------------------------------------------------------------