├── .github └── workflows │ └── build_and_publish.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── MANIFEST.in ├── README.md ├── pyproject.toml ├── setup.py ├── tests ├── __init__.py └── test.py └── whisper_cpp_python ├── __init__.py ├── server ├── __init__.py ├── __main__.py └── app.py ├── whisper.py └── whisper_types.py /.github/workflows/build_and_publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Package to Pypi 2 | on: 3 | push: 4 | branches: [ "main" ] 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | environment: 9 | name: pypi 10 | url: https://pypi.org/p/whisper-cpp-python 11 | permissions: 12 | id-token: write 13 | steps: 14 | - name: Checkout repository 15 | uses: actions/checkout@v3 16 | with: 17 | submodules: 'recursive' 18 | - name: Set up Python 19 | uses: actions/setup-python@v3 20 | with: 21 | python-version: "3.9" 22 | - name: Build package 23 | run: | 24 | python -m pip install --upgrade pip 25 | python -m pip install --upgrade build 26 | python -m build -s 27 | - name: Publish to Pypi 28 | uses: pypa/gh-action-pypi-publish@release/v1 29 | 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | _skbuild 3 | dist 4 | whisper_cpp_python/whisper_cpp.py 5 | *.so 6 | *.dll 7 | *.dylib 8 | __pycache__/ 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/whisper.cpp"] 2 | path = vendor/whisper.cpp 3 | url = https://github.com/ggerganov/whisper.cpp.git 4 | [submodule "vendor/pycparser"] 5 | path = vendor/pycparser 6 | url = https://github.com/eliben/pycparser.git 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4...3.22) 2 | 3 | project(whisper_cpp_python) 4 | 5 | option(FORCE_CMAKE "Force CMake build of Python bindings" OFF) 6 | 7 | add_subdirectory(vendor/whisper.cpp) 8 | 9 | option(${BUILD_SHARED_LIBS} "ON") 10 | 11 | install(DIRECTORY ${CMAKE_INSTALL_PREFIX}/lib/ DESTINATION ${CMAKE_INSTALL_PREFIX}/whisper_cpp_python/ 12 | FILES_MATCHING PATTERN "*") 13 | 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Georgi Gerganov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CMakeLists.txt 2 | include MANIFEST.in 3 | include README.md 4 | include pyproject.toml 5 | include setup.py 6 | include vendor/pycparser/utils/fake_libc_include/X11/Intrinsic.h 7 | include vendor/pycparser/utils/fake_libc_include/X11/Xlib.h 8 | include vendor/pycparser/utils/fake_libc_include/X11/_X11_fake_defines.h 9 | include vendor/pycparser/utils/fake_libc_include/X11/_X11_fake_typedefs.h 10 | include vendor/pycparser/utils/fake_libc_include/_ansi.h 11 | include vendor/pycparser/utils/fake_libc_include/_fake_defines.h 12 | include vendor/pycparser/utils/fake_libc_include/_fake_typedefs.h 13 | include vendor/pycparser/utils/fake_libc_include/_syslist.h 14 | include vendor/pycparser/utils/fake_libc_include/aio.h 15 | include vendor/pycparser/utils/fake_libc_include/alloca.h 16 | include vendor/pycparser/utils/fake_libc_include/ar.h 17 | include vendor/pycparser/utils/fake_libc_include/argz.h 18 | include vendor/pycparser/utils/fake_libc_include/arpa/inet.h 19 | include vendor/pycparser/utils/fake_libc_include/asm-generic/int-ll64.h 20 | include vendor/pycparser/utils/fake_libc_include/assert.h 21 | include vendor/pycparser/utils/fake_libc_include/complex.h 22 | include vendor/pycparser/utils/fake_libc_include/cpio.h 23 | include vendor/pycparser/utils/fake_libc_include/ctype.h 24 | include vendor/pycparser/utils/fake_libc_include/dirent.h 25 | include vendor/pycparser/utils/fake_libc_include/dlfcn.h 26 | include vendor/pycparser/utils/fake_libc_include/emmintrin.h 27 | include vendor/pycparser/utils/fake_libc_include/endian.h 28 | include vendor/pycparser/utils/fake_libc_include/envz.h 29 | include vendor/pycparser/utils/fake_libc_include/errno.h 30 | include vendor/pycparser/utils/fake_libc_include/fastmath.h 31 | include vendor/pycparser/utils/fake_libc_include/fcntl.h 32 | include vendor/pycparser/utils/fake_libc_include/features.h 33 | include vendor/pycparser/utils/fake_libc_include/fenv.h 34 | include vendor/pycparser/utils/fake_libc_include/float.h 35 | include vendor/pycparser/utils/fake_libc_include/fmtmsg.h 36 | include vendor/pycparser/utils/fake_libc_include/fnmatch.h 37 | include vendor/pycparser/utils/fake_libc_include/ftw.h 38 | include vendor/pycparser/utils/fake_libc_include/getopt.h 39 | include vendor/pycparser/utils/fake_libc_include/glob.h 40 | include vendor/pycparser/utils/fake_libc_include/grp.h 41 | include vendor/pycparser/utils/fake_libc_include/iconv.h 42 | include vendor/pycparser/utils/fake_libc_include/ieeefp.h 43 | include vendor/pycparser/utils/fake_libc_include/immintrin.h 44 | include vendor/pycparser/utils/fake_libc_include/inttypes.h 45 | include vendor/pycparser/utils/fake_libc_include/iso646.h 46 | include vendor/pycparser/utils/fake_libc_include/langinfo.h 47 | include vendor/pycparser/utils/fake_libc_include/libgen.h 48 | include vendor/pycparser/utils/fake_libc_include/libintl.h 49 | include vendor/pycparser/utils/fake_libc_include/limits.h 50 | include vendor/pycparser/utils/fake_libc_include/linux/socket.h 51 | include vendor/pycparser/utils/fake_libc_include/linux/version.h 52 | include vendor/pycparser/utils/fake_libc_include/locale.h 53 | include vendor/pycparser/utils/fake_libc_include/malloc.h 54 | include vendor/pycparser/utils/fake_libc_include/math.h 55 | include vendor/pycparser/utils/fake_libc_include/mir_toolkit/client_types.h 56 | include vendor/pycparser/utils/fake_libc_include/monetary.h 57 | include vendor/pycparser/utils/fake_libc_include/mqueue.h 58 | include vendor/pycparser/utils/fake_libc_include/ndbm.h 59 | include vendor/pycparser/utils/fake_libc_include/net/if.h 60 | include vendor/pycparser/utils/fake_libc_include/netdb.h 61 | include vendor/pycparser/utils/fake_libc_include/netinet/in.h 62 | include vendor/pycparser/utils/fake_libc_include/netinet/tcp.h 63 | include vendor/pycparser/utils/fake_libc_include/newlib.h 64 | include vendor/pycparser/utils/fake_libc_include/nl_types.h 65 | include vendor/pycparser/utils/fake_libc_include/openssl/err.h 66 | include vendor/pycparser/utils/fake_libc_include/openssl/evp.h 67 | include vendor/pycparser/utils/fake_libc_include/openssl/hmac.h 68 | include vendor/pycparser/utils/fake_libc_include/openssl/ssl.h 69 | include vendor/pycparser/utils/fake_libc_include/openssl/x509v3.h 70 | include vendor/pycparser/utils/fake_libc_include/paths.h 71 | include vendor/pycparser/utils/fake_libc_include/poll.h 72 | include vendor/pycparser/utils/fake_libc_include/process.h 73 | include vendor/pycparser/utils/fake_libc_include/pthread.h 74 | include vendor/pycparser/utils/fake_libc_include/pwd.h 75 | include vendor/pycparser/utils/fake_libc_include/reent.h 76 | include vendor/pycparser/utils/fake_libc_include/regdef.h 77 | include vendor/pycparser/utils/fake_libc_include/regex.h 78 | include vendor/pycparser/utils/fake_libc_include/sched.h 79 | include vendor/pycparser/utils/fake_libc_include/search.h 80 | include vendor/pycparser/utils/fake_libc_include/semaphore.h 81 | include vendor/pycparser/utils/fake_libc_include/setjmp.h 82 | include vendor/pycparser/utils/fake_libc_include/signal.h 83 | include vendor/pycparser/utils/fake_libc_include/smmintrin.h 84 | include vendor/pycparser/utils/fake_libc_include/spawn.h 85 | include vendor/pycparser/utils/fake_libc_include/stdalign.h 86 | include vendor/pycparser/utils/fake_libc_include/stdarg.h 87 | include vendor/pycparser/utils/fake_libc_include/stdatomic.h 88 | include vendor/pycparser/utils/fake_libc_include/stdbool.h 89 | include vendor/pycparser/utils/fake_libc_include/stddef.h 90 | include vendor/pycparser/utils/fake_libc_include/stdint.h 91 | include vendor/pycparser/utils/fake_libc_include/stdio.h 92 | include vendor/pycparser/utils/fake_libc_include/stdlib.h 93 | include vendor/pycparser/utils/fake_libc_include/stdnoreturn.h 94 | include vendor/pycparser/utils/fake_libc_include/string.h 95 | include vendor/pycparser/utils/fake_libc_include/strings.h 96 | include vendor/pycparser/utils/fake_libc_include/stropts.h 97 | include vendor/pycparser/utils/fake_libc_include/sys/ioctl.h 98 | include vendor/pycparser/utils/fake_libc_include/sys/ipc.h 99 | include vendor/pycparser/utils/fake_libc_include/sys/mman.h 100 | include vendor/pycparser/utils/fake_libc_include/sys/msg.h 101 | include vendor/pycparser/utils/fake_libc_include/sys/poll.h 102 | include vendor/pycparser/utils/fake_libc_include/sys/resource.h 103 | include vendor/pycparser/utils/fake_libc_include/sys/select.h 104 | include vendor/pycparser/utils/fake_libc_include/sys/sem.h 105 | include vendor/pycparser/utils/fake_libc_include/sys/shm.h 106 | include vendor/pycparser/utils/fake_libc_include/sys/socket.h 107 | include vendor/pycparser/utils/fake_libc_include/sys/stat.h 108 | include vendor/pycparser/utils/fake_libc_include/sys/statvfs.h 109 | include vendor/pycparser/utils/fake_libc_include/sys/sysctl.h 110 | include vendor/pycparser/utils/fake_libc_include/sys/time.h 111 | include vendor/pycparser/utils/fake_libc_include/sys/times.h 112 | include vendor/pycparser/utils/fake_libc_include/sys/types.h 113 | include vendor/pycparser/utils/fake_libc_include/sys/uio.h 114 | include vendor/pycparser/utils/fake_libc_include/sys/un.h 115 | include vendor/pycparser/utils/fake_libc_include/sys/utsname.h 116 | include vendor/pycparser/utils/fake_libc_include/sys/wait.h 117 | include vendor/pycparser/utils/fake_libc_include/syslog.h 118 | include vendor/pycparser/utils/fake_libc_include/tar.h 119 | include vendor/pycparser/utils/fake_libc_include/termios.h 120 | include vendor/pycparser/utils/fake_libc_include/tgmath.h 121 | include vendor/pycparser/utils/fake_libc_include/threads.h 122 | include vendor/pycparser/utils/fake_libc_include/time.h 123 | include vendor/pycparser/utils/fake_libc_include/trace.h 124 | include vendor/pycparser/utils/fake_libc_include/ulimit.h 125 | include vendor/pycparser/utils/fake_libc_include/unctrl.h 126 | include vendor/pycparser/utils/fake_libc_include/unistd.h 127 | include vendor/pycparser/utils/fake_libc_include/utime.h 128 | include vendor/pycparser/utils/fake_libc_include/utmp.h 129 | include vendor/pycparser/utils/fake_libc_include/utmpx.h 130 | include vendor/pycparser/utils/fake_libc_include/wchar.h 131 | include vendor/pycparser/utils/fake_libc_include/wctype.h 132 | include vendor/pycparser/utils/fake_libc_include/wordexp.h 133 | include vendor/pycparser/utils/fake_libc_include/xcb/xcb.h 134 | include vendor/pycparser/utils/fake_libc_include/zlib.h 135 | include vendor/whisper.cpp/.github/workflows/bindings-go.yml 136 | include vendor/whisper.cpp/.github/workflows/bindings-ruby.yml 137 | include vendor/whisper.cpp/.github/workflows/build.yml 138 | include vendor/whisper.cpp/.github/workflows/examples.yml 139 | include vendor/whisper.cpp/.gitignore 140 | include vendor/whisper.cpp/.gitmodules 141 | include vendor/whisper.cpp/CMakeLists.txt 142 | include vendor/whisper.cpp/LICENSE 143 | include vendor/whisper.cpp/Makefile 144 | include vendor/whisper.cpp/README.md 145 | include vendor/whisper.cpp/bindings/CMakeLists.txt 146 | include vendor/whisper.cpp/bindings/go/.gitignore 147 | include vendor/whisper.cpp/bindings/go/LICENSE 148 | include vendor/whisper.cpp/bindings/go/Makefile 149 | include vendor/whisper.cpp/bindings/go/README.md 150 | include vendor/whisper.cpp/bindings/go/doc.go 151 | include vendor/whisper.cpp/bindings/go/examples/go-model-download/context.go 152 | include vendor/whisper.cpp/bindings/go/examples/go-model-download/main.go 153 | include vendor/whisper.cpp/bindings/go/examples/go-whisper/color.go 154 | include vendor/whisper.cpp/bindings/go/examples/go-whisper/flags.go 155 | include vendor/whisper.cpp/bindings/go/examples/go-whisper/main.go 156 | include vendor/whisper.cpp/bindings/go/examples/go-whisper/process.go 157 | include vendor/whisper.cpp/bindings/go/go.mod 158 | include vendor/whisper.cpp/bindings/go/go.sum 159 | include vendor/whisper.cpp/bindings/go/params.go 160 | include vendor/whisper.cpp/bindings/go/pkg/whisper/consts.go 161 | include vendor/whisper.cpp/bindings/go/pkg/whisper/context.go 162 | include vendor/whisper.cpp/bindings/go/pkg/whisper/context_test.go 163 | include vendor/whisper.cpp/bindings/go/pkg/whisper/doc.go 164 | include vendor/whisper.cpp/bindings/go/pkg/whisper/interface.go 165 | include vendor/whisper.cpp/bindings/go/pkg/whisper/model.go 166 | include vendor/whisper.cpp/bindings/go/samples/jfk.wav 167 | include vendor/whisper.cpp/bindings/go/whisper.go 168 | include vendor/whisper.cpp/bindings/go/whisper_test.go 169 | include vendor/whisper.cpp/bindings/ios/.github/workflows/swift.yml 170 | include vendor/whisper.cpp/bindings/ios/.gitignore 171 | include vendor/whisper.cpp/bindings/ios/LICENSE 172 | include vendor/whisper.cpp/bindings/ios/Makefile 173 | include vendor/whisper.cpp/bindings/ios/Makefile-tmpl 174 | include vendor/whisper.cpp/bindings/ios/Package.swift 175 | include vendor/whisper.cpp/bindings/ios/README.md 176 | include vendor/whisper.cpp/bindings/ios/Sources/test-objc/main.m 177 | include vendor/whisper.cpp/bindings/ios/Sources/test-swift/main.swift 178 | include vendor/whisper.cpp/bindings/ios/Sources/whisper/ggml.c 179 | include vendor/whisper.cpp/bindings/ios/Sources/whisper/ggml.h 180 | include vendor/whisper.cpp/bindings/ios/Sources/whisper/include/whisper.h 181 | include vendor/whisper.cpp/bindings/ios/Sources/whisper/whisper.cpp 182 | include vendor/whisper.cpp/bindings/ios/models/for-tests-ggml-base.en.bin 183 | include vendor/whisper.cpp/bindings/ios/publish-trigger 184 | include vendor/whisper.cpp/bindings/javascript/.gitignore 185 | include vendor/whisper.cpp/bindings/javascript/CMakeLists.txt 186 | include vendor/whisper.cpp/bindings/javascript/README.md 187 | include vendor/whisper.cpp/bindings/javascript/emscripten.cpp 188 | include vendor/whisper.cpp/bindings/javascript/libwhisper.worker.js 189 | include vendor/whisper.cpp/bindings/javascript/package-tmpl.json 190 | include vendor/whisper.cpp/bindings/javascript/package.json 191 | include vendor/whisper.cpp/bindings/javascript/whisper.js 192 | include vendor/whisper.cpp/bindings/ruby/ext/.gitignore 193 | include vendor/whisper.cpp/bindings/ruby/ext/extconf.rb 194 | include vendor/whisper.cpp/bindings/ruby/ext/ruby_whisper.cpp 195 | include vendor/whisper.cpp/bindings/ruby/ext/ruby_whisper.h 196 | include vendor/whisper.cpp/bindings/ruby/tests/test_whisper.rb 197 | include vendor/whisper.cpp/cmake/BuildTypes.cmake 198 | include vendor/whisper.cpp/cmake/DefaultTargetOptions.cmake 199 | include vendor/whisper.cpp/cmake/GitVars.cmake 200 | include vendor/whisper.cpp/coreml/whisper-decoder-impl.h 201 | include vendor/whisper.cpp/coreml/whisper-decoder-impl.m 202 | include vendor/whisper.cpp/coreml/whisper-encoder-impl.h 203 | include vendor/whisper.cpp/coreml/whisper-encoder-impl.m 204 | include vendor/whisper.cpp/coreml/whisper-encoder.h 205 | include vendor/whisper.cpp/coreml/whisper-encoder.mm 206 | include vendor/whisper.cpp/examples/CMakeLists.txt 207 | include vendor/whisper.cpp/examples/addon.node/.gitignore 208 | include vendor/whisper.cpp/examples/addon.node/CMakeLists.txt 209 | include vendor/whisper.cpp/examples/addon.node/README.md 210 | include vendor/whisper.cpp/examples/addon.node/__test__/whisper.spec.js 211 | include vendor/whisper.cpp/examples/addon.node/addon.cpp 212 | include vendor/whisper.cpp/examples/addon.node/index.js 213 | include vendor/whisper.cpp/examples/addon.node/package.json 214 | include vendor/whisper.cpp/examples/bench.wasm/CMakeLists.txt 215 | include vendor/whisper.cpp/examples/bench.wasm/README.md 216 | include vendor/whisper.cpp/examples/bench.wasm/emscripten.cpp 217 | include vendor/whisper.cpp/examples/bench.wasm/index-tmpl.html 218 | include vendor/whisper.cpp/examples/bench/CMakeLists.txt 219 | include vendor/whisper.cpp/examples/bench/README.md 220 | include vendor/whisper.cpp/examples/bench/bench.cpp 221 | include vendor/whisper.cpp/examples/command.wasm/CMakeLists.txt 222 | include vendor/whisper.cpp/examples/command.wasm/README.md 223 | include vendor/whisper.cpp/examples/command.wasm/emscripten.cpp 224 | include vendor/whisper.cpp/examples/command.wasm/index-tmpl.html 225 | include vendor/whisper.cpp/examples/command/CMakeLists.txt 226 | include vendor/whisper.cpp/examples/command/README.md 227 | include vendor/whisper.cpp/examples/command/command.cpp 228 | include vendor/whisper.cpp/examples/command/commands.txt 229 | include vendor/whisper.cpp/examples/common-ggml.cpp 230 | include vendor/whisper.cpp/examples/common-ggml.h 231 | include vendor/whisper.cpp/examples/common-sdl.cpp 232 | include vendor/whisper.cpp/examples/common-sdl.h 233 | include vendor/whisper.cpp/examples/common.cpp 234 | include vendor/whisper.cpp/examples/common.h 235 | include vendor/whisper.cpp/examples/dr_wav.h 236 | include vendor/whisper.cpp/examples/generate-karaoke.sh 237 | include vendor/whisper.cpp/examples/helpers.js 238 | include vendor/whisper.cpp/examples/livestream.sh 239 | include vendor/whisper.cpp/examples/main/CMakeLists.txt 240 | include vendor/whisper.cpp/examples/main/README.md 241 | include vendor/whisper.cpp/examples/main/main.cpp 242 | include vendor/whisper.cpp/examples/quantize/CMakeLists.txt 243 | include vendor/whisper.cpp/examples/quantize/README.md 244 | include vendor/whisper.cpp/examples/quantize/quantize.cpp 245 | include vendor/whisper.cpp/examples/stream.wasm/CMakeLists.txt 246 | include vendor/whisper.cpp/examples/stream.wasm/README.md 247 | include vendor/whisper.cpp/examples/stream.wasm/emscripten.cpp 248 | include vendor/whisper.cpp/examples/stream.wasm/index-tmpl.html 249 | include vendor/whisper.cpp/examples/stream/CMakeLists.txt 250 | include vendor/whisper.cpp/examples/stream/README.md 251 | include vendor/whisper.cpp/examples/stream/stream.cpp 252 | include vendor/whisper.cpp/examples/talk-llama/.gitignore 253 | include vendor/whisper.cpp/examples/talk-llama/CMakeLists.txt 254 | include vendor/whisper.cpp/examples/talk-llama/README.md 255 | include vendor/whisper.cpp/examples/talk-llama/eleven-labs.py 256 | include vendor/whisper.cpp/examples/talk-llama/llama-util.h 257 | include vendor/whisper.cpp/examples/talk-llama/llama.cpp 258 | include vendor/whisper.cpp/examples/talk-llama/llama.h 259 | include vendor/whisper.cpp/examples/talk-llama/prompts/talk-alpaca.txt 260 | include vendor/whisper.cpp/examples/talk-llama/speak.sh 261 | include vendor/whisper.cpp/examples/talk-llama/talk-llama.cpp 262 | include vendor/whisper.cpp/examples/talk.wasm/CMakeLists.txt 263 | include vendor/whisper.cpp/examples/talk.wasm/README.md 264 | include vendor/whisper.cpp/examples/talk.wasm/emscripten.cpp 265 | include vendor/whisper.cpp/examples/talk.wasm/gpt-2.cpp 266 | include vendor/whisper.cpp/examples/talk.wasm/gpt-2.h 267 | include vendor/whisper.cpp/examples/talk.wasm/index-tmpl.html 268 | include vendor/whisper.cpp/examples/talk/.gitignore 269 | include vendor/whisper.cpp/examples/talk/CMakeLists.txt 270 | include vendor/whisper.cpp/examples/talk/README.md 271 | include vendor/whisper.cpp/examples/talk/eleven-labs.py 272 | include vendor/whisper.cpp/examples/talk/gpt-2.cpp 273 | include vendor/whisper.cpp/examples/talk/gpt-2.h 274 | include vendor/whisper.cpp/examples/talk/speak.sh 275 | include vendor/whisper.cpp/examples/talk/talk.cpp 276 | include vendor/whisper.cpp/examples/twitch.sh 277 | include vendor/whisper.cpp/examples/whisper.android/.gitignore 278 | include vendor/whisper.cpp/examples/whisper.android/.idea/.gitignore 279 | include vendor/whisper.cpp/examples/whisper.android/.idea/.name 280 | include vendor/whisper.cpp/examples/whisper.android/.idea/compiler.xml 281 | include vendor/whisper.cpp/examples/whisper.android/.idea/gradle.xml 282 | include vendor/whisper.cpp/examples/whisper.android/.idea/misc.xml 283 | include vendor/whisper.cpp/examples/whisper.android/.idea/vcs.xml 284 | include vendor/whisper.cpp/examples/whisper.android/README.md 285 | include vendor/whisper.cpp/examples/whisper.android/app/.gitignore 286 | include vendor/whisper.cpp/examples/whisper.android/app/build.gradle 287 | include vendor/whisper.cpp/examples/whisper.android/app/proguard-rules.pro 288 | include vendor/whisper.cpp/examples/whisper.android/app/src/androidTest/java/com/whispercppdemo/ExampleInstrumentedTest.kt 289 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/AndroidManifest.xml 290 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/MainActivity.kt 291 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/media/RiffWaveHelper.kt 292 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/recorder/Recorder.kt 293 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreen.kt 294 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/main/MainScreenViewModel.kt 295 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Color.kt 296 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Theme.kt 297 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/ui/theme/Type.kt 298 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/java/com/whispercppdemo/whisper/LibWhisper.kt 299 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/jni/whisper/Android.mk 300 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/jni/whisper/Application.mk 301 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/jni/whisper/Whisper.mk 302 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/jni/whisper/jni.c 303 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/res/drawable/ic_launcher_background.xml 304 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/res/drawable/ic_launcher_foreground.xml 305 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/res/mipmap-anydpi/ic_launcher.xml 306 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/res/values/colors.xml 307 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/res/values/strings.xml 308 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/res/values/themes.xml 309 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/res/xml/backup_rules.xml 310 | include vendor/whisper.cpp/examples/whisper.android/app/src/main/res/xml/data_extraction_rules.xml 311 | include vendor/whisper.cpp/examples/whisper.android/app/src/test/java/com/whispercppdemo/ExampleUnitTest.kt 312 | include vendor/whisper.cpp/examples/whisper.android/build.gradle 313 | include vendor/whisper.cpp/examples/whisper.android/gradle.properties 314 | include vendor/whisper.cpp/examples/whisper.android/gradle/wrapper/gradle-wrapper.jar 315 | include vendor/whisper.cpp/examples/whisper.android/gradle/wrapper/gradle-wrapper.properties 316 | include vendor/whisper.cpp/examples/whisper.android/gradlew 317 | include vendor/whisper.cpp/examples/whisper.android/gradlew.bat 318 | include vendor/whisper.cpp/examples/whisper.android/settings.gradle 319 | include vendor/whisper.cpp/examples/whisper.nvim/README.md 320 | include vendor/whisper.cpp/examples/whisper.nvim/whisper.nvim 321 | include vendor/whisper.cpp/examples/whisper.objc/README.md 322 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj 323 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/contents.xcworkspacedata 324 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist 325 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/AppDelegate.h 326 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/AppDelegate.m 327 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/Assets.xcassets/AccentColor.colorset/Contents.json 328 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/Assets.xcassets/AppIcon.appiconset/Contents.json 329 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/Assets.xcassets/Contents.json 330 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/Base.lproj/LaunchScreen.storyboard 331 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/Base.lproj/Main.storyboard 332 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/Info.plist 333 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/SceneDelegate.h 334 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/SceneDelegate.m 335 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/ViewController.h 336 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/ViewController.m 337 | include vendor/whisper.cpp/examples/whisper.objc/whisper.objc/main.m 338 | include vendor/whisper.cpp/examples/whisper.swiftui/README.md 339 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.cpp.swift/LibWhisper.swift 340 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.cpp.swift/WhisperCppDemo-Bridging-Header.h 341 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Models/WhisperState.swift 342 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Resources/models/.gitignore 343 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Resources/samples/.gitignore 344 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Assets.xcassets/AccentColor.colorset/Contents.json 345 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Assets.xcassets/AppIcon.appiconset/Contents.json 346 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Assets.xcassets/Contents.json 347 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/Preview Content/Preview Assets.xcassets/Contents.json 348 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Supporting files/WhisperCppDemo.entitlements 349 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/UI/ContentView.swift 350 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Utils/Recorder.swift 351 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/Utils/RiffWaveUtils.swift 352 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.demo/WhisperCppDemoApp.swift 353 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.xcodeproj/.gitignore 354 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.pbxproj 355 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.xcworkspace/.gitignore 356 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist 357 | include vendor/whisper.cpp/examples/whisper.swiftui/whisper.swiftui.xcodeproj/xcshareddata/xcschemes/WhisperCppDemo.xcscheme 358 | include vendor/whisper.cpp/examples/whisper.wasm/CMakeLists.txt 359 | include vendor/whisper.cpp/examples/whisper.wasm/README.md 360 | include vendor/whisper.cpp/examples/whisper.wasm/emscripten.cpp 361 | include vendor/whisper.cpp/examples/whisper.wasm/index-tmpl.html 362 | include vendor/whisper.cpp/examples/yt-wsp.sh 363 | include vendor/whisper.cpp/extra/bench-all.sh 364 | include vendor/whisper.cpp/extra/bench-wts.sh 365 | include vendor/whisper.cpp/extra/convert-all.sh 366 | include vendor/whisper.cpp/extra/deploy-wasm.sh 367 | include vendor/whisper.cpp/extra/quantize-all.sh 368 | include vendor/whisper.cpp/extra/sha-all.sh 369 | include vendor/whisper.cpp/extra/sync-ggml.sh 370 | include vendor/whisper.cpp/ggml-cuda.cu 371 | include vendor/whisper.cpp/ggml-cuda.h 372 | include vendor/whisper.cpp/ggml-opencl.c 373 | include vendor/whisper.cpp/ggml-opencl.h 374 | include vendor/whisper.cpp/ggml.c 375 | include vendor/whisper.cpp/ggml.h 376 | include vendor/whisper.cpp/models/.gitignore 377 | include vendor/whisper.cpp/models/README.md 378 | include vendor/whisper.cpp/models/convert-h5-to-ggml.py 379 | include vendor/whisper.cpp/models/convert-pt-to-ggml.py 380 | include vendor/whisper.cpp/models/convert-whisper-to-coreml.py 381 | include vendor/whisper.cpp/models/download-coreml-model.sh 382 | include vendor/whisper.cpp/models/download-ggml-model.cmd 383 | include vendor/whisper.cpp/models/download-ggml-model.sh 384 | include vendor/whisper.cpp/models/for-tests-ggml-base.bin 385 | include vendor/whisper.cpp/models/for-tests-ggml-base.en.bin 386 | include vendor/whisper.cpp/models/for-tests-ggml-large.bin 387 | include vendor/whisper.cpp/models/for-tests-ggml-medium.bin 388 | include vendor/whisper.cpp/models/for-tests-ggml-medium.en.bin 389 | include vendor/whisper.cpp/models/for-tests-ggml-small.bin 390 | include vendor/whisper.cpp/models/for-tests-ggml-small.en.bin 391 | include vendor/whisper.cpp/models/for-tests-ggml-tiny.bin 392 | include vendor/whisper.cpp/models/for-tests-ggml-tiny.en.bin 393 | include vendor/whisper.cpp/models/generate-coreml-interface.sh 394 | include vendor/whisper.cpp/models/generate-coreml-model.sh 395 | include vendor/whisper.cpp/samples/.gitignore 396 | include vendor/whisper.cpp/samples/README.md 397 | include vendor/whisper.cpp/samples/jfk.wav 398 | include vendor/whisper.cpp/tests/.gitignore 399 | include vendor/whisper.cpp/tests/CMakeLists.txt 400 | include vendor/whisper.cpp/tests/en-0-ref.txt 401 | include vendor/whisper.cpp/tests/en-1-ref.txt 402 | include vendor/whisper.cpp/tests/en-2-ref.txt 403 | include vendor/whisper.cpp/tests/es-0-ref.txt 404 | include vendor/whisper.cpp/tests/run-tests.sh 405 | include vendor/whisper.cpp/tests/test-whisper.js 406 | include vendor/whisper.cpp/whisper.cpp 407 | include vendor/whisper.cpp/whisper.h 408 | include whisper_cpp_python/__init__.py 409 | include whisper_cpp_python/whisper.py 410 | exclude MANIFEST.in 411 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # whisper-cpp-python 2 | 3 | ![GitHub Workflow Status (with branch)](https://img.shields.io/github/actions/workflow/status/carloscdias/whisper-cpp-python/build_and_publish.yml) 4 | ![GitHub](https://img.shields.io/github/license/carloscdias/whisper-cpp-python) 5 | ![PyPI](https://img.shields.io/pypi/v/whisper-cpp-python) 6 | 7 | whisper-cpp-python is a Python module inspired by [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) that provides a Python interface to the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) model. 8 | This module automatically parses the C++ header file of the project during building time, generating the corresponding Python bindings. 9 | 10 | ## Installation 11 | 12 | To install the module, you can use pip: 13 | 14 | ```bash 15 | pip install whisper-cpp-python 16 | ``` 17 | 18 | ## Usage 19 | 20 | To use the module, you need to create an instance of the `Whisper` class, passing the path to the model file as a parameter. Then, you can call the `transcribe` or `translate` method to transcribe or translate a given audio file. 21 | 22 | ### High-level API 23 | 24 | The high-level API provides a simple managed interface through the `Wisper` class. 25 | 26 | Below is a short example demonstrating how to use the high-level API to transcribe an mp3: 27 | 28 | ```python 29 | >>> from whisper_cpp_python import Whisper 30 | >>> whisper = Whisper(model_path="./models/ggml-tiny.bin") 31 | >>> output = whisper.transcribe(open('samples/jfk.mp3')) 32 | >>> print(output) 33 | {'text': 'And so my fellow Americans ask not what your country can do for you, ask what you can do for your country.'} 34 | >>> output = whisper.transcribe(open('samples/jfk.mp3'), response_format='verbose_json') 35 | >>> print(output) 36 | { 37 | 'task': 'transcribe', 38 | 'language': 'en', 39 | 'duration': 11.0, 40 | 'text': 'And so, my fellow Americans ask not what your country can do for you, ask what you can do for your country.', 41 | 'segments': [{ 42 | 'id': 0, 43 | 'seek': 0.0, 44 | 'start': 0.0, 45 | 'end': 10.98, 46 | 'text': ' And so, my fellow Americans ask not what your country can do for you, ask what you can do for your country.', 47 | 'tokens': [50364, 48 | 400, 49 | 370, 50 | 11, 51 | 452, 52 | 7177, 53 | 6280, 54 | 1029, 55 | 406, 56 | 437, 57 | 428, 58 | 1941, 59 | 393, 60 | 360, 61 | 337, 62 | 291, 63 | 11, 64 | 1029, 65 | 437, 66 | 291, 67 | 393, 68 | 360, 69 | 337, 70 | 428, 71 | 1941, 72 | 13, 73 | 50913], 74 | 'temperature': 0.800000011920929, 75 | 'avg_logprob': -0.3063158459133572, 76 | 'compression_ratio': 2.4000000953674316, 77 | 'no_speech_prob': 0.0, 78 | 'transient': False 79 | }] 80 | } 81 | ``` 82 | 83 | ### Low-level API 84 | 85 | All interfaces provided by `whisper.h` are available in python. The following example 86 | show how to pass a custom `progress_callback` function to the model. 87 | 88 | ```python 89 | from whisper_cpp_python import Whisper 90 | from whisper_cpp_python.whisper_cpp import whisper_progress_callback 91 | 92 | def callback(ctx, state, i, p): 93 | print(i) 94 | 95 | model = Whisper('../quantized_models/whisper/models/ggml-tiny.bin') 96 | model.params.progress_callback = whisper_progress_callback(callback) 97 | 98 | print(model.transcribe('vendor/whisper.cpp/samples/jfk.wav')) 99 | ``` 100 | 101 | ## Web Server 102 | 103 | `whisper-cpp-python` offers a web server which aims to act as a drop-in replacement for the OpenAI API. 104 | This allows you to use whisper.cpp compatible models with any OpenAI compatible client (language libraries, services, etc). 105 | 106 | To install the server package and get started: 107 | 108 | ```bash 109 | pip install whisper-cpp-python[server] 110 | python3 -m whisper_cpp_python.server --model models/ggml-tiny.bin 111 | ``` 112 | 113 | Navigate to [http://localhost:8001/docs](http://localhost:8001/docs) to see the OpenAPI documentation. 114 | 115 | 116 | ## License 117 | 118 | whisper-cpp-python is released under the MIT License. See [LICENSE](LICENSE) for details. 119 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "whisper_cpp_python" 3 | version = "0.2.0" 4 | description = "Python bindings for the whisper.cpp library" 5 | authors = ["Carlos Cardoso Dias "] 6 | license = "MIT" 7 | readme = "README.md" 8 | homepage = "https://github.com/carloscdias/whisper-cpp-python" 9 | repository = "https://github.com/carloscdias/whisper-cpp-python" 10 | packages = [{include = "whisper_cpp_python"}] 11 | 12 | [tool.poetry.dependencies] 13 | python = "^3.9" 14 | librosa = "^0.10.0.post2" 15 | 16 | [build-system] 17 | requires = ["setuptools>=42", "scikit-build", "cmake", "ninja", "pycparser>=2.21"] 18 | build-backend = "setuptools.build_meta" 19 | 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from skbuild import setup 2 | import skbuild.constants 3 | import shutil 4 | 5 | from pathlib import Path 6 | 7 | from pycparser import c_ast, parse_file 8 | 9 | FILE = '''# auto-generated file 10 | import sys 11 | import os 12 | import ctypes 13 | import pathlib 14 | 15 | 16 | # Load the library 17 | def _load_shared_library(lib_base_name: str): 18 | # Determine the file extension based on the platform 19 | if sys.platform.startswith("linux"): 20 | lib_ext = ".so" 21 | elif sys.platform == "darwin": 22 | lib_ext = ".so" 23 | elif sys.platform == "win32": 24 | lib_ext = ".dll" 25 | else: 26 | raise RuntimeError("Unsupported platform") 27 | 28 | # Construct the paths to the possible shared library names 29 | _base_path = pathlib.Path(__file__).parent.resolve() 30 | _lib_paths = [ 31 | _base_path / f"lib{lib_base_name}{lib_ext}", 32 | _base_path / f"{lib_base_name}{lib_ext}", 33 | ] 34 | 35 | if "WHISPER_CPP_LIB" in os.environ: 36 | lib_base_name = os.environ["WHISPER_CPP_LIB"] 37 | _lib = pathlib.Path(lib_base_name) 38 | _base_path = _lib.parent.resolve() 39 | _lib_paths = [_lib.resolve()] 40 | 41 | # Add the library directory to the DLL search path on Windows (if needed) 42 | if sys.platform == "win32" and sys.version_info >= (3, 8): 43 | os.add_dll_directory(str(_base_path)) 44 | 45 | # Try to load the shared library, handling potential errors 46 | for _lib_path in _lib_paths: 47 | if _lib_path.exists(): 48 | try: 49 | return ctypes.CDLL(str(_lib_path)) 50 | except Exception as e: 51 | raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}") 52 | 53 | raise FileNotFoundError( 54 | f"Shared library with base name '{lib_base_name}' not found" 55 | ) 56 | 57 | 58 | # Specify the base name of the shared library to load 59 | _lib_base_name = "whisper" 60 | 61 | # Load the library 62 | _lib = _load_shared_library(_lib_base_name) 63 | 64 | ''' 65 | 66 | DEFAULT_TYPE = 'ctypes.c_void_p' 67 | 68 | class WhisperCppFileGen(): 69 | T = ' ' 70 | types = { 71 | 'bool': 'ctypes.c_bool', 72 | 'int': 'ctypes.c_int', 73 | 'int64_t': 'ctypes.c_int64', 74 | 'size_t': 'ctypes.c_size_t', 75 | 'float': 'ctypes.c_float', 76 | 'char': 'ctypes.c_char', 77 | 'void': '', 78 | } 79 | 80 | replace = { 81 | 'ctypes.POINTER(ctypes.c_char)': 'ctypes.c_char_p', 82 | 'ctypes.POINTER()': 'ctypes.c_void_p', 83 | } 84 | 85 | @staticmethod 86 | def get_nested_type(node, ignore = {c_ast.Decl, c_ast.Typename}): 87 | typ = type(node) 88 | if typ in ignore: 89 | return WhisperCppFileGen.get_nested_type(node.type) 90 | return typ 91 | 92 | def __init__(self, filename, fake_libc = 'vendor/pycparser/utils/fake_libc_include'): 93 | self.ast = parse_file(filename, use_cpp=True, cpp_args=['-E', f'-I{fake_libc}'], cpp_path='gcc') 94 | self.blocks = [] 95 | self._output = None 96 | self._process() 97 | 98 | def _process(self): 99 | for node in self.ast: 100 | to_pop = None 101 | typ = WhisperCppFileGen.get_nested_type(node) 102 | if typ == c_ast.FuncDecl: 103 | self.format_function(node) 104 | elif typ == c_ast.Struct: 105 | self.format_ctypes_structure(node) 106 | elif typ == c_ast.TypeDecl: 107 | self.format_ctypes_defs(node) 108 | elif typ == c_ast.Typedef: 109 | self.format_ctypes_defs(node) 110 | self._output = FILE 111 | while len(self.blocks) > 0: 112 | self._output += '\n\n' + self.blocks.pop(0) 113 | 114 | def print(self): 115 | print(self._output) 116 | 117 | def output(self, filename): 118 | with open(filename, 'w') as f: 119 | f.write(self._output) 120 | 121 | def format_ctypes_defs(self, node): 122 | typ = type(node) 123 | if typ != c_ast.Typedef or node.name in self.types or not node.name.startswith('whisper'): 124 | return 125 | t_type = self.get_ctypes_type(node.type) 126 | if node.name == t_type: 127 | return 128 | t_def = f'{node.name} = {t_type}' 129 | self.types[node.name] = node.name 130 | self.blocks.append(t_def) 131 | 132 | def format_ctypes_structure(self, node, cls_name = ''): 133 | typ = type(node) 134 | while typ in {c_ast.Decl, c_ast.TypeDecl, c_ast.Typedef}: 135 | node = node.type 136 | typ = type(node) 137 | if typ != c_ast.Struct: 138 | return 139 | cls_name = cls_name if cls_name else node.name 140 | if cls_name in self.types: 141 | return 142 | self.types[cls_name] = cls_name 143 | if not node.decls: 144 | cls = f'{cls_name}_p = ctypes.c_void_p' 145 | self.replace[f'ctypes.POINTER({cls_name})'] = f'{cls_name}_p' 146 | else: 147 | cls = f'class {cls_name}(ctypes.Structure):\n{self.T}_fields_ = [\n{self.T*2}' 148 | cls += f'\n{self.T*2}'.join(self.format_ctypes_structure_fields(node.decls)) 149 | cls += f'\n{self.T}]' 150 | self.blocks.append(cls) 151 | 152 | def format_ctypes_structure_fields(self, fields): 153 | fields_txt = [] 154 | for f in fields: 155 | typ = self.get_ctypes_type(f) 156 | typ = self.replace[typ] if typ in self.replace else typ 157 | fields_txt.append(f'("{f.name}", {typ}),') 158 | return fields_txt 159 | 160 | def get_ctypes_type(self, node, name=''): 161 | if node is None: 162 | return '' 163 | typ = type(node) 164 | if typ == c_ast.Typename or typ == c_ast.Decl: 165 | return self.get_ctypes_type(node.type, node.name) 166 | if typ == c_ast.TypeDecl: 167 | return self.get_ctypes_type(node.type, node.declname) 168 | if typ == c_ast.FuncDecl: 169 | params = [self.get_ctypes_type(t) for t in node.args.params] 170 | params = [self.replace[t] if t in self.replace else t for t in params] 171 | ret = self.get_ctypes_type(node.type) 172 | ret = ret if ret else 'None' 173 | return 'ctypes.CFUNCTYPE(' + ret + (', ' if len(params) > 0 else '') + ', '.join(params) + ')' 174 | if typ == c_ast.PtrDecl: 175 | if node.type and type(node.type) == c_ast.FuncDecl: 176 | return self.get_ctypes_type(node.type) 177 | return 'ctypes.POINTER(' + self.get_ctypes_type(node.type) + ')' 178 | if typ == c_ast.Struct: 179 | name = node.name if node.name else name 180 | self.format_ctypes_structure(node, name) 181 | return name 182 | if typ == c_ast.Enum: 183 | return 'ctypes.c_int' 184 | return self.types.get(''.join(node.names), DEFAULT_TYPE) 185 | 186 | def get_function_args(self, args): 187 | names = [] 188 | types = [] 189 | if args is None or args.params is None: 190 | return names, types 191 | for p in args.params: 192 | names.append(p.name) 193 | typ = self.get_ctypes_type(p) 194 | typ = self.replace[typ] if typ in self.replace else typ 195 | types.append(typ) 196 | return list(filter(lambda x: x, names)), types 197 | 198 | def format_function(self, node): 199 | name = node.name 200 | typ = self.get_ctypes_type(node.type.type) 201 | ret = self.replace[typ] if typ in self.replace else typ 202 | args, types = self.get_function_args(node.type.args) 203 | args_typed = f',\n{self.T}'.join([f'{n}: {t}' for n, t in zip(args, types)]) 204 | all_args = ', '.join(args) 205 | all_types = ', '.join(types) 206 | ret_f = f' -> {ret}:\n{self.T}return ' if ret else f':\n{self.T}' 207 | pyfunction = f'def {name}({args_typed}){ret_f}_lib.{name}({all_args})' 208 | all_block = f'{pyfunction}\n\n_lib.{name}.argtypes = [{all_types}]\n_lib.{name}.restype = {ret if ret else "None"}\n' 209 | self.blocks.append(all_block) 210 | 211 | 212 | if __name__ == '__main__': 213 | this_directory = Path(__file__).parent 214 | long_description = (this_directory / "README.md").read_text(encoding="utf-8") 215 | 216 | # Copy built C-extensions back to the project. 217 | setup( 218 | name="whisper_cpp_python", 219 | description="A Python wrapper for whisper.cpp", 220 | long_description=long_description, 221 | long_description_content_type="text/markdown", 222 | version="0.2.0", 223 | author="Carlos Cardoso Dias", 224 | author_email="carlosdias.dev@gmail.com", 225 | license="MIT", 226 | package_dir={"whisper_cpp_python": "whisper_cpp_python", "whisper_cpp_python.server": "whisper_cpp_python/server"}, 227 | packages=["whisper_cpp_python", "whisper_cpp_python.server"], 228 | install_requires=[ 229 | "librosa>=0.10.0.post2", 230 | "typing-extensions>=4.5.0", 231 | ], 232 | extras_require={ 233 | "server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3", "python-multipart>=0.0.6"], 234 | }, 235 | python_requires=">=3.9", 236 | classifiers=[ 237 | "Programming Language :: Python :: 3", 238 | "Programming Language :: Python :: 3.9", 239 | "Programming Language :: Python :: 3.10", 240 | "Programming Language :: Python :: 3.11", 241 | ], 242 | include_package_data=True, 243 | cmake_process_manifest_hook=lambda x: list(filter(lambda y: not y.endswith('.h'), x)), 244 | ) 245 | 246 | # generate whisper_cpp.py with whisper.h header file 247 | dest_dir = Path("whisper_cpp_python") 248 | c_header_file = "vendor/whisper.cpp/whisper.h" 249 | file_gen = WhisperCppFileGen(c_header_file) 250 | file_gen.output(dest_dir / "whisper_cpp.py") 251 | 252 | 253 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carloscdias/whisper-cpp-python/0744238e10f6b1da3440d43aa3dff43d46b09b30/tests/__init__.py -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | from whisper_cpp_python import Whisper 2 | from whisper_cpp_python.whisper_cpp import whisper_progress_callback 3 | 4 | #@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p) 5 | def callback(ctx, state, i, p): 6 | print('hehehe') 7 | print(i) 8 | 9 | model = Whisper('vendor/whisper.cpp/models/ggml-tiny.bin') 10 | model.params.progress_callback = whisper_progress_callback(callback) 11 | model.params.print_progress = False 12 | model.params.print_special = False 13 | 14 | print(model.transcribe('vendor/whisper.cpp/samples/jfk.wav')) 15 | -------------------------------------------------------------------------------- /whisper_cpp_python/__init__.py: -------------------------------------------------------------------------------- 1 | from .whisper import * 2 | -------------------------------------------------------------------------------- /whisper_cpp_python/server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carloscdias/whisper-cpp-python/0744238e10f6b1da3440d43aa3dff43d46b09b30/whisper_cpp_python/server/__init__.py -------------------------------------------------------------------------------- /whisper_cpp_python/server/__main__.py: -------------------------------------------------------------------------------- 1 | """Example FastAPI server for whisper.cpp 2 | 3 | To run this example: 4 | 5 | Then run: 6 | ``` 7 | uvicorn whisper_cpp_python.server.app:app --reload 8 | ``` 9 | 10 | or 11 | 12 | ``` 13 | python3 -m whisper_cpp_python.server 14 | ``` 15 | 16 | Then visit http://localhost:8000/docs to see the interactive API docs. 17 | 18 | """ 19 | import os 20 | import uvicorn 21 | import argparse 22 | 23 | from whisper_cpp_python.server.app import create_app, Settings 24 | 25 | if __name__ == "__main__": 26 | parser = argparse.ArgumentParser() 27 | for name, field in Settings.__fields__.items(): 28 | description = field.field_info.description 29 | if field.default is not None and description is not None: 30 | description += f" (default: {field.default})" 31 | parser.add_argument( 32 | f"--{name}", 33 | dest=name, 34 | type=field.type_, 35 | help=description, 36 | ) 37 | 38 | args = parser.parse_args() 39 | settings = Settings(**{k: v for k, v in vars(args).items() if v is not None}) 40 | app = create_app(settings=settings) 41 | 42 | uvicorn.run( 43 | app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8001)) 44 | ) 45 | -------------------------------------------------------------------------------- /whisper_cpp_python/server/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import json 4 | from threading import Lock 5 | from typing import List, Optional, Union, Any 6 | from typing_extensions import TypedDict, Literal, Annotated 7 | 8 | import whisper_cpp_python 9 | 10 | from fastapi import Depends, FastAPI, APIRouter, File, Body 11 | from fastapi.middleware.cors import CORSMiddleware 12 | from pydantic import BaseModel, BaseSettings, Field 13 | from sse_starlette.sse import EventSourceResponse 14 | 15 | 16 | class Settings(BaseSettings): 17 | model: str 18 | strategy: int = 0 19 | n_threads: int = max((os.cpu_count() or 2) // 2, 1) 20 | 21 | 22 | router = APIRouter() 23 | 24 | whisper: Optional[whisper_cpp_python.Whisper] = None 25 | 26 | 27 | def create_app(settings: Optional[Settings] = None): 28 | if settings is None: 29 | settings = Settings() 30 | app = FastAPI( 31 | title="whisper.cpp Python API", 32 | version="0.0.1", 33 | ) 34 | app.add_middleware( 35 | CORSMiddleware, 36 | allow_origins=["*"], 37 | allow_credentials=True, 38 | allow_methods=["*"], 39 | allow_headers=["*"], 40 | ) 41 | app.include_router(router) 42 | global whisper 43 | whisper = whisper_cpp_python.Whisper( 44 | model_path=settings.model, 45 | strategy=settings.strategy, 46 | n_threads=settings.n_threads, 47 | ) 48 | return app 49 | 50 | 51 | whisper_lock = Lock() 52 | 53 | 54 | def get_whisper(): 55 | with whisper_lock: 56 | yield whisper 57 | 58 | 59 | @router.post("/v1/audio/transcriptions") 60 | def transcription( 61 | file: Annotated[bytes, File()], 62 | model: Annotated[str, Body()], 63 | prompt: Annotated[str, Body()] = None, 64 | response_format: Annotated[Literal["json", "text", "srt", "verbose_json", "vtt"], Body()] = "json", 65 | temperature: Annotated[float, Body()] = 0.8, 66 | language: Annotated[str, Body()] = 'en', 67 | whisper: whisper_cpp_python.Whisper = Depends(get_whisper)) -> Any: 68 | return whisper.transcribe(io.BytesIO(file), prompt, response_format, temperature, language) 69 | 70 | 71 | @router.post("/v1/audio/translations") 72 | def translation( 73 | file: Annotated[bytes, File()], 74 | model: Annotated[str, Body()], 75 | prompt: Annotated[str, Body()] = None, 76 | response_format: Annotated[Literal["json", "text", "srt", "verbose_json", "vtt"], Body()] = "json", 77 | temperature: Annotated[float, Body()] = 0.8, 78 | whisper: whisper_cpp_python.Whisper = Depends(get_whisper)) -> Any: 79 | return whisper.translate(io.BytesIO(file), prompt, response_format, temperature) 80 | 81 | -------------------------------------------------------------------------------- /whisper_cpp_python/whisper.py: -------------------------------------------------------------------------------- 1 | from . import whisper_cpp 2 | from .whisper_types import WhisperResult, WhisperSegment, WhisperToken 3 | from typing import List, Literal, Any 4 | import ctypes 5 | import librosa 6 | 7 | 8 | class Whisper(): 9 | WHISPER_SR = 16000 10 | 11 | def __init__(self, model_path, strategy = 0, n_threads = 1): 12 | self.context = whisper_cpp.whisper_init_from_file(model_path.encode('utf-8')) 13 | self.params = whisper_cpp.whisper_full_default_params(strategy) 14 | self.params.n_threads = n_threads 15 | self.params.print_special = False 16 | self.params.print_progress = False 17 | self.params.print_realtime = False 18 | self.params.print_timestamps = False 19 | 20 | def transcribe(self, file, prompt = None, response_format = 'json', temperature = 0.8, language = 'en') -> Any: 21 | data, sr = librosa.load(file, sr=Whisper.WHISPER_SR) 22 | self.params.language = language.encode('utf-8') 23 | if prompt: 24 | self.params.initial_prompt = prompt.encode('utf-8') 25 | self.params.temperature = temperature 26 | result = self._full(data) 27 | return self._parse_format(result, response_format) 28 | 29 | def translate(self, file, prompt = None, response_format = 'json', temperature = 0.8) -> Any: 30 | data, sr = librosa.load(file, sr=Whisper.WHISPER_SR) 31 | self.params.translate = True 32 | self.params.initial_prompt = prompt.encode('utf-8') 33 | self.params.temperature = temperature 34 | result = self._full(data) 35 | return self._parse_format(result, response_format) 36 | 37 | def _full(self, data) -> WhisperResult: 38 | # run the inference 39 | r = whisper_cpp.whisper_full(ctypes.c_void_p(self.context), self.params, data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), len(data)) 40 | if r != 0: 41 | raise "Error: {}".format(result) 42 | 43 | result: WhisperResult = { 44 | "task": "translate" if self.params.translate else "transcribe", 45 | "language": self.params.language, 46 | "duration": librosa.get_duration(y=data, sr=Whisper.WHISPER_SR), 47 | } 48 | 49 | segments: List[WhisperSegment] = [] 50 | all_text = '' 51 | n_segments = whisper_cpp.whisper_full_n_segments(ctypes.c_void_p(self.context)) 52 | for i in range(n_segments): 53 | t0 = whisper_cpp.whisper_full_get_segment_t0(ctypes.c_void_p(self.context), i)/100.0 54 | t1 = whisper_cpp.whisper_full_get_segment_t1(ctypes.c_void_p(self.context), i)/100.0 55 | txt = whisper_cpp.whisper_full_get_segment_text(ctypes.c_void_p(self.context), i).decode('utf-8') 56 | all_text += txt 57 | n_tokens = whisper_cpp.whisper_full_n_tokens(ctypes.c_void_p(self.context), i) 58 | tokens: List[WhisperToken] = [] 59 | for j in range(n_tokens): 60 | token_data = whisper_cpp.whisper_full_get_token_data(ctypes.c_void_p(self.context), i, j) 61 | tokens.append({ 62 | "id": token_data.id, 63 | "prob": token_data.p, 64 | "logprob": token_data.plog, 65 | "pt": token_data.pt, 66 | "pt_sum": token_data.ptsum, 67 | }) 68 | segments.append({ 69 | "start": t0, 70 | "end": t1, 71 | "text": txt, 72 | "tokens": tokens, 73 | }) 74 | 75 | result["segments"] = segments 76 | result["text"] = all_text.strip() 77 | return result 78 | 79 | def _parse_format(self, result: WhisperResult, response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]): 80 | return { 81 | "json": self._parse_format_json, 82 | "text": self._parse_format_text, 83 | "srt": self._parse_format_srt, 84 | "verbose_json": self._parse_format_verbose_json, 85 | "vtt": self._parse_format_vtt, 86 | }[response_format](result) 87 | 88 | def _parse_format_verbose_json(self, result: WhisperResult): 89 | return { 90 | "task": result["task"], 91 | "language": result["language"], 92 | "duration": result["duration"], 93 | "text": result["text"], 94 | "segments": [{ 95 | "id": i, 96 | "seek": s['start'], 97 | "start": s['start'], 98 | "end": s['end'], 99 | "text": s['text'], 100 | "tokens": [t["id"] for t in s["tokens"]], 101 | "temperature": self.params.temperature + self.params.temperature_inc * i, 102 | "avg_logprob": sum([t["logprob"] for t in s["tokens"]])/len(s["tokens"]), 103 | "compression_ratio": self.params.entropy_thold, 104 | "no_speech_prob": 0.0, 105 | "transient": False, 106 | } for i, s in enumerate(result["segments"])], 107 | } 108 | 109 | def _parse_format_json(self, result: WhisperResult): 110 | return { 111 | "text": result["text"], 112 | } 113 | 114 | def _parse_format_text(self, result: WhisperResult): 115 | return result["text"] 116 | 117 | def _parse_format_srt(self, result: WhisperResult): 118 | return '\n'.join([f'{i + 1}\n{Whisper.format_time(s["start"])} --> {Whisper.format_time(s["end"])}\n{s["text"]}\n' for i, s in enumerate(result["segments"])]) 119 | 120 | def _parse_format_vtt(self, result: WhisperResult): 121 | return '\n'.join([f'{i + 1}\n{Whisper.format_time(s["start"])} --> {Whisper.format_time(s["end"])} align:middle\n{s["text"]}\n' for i, s in enumerate(result["segments"])]) 122 | 123 | def __dealloc__(self): 124 | # free the memory 125 | whisper_cpp.whisper_free(ctypes.c_void_p(self.context)) 126 | 127 | @staticmethod 128 | def format_time(t: int): 129 | msec = t * 10 130 | hr = msec / (1000 * 60 * 60) 131 | msec = msec - hr * (1000 * 60 * 60) 132 | minu = msec / (1000 * 60) 133 | msec = msec - minu * (1000 * 60) 134 | sec = msec / 1000 135 | msec = msec - sec * 1000 136 | return f'{int(hr):02}:{int(minu):02}:{int(sec):02}.{int(msec):03}' 137 | 138 | -------------------------------------------------------------------------------- /whisper_cpp_python/whisper_types.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Dict, Union 2 | from typing_extensions import TypedDict, NotRequired, Literal 3 | 4 | class WhisperToken(TypedDict): 5 | id: int 6 | prob: float 7 | logprob: float 8 | pt: float 9 | pt_sum: float 10 | 11 | class WhisperSegment(TypedDict): 12 | start: int 13 | end: int 14 | text: str 15 | tokens: List[WhisperToken] 16 | 17 | class WhisperResult(TypedDict): 18 | task: Literal["transcribe", "translate"] 19 | language: str 20 | duration: float 21 | segments: List[WhisperSegment] 22 | text: str 23 | 24 | --------------------------------------------------------------------------------