├── android ├── whisperkit │ ├── .gitignore │ ├── consumer-rules.pro │ ├── src │ │ ├── main │ │ │ ├── java │ │ │ │ └── com │ │ │ │ │ └── argmaxinc │ │ │ │ │ └── whisperkit │ │ │ │ │ ├── util │ │ │ │ │ ├── MessageProcessor.kt │ │ │ │ │ └── SegmentTextOnlyMessageProcessor.kt │ │ │ │ │ ├── NativeTest.kt │ │ │ │ │ ├── WhisperKitException.kt │ │ │ │ │ ├── ExperimentalWhisperKit.kt │ │ │ │ │ ├── network │ │ │ │ │ └── ArgmaxModelDownloader.kt │ │ │ │ │ ├── huggingface │ │ │ │ │ ├── HuggingFaceApiConfig.kt │ │ │ │ │ ├── Repo.kt │ │ │ │ │ ├── HuggingFaceLogger.kt │ │ │ │ │ ├── KtorHuggingFaceClient.kt │ │ │ │ │ ├── ModelInfo.kt │ │ │ │ │ └── HuggingFaceApi.kt │ │ │ │ │ └── WhisperKitImpl.kt │ │ │ └── AndroidManifest.xml │ │ └── test │ │ │ └── java │ │ │ └── com │ │ │ └── argmaxinc │ │ │ └── whisperkit │ │ │ └── util │ │ │ └── SegmentTextOnlyMessageProcessorTest.kt │ ├── proguard-rules.pro │ ├── detekt-baseline.xml │ └── build.gradle.kts ├── examples │ └── WhisperAX │ │ ├── .gitignore │ │ ├── src │ │ └── main │ │ │ ├── res │ │ │ ├── values │ │ │ │ ├── strings.xml │ │ │ │ ├── dimens.xml │ │ │ │ ├── colors.xml │ │ │ │ ├── themes.xml │ │ │ │ └── styles.xml │ │ │ ├── drawable │ │ │ │ ├── logo.png │ │ │ │ ├── ic_launcher_foreground.xml │ │ │ │ └── ic_launcher_background.xml │ │ │ ├── values-night │ │ │ │ ├── colors.xml │ │ │ │ └── themes.xml │ │ │ └── layout │ │ │ │ └── activity_main.xml │ │ │ ├── java │ │ │ └── com │ │ │ │ └── argmaxinc │ │ │ │ └── whisperax │ │ │ │ ├── AudioDecoderCallbacks.kt │ │ │ │ ├── Typography.kt │ │ │ │ ├── WhisperAppTheme.kt │ │ │ │ ├── MainActivity.kt │ │ │ │ ├── MainScreenContentImpl.kt │ │ │ │ └── PCMDecoder.kt │ │ │ └── AndroidManifest.xml │ │ ├── proguard-rules.pro │ │ └── build.gradle.kts ├── .gitignore └── config │ └── detekt.yml ├── .clang-format ├── test ├── ted_60.m4a ├── jfk_441khz.m4a ├── requirements.txt ├── ENVIRONMENT.json └── test_build_all.sh ├── gradle ├── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties └── libs.versions.toml ├── scripts ├── run_on_android.sh ├── copy_libraries.sh ├── build_tokenizers.sh ├── build_test.sh ├── build_ffmpeg.sh ├── Dockerfile ├── build_tensorflow.sh ├── adb_push.sh ├── dev_env.sh ├── build.sh └── download_models.sh ├── cpp └── src │ ├── WhisperKitTranscriptionResult.hpp │ ├── Core │ ├── backend_class.hpp │ ├── tflite_msg.hpp │ └── DelegateInterface.hpp │ ├── Models │ ├── tflite_gpu_model.hpp │ ├── tflite_qnn_model.hpp │ ├── tflite_gpu_model.cpp │ ├── tflite_model.hpp │ ├── tflite_qnn_model.cpp │ └── TextDecoder.hpp │ ├── WhisperKitTranscriptionResult.cpp │ ├── Text │ ├── post_proc.hpp │ ├── Tokenizer.h │ ├── Tokenizer.cpp │ └── post_proc.cpp │ ├── WhisperKitPipeline.hpp │ ├── TranscribeTask.hpp │ ├── WhisperKitPipeline.cpp │ ├── WhisperKitConfiguration.hpp │ ├── Audio │ └── audio_input.hpp │ └── WhisperKitConfiguration.cpp ├── .gitignore ├── settings.gradle.kts ├── .github ├── pull_request_template.md └── workflows │ └── pr-checks.yml ├── LICENSE ├── gradle.properties ├── .githooks └── pre-commit ├── cli ├── whisperkit_cli.h └── whisperkit_cli.cpp ├── jni └── whisperkit_jni.h ├── gradlew.bat ├── Makefile ├── CMakeLists.txt └── gradlew /android/whisperkit/.gitignore: -------------------------------------------------------------------------------- 1 | /build -------------------------------------------------------------------------------- /android/whisperkit/consumer-rules.pro: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/.gitignore: -------------------------------------------------------------------------------- 1 | /build -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | IndentWidth: 4 3 | ColumnLimit: 120 4 | -------------------------------------------------------------------------------- /test/ted_60.m4a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argmaxinc/WhisperKitAndroid/HEAD/test/ted_60.m4a -------------------------------------------------------------------------------- /test/jfk_441khz.m4a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argmaxinc/WhisperKitAndroid/HEAD/test/jfk_441khz.m4a -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argmaxinc/WhisperKitAndroid/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /test/requirements.txt: -------------------------------------------------------------------------------- 1 | evaluate==0.4.3 2 | huggingface_hub==0.26.2 3 | openai_whisper==20240930 4 | jiwer==3.0.5 5 | docker==7.1.0 6 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | WhisperAX 3 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/drawable/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argmaxinc/WhisperKitAndroid/HEAD/android/examples/WhisperAX/src/main/res/drawable/logo.png -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/values/dimens.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 48dp 4 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Tue May 06 17:09:48 PDT 2025 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.11.1-bin.zip 5 | zipStoreBase=GRADLE_USER_HOME 6 | zipStorePath=wrapper/dists 7 | -------------------------------------------------------------------------------- /scripts/run_on_android.sh: -------------------------------------------------------------------------------- 1 | #!/system/bin/sh 2 | 3 | # Set up environment variables for the Android session 4 | export PATH=/data/local/tmp/bin:$PATH 5 | export LD_LIBRARY_PATH=/data/local/tmp/lib 6 | 7 | whisperkit-cli transcribe --model-path /sdcard/argmax/tflite/models/openai_whisper-tiny --audio-path /sdcard/argmax/tflite/inputs/jfk_441khz.m4a 8 | -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/util/MessageProcessor.kt: -------------------------------------------------------------------------------- 1 | package com.argmaxinc.whisperkit.util 2 | 3 | import com.argmaxinc.whisperkit.TranscriptionResult 4 | 5 | /** 6 | * Processor to convert raw model output Strings into [TranscriptionResult] 7 | */ 8 | internal interface MessageProcessor { 9 | fun process(rawMsg: String): TranscriptionResult 10 | } 11 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/values/colors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | #4542F4 4 | #4542F4 5 | #4542F4 6 | #4542F4 7 | #FFFFFF 8 | #000000 9 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/values-night/colors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | #4542F4 4 | #4542F4 5 | #4542F4 6 | #4542F4 7 | #FFFFFF 8 | #FFFFFF 9 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/java/com/argmaxinc/whisperax/AudioDecoderCallbacks.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | package com.argmaxinc.whisperax 4 | 5 | interface AudioDecoderCallbacks { 6 | fun onAudioFormat(freq: Int, ch: Int, dur: Long) 7 | fun onOutputBuffer(pcmbuffer: ByteArray, timestamp: Long) 8 | fun onDecodeClose() 9 | fun onEndOfStream() 10 | } 11 | -------------------------------------------------------------------------------- /android/whisperkit/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 10 | 11 | -------------------------------------------------------------------------------- /cpp/src/WhisperKitTranscriptionResult.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | struct whisperkit_transcription_result_t { 6 | public: 7 | whisperkit_transcription_result_t(); 8 | ~whisperkit_transcription_result_t(); 9 | 10 | void set_transcription(const std::string& transcription); 11 | std::string get_transcription() const; 12 | std::string get_chunk_transcription() const; 13 | 14 | private: 15 | std::string curr_transcription; 16 | std::string all_transcription; 17 | }; -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/NativeTest.kt: -------------------------------------------------------------------------------- 1 | package com.argmaxinc.whisperkit 2 | 3 | /** 4 | * A simple class to test native code editing 5 | */ 6 | class NativeTest { 7 | 8 | /** 9 | * A native method that returns a string from C++ 10 | */ 11 | external fun stringFromJNI(): String 12 | 13 | companion object { 14 | // Used to load the native library on application startup 15 | init { 16 | System.loadLibrary("whisperkit_jni") 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/values/themes.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | -------------------------------------------------------------------------------- /cpp/src/Core/backend_class.hpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #pragma once 4 | 5 | enum ComputeBackend { 6 | None = 0, 7 | CPU = 1, 8 | GPU = 2, 9 | NPU = 3, 10 | }; 11 | 12 | #if QNN_DELEGATE 13 | #include "tflite_qnn_model.hpp" 14 | #define MODEL_SUPER_CLASS TFLiteQNN 15 | #else 16 | 17 | #if GPU_DELEGATE 18 | 19 | #include "tflite_gpu_model.hpp" 20 | #define MODEL_SUPER_CLASS TFLiteGPU 21 | 22 | #else 23 | 24 | #include "tflite_model.hpp" 25 | #define MODEL_SUPER_CLASS TFLiteModel 26 | 27 | #endif 28 | #endif 29 | -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/WhisperKitException.kt: -------------------------------------------------------------------------------- 1 | package com.argmaxinc.whisperkit 2 | 3 | /** 4 | * Exception thrown by WhisperKit when an error occurs during model operations. 5 | * This exception can be thrown during: 6 | * - Model loading and initialization 7 | * - Transcription processing 8 | * - Resource cleanup 9 | * - Invalid configuration 10 | * 11 | * @param message A detailed description of the error 12 | * @param cause The underlying exception that caused this error, if any 13 | */ 14 | class WhisperKitException(message: String, cause: Throwable? = null) : Exception(message, cause) 15 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/values-night/themes.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore build folders 2 | .source/ 3 | build/ 4 | inputs/ 5 | models/ 6 | cache/ 7 | external/ 8 | test/dataset/ 9 | test/logs/ 10 | android/examples/WhisperAX/release/ 11 | .idea 12 | 13 | # Never exclude src files 14 | !cpp/** 15 | !cli/** 16 | 17 | # MacOS specific 18 | .DS_Store 19 | **/.DS_Store 20 | 21 | # Binary files 22 | *.so 23 | *.bin 24 | *.apk 25 | *.aar 26 | *.jar 27 | *.tflite 28 | 29 | # Gradle and Android 30 | .gradle/ 31 | local.properties 32 | **/build/ 33 | captures/ 34 | .externalNativeBuild/ 35 | .cxx/ 36 | 37 | # Python 38 | __pycache__/ 39 | *.py[cod] 40 | *$py.class 41 | *.so 42 | .Python 43 | env/ 44 | venv/ 45 | ENV/ -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/ExperimentalWhisperKit.kt: -------------------------------------------------------------------------------- 1 | package com.argmaxinc.whisperkit 2 | 3 | import kotlin.RequiresOptIn 4 | import kotlin.annotation.AnnotationTarget 5 | 6 | /** 7 | * Marks the WhisperKit API as experimental. 8 | * This annotation indicates that the API is still in development and may change in future releases. 9 | */ 10 | @RequiresOptIn( 11 | level = RequiresOptIn.Level.WARNING, 12 | message = "This API is experimental and may change in future releases. Use with caution in production code.", 13 | ) 14 | @Target(AnnotationTarget.CLASS, AnnotationTarget.FUNCTION, AnnotationTarget.PROPERTY) 15 | annotation class ExperimentalWhisperKit 16 | -------------------------------------------------------------------------------- /settings.gradle.kts: -------------------------------------------------------------------------------- 1 | pluginManagement { 2 | repositories { 3 | google { 4 | content { 5 | includeGroupByRegex("com\\.android.*") 6 | includeGroupByRegex("com\\.google.*") 7 | includeGroupByRegex("androidx.*") 8 | } 9 | } 10 | mavenCentral() 11 | gradlePluginPortal() 12 | } 13 | } 14 | dependencyResolutionManagement { 15 | repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS) 16 | repositories { 17 | google() 18 | mavenCentral() 19 | } 20 | } 21 | 22 | rootProject.name = "WhisperKit" 23 | include(":android:examples:WhisperAX") 24 | include(":android:whisperkit") 25 | -------------------------------------------------------------------------------- /cpp/src/Models/tflite_gpu_model.hpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #pragma once 4 | 5 | #if GPU_DELEGATE 6 | #include "tensorflow/lite/delegates/gpu/delegate.h" 7 | #include "tflite_model.hpp" 8 | 9 | class TFLiteGPU : public TFLiteModel { 10 | public: 11 | TFLiteGPU(const std::string& name); 12 | virtual ~TFLiteGPU(); 13 | 14 | virtual bool initialize(std::string model_path, std::string lib_dir, std::string cache_path, int backend, 15 | bool debug = false); 16 | virtual void uninitialize(); 17 | 18 | protected: 19 | virtual bool create_interpreter_delegate(std::string model_path); 20 | }; 21 | #endif 22 | -------------------------------------------------------------------------------- /test/ENVIRONMENT.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Environment variables for WhisperKitAndroid project", 3 | "test": { 4 | "hf_repo": "argmaxinc/whisperkit-test-data", 5 | "datasets": ["librispeech-10mins", "earnings22-10mins"], 6 | "output_file": "output.json", 7 | "delegate_file": "delegate.log", 8 | "android_dir": "/sdcard/argmax/tflite", 9 | "executable": "whisperkit-cli", 10 | "metadata": "metadata.json" 11 | }, 12 | "docker": { 13 | "image": "android-ndk-qnn-tensorflow-image", 14 | "rootdir": "/src/AXIE", 15 | "container": "axie_tflite" 16 | }, 17 | "audio": { 18 | "extensions": [".mp3", ".m4a", ".ogg", ".flac", ".aac", ".wav"], 19 | "local_dir": "inputs" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | Describe what this PR does and why it's needed. Explain any context or dependencies if applicable. 3 | If a related issue exists, please provide a link. 4 | 5 | ## Type of Change 6 | - [ ] Bug fix 🐛 7 | - [ ] New feature 🚀 8 | - [ ] Refactor 🔄 9 | - [ ] Documentation update 📖 10 | - [ ] Other (please describe) 11 | 12 | ## Test Plan 13 | - [ ] I have run `bash test/test_build_all.sh` and it ran successfully 14 | - [ ] I have tested this change on all relevant platforms. 15 | 16 | ## Checklist 17 | - [ ] My code follows the project's style guidelines. 18 | - [ ] I have updated relevant documentation (if applicable). 19 | - [ ] I have added appropriate tests (if applicable). 20 | - [ ] I have self-reviewed my code before requesting review. 21 | -------------------------------------------------------------------------------- /cpp/src/Models/tflite_qnn_model.hpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #pragma once 4 | 5 | #if QNN_DELEGATE 6 | #include "QnnTFLiteDelegate.h" 7 | #include "tensorflow/lite/delegates/gpu/delegate.h" 8 | #include "tflite_model.hpp" 9 | 10 | class TFLiteQNN : public TFLiteModel { 11 | public: 12 | TFLiteQNN(const std::string& name); 13 | virtual ~TFLiteQNN(); 14 | 15 | virtual bool initialize(std::string model_path, std::string lib_dir, std::string cache_path, 16 | int backend = kHtpBackend, bool debug = false); 17 | virtual void uninitialize(); 18 | 19 | protected: 20 | TfLiteQnnDelegateOptions _options; 21 | 22 | virtual bool create_interpreter_delegate(std::string model_path); 23 | }; 24 | #endif 25 | -------------------------------------------------------------------------------- /android/whisperkit/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # You can control the set of applied configuration files using the 3 | # proguardFiles setting in build.gradle. 4 | # 5 | # For more details, see 6 | # http://developer.android.com/guide/developing/tools/proguard.html 7 | 8 | # If your project uses WebView with JS, uncomment the following 9 | # and specify the fully qualified class name to the JavaScript interface 10 | # class: 11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 12 | # public *; 13 | #} 14 | 15 | # Uncomment this to preserve the line number information for 16 | # debugging stack traces. 17 | #-keepattributes SourceFile,LineNumberTable 18 | 19 | # If you keep the line number information, uncomment this to 20 | # hide the original source file name. 21 | #-renamesourcefileattribute SourceFile -------------------------------------------------------------------------------- /cpp/src/WhisperKitTranscriptionResult.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "WhisperKitTranscriptionResult.hpp" 3 | 4 | whisperkit_transcription_result_t::whisperkit_transcription_result_t() { 5 | curr_transcription.clear(); 6 | all_transcription.clear(); 7 | } 8 | 9 | whisperkit_transcription_result_t::~whisperkit_transcription_result_t() { 10 | curr_transcription.clear(); 11 | all_transcription.clear(); 12 | } 13 | 14 | void whisperkit_transcription_result_t::set_transcription(const std::string& input_transcription) { 15 | curr_transcription = std::move(input_transcription); 16 | all_transcription.append(curr_transcription); 17 | } 18 | 19 | std::string whisperkit_transcription_result_t::get_chunk_transcription() const { return curr_transcription; } 20 | 21 | std::string whisperkit_transcription_result_t::get_transcription() const { return all_transcription; } -------------------------------------------------------------------------------- /android/examples/WhisperAX/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # You can control the set of applied configuration files using the 3 | # proguardFiles setting in build.gradle. 4 | # 5 | # For more details, see 6 | # http://developer.android.com/guide/developing/tools/proguard.html 7 | 8 | # If your project uses WebView with JS, uncomment the following 9 | # and specify the fully qualified class name to the JavaScript interface 10 | # class: 11 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 12 | # public *; 13 | #} 14 | 15 | # Uncomment this to preserve the line number information for 16 | # debugging stack traces. 17 | #-keepattributes SourceFile,LineNumberTable 18 | 19 | # If you keep the line number information, uncomment this to 20 | # hide the original source file name. 21 | #-renamesourcefileattribute SourceFile -------------------------------------------------------------------------------- /android/whisperkit/detekt-baseline.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | LargeClass:ArgmaxModelDownloaderImplTest.kt$ArgmaxModelDownloaderImplTest 6 | LongMethod:KtorHuggingFaceApiImpl.kt$KtorHuggingFaceApiImpl$private suspend fun FlowCollector<Progress>.downloadFilesWithRetry( from: Repo, revision: String, files: List<String>, baseDir: File, ) 7 | ThrowsCount:WhisperKit.kt$WhisperKit.Builder$@Throws(WhisperKitException::class) fun build(): WhisperKit 8 | TooGenericExceptionCaught:KtorHuggingFaceApiImpl.kt$KtorHuggingFaceApiImpl$e: Exception 9 | TooGenericExceptionCaught:WhisperKitImpl.kt$WhisperKitImpl$e: Exception 10 | UnusedParameter:WhisperKitImpl.kt$WhisperKitImpl$timestamp: Float 11 | 12 | 13 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/values/styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 8 | 9 | 10 | 13 | 14 | 15 | 19 | -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/network/ArgmaxModelDownloader.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | 4 | package com.argmaxinc.whisperkit.network 5 | 6 | import com.argmaxinc.whisperkit.huggingface.HuggingFaceApi 7 | import com.argmaxinc.whisperkit.huggingface.HuggingFaceApi.Progress 8 | import kotlinx.coroutines.flow.Flow 9 | import java.io.File 10 | 11 | interface ArgmaxModelDownloader { 12 | /** 13 | * Downloads all model files for a specific model variant. 14 | * 15 | * @param model The model to download 16 | * @param variant The specific variant to download 17 | * @param root The root directory where model files will be downloaded 18 | * @return A Flow of [HuggingFaceApi.Progress] that reports download progress 19 | */ 20 | fun download( 21 | model: ArgmaxModel, 22 | variant: String, 23 | root: File, 24 | ): Flow 25 | } 26 | -------------------------------------------------------------------------------- /android/.gitignore: -------------------------------------------------------------------------------- 1 | # Gradle files 2 | .gradle/ 3 | build/ 4 | examples/WhisperAX/build/ 5 | !examples/WhisperAX/src/main/res/raw/ # Allow raw resources 6 | !examples/WhisperAX/src/main/res/xml/ # Allow XML resources 7 | examples/WhisperAX/.cxx/ 8 | examples/WhisperAX/.cxx/ 9 | local.properties 10 | *.hprof 11 | 12 | # Android Studio files 13 | .idea/ 14 | *.iml 15 | *.ipr 16 | *.iws 17 | *.idea/workspace.xml 18 | *.idea/libraries 19 | *.idea/dictionaries 20 | *.idea/misc.xml 21 | *.idea/modules.xml 22 | *.idea/vcs.xml 23 | 24 | # Android specific files 25 | *.apk 26 | *.ap_ 27 | *.dex 28 | *.class 29 | *.keystore 30 | *.jks 31 | *.log 32 | *.csv 33 | *.dat 34 | 35 | # Generated files 36 | captures/ 37 | outputs/ 38 | gradle-app.setting 39 | **/build/ 40 | **/captures/ 41 | **/outputs/ 42 | examples/WhisperAX/release/ 43 | 44 | # Coverage reports 45 | */coverage/ 46 | *.html 47 | 48 | # Miscellaneous 49 | .DS_Store 50 | *.orig 51 | *.tmp 52 | *.swp 53 | *.swo 54 | *.stacktrace 55 | *.trace 56 | *.stack 57 | *.user 58 | *.war 59 | *.ear 60 | *.lock 61 | *.sqlite 62 | *.db 63 | *.out 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 argmax, inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/huggingface/HuggingFaceApiConfig.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | 4 | package com.argmaxinc.whisperkit.huggingface 5 | 6 | /** 7 | * Configuration class for the HuggingFace API implementation. 8 | * This class holds all configurable parameters for the API client. 9 | * 10 | * @property retryCount The maximum number of retry attempts for failed downloads. Defaults to [DEFAULT_MAX_RETRY]. 11 | * @property bearerToken Optional authentication token for accessing private repositories. Defaults to null. 12 | * @property logger The logger implementation to use for API operations. Defaults to [NoOpHuggingFaceLogger]. 13 | */ 14 | data class HuggingFaceApiConfig( 15 | val retryCount: Int = DEFAULT_MAX_RETRY, 16 | val bearerToken: String? = null, 17 | val logger: HuggingFaceLogger = NoOpHuggingFaceLogger, 18 | ) { 19 | companion object { 20 | /** 21 | * Default maximum number of retry attempts for failed downloads. 22 | */ 23 | const val DEFAULT_MAX_RETRY = 3 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /cpp/src/Text/post_proc.hpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "Tokenizer.h" 10 | #include "backend_class.hpp" 11 | 12 | constexpr const uint32_t SAMPLE_BEGIN = 1; 13 | 14 | class PostProcModel : public MODEL_SUPER_CLASS { 15 | public: 16 | PostProcModel(Tokenizer* tokenizer, bool timestamp_text = false); 17 | virtual ~PostProcModel(){}; 18 | 19 | bool initialize(bool debug = false); 20 | virtual void invoke(bool measure_time = false); 21 | 22 | int process(int idx, float* logits, int logits_size, std::vector& decoded_tokens, float base_timestamp); 23 | 24 | std::unique_ptr get_sentence(bool clear = true); 25 | void decode_segment(const std::vector& tokens); 26 | 27 | private: 28 | Tokenizer* _tokenizer; 29 | bool _timestamp_text; 30 | std::string _sentence; 31 | 32 | void apply_timestamp_rules(float* logits, int logits_size, std::vector& tokens); 33 | void proc_token(int token, float base_timestamp); 34 | }; 35 | -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/huggingface/Repo.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | 4 | package com.argmaxinc.whisperkit.huggingface 5 | 6 | /** 7 | * Represents a HuggingFace repository. 8 | * 9 | * @property id The unique identifier of the repository, typically in the format "username/repo-name" 10 | * @property type The type of repository, which determines its purpose and available operations 11 | */ 12 | data class Repo( 13 | val id: String, 14 | val type: RepoType, 15 | ) 16 | 17 | /** 18 | * Enumeration of possible HuggingFace repository types. 19 | * Each type corresponds to a different category of content on the HuggingFace platform. 20 | * 21 | * @property typeName The string representation of the repository type as used in the HuggingFace API 22 | */ 23 | enum class RepoType(val typeName: String) { 24 | /** Repository containing machine learning models */ 25 | MODELS("models"), 26 | 27 | /** Repository containing datasets for training or evaluation */ 28 | DATASETS("datasets"), 29 | 30 | /** Repository containing interactive demos and applications */ 31 | SPACES("spaces"), 32 | } 33 | -------------------------------------------------------------------------------- /scripts/copy_libraries.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For licensing see accompanying LICENSE file. 3 | # Copyright © 2024 Argmax, Inc. All rights reserved. 4 | 5 | 6 | ARG=$1 7 | CURRENT_DIR="$(dirname "$(realpath "$0")")" 8 | SOURCE_DIR="$CURRENT_DIR/.." 9 | 10 | if [ "$ARG" = "jni" ]; then 11 | sleep 1 12 | cd ${SOURCE_DIR} 13 | files=( 14 | "external/libs/android/libavcodec.so" 15 | "external/libs/android/libavformat.so" 16 | "external/libs/android/libavutil.so" 17 | "external/libs/android/libswresample.so" 18 | "external/libs/android/libtensorflowlite_gpu_delegate.so" 19 | "external/libs/android/libtensorflowlite.so" 20 | "external/libs/android/libtokenizers_sys.so" 21 | "build/android/libwhisperkit_jni.so" 22 | "build/android/libwhisperkit.so" 23 | "$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android/libc++_shared.so" 24 | ) 25 | 26 | # Destination directory 27 | DEST="$SOURCE_DIR/android/whisperkit/src/main/jniLibs/arm64-v8a" 28 | if [ ! -d "$DEST" ]; then 29 | mkdir -p $DEST 30 | fi 31 | for file in "${files[@]}"; do 32 | cp "$file" "$DEST" 33 | done 34 | chmod 755 $DEST/*.so 35 | fi 36 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | # Project-wide Gradle settings. 2 | # IDE (e.g. Android Studio) users: 3 | # Gradle settings configured through the IDE *will override* 4 | # any settings specified in this file. 5 | 6 | # Specifies the JVM arguments used for the daemon process. 7 | # The setting is particularly useful for tweaking memory settings. 8 | org.gradle.jvmargs=-Xmx2048m -XX:MaxMetaspaceSize=1024m -Dfile.encoding=UTF-8 9 | 10 | # When configured, Gradle will run in incubating parallel mode. 11 | # This option should only be used with decoupled projects. 12 | org.gradle.parallel=true 13 | 14 | # AndroidX package structure to make it clearer which packages are bundled with the 15 | # Android operating system, and which are packaged with your app's APK 16 | android.useAndroidX=true 17 | 18 | # Kotlin code style for this project: "official" or "obsolete": 19 | kotlin.code.style=official 20 | 21 | # Enables namespacing of each library's R class so that its R class includes only the 22 | # resources declared in the library itself and none from the library's dependencies, 23 | # thereby reducing the size of the R class for that library 24 | android.nonTransitiveRClass=true 25 | 26 | # Disable performance logging to reduce memory usage 27 | org.gradle.daemon.performance.disable-logging=true -------------------------------------------------------------------------------- /cpp/src/WhisperKitPipeline.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "WhisperKit.h" 6 | #include "WhisperKitConfiguration.hpp" 7 | 8 | struct TranscribeTask; 9 | struct whisperkit_transcription_result_t; 10 | struct whisperkit_pipeline_t { 11 | public: 12 | whisperkit_pipeline_t(); 13 | whisperkit_pipeline_t(const whisperkit_pipeline_t&) = delete; 14 | whisperkit_pipeline_t& operator=(const whisperkit_pipeline_t&) = delete; 15 | ~whisperkit_pipeline_t(); 16 | whisperkit_pipeline_status_t get_state() const; 17 | void set_state(whisperkit_pipeline_status_t status); 18 | void set_configuration(const whisperkit_configuration_t* configuration); 19 | void build(); 20 | // transcribe an audio file 21 | void transcribe(const char* audio_file, whisperkit_transcription_result_t* transcription_result); 22 | // in streaming mode: append any length of audio data 23 | void init_streaming(whisperkit_transcription_result_t* transcription_result, int sample_rate, int num_channels); 24 | bool append_audio(int size, char* buffer); 25 | void close_streaming(); 26 | 27 | private: 28 | whisperkit_configuration_t configuration; 29 | whisperkit_pipeline_status_t status; 30 | std::unique_ptr transcribe_task; 31 | }; -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/huggingface/HuggingFaceLogger.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | 4 | package com.argmaxinc.whisperkit.huggingface 5 | 6 | /** 7 | * Interface for logging operations in the HuggingFace API implementation. 8 | * This interface provides methods for logging informational and error messages, 9 | * with support for including throwable exceptions in error logs. 10 | */ 11 | interface HuggingFaceLogger { 12 | fun info(message: String) 13 | 14 | fun error(message: String) 15 | 16 | fun error( 17 | throwable: Throwable, 18 | message: String, 19 | ) 20 | } 21 | 22 | /** 23 | * A no-operation implementation of [HuggingFaceLogger] that silently discards all log messages. 24 | * This implementation is useful when logging is not needed or should be disabled. 25 | */ 26 | object NoOpHuggingFaceLogger : HuggingFaceLogger { 27 | override fun info(message: String) { 28 | // No-op 29 | } 30 | 31 | override fun error(message: String) { 32 | // No-op 33 | } 34 | 35 | override fun error( 36 | throwable: Throwable, 37 | message: String, 38 | ) { 39 | // No-op 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /test/test_build_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set the project directory 4 | PROJECT_DIR="$(dirname "$(dirname "$(realpath "$0")")")" 5 | 6 | # List of build targets 7 | TARGETS=( 8 | "linux" 9 | "qnn" 10 | "gpu" 11 | ) 12 | 13 | set -eo pipefail 14 | 15 | mkdir -p "$PROJECT_DIR/test/logs" 16 | 17 | command -v make >/dev/null 2>&1 || { echo >&2 "make is required but not installed. Aborting."; exit 1; } 18 | command -v cmake >/dev/null 2>&1 || { echo >&2 "cmake is required but not installed. Aborting."; exit 1; } 19 | 20 | trap 'echo "Script interrupted. Cleaning up..."; exit 1' INT TERM ERR 21 | 22 | for TARGET in "${TARGETS[@]}"; do 23 | echo "===============================" 24 | echo "Building target: $TARGET" 25 | echo "===============================" 26 | 27 | (cd "$PROJECT_DIR" && make build clean) 28 | 29 | if (cd "$PROJECT_DIR" && make build "$TARGET") > "$PROJECT_DIR/test/logs/${TARGET}_build.log" 2>&1; then 30 | echo "Build successful for target: $TARGET" 31 | else 32 | echo "Build failed for target: $TARGET. Check logs at $PROJECT_DIR/test/logs/${TARGET}_build.log" 33 | exit 1 34 | fi 35 | 36 | echo "-----------------------------------" 37 | echo 38 | 39 | done 40 | 41 | echo "All targets built successfully." 42 | 43 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 9 | 10 | 11 | 18 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /.githooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get list of files that are staged for commit 4 | KT_FILES=$(git diff --cached --name-only --diff-filter=ACMR | grep -E '^android/.*\.kt$' || true) 5 | GRADLE_FILES=$(git diff --cached --name-only --diff-filter=ACMR | grep -E '.*\.gradle\.kts$' || true) 6 | CPP_FILES=$(git diff --cached --name-only --diff-filter=ACMR | grep -E '^(jni/.*\.(cpp|h|c|hpp|cc)|cli/.*\.(cpp|h|c|hpp|cc)|cpp/.*\.(cpp|h|c|hpp|cc))$' || true) 7 | 8 | # Run spotless checks if needed 9 | if [ -n "$KT_FILES" ]; then 10 | echo "Running spotlessKotlinCheck..." 11 | ./gradlew spotlessKotlinCheck 12 | if [ $? -ne 0 ]; then 13 | echo "❌ Kotlin files need formatting. Please run './gradlew spotlessKotlinApply' or 'make format' and commit again." 14 | exit 1 15 | fi 16 | fi 17 | 18 | if [ -n "$GRADLE_FILES" ]; then 19 | echo "Running spotlessKotlinGradleCheck..." 20 | ./gradlew spotlessKotlinGradleCheck 21 | if [ $? -ne 0 ]; then 22 | echo "❌ Gradle files need formatting. Please run './gradlew spotlessKotlinGradleApply' or 'make format' and commit again." 23 | exit 1 24 | fi 25 | fi 26 | 27 | if [ -n "$CPP_FILES" ]; then 28 | echo "Running spotlessCppCheck..." 29 | ./gradlew spotlessCppCheck 30 | if [ $? -ne 0 ]; then 31 | echo "❌ C++ files need formatting. Please run './gradlew spotlessCppApply' or 'make format' and commit again." 32 | exit 1 33 | fi 34 | fi -------------------------------------------------------------------------------- /cli/whisperkit_cli.h: -------------------------------------------------------------------------------- 1 | #ifndef WHISPERKIT_CLI_H 2 | #define WHISPERKIT_CLI_H 3 | 4 | #include 5 | #include 6 | 7 | #include "WhisperKit.h" 8 | #include "cxxopts.hpp" 9 | 10 | struct WhisperKitConfig { 11 | public: 12 | std::string audioPath; 13 | std::string modelPath; 14 | std::string audioEncoderComputeUnits; 15 | std::string textDecoderComputeUnits; 16 | float temperature; 17 | float temperatureIncrementOnFallback; 18 | int temperatureFallbackCount; 19 | int bestOf; 20 | bool skipSpecialTokens; 21 | bool withoutTimestamps; 22 | bool wordTimestamps; 23 | float logprobThreshold; 24 | float firstTokenLogProbThreshold; 25 | float noSpeechThreshold; 26 | bool report; 27 | std::string reportPath; 28 | int concurrentWorkerCount; 29 | bool verbose; 30 | whisperkit_backend_t encoder_backend; 31 | whisperkit_backend_t decoder_backend; 32 | 33 | WhisperKitConfig(); 34 | }; 35 | 36 | void CHECK_WHISPERKIT_STATUS(whisperkit_status_t status); 37 | 38 | class WhisperKitRunner { 39 | public: 40 | explicit WhisperKitRunner(WhisperKitConfig& config); 41 | ~WhisperKitRunner(); 42 | void buildPipeline(); 43 | void transcribe(); 44 | whisperkit_transcription_result_t* transcriptionResult; 45 | 46 | private: 47 | WhisperKitConfig& config; 48 | whisperkit_pipeline_t* pipeline; 49 | whisperkit_configuration_t* configuration; 50 | }; 51 | 52 | #endif // WHISPERKIT_CLI_H -------------------------------------------------------------------------------- /cpp/src/Core/tflite_msg.hpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #ifdef ANDROID_JNI 12 | #include 13 | 14 | constexpr const char* ARGMAX_WHISPERKIT_BUNDLE_INFO = "com.argmaxinc.whisperax"; 15 | #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, ARGMAX_WHISPERKIT_BUNDLE_INFO, __VA_ARGS__) 16 | #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, ARGMAX_WHISPERKIT_BUNDLE_INFO, __VA_ARGS__) 17 | 18 | #else 19 | 20 | #define LOGI(...) fprintf(stdout, __VA_ARGS__) 21 | #define LOGE(...) fprintf(stderr, __VA_ARGS__) 22 | 23 | #endif 24 | 25 | class TFLiteMessenger { 26 | public: 27 | TFLiteMessenger() { _msg = std::make_unique(); } 28 | std::mutex _mutex; 29 | std::condition_variable _cond_var; 30 | 31 | bool _running = false; 32 | float _timestamp = 0; 33 | std::unique_ptr _msg; 34 | 35 | ~TFLiteMessenger() { _msg.reset(); } 36 | 37 | std::string get_message() { return std::string(*(_msg.get())); } 38 | 39 | void print() { 40 | if (_msg->empty()) { 41 | return; 42 | } 43 | if (!_running) { 44 | LOGI("\nFinal Text: %s\n", _msg.get()->c_str()); 45 | } else { 46 | LOGI("\nText: %s\n", _msg.get()->c_str()); 47 | } 48 | }; 49 | }; 50 | -------------------------------------------------------------------------------- /android/config/detekt.yml: -------------------------------------------------------------------------------- 1 | build: 2 | maxIssues: 0 3 | weights: 4 | complexity: 2 5 | style: 1 6 | LongParameterList: 1 7 | comments: 1 8 | 9 | complexity: 10 | TooManyFunctions: 11 | thresholdInFiles: 20 12 | thresholdInClasses: 15 13 | thresholdInInterfaces: 10 14 | thresholdInObjects: 10 15 | LongParameterList: 16 | functionThreshold: 8 17 | constructorThreshold: 8 18 | CyclomaticComplexMethod: 19 | threshold: 20 20 | NestedBlockDepth: 21 | threshold: 5 22 | ComplexCondition: 23 | threshold: 5 24 | LongMethod: 25 | active: true 26 | threshold: 80 27 | excludes: 28 | - '**/test/**' 29 | - '**/androidTest/**' 30 | - '**/*Test.kt' 31 | 32 | style: 33 | MagicNumber: 34 | active: true 35 | ignoreNumbers: ['-1', '0', '1', '2', '100', '1000'] 36 | ignoreHashCodeFunction: true 37 | ignorePropertyDeclaration: true 38 | ignoreAnnotation: true 39 | MaxLineLength: 40 | maxLineLength: 120 41 | NewLineAtEndOfFile: 42 | active: true 43 | TrailingWhitespace: 44 | active: true 45 | 46 | empty-blocks: 47 | EmptyFunctionBlock: 48 | active: true 49 | EmptyClassBlock: 50 | active: true 51 | EmptyIfBlock: 52 | active: true 53 | EmptyWhileBlock: 54 | active: true 55 | EmptyForBlock: 56 | active: true 57 | EmptyTryBlock: 58 | active: true 59 | EmptyCatchBlock: 60 | active: true 61 | EmptyFinallyBlock: 62 | active: true 63 | EmptyDoWhileBlock: 64 | active: true -------------------------------------------------------------------------------- /cpp/src/TranscribeTask.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "WhisperKitConfiguration.hpp" 10 | #include "WhisperKitTranscriptionResult.hpp" 11 | #include "nlohmann/json.hpp" 12 | 13 | namespace WhisperKit::TranscribeTask { 14 | class AudioCodec; 15 | class Runtime; 16 | } // namespace WhisperKit::TranscribeTask 17 | 18 | struct TranscribeTask { 19 | std::string audio_file; 20 | std::string model_size; 21 | whisperkit_transcription_result_t* _transcription; 22 | float duration; 23 | 24 | // audio file transcription 25 | void transcribe(const char* audio_file, whisperkit_transcription_result_t* transcription_result); 26 | // audio stream mode: init, append, close 27 | void initStreaming(whisperkit_transcription_result_t* transcription_result, int sample_rate = 0, 28 | int num_channels = 0); 29 | bool appendAudio(int size, char* buffer0, char* buffer1 = nullptr); 30 | void closeStreaming(); 31 | 32 | TranscribeTask(const whisperkit_configuration_t& config); 33 | ~TranscribeTask(); 34 | 35 | private: 36 | void textOutputProc(); 37 | int chunk_idx; 38 | whisperkit_configuration_t config; 39 | std::unique_ptr argsjson; 40 | std::unique_ptr text_out_thread; 41 | std::unique_ptr audio_codec; 42 | std::unique_ptr runtime; 43 | }; 44 | -------------------------------------------------------------------------------- /cpp/src/Text/Tokenizer.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKENIZER_API_H 2 | #define TOKENIZER_API_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | typedef struct TokenizerHandle TokenizerHandle; 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | 12 | // Tokenizer-related constants and structures 13 | typedef struct { 14 | int startOfTranscriptToken; 15 | int endOfTranscriptToken; 16 | int blankToken; 17 | int noTimestampsToken; 18 | int timestampBeginToken; 19 | int noSpeechToken; 20 | int transcribeToken; 21 | int translateToken; 22 | int englishToken; 23 | int specialTokenBegin; 24 | } SpecialTokens; 25 | 26 | typedef struct { 27 | SpecialTokens specialTokens; 28 | int* nonSpeechTokens; 29 | int numNonSpeechTokens; 30 | unsigned int vocabSize; 31 | TokenizerHandle* handle; 32 | } Tokenizer; 33 | 34 | // Initialize the tokenizer 35 | Tokenizer* tokenizer_init_from_file(const char* path, const char* config_path); 36 | 37 | // Decode token IDs into a string 38 | char* tokenizer_decode(const Tokenizer* tokenizer, const int* tokens, int tokenCount, bool skipSpecialTokens); 39 | 40 | bool tokenizer_is_multilingual(const Tokenizer* tokenizer); 41 | 42 | // Convert token string to ID 43 | int tokenizer_convert_token_to_id(const Tokenizer* tokenizer, const char* tokenString); 44 | 45 | // Get the size of the vocabulary 46 | int tokenizer_get_vocab_size(); 47 | 48 | // Deallocates tokenizer 49 | void tokenizer_free(Tokenizer* tokenizer); 50 | 51 | // Deallocates decoded string 52 | void tokenizer_free_rstring(char* s); 53 | 54 | #endif // TOKENIZER_API_H 55 | -------------------------------------------------------------------------------- /scripts/build_tokenizers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For licensing see accompanying LICENSE file. 3 | # Copyright © 2024 Argmax, Inc. All rights reserved. 4 | 5 | # This build script runs when docker image is created. 6 | # The resulting `libtokenizers_sys.so` is copied into /libs folder in the build.sh 7 | 8 | CURRENT_DIR="$(dirname "$(realpath "$0")")" 9 | SOURCE_DIR="$CURRENT_DIR/../.source/tokenizers-sys" 10 | 11 | PLATFORM=$1 12 | if [ "$PLATFORM" = "" ]; then 13 | PLATFORM="android" 14 | fi 15 | 16 | # Install Rust using rustup (stable toolchain by default) 17 | curl https://sh.rustup.rs -sSf | sh -s -- -y 18 | 19 | # Add cargo binaries to PATH 20 | PATH="/root/.cargo/bin:${PATH}" 21 | 22 | export ANDROID_NDK_HOME=/opt/android-ndk/android-ndk-r25c 23 | export TOOLCHAIN=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64 24 | export TARGET=aarch64-linux-android 25 | export API=33 26 | 27 | export CC="$TOOLCHAIN/bin/${TARGET}${API}-clang" 28 | export AR="$TOOLCHAIN/bin/llvm-ar" 29 | export CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER="$CC" 30 | export PATH=$PATH:$TOOLCHAIN/bin/ 31 | 32 | cd $SOURCE_DIR 33 | if [ "$PLATFORM" = "android" ]; then 34 | rm $SOURCE_DIR/Cargo.lock 35 | 36 | source /root/.cargo/env 37 | 38 | TARGET=aarch64-linux-android 39 | 40 | rustup target add $TARGET 41 | 42 | cargo build --release --target $TARGET 43 | cp ${SOURCE_DIR}/target/$TARGET/release/*.so $CURRENT_DIR/../external/libs/$PLATFORM/ 44 | cp $TOOLCHAIN/sysroot/usr/lib/$TARGET/libc++_shared.so $CURRENT_DIR/../external/libs/$PLATFORM/ 45 | else 46 | cargo build --release 47 | cp -rf ${SOURCE_DIR}/target/release/libtokenizers_sys.so $CURRENT_DIR/../external/libs/$PLATFORM/ 48 | fi 49 | -------------------------------------------------------------------------------- /jni/whisperkit_jni.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "WhisperKit.h" // Use our wrapper instead of direct inclusion 6 | 7 | #define TAG "WhisperKitJNI" 8 | #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__) 9 | #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__) 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | // JNI method declarations for MainActivity 16 | JNIEXPORT jint JNICALL Java_com_argmaxinc_whisperkit_WhisperKitImpl_loadModels(JNIEnv *env, jobject thiz, 17 | jstring jsonstr); 18 | JNIEXPORT jint JNICALL Java_com_argmaxinc_whisperkit_WhisperKitImpl_init(JNIEnv *env, jobject thiz, jstring jsonstr); 19 | JNIEXPORT jint JNICALL Java_com_argmaxinc_whisperkit_WhisperKitImpl_close(JNIEnv *env, jobject thiz); 20 | JNIEXPORT jint JNICALL Java_com_argmaxinc_whisperkit_WhisperKitImpl_writeData(JNIEnv *env, jobject thiz, 21 | jbyteArray pcmbuffer); 22 | JNIEXPORT jint JNICALL Java_com_argmaxinc_whisperkit_WhisperKitImpl_setBackend(JNIEnv *env, jobject thiz, 23 | jint encoder_backend, 24 | jint decoder_backend); 25 | JNIEXPORT jstring JNICALL Java_com_argmaxinc_whisperkit_WhisperKitImpl_getPerfString(JNIEnv *env, jobject thiz); 26 | 27 | enum class CallbackMsgType : int { INIT = 0, TEXT_OUT = 1, CLOSE = 2 }; 28 | 29 | // Callback helper function 30 | void sendTextToJava(JNIEnv *env, jobject thiz, jint what, jfloat timestamp, const char *text); 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | -------------------------------------------------------------------------------- /cpp/src/WhisperKitPipeline.cpp: -------------------------------------------------------------------------------- 1 | #include "WhisperKitPipeline.hpp" 2 | 3 | #include "TranscribeTask.hpp" 4 | whisperkit_pipeline_t::whisperkit_pipeline_t() { status = WHISPERKIT_PIPELINE_STATUS_INITIALIZED; } 5 | 6 | whisperkit_pipeline_status_t whisperkit_pipeline_t::get_state() const { return status; } 7 | 8 | void whisperkit_pipeline_t::set_state(whisperkit_pipeline_status_t status) { this->status = status; } 9 | 10 | void whisperkit_pipeline_t::set_configuration(const whisperkit_configuration_t* configuration) { 11 | if (configuration == nullptr) { 12 | return; 13 | } 14 | this->configuration = whisperkit_configuration_t(*configuration); 15 | status = WHISPERKIT_PIPELINE_STATUS_CONFIGURED; 16 | } 17 | 18 | whisperkit_pipeline_t::~whisperkit_pipeline_t() { transcribe_task.reset(); } 19 | 20 | void whisperkit_pipeline_t::build() { transcribe_task = std::make_unique(this->configuration); } 21 | void whisperkit_pipeline_t::transcribe(const char* audio_file, 22 | whisperkit_transcription_result_t* transcription_result) { 23 | transcribe_task->transcribe(audio_file, transcription_result); 24 | } 25 | 26 | void whisperkit_pipeline_t::init_streaming(whisperkit_transcription_result_t* transcription_result, int sample_rate, 27 | int num_channels) { 28 | transcribe_task->initStreaming(transcription_result, sample_rate, num_channels); 29 | status = WHISPERKIT_PIPELINE_STATUS_AUDIOINIT; 30 | } 31 | 32 | bool whisperkit_pipeline_t::append_audio(int size, char* buffer) { 33 | bool transcribed = transcribe_task->appendAudio(size, buffer); 34 | return transcribed; 35 | } 36 | 37 | void whisperkit_pipeline_t::close_streaming() { transcribe_task->closeStreaming(); } 38 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/drawable/ic_launcher_foreground.xml: -------------------------------------------------------------------------------- 1 | 7 | 8 | 9 | 15 | 18 | 21 | 22 | 23 | 24 | 30 | -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/util/SegmentTextOnlyMessageProcessor.kt: -------------------------------------------------------------------------------- 1 | package com.argmaxinc.whisperkit.util 2 | 3 | import com.argmaxinc.whisperkit.TranscriptionResult 4 | import com.argmaxinc.whisperkit.TranscriptionSegment 5 | 6 | /** 7 | * A processor to only extract segment text from raw string, ignoring all timestamps or windows 8 | */ 9 | internal class SegmentTextOnlyMessageProcessor : MessageProcessor { 10 | private companion object { 11 | private val TIMESTAMP_PATTERN = "<\\|(\\d+\\.\\d+)\\|>".toRegex() 12 | 13 | // Pattern to match any <|str|> that's not a timestamp 14 | private val NON_TIMESTAMP_PATTERN = "<\\|(?!\\d+\\.\\d+)[^>]*\\|>".toRegex() 15 | } 16 | 17 | override fun process(rawMsg: String): TranscriptionResult { 18 | // Remove any markers that aren't timestamps 19 | val cleanMsg = rawMsg.replace(NON_TIMESTAMP_PATTERN, "") 20 | 21 | val segments = mutableListOf() 22 | 23 | // Find all timestamp markers 24 | val matches = TIMESTAMP_PATTERN.findAll(cleanMsg).toList() 25 | 26 | for (i in 0 until matches.size - 1) { 27 | val startMatch = matches[i] 28 | val endMatch = matches[i + 1] 29 | 30 | // TODD: add start and end to each segment 31 | // val start = startMatch.groupValues[1].toFloat() 32 | // val end = endMatch.groupValues[1].toFloat() 33 | 34 | // Extract text between timestamps 35 | val textStart = startMatch.range.last + 1 36 | val textEnd = endMatch.range.first 37 | val text = cleanMsg.substring(textStart, textEnd) 38 | 39 | if (text != "\n" && text.isNotEmpty()) { 40 | segments.add(TranscriptionSegment(text)) 41 | } 42 | } 43 | 44 | return TranscriptionResult(text = rawMsg, segments = segments) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /scripts/build_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For licensing see accompanying LICENSE file. 3 | # Copyright © 2024 Argmax, Inc. All rights reserved. 4 | 5 | REMOTE_SDROOT_DIR="/sdcard/argmax/tflite" 6 | REMOTE_INPUTS_DIR="${REMOTE_SDROOT_DIR}/inputs" 7 | REMOTE_BIN_DIR="/data/local/tmp/bin" 8 | REMOTE_LIB_DIR="/data/local/tmp/lib" 9 | 10 | CURRENT_DIR="$(dirname "$(realpath "$0")")" 11 | SOURCE_DIR="$CURRENT_DIR/.." 12 | LINUX_BUILD_DIR=./build/linux 13 | ARG=$1 14 | 15 | case $ARG in 16 | "linux") 17 | echo " ${0} linux : run in Docker" 18 | cd $SOURCE_DIR 19 | $LINUX_BUILD_DIR/whisperkit-cli \ 20 | --audio-path ./test/jfk_441khz.m4a \ 21 | --model-path models/openai_whisper-base/ \ 22 | --report --report-path . 23 | exit 0 ;; 24 | 25 | "gpu" | "qnn" | "" ) 26 | echo " ${0} [gpu|qnn] : run on Host PC" 27 | 28 | for dev in `adb devices | grep -v "List" | awk '{print $1}'` 29 | do 30 | DEVICE=$dev 31 | break 32 | done 33 | if [ "$DEVICE" = "" ]; then 34 | echo "No Android device is connected via adb" 35 | exit 0 36 | fi 37 | echo "Test on: $DEVICE" 38 | 39 | CMD="cd ${REMOTE_SDROOT_DIR} && \ 40 | export LD_LIBRARY_PATH=${REMOTE_LIB_DIR} && \ 41 | ${REMOTE_BIN_DIR}/whisperkit-cli \ 42 | --audio-path ${REMOTE_SDROOT_DIR}/inputs/jfk_441khz.m4a \ 43 | --model-path ${REMOTE_SDROOT_DIR}/models/openai_whisper-base/ \ 44 | --report --report-path ${REMOTE_SDROOT_DIR}" 45 | 46 | cd $SOURCE_DIR/test 47 | adb -s $DEVICE push jfk_441khz.m4a $REMOTE_INPUTS_DIR/. 48 | adb -s $DEVICE shell $CMD 49 | exit 0 ;; 50 | *) 51 | echo "Usage: " 52 | echo " ${0} linux : test for linux (in build/linux)" 53 | echo " ${0} qnn|gpu : test for arm64 Android (QNN | GPU delegate in build/android)" 54 | echo " ${0} : test for arm64 Android (QNN delegate in build/android)" 55 | exit 1 ;; 56 | esac 57 | -------------------------------------------------------------------------------- /cpp/src/WhisperKitConfiguration.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "WhisperKit.h" 6 | 7 | struct whisperkit_configuration_t { 8 | public: 9 | whisperkit_configuration_t(); 10 | 11 | void set_audio_encoder(const char* audio_encoder) noexcept; 12 | void set_text_decoder(const char* text_decoder) noexcept; 13 | void set_tokenizer(const char* tokenizer) noexcept; 14 | void set_melspectrogram_model(const char* melspectrogram_model) noexcept; 15 | void set_lib_dir(const char* lib_dir) noexcept; 16 | void set_cache_dir(const char* cache_dir) noexcept; 17 | void set_verbose(bool verbose) noexcept; 18 | void set_log_level(int log_level) noexcept; 19 | void set_prewarm(bool prewarm) noexcept; 20 | void set_load(bool load) noexcept; 21 | void set_model_path(const char* model_path) noexcept; 22 | void set_report_path(const char* report_path) noexcept; 23 | void set_backends(whisperkit_backend_t encoder_backend, whisperkit_backend_t decoder_backend) noexcept; 24 | 25 | const std::string get_audio_encoder() const noexcept; 26 | const std::string get_text_decoder() const noexcept; 27 | const std::string get_tokenizer() const noexcept; 28 | const std::string get_melspectrogram_model() const noexcept; 29 | const std::string get_lib_dir() const noexcept; 30 | const std::string get_cache_dir() const noexcept; 31 | const std::string get_model_path() const noexcept; 32 | const std::string get_report_path() const noexcept; 33 | int get_encoder_backend() const noexcept; 34 | int get_decoder_backend() const noexcept; 35 | bool get_verbose() const noexcept; 36 | int get_log_level() const noexcept; 37 | bool get_prewarm() const noexcept; 38 | bool get_load() const noexcept; 39 | 40 | whisperkit_pipeline_t* get_pipeline() const noexcept; 41 | 42 | private: 43 | std::string audio_encoder; 44 | std::string text_decoder; 45 | std::string tokenizer; 46 | std::string melspectrogram_model; 47 | std::string model_path; 48 | std::string report_path; 49 | std::string lib_dir; 50 | std::string cache_dir; 51 | whisperkit_pipeline_t* pipeline; 52 | int encoder_backend; 53 | int decoder_backend; 54 | 55 | bool verbose; 56 | int log_level; 57 | bool prewarm; 58 | bool load; 59 | }; 60 | -------------------------------------------------------------------------------- /cpp/src/Models/tflite_gpu_model.cpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #if GPU_DELEGATE 4 | #include "tflite_gpu_model.hpp" 5 | 6 | #include // C++ 17 or later 7 | 8 | #include "tensorflow/lite/optional_debug_tools.h" 9 | 10 | using namespace std; 11 | 12 | TFLiteGPU::TFLiteGPU(const string& name) : TFLiteModel(name) {} 13 | 14 | TFLiteGPU::~TFLiteGPU() { uninitialize(); } 15 | 16 | bool TFLiteGPU::initialize(string model_path, string lib_dir, string cache_dir, int backend, bool debug) { 17 | set_dirs(model_path, lib_dir, cache_dir); 18 | 19 | if (!create_interpreter_delegate(model_path)) { 20 | LOGE("Failed with create_interpreter_delegate..\n"); 21 | return false; 22 | } 23 | if (!allocate_tensors()) { 24 | LOGE("Failed with allocate_tensors..\n"); 25 | return false; 26 | } 27 | 28 | modify_graph_delegate(); 29 | 30 | if (debug) { 31 | LOGI("\n========== %s delegation info ==========\n", _model_name.c_str()); 32 | tflite::PrintInterpreterState(_interpreter.get()); 33 | } 34 | return true; 35 | } 36 | 37 | void TFLiteGPU::uninitialize() { 38 | if (_delegate != nullptr) { 39 | TfLiteGpuDelegateV2Delete(_delegate); 40 | _delegate = nullptr; 41 | } 42 | 43 | TFLiteModel::uninitialize(); 44 | } 45 | 46 | bool TFLiteGPU::create_interpreter_delegate(string model_path) { 47 | _model = tflite::FlatBufferModel::BuildFromFile(model_path.c_str()); 48 | if (_model.get() == nullptr) return false; 49 | 50 | tflite::ops::builtin::BuiltinOpResolver tflite_resolver; 51 | tflite::InterpreterBuilder builder(*_model, tflite_resolver); 52 | TFLITE_FUNCTION_CHECK(builder(&_interpreter)) 53 | 54 | TfLiteGpuDelegateOptionsV2 gpu_options = TfLiteGpuDelegateOptionsV2Default(); 55 | gpu_options.serialization_dir = _cache_dir.c_str(); 56 | gpu_options.experimental_flags |= TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION; 57 | gpu_options.max_delegated_partitions = 3; 58 | _delegate = TfLiteGpuDelegateV2Create(&gpu_options); 59 | 60 | if (_delegate == nullptr) return false; 61 | 62 | const auto processor_count = thread::hardware_concurrency(); 63 | _interpreter->SetNumThreads(processor_count); 64 | 65 | return true; 66 | } 67 | #endif 68 | -------------------------------------------------------------------------------- /.github/workflows/pr-checks.yml: -------------------------------------------------------------------------------- 1 | name: PR Checks 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize, reopened] 6 | push: 7 | branches: 8 | - main 9 | 10 | jobs: 11 | check-format: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Set up JDK 17 17 | uses: actions/setup-java@v3 18 | with: 19 | java-version: '17' 20 | distribution: 'temurin' 21 | cache: gradle 22 | 23 | - name: Install clang-format 14 (native) 24 | run: | 25 | sudo apt update 26 | sudo apt install -y clang-format-14 27 | sudo ln -sf /usr/bin/clang-format-14 /usr/local/bin/clang-format 28 | clang-format --version 29 | 30 | - name: Verify clang-format installation 31 | run: | 32 | if ! command -v clang-format >/dev/null; then 33 | echo "❌ clang-format not found" 34 | exit 1 35 | fi 36 | clang-format --version 37 | 38 | - name: Grant execute permission for gradlew 39 | run: chmod +x gradlew 40 | 41 | - name: Run spotlessCheck 42 | run: | 43 | echo "Running spotlessCheck..." 44 | ./gradlew spotlessCheck 45 | if [ $? -ne 0 ]; then 46 | echo "❌ spotlessCheck failed. Please run './gradlew spotlessApply' locally to fix formatting issues." 47 | exit 1 48 | fi 49 | 50 | build-test-kotlin: 51 | runs-on: ubuntu-latest 52 | steps: 53 | - uses: actions/checkout@v4 54 | 55 | - name: Set up JDK 17 56 | uses: actions/setup-java@v3 57 | with: 58 | java-version: '17' 59 | distribution: 'temurin' 60 | cache: gradle 61 | 62 | - name: Grant execute permission for gradlew 63 | run: chmod +x gradlew 64 | 65 | - name: Run detekt 66 | run: | 67 | echo "Running detekt..." 68 | ./gradlew detekt 69 | if [ $? -ne 0 ]; then 70 | echo "❌ detekt found code style issues. Please fix them locally." 71 | exit 1 72 | fi 73 | 74 | - name: Run unit tests 75 | run: | 76 | echo "Running unit tests..." 77 | ./gradlew testDebugUnitTest 78 | if [ $? -ne 0 ]; then 79 | echo "❌ Unit tests failed. Please fix the failing tests locally." 80 | exit 1 81 | fi -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/huggingface/KtorHuggingFaceClient.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | 4 | package com.argmaxinc.whisperkit.huggingface 5 | 6 | import io.ktor.client.HttpClient 7 | import io.ktor.client.engine.cio.CIO 8 | import io.ktor.client.plugins.HttpTimeout 9 | import io.ktor.client.plugins.contentnegotiation.ContentNegotiation 10 | import io.ktor.client.plugins.defaultRequest 11 | import io.ktor.client.request.header 12 | import io.ktor.http.ContentType 13 | import io.ktor.http.HttpHeaders 14 | import io.ktor.serialization.kotlinx.json.json 15 | import kotlinx.serialization.json.Json 16 | 17 | /** 18 | * HTTP client for interacting with the HuggingFace API. 19 | * This class configures a Ktor HTTP client with appropriate settings for the HuggingFace API, 20 | * including authentication, timeouts, and JSON serialization. 21 | * 22 | * @property authToken Optional authentication token for accessing private repositories 23 | */ 24 | internal class KtorHuggingFaceClient( 25 | authToken: String?, 26 | ) { 27 | private companion object { 28 | /** Base URL for the HuggingFace API */ 29 | const val BASE_URL = "https://huggingface.co" 30 | } 31 | 32 | /** 33 | * JSON serializer configuration for handling HuggingFace API responses. 34 | * Configured to be lenient and ignore unknown fields to handle API changes gracefully. 35 | */ 36 | private val json = 37 | Json { 38 | ignoreUnknownKeys = true 39 | isLenient = true 40 | coerceInputValues = true 41 | } 42 | 43 | /** 44 | * Configured HTTP client for making requests to the HuggingFace API. 45 | * Features: 46 | * - 1-second socket timeout between packets 47 | * - JSON content negotiation 48 | * - Default request configuration with base URL and content type 49 | * - Optional bearer token authentication 50 | */ 51 | val httpClient = 52 | HttpClient(CIO) { 53 | install(HttpTimeout) { 54 | socketTimeoutMillis = 1000 // 1 seconds between packets 55 | } 56 | 57 | install(ContentNegotiation) { 58 | json(json) 59 | } 60 | 61 | defaultRequest { 62 | url(BASE_URL) 63 | header(HttpHeaders.ContentType, ContentType.Application.Json) 64 | authToken?.let { 65 | header(HttpHeaders.Authorization, "Bearer $it") 66 | } 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/java/com/argmaxinc/whisperax/Typography.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | package com.argmaxinc.whisperax 4 | 5 | import androidx.compose.material3.Typography 6 | import androidx.compose.ui.text.TextStyle 7 | import androidx.compose.ui.text.font.FontWeight 8 | import androidx.compose.ui.unit.sp 9 | 10 | val Typography = Typography( 11 | headlineLarge = TextStyle( 12 | fontWeight = FontWeight.Bold, 13 | fontSize = 32.sp, 14 | lineHeight = 40.sp, 15 | letterSpacing = 0.sp, 16 | ), 17 | headlineMedium = TextStyle( 18 | fontWeight = FontWeight.SemiBold, 19 | fontSize = 28.sp, 20 | lineHeight = 36.sp, 21 | letterSpacing = 0.sp, 22 | ), 23 | headlineSmall = TextStyle( 24 | fontWeight = FontWeight.SemiBold, 25 | fontSize = 24.sp, 26 | lineHeight = 32.sp, 27 | letterSpacing = 0.sp, 28 | ), 29 | titleLarge = TextStyle( 30 | fontWeight = FontWeight.SemiBold, 31 | fontSize = 22.sp, 32 | lineHeight = 28.sp, 33 | letterSpacing = 0.sp, 34 | ), 35 | titleMedium = TextStyle( 36 | fontWeight = FontWeight.SemiBold, 37 | fontSize = 18.sp, 38 | lineHeight = 24.sp, 39 | letterSpacing = 0.1.sp, 40 | ), 41 | titleSmall = TextStyle( 42 | fontWeight = FontWeight.Bold, 43 | fontSize = 14.sp, 44 | lineHeight = 20.sp, 45 | letterSpacing = 0.1.sp, 46 | ), 47 | bodyLarge = TextStyle( 48 | fontWeight = FontWeight.Normal, 49 | fontSize = 16.sp, 50 | lineHeight = 24.sp, 51 | letterSpacing = 0.15.sp, 52 | ), 53 | bodyMedium = TextStyle( 54 | fontWeight = FontWeight.Medium, 55 | fontSize = 14.sp, 56 | lineHeight = 20.sp, 57 | letterSpacing = 0.25.sp, 58 | ), 59 | bodySmall = TextStyle( 60 | fontWeight = FontWeight.Normal, 61 | fontSize = 12.sp, 62 | lineHeight = 16.sp, 63 | letterSpacing = 0.4.sp, 64 | ), 65 | labelLarge = TextStyle( 66 | fontWeight = FontWeight.SemiBold, 67 | fontSize = 14.sp, 68 | lineHeight = 20.sp, 69 | letterSpacing = 0.1.sp, 70 | ), 71 | labelMedium = TextStyle( 72 | fontWeight = FontWeight.SemiBold, 73 | fontSize = 12.sp, 74 | lineHeight = 16.sp, 75 | letterSpacing = 0.5.sp, 76 | ), 77 | labelSmall = TextStyle( 78 | fontWeight = FontWeight.SemiBold, 79 | fontSize = 10.sp, 80 | lineHeight = 16.sp, 81 | letterSpacing = 0.5.sp, 82 | ), 83 | ) 84 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/java/com/argmaxinc/whisperax/WhisperAppTheme.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | package com.argmaxinc.whisperax 4 | 5 | import androidx.compose.foundation.isSystemInDarkTheme 6 | import androidx.compose.material3.MaterialTheme 7 | import androidx.compose.material3.darkColorScheme 8 | import androidx.compose.material3.lightColorScheme 9 | import androidx.compose.runtime.Composable 10 | import androidx.compose.ui.graphics.Color 11 | 12 | private val DarkColorScheme = darkColorScheme( 13 | primary = Color(0xFF86AAFC), 14 | onPrimary = Color(0xFF002D6F), 15 | primaryContainer = Color(0xFF0C428F), 16 | onPrimaryContainer = Color(0xFFD9E2FF), 17 | secondary = Color(0xFFC0C6DD), 18 | onSecondary = Color(0xFF2B3042), 19 | secondaryContainer = Color(0xFF414659), 20 | onSecondaryContainer = Color(0xFFDCE2F9), 21 | tertiary = Color(0xFFECB6B8), 22 | onTertiary = Color(0xFF492628), 23 | tertiaryContainer = Color(0xFF633B3D), 24 | onTertiaryContainer = Color(0xFFFFD9DC), 25 | error = Color(0xFFFFB4AB), 26 | onError = Color(0xFF690005), 27 | errorContainer = Color(0xFF93000A), 28 | onErrorContainer = Color(0xFFFFDAD6), 29 | background = Color(0xFF1B1B1F), 30 | onBackground = Color(0xFFE3E2E6), 31 | surface = Color(0xFF1B1B1F), 32 | onSurface = Color(0xFFE3E2E6), 33 | surfaceVariant = Color(0xFF44474F), 34 | onSurfaceVariant = Color(0xFFC5C6D0), 35 | ) 36 | 37 | private val LightColorScheme = lightColorScheme( 38 | primary = Color(0xFF2F5FBD), 39 | onPrimary = Color(0xFFFFFFFF), 40 | primaryContainer = Color(0xFFDAE2FF), 41 | onPrimaryContainer = Color(0xFF001A42), 42 | secondary = Color(0xFF585E71), 43 | onSecondary = Color(0xFFFFFFFF), 44 | secondaryContainer = Color(0xFFDDE2F9), 45 | onSecondaryContainer = Color(0xFF151B2C), 46 | tertiary = Color(0xFF745052), 47 | onTertiary = Color(0xFFFFFFFF), 48 | tertiaryContainer = Color(0xFFFFD9DC), 49 | onTertiaryContainer = Color(0xFF2C1516), 50 | error = Color(0xFFBA1A1A), 51 | onError = Color(0xFFFFFFFF), 52 | errorContainer = Color(0xFFFFDAD6), 53 | onErrorContainer = Color(0xFF410002), 54 | background = Color(0xFFFEFBFF), 55 | onBackground = Color(0xFF1B1B1F), 56 | surface = Color(0xFFFEFBFF), 57 | onSurface = Color(0xFF1B1B1F), 58 | surfaceVariant = Color(0xFFE1E2EC), 59 | onSurfaceVariant = Color(0xFF44474F), 60 | ) 61 | 62 | @Composable 63 | fun WhisperAppTheme( 64 | darkTheme: Boolean = isSystemInDarkTheme(), 65 | content: @Composable () -> Unit, 66 | ) { 67 | val colorScheme = if (darkTheme) DarkColorScheme else LightColorScheme 68 | 69 | MaterialTheme( 70 | colorScheme = colorScheme, 71 | typography = Typography, 72 | content = content, 73 | ) 74 | } 75 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/java/com/argmaxinc/whisperax/MainActivity.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | package com.argmaxinc.whisperax 4 | 5 | import android.os.Bundle 6 | import androidx.activity.ComponentActivity 7 | import androidx.activity.compose.setContent 8 | import androidx.compose.foundation.layout.fillMaxSize 9 | import androidx.compose.material3.MaterialTheme 10 | import androidx.compose.material3.Surface 11 | import androidx.compose.runtime.Composable 12 | import androidx.compose.runtime.getValue 13 | import androidx.compose.runtime.mutableStateOf 14 | import androidx.compose.runtime.remember 15 | import androidx.compose.runtime.setValue 16 | import androidx.compose.ui.Modifier 17 | import androidx.core.view.WindowCompat 18 | import androidx.lifecycle.viewmodel.compose.viewModel 19 | 20 | class MainActivity : ComponentActivity() { 21 | companion object { 22 | const val TAG = "com.argmaxinc.whisperax" 23 | } 24 | 25 | private lateinit var viewModel: WhisperViewModel 26 | 27 | override fun onCreate(savedInstanceState: Bundle?) { 28 | super.onCreate(savedInstanceState) 29 | 30 | WindowCompat.setDecorFitsSystemWindows(window, false) 31 | 32 | window.statusBarColor = android.graphics.Color.TRANSPARENT 33 | window.navigationBarColor = android.graphics.Color.TRANSPARENT 34 | 35 | viewModel = WhisperViewModel() 36 | viewModel.initContext(this) 37 | 38 | setContent { 39 | WhisperAppTheme { 40 | Surface( 41 | modifier = Modifier.fillMaxSize(), 42 | color = MaterialTheme.colorScheme.background, 43 | ) { 44 | MainScreen(viewModel = viewModel) 45 | } 46 | } 47 | } 48 | } 49 | } 50 | 51 | @Composable 52 | fun MainScreen(viewModel: WhisperViewModel = viewModel()) { 53 | var currentScreen by remember { mutableStateOf("main") } 54 | 55 | when (currentScreen) { 56 | "main" -> { 57 | MainScreenContent( 58 | viewModel = viewModel, 59 | onNavigate = { screen -> 60 | currentScreen = screen.lowercase() 61 | }, 62 | ) 63 | } 64 | 65 | "transcribe" -> { 66 | TranscribeScreen( 67 | viewModel = viewModel, 68 | onBackPressed = { 69 | if (viewModel.isTranscribing.value) { 70 | viewModel.stopTranscription() 71 | } 72 | currentScreen = "main" 73 | }, 74 | ) 75 | } 76 | } 77 | } 78 | 79 | @Composable 80 | fun MainScreenContent( 81 | viewModel: WhisperViewModel, 82 | onNavigate: (String) -> Unit, 83 | ) { 84 | MainScreenContentImpl(viewModel, onNavigate) 85 | } 86 | -------------------------------------------------------------------------------- /scripts/build_ffmpeg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For licensing see accompanying LICENSE file. 3 | # Copyright © 2024 Argmax, Inc. All rights reserved. 4 | 5 | # This build script runs when docker image is created. 6 | # The resulting library & header files are copied into external/libs & external/inc folder 7 | CURRENT_DIR="$(dirname "$(realpath "$0")")" 8 | SOURCE_DIR="$CURRENT_DIR/../.source/ffmpeg" 9 | PLATFORM=$1 10 | if [ "$PLATFORM" = "" ]; then 11 | PLATFORM="android" 12 | fi 13 | BUILD_DIR=$CURRENT_DIR/../external/build/$PLATFORM/ffmpeg 14 | 15 | cd $SOURCE_DIR 16 | CXXFLAGS="-std=c++17 ${CXXFLAGS}" 17 | 18 | if [ "$PLATFORM" = "linux" ]; then 19 | echo " ${0} linux : build for linux (in build_linux)" 20 | PLATFORM="linux" 21 | ARCH_CONFIG="--cc=gcc --cxx=g++ --enable-x86asm " 22 | else 23 | echo " ${0} android : build for arm64 Android (in build/android)" 24 | PLATFORM="android" 25 | ARCH_CONFIG="--cross-prefix=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android33- \ 26 | --sysroot=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/sysroot \ 27 | --enable-cross-compile \ 28 | --target-os=android \ 29 | --arch=arm64 \ 30 | --cpu=armv8-a \ 31 | --nm=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-nm \ 32 | --ar=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-ar \ 33 | --strip=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip \ 34 | --cc=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android33-clang \ 35 | --cxx=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android33-clang++ \ 36 | --disable-x86asm " 37 | fi 38 | 39 | ./configure \ 40 | --prefix=${BUILD_DIR} \ 41 | ${ARCH_CONFIG} \ 42 | --extra-cflags="$CFLAGS" \ 43 | --extra-cxxflags="$CFLAGS" \ 44 | --extra-ldflags=-ldl \ 45 | --disable-programs \ 46 | --disable-logging \ 47 | --disable-everything \ 48 | --disable-ffplay \ 49 | --disable-doc \ 50 | --disable-devices \ 51 | --disable-swscale \ 52 | --disable-hwaccels \ 53 | --disable-parsers \ 54 | --disable-bsfs \ 55 | --disable-debug \ 56 | --disable-indevs \ 57 | --disable-outdevs \ 58 | --disable-static \ 59 | --enable-ffmpeg \ 60 | --enable-ffprobe \ 61 | --enable-avformat \ 62 | --enable-avcodec \ 63 | --enable-swresample \ 64 | --enable-decoder="mov,mp4,aac,mp3,m4a,flac,vorbis,wavpack" \ 65 | --enable-parser="mov,mp4,aac,mp3,m4a,flac,ogg,wav" \ 66 | --enable-demuxer="mov,mp4,aac,mp3,m4a,flac,ogg,wav" \ 67 | --enable-optimizations \ 68 | --enable-stripping \ 69 | --enable-small \ 70 | --enable-shared \ 71 | --enable-protocol=file,http,tcp,rtmp,rtsp 72 | 73 | sleep 1 74 | make clean; make -j 12 75 | sleep 1 76 | make install 77 | 78 | cp -rf ${BUILD_DIR}/lib/lib*.so* $CURRENT_DIR/../external/libs/$PLATFORM/ 79 | cp -rf ${BUILD_DIR}/include/* $CURRENT_DIR/../external/inc/ 80 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /cpp/src/Audio/audio_input.hpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #pragma once 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | extern "C" { 11 | #include 12 | #include 13 | #include 14 | #include 15 | } 16 | 17 | #include "backend_class.hpp" 18 | 19 | constexpr const int SAMPLE_FREQ = 16000; 20 | 21 | inline std::unique_ptr av_err2string(int errnum) { 22 | return std::make_unique( 23 | av_make_error_string((char*)__builtin_alloca(AV_ERROR_MAX_STRING_SIZE), AV_ERROR_MAX_STRING_SIZE, errnum)); 24 | } 25 | 26 | class AudioBuffer { 27 | private: 28 | SwrContext* _swr; 29 | AVFrame* _source_frame; 30 | AVFrame* _target_frame; 31 | std::mutex _mutex; 32 | bool _verbose; 33 | 34 | // target buf associated, with 16khz, mono PCM data 35 | std::vector _buffer; 36 | int _tgt_bytes_per_sample; 37 | int _src_bytes_per_sample; 38 | 39 | public: 40 | AudioBuffer(); 41 | ~AudioBuffer(); 42 | 43 | bool initialize(AVFrame* src_frame, AVFrame* tgt_frame, bool verbose = false); 44 | void uninitialize(); 45 | bool empty_source(); 46 | 47 | int append(int bytes, char* buffer0, char* buffer1 = nullptr); 48 | int samples(int desired_samples = 0); 49 | void consumed(int samples); 50 | float* get_buffer() { return _buffer.data(); } 51 | int get_srcbytes_per_sample() { return _src_bytes_per_sample; } 52 | void print_frame_info(); 53 | }; 54 | 55 | class AudioInputModel { 56 | public: 57 | AudioInputModel(int freq, int channels, int format = AV_SAMPLE_FMT_FLT); 58 | ~AudioInputModel(); 59 | 60 | bool initialize(bool debug = false); 61 | void uninitialize(); 62 | virtual void invoke(bool measure_time = false); 63 | 64 | // this is temporary 65 | std::vector> get_input_ptrs(); 66 | std::vector> get_output_ptrs(); 67 | 68 | void fill_pcmdata(int size, char* pcm_buffer0, char* pcm_buffer1 = nullptr); 69 | float get_next_chunk(char* output); 70 | int get_curr_buf_time() { return _curr_buf_time; } 71 | float get_total_input_time(); 72 | bool empty_source() { return _pcm_buffer->empty_source(); } 73 | 74 | private: 75 | std::unique_ptr _model; 76 | 77 | int32_t _total_src_bytes = 0; 78 | int32_t _buffer_index = 0; 79 | 80 | std::unique_ptr _pcm_buffer; 81 | 82 | AVFrame* _source_frame; 83 | AVFrame* _target_frame; 84 | 85 | const float _energy_threshold = 0.02; 86 | const int _frame_length_samples = (0.1 * 16000); 87 | 88 | std::vector _float_buffer; 89 | int32_t _silence_index = 0; 90 | int32_t _remain_samples = 0; 91 | int _curr_buf_time = 0; 92 | 93 | void read_audio_file(std::string input_file); 94 | void chunk_all(); 95 | float get_silence_index(char* output, int audio_samples); 96 | int get_next_samples(); 97 | uint32_t split_on_middle_silence(uint32_t end_index); 98 | }; 99 | -------------------------------------------------------------------------------- /cpp/src/Models/tflite_model.hpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "tensorflow/lite/builtin_ops.h" 19 | #include "tensorflow/lite/context_util.h" 20 | #include "tensorflow/lite/interpreter.h" 21 | #include "tensorflow/lite/interpreter_builder.h" 22 | #include "tensorflow/lite/kernels/register.h" 23 | #include "tflite_msg.hpp" 24 | 25 | #define TFLITE_FUNCTION_CHECK(x) \ 26 | if ((x) != kTfLiteOk) { \ 27 | fprintf(stderr, "Error at %s:%d\n", __FUNCTION__, __LINE__); \ 28 | return false; \ 29 | } 30 | 31 | using json = nlohmann::json; 32 | 33 | namespace WhisperKit { 34 | namespace InMemoryModel { 35 | enum class ModelType { kSimpleVADModel = 1, kSimplePostProcessingModel = 2 }; 36 | } 37 | } // namespace WhisperKit 38 | 39 | class TFLiteModel { 40 | public: 41 | TFLiteModel(const std::string& name); 42 | virtual ~TFLiteModel(); 43 | 44 | bool initialize(std::string model_path, std::string lib_dir, std::string cache_path, int backend, 45 | bool debug = false); 46 | 47 | bool initializeModelInMemory(WhisperKit::InMemoryModel::ModelType model_type, bool debug = false); 48 | 49 | void uninitialize(); 50 | virtual void invoke(bool measure_time = false); 51 | 52 | std::mutex* get_mutex() { return &_mutex; } 53 | void read_input_file(std::string input_file, int idx); 54 | void read_input_data(char* input_data, int idx); 55 | std::vector> get_input_ptrs(); 56 | std::vector> get_output_ptrs(); 57 | std::pair get_output_with_name(const std::string& name); 58 | 59 | void print_tensor_dims(); 60 | std::unique_ptr get_latency_json(); 61 | float get_latency_median(); 62 | float get_latency_sum(); 63 | float get_latency_avg(); 64 | int get_inference_num() { return _latencies.size(); } 65 | 66 | static void save_tensor(std::string filename, char* tensor, int size); 67 | 68 | std::vector _latencies; 69 | 70 | std::unique_ptr _interpreter; 71 | 72 | protected: 73 | std::mutex _mutex; 74 | std::unique_ptr _model; 75 | 76 | flatbuffers::FlatBufferBuilder _builder; 77 | TfLiteDelegate* _delegate = nullptr; 78 | std::string _model_name; 79 | std::string _lib_dir; 80 | std::string _cache_dir; 81 | std::string _model_token; 82 | 83 | std::vector> _input_ptrs; 84 | std::vector> _output_ptrs; 85 | 86 | bool create_interpreter_delegate(std::string model_path); 87 | bool allocate_tensors(); 88 | void modify_graph_delegate(); 89 | void set_dirs(std::string filename, std::string lib_dir, std::string cache_dir); 90 | 91 | private: 92 | bool buildSimpleVADModel(); 93 | bool buildPostProcModel(); 94 | }; 95 | -------------------------------------------------------------------------------- /android/examples/WhisperAX/build.gradle.kts: -------------------------------------------------------------------------------- 1 | plugins { 2 | alias(libs.plugins.android.application) 3 | alias(libs.plugins.kotlin.android) 4 | alias(libs.plugins.kotlin.compose) 5 | } 6 | 7 | android { 8 | namespace = "com.argmaxinc.whisperax" 9 | compileSdk = 35 10 | 11 | defaultConfig { 12 | applicationId = "com.argmaxinc.whisperax" 13 | minSdk = 26 14 | targetSdk = 35 15 | versionCode = 7 16 | versionName = "0.3.3" 17 | 18 | testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" 19 | } 20 | 21 | buildTypes { 22 | release { 23 | isMinifyEnabled = false 24 | proguardFiles( 25 | getDefaultProguardFile("proguard-android-optimize.txt"), 26 | "proguard-rules.pro", 27 | ) 28 | } 29 | } 30 | 31 | compileOptions { 32 | sourceCompatibility = JavaVersion.VERSION_11 33 | targetCompatibility = JavaVersion.VERSION_11 34 | } 35 | 36 | kotlinOptions { 37 | jvmTarget = "11" 38 | } 39 | 40 | buildFeatures { 41 | compose = true 42 | } 43 | 44 | packaging { 45 | resources { 46 | excludes += "/META-INF/{AL2.0,LGPL2.1}" 47 | } 48 | jniLibs { 49 | useLegacyPackaging = true 50 | } 51 | } 52 | } 53 | 54 | dependencies { 55 | implementation(libs.core.ktx) 56 | implementation(libs.lifecycle.runtime.ktx) 57 | implementation(libs.activity.compose) 58 | 59 | // Material Components for XML layouts (for backward compatibility) 60 | implementation(libs.material) 61 | implementation(libs.appcompat) 62 | 63 | // Compose dependencies with specific versions 64 | val composeBom = platform(libs.compose.bom) 65 | implementation(composeBom) 66 | androidTestImplementation(composeBom) 67 | 68 | // Individual Compose dependencies 69 | implementation(libs.compose.ui) 70 | implementation(libs.compose.ui.graphics) 71 | implementation(libs.compose.ui.tooling.preview) 72 | implementation(libs.compose.material3) 73 | 74 | // Material Icons Extended 75 | implementation(libs.compose.material.icons.extended) 76 | 77 | // Navigation and ViewModel 78 | implementation(libs.navigation.compose) 79 | implementation(libs.lifecycle.viewmodel.compose) 80 | 81 | // For permissions handling 82 | implementation(libs.accompanist.permissions) 83 | 84 | // JSON serialization (for your existing app) 85 | implementation(libs.kotlinx.serialization.json) 86 | 87 | testImplementation(libs.junit) 88 | androidTestImplementation(libs.androidx.test.ext.junit) 89 | androidTestImplementation(libs.espresso.core) 90 | androidTestImplementation(libs.compose.ui.test.junit4) 91 | debugImplementation(libs.compose.ui.tooling) 92 | debugImplementation(libs.compose.ui.test.manifest) 93 | 94 | // To run whisperkit, include the following two sets of dependencies 95 | 96 | // 1 - WhisperKit API 97 | implementation(project(":android:whisperkit")) 98 | // 2 - dependencies to accelerate inference where QNN hardware is avaiable 99 | implementation(libs.qnn.runtime) 100 | implementation(libs.qnn.litert.delegate) 101 | } 102 | -------------------------------------------------------------------------------- /scripts/Dockerfile: -------------------------------------------------------------------------------- 1 | # For licensing see accompanying LICENSE file. 2 | # Copyright © 2024 Argmax, Inc. All rights reserved. 3 | FROM ubuntu:22.04 AS build-env 4 | 5 | RUN apt-get update && apt-get install -y \ 6 | openjdk-17-jre-headless build-essential cmake git \ 7 | python3 python3-pip apt-transport-https \ 8 | curl wget vim g++ unzip libdrm-dev \ 9 | pkg-config gnome-desktop-testing libasound2-dev libpulse-dev \ 10 | libxrandr-dev libxcursor-dev libxfixes-dev libxi-dev libxss-dev \ 11 | libxkbcommon-dev libgbm-dev libgl-dev libgles2-mesa-dev \ 12 | libegl1-mesa-dev libdbus-1-dev libibus-1.0-dev libudev-dev fcitx-libs-dev \ 13 | libpipewire-0.3-dev libwayland-dev libdecor-0-dev nasm yasm libssl-dev \ 14 | lsb-release software-properties-common gnupg \ 15 | && rm -rf /var/lib/apt/lists/* \ 16 | && apt-get clean 17 | 18 | FROM build-env AS tflite-env 19 | 20 | ENV TF_PYTHON_VERSION=3.10 21 | 22 | ARG CMAKE_VERION=3.22.1 23 | ARG CMD_TOOLS=cmdline-tools 24 | ENV ANDROID_SDK_HOME=/opt/android-sdk 25 | 26 | # ARG ANDROID_NDK_VERSION=android-ndk-r27b => this is not supported by tensorflow 27 | ARG ANDROID_NDK_VERSION=android-ndk-r25c 28 | ARG ANDROID_NDK_PATH=/opt/android-ndk 29 | ENV ANDROID_NDK_HOME=$ANDROID_NDK_PATH/$ANDROID_NDK_VERSION 30 | ENV TENSORFLOW_SOURCE_DIR=/opt/tensorflow 31 | 32 | ENV ANDROID_NDK_ROOT=$ANDROID_NDK_PATH/$ANDROID_NDK_VERSION 33 | ENV ANDROID_HOME=$ANDROID_SDK_HOME 34 | 35 | ENV QNN_SDK_ROOT=/opt/qnn-sdk 36 | ENV QNN_RUNTIME_ROOT=/opt/qnn-runtime 37 | ENV AXIE_ROOT=/src/AXIE 38 | 39 | ARG ANDROID_NDK_ZIP=$ANDROID_NDK_VERSION-linux.zip 40 | ARG BAZEL_INSTALLER=bazel-7.4.1-installer-linux-x86_64.sh 41 | ARG BAZEL_DIR=/opt/bazel 42 | ARG QNN_RUNTIME=qnn-runtime-2.33.0.aar 43 | ARG QNN_TFLITE_DELEGATE=qnn-litert-delegate-2.33.0.aar 44 | ARG ANDROID_COMMAND_LINE_TOOLS=commandlinetools-linux-11076708_latest.zip 45 | 46 | # Copy build dependencies 47 | ADD .source/$ANDROID_NDK_ZIP $ANDROID_NDK_PATH/ 48 | ADD .source/$BAZEL_INSTALLER $BAZEL_DIR/$BAZEL_INSTALLER 49 | ADD .source/$QNN_RUNTIME $QNN_RUNTIME_ROOT/ 50 | ADD .source/$QNN_TFLITE_DELEGATE $QNN_SDK_ROOT/ 51 | ADD .source/$ANDROID_COMMAND_LINE_TOOLS $ANDROID_SDK_HOME/ 52 | 53 | # install clang-18 54 | RUN wget -qO- https://apt.llvm.org/llvm.sh | bash -s -- 18 && \ 55 | ln -s /usr/bin/clang-18 /usr/bin/clang 56 | 57 | # Unzip and install dependencies 58 | RUN cd $QNN_RUNTIME_ROOT && unzip $QNN_RUNTIME && rm $QNN_RUNTIME && \ 59 | cd $QNN_SDK_ROOT && unzip $QNN_TFLITE_DELEGATE && rm $QNN_TFLITE_DELEGATE && \ 60 | cd $ANDROID_NDK_PATH && unzip $ANDROID_NDK_ZIP && rm $ANDROID_NDK_ZIP && \ 61 | cd $ANDROID_SDK_HOME && unzip $ANDROID_COMMAND_LINE_TOOLS && rm $ANDROID_COMMAND_LINE_TOOLS && \ 62 | yes | $CMD_TOOLS/bin/sdkmanager --install "platform-tools" --sdk_root=$ANDROID_SDK_HOME && \ 63 | $CMD_TOOLS/bin/sdkmanager --install "platforms;android-34" --sdk_root=$ANDROID_SDK_HOME && \ 64 | $CMD_TOOLS/bin/sdkmanager --install "build-tools;34.0.0" --sdk_root=$ANDROID_SDK_HOME && \ 65 | $CMD_TOOLS/bin/sdkmanager --install "cmake;$CMAKE_VERION" --sdk_root=$ANDROID_SDK_HOME && \ 66 | ln -s $AXIE_ROOT/.source/tensorflow $TENSORFLOW_SOURCE_DIR && \ 67 | chmod +x $BAZEL_DIR/$BAZEL_INSTALLER && $BAZEL_DIR/$BAZEL_INSTALLER && rm -rf $BAZEL_DIR 68 | 69 | # Set up the PATH 70 | ENV PATH=$PATH:$ANDROID_NDK_HOME:$ANDROID_SDK_HOME/$CMD_TOOLS/bin:$ANDROID_SDK_HOME/platform-tools:$ANDROID_SDK_HOME/cmake/$CMAKE_VERION/bin 71 | 72 | WORKDIR $AXIE_ROOT 73 | -------------------------------------------------------------------------------- /scripts/build_tensorflow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # For licensing see accompanying LICENSE file. 3 | # Copyright © 2024 Argmax, Inc. All rights reserved. 4 | 5 | # This build script runs when docker image is created. 6 | # The resulting `libtensorflowlite_gpu_delegate.so` is copied into /libs folder in the build.sh 7 | 8 | CURRENT_DIR="$(dirname "$(realpath "$0")")" 9 | SOURCE_DIR="$CURRENT_DIR/.." 10 | PLATFORM=$1 11 | if [ "$PLATFORM" = "" ]; then 12 | PLATFORM="android" 13 | fi 14 | 15 | export PYTHON_BIN_PATH=/usr/bin/python3 16 | export PYTHON_LIB_PATH=/usr/lib/python3/dist-packages 17 | export TF_NEED_ROCM=0 18 | export TF_NEED_CUDA=0 19 | export TF_NEED_CLANG=1 20 | export CLANG_COMPILER_PATH=/usr/bin/clang-18 21 | export CC_OPT_FLAGS=-Wno-sign-compare 22 | 23 | # nightly tf commit needs bazel 7.4.1 24 | USING_NIGHTLY_TF_COMMIT=1 25 | REQUIRED_BAZEL_VERSION="7.4.1" 26 | BAZEL_BIN_DIR="/usr/local/lib/bazel/bin" 27 | BAZEL_FILENAME="bazel-${REQUIRED_BAZEL_VERSION}-linux-x86_64" 28 | BAZEL_PATH="${BAZEL_BIN_DIR}/${BAZEL_FILENAME}" 29 | 30 | if [ "$USING_NIGHTLY_TF_COMMIT" = "1" ]; then 31 | if [ -f "$BAZEL_PATH" ]; then 32 | echo "Bazel $REQUIRED_BAZEL_VERSION already exists at $BAZEL_PATH. Skipping download." 33 | else 34 | echo "Downloading Bazel $REQUIRED_BAZEL_VERSION..." 35 | mkdir -p "$BAZEL_BIN_DIR" 36 | cd "$BAZEL_BIN_DIR" || exit 1 37 | curl -fLO "https://releases.bazel.build/${REQUIRED_BAZEL_VERSION}/release/${BAZEL_FILENAME}" 38 | chmod +x "$BAZEL_FILENAME" 39 | echo "Bazel $REQUIRED_BAZEL_VERSION downloaded to $BAZEL_PATH." 40 | fi 41 | fi 42 | 43 | if [ "$PLATFORM" = "android" ]; then 44 | export TF_SET_ANDROID_WORKSPACE=1 45 | export ANDROID_NDK_API_LEVEL=24 46 | export ANDROID_API_LEVEL=34 47 | export ANDROID_BUILD_TOOLS_VERSION=34.0.0 48 | 49 | cd $TENSORFLOW_SOURCE_DIR && ./configure 50 | 51 | if [ ! -f $SOURCE_DIR/external/libs/$PLATFORM/libtensorflowlite_gpu_delegate.so ]; then 52 | echo "$SOURCE_DIR/external/libs/$PLATFORM ..." 53 | echo "Building libtensorflowlite_gpu_delegate.so ..." 54 | printenv 55 | mkdir -p tensorflow/lite/delegates/gpu 56 | bazel build -c opt --config android_arm64 --cxxopt=--std=c++17 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_delegate.so 57 | find "$TENSORFLOW_SOURCE_DIR/" $TENSORFLOW_SOURCE_DIR/bazel-bin/ \ 58 | -name libtensorflowlite_gpu_delegate.so -exec cp {} $SOURCE_DIR/external/libs/android/ \; 59 | fi 60 | 61 | if [ ! -f $SOURCE_DIR/external/libs/$PLATFORM/libtensorflowlite.so ]; then 62 | bazel build -c opt --config android_arm64 --cxxopt=--std=c++17 //tensorflow/lite:libtensorflowlite.so 63 | find "$TENSORFLOW_SOURCE_DIR/" $TENSORFLOW_SOURCE_DIR/bazel-bin/ \ 64 | -name libtensorflowlite.so -exec cp {} $SOURCE_DIR/external/libs/$PLATFORM/ \; 65 | fi 66 | else 67 | export TF_SET_ANDROID_WORKSPACE=0 68 | if [ ! -f $SOURCE_DIR/external/libs/$PLATFORM/libtensorflowlite.so ]; then 69 | cd $TENSORFLOW_SOURCE_DIR && ./configure 70 | 71 | bazel build //tensorflow/lite:libtensorflowlite.so 72 | find "$TENSORFLOW_SOURCE_DIR/" $TENSORFLOW_SOURCE_DIR/bazel-bin/ \ 73 | -name libtensorflowlite.so -exec cp {} $SOURCE_DIR/external/libs/$PLATFORM/ \; 74 | fi 75 | fi 76 | 77 | if [ ! -d $SOURCE_DIR/external/inc/flatbuffers ]; then 78 | cp -rf $TENSORFLOW_SOURCE_DIR/bazel-tensorflow/external/flatbuffers/include/flatbuffers \ 79 | $SOURCE_DIR/external/inc/. 80 | fi 81 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # For licensing see accompanying LICENSE file. 2 | # Copyright © 2024 Argmax, Inc. All rights reserved. 3 | 4 | SCRIPTS_DIR = ./scripts 5 | 6 | # Define targets for each script 7 | .PHONY: setup env ci-env clean rebuild-env download-models build test adb-push format help 8 | 9 | args = `arg="$(filter-out $@,$(MAKECMDGOALS))" && echo $${arg:-${1}}` 10 | 11 | help: 12 | @echo "Available targets:" 13 | @echo " setup Checking dependencies and any setup for the host." 14 | @echo " download-models Download all models and files." 15 | @echo " env Builds and runs docker environment to build axie_tflite CLI." 16 | @echo " ci-env Builds and runs docker environment for GitHub CI" 17 | @echo " rebuild-env Clean and rebuilds and runs docker environment." 18 | @echo " clean Clean WhisperKitAndroid build." 19 | @echo " [all] Deep clean WhisperKitAndroid build, including external components" 20 | @echo " build Build the axie_tflite CLI. **Run this inside development environment** " 21 | @echo " [qnn|gpu|linux|jni] Build for each target: QNN or GPU for Android, or Linux" 22 | @echo " adb-push Push axie_tflite CLI and other dependencies to the Android device. Run this on host." 23 | @echo " test Builds and install test dependencies." 24 | @echo " format Run spotlessApply to format code." 25 | 26 | 27 | setup: 28 | @echo "Setting up environment..." 29 | @echo "Checking for Aria2 ..." 30 | @which aria2c > /dev/null || (echo "Error: Aria2 is not installed. Install using 'brew install aria2' or other methods from https://aria2.github.io/ and try again" && exit 1) 31 | @echo "Checking for docker ..." 32 | @which docker > /dev/null || (echo "Error: Docker is not installed. Install docker from https://www.docker.com/ and try again." && exit 1) 33 | @echo "Checking for adb ..." 34 | @which adb > /dev/null || (echo "Error: Android Debug Bridge (adb) is not installed. Install it using 'brew install --cask android-platform-tools' or by installing Android Studio and try again." && exit 1) 35 | @echo "Checking for expect ..." 36 | @which expect > /dev/null || (echo "Error: expect is not installed. Install using 'brew install expect' or other method from https://core.tcl-lang.org/expect/index and try again" && exit 1) 37 | @echo "Checking for clang-format ..." 38 | @which clang-format > /dev/null || (echo "Error: clang-format is not installed. Install using 'brew install clang-format' or other methods from https://clang.llvm.org/docs/ClangFormat.html and try again" && exit 1) 39 | @echo "Done 🚀" 40 | 41 | download-models: 42 | @bash $(SCRIPTS_DIR)/download_models.sh 43 | 44 | env: 45 | @bash $(SCRIPTS_DIR)/dev_env.sh 46 | 47 | ci-env: 48 | @bash $(SCRIPTS_DIR)/dev_env.sh -c 49 | 50 | rebuild-env: 51 | @bash $(SCRIPTS_DIR)/dev_env.sh -rf 52 | 53 | clean: 54 | @bash $(SCRIPTS_DIR)/build.sh clean $(call args,) 55 | 56 | build: 57 | @bash $(SCRIPTS_DIR)/build.sh $(call args,) 58 | 59 | test: 60 | @bash $(SCRIPTS_DIR)/build_test.sh $(call args,) 61 | 62 | adb-push: 63 | @bash $(SCRIPTS_DIR)/adb_push.sh $(call args,) 64 | 65 | all: # do nothing - sub target of clean 66 | @echo "" 67 | 68 | linux: # do nothing - sub target of build/test 69 | @echo "" 70 | 71 | qnn: # do nothing - sub target of build/test 72 | @echo "" 73 | 74 | gpu: # do nothing - sub target of build/test 75 | @echo "" 76 | 77 | jni: # do nothing - sub target of build/test 78 | @echo "" 79 | 80 | forced: # do nothing - sub target of adb-push 81 | @echo "" 82 | 83 | format: 84 | @echo "Running code formatting..." 85 | ./gradlew :spotlessApply 86 | -------------------------------------------------------------------------------- /cpp/src/Core/DelegateInterface.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace WhisperKit { 10 | namespace Delegates { 11 | 12 | enum BackendType { 13 | WHISPERKIT_BACKEND_NPU_QCOM = 9, 14 | WHISPERKIT_BACKEND_GPU = 10, 15 | WHISPERKIT_BACKEND_CPU = 11, 16 | WHISPERKIT_BACKEND_EXPERIMENTAL = 12, 17 | }; 18 | 19 | } // namespace Delegates 20 | } // namespace WhisperKit 21 | 22 | using namespace WhisperKit::Delegates; 23 | 24 | // TODO: move in to separate file (ditto with enum above) 25 | class BaseDelegateOptions { 26 | public: 27 | virtual ~BaseDelegateOptions(); 28 | BaseDelegateOptions() = default; 29 | virtual std::any get_options() = 0; 30 | virtual void set_value_for_option(const std::string& key, const std::string& value) = 0; 31 | virtual std::string get_value_for_option(const std::string& key) const = 0; 32 | 33 | protected: 34 | std::any options_; 35 | }; 36 | 37 | class NpuOptionsImpl : public BaseDelegateOptions { 38 | public: 39 | NpuOptionsImpl(); 40 | ~NpuOptionsImpl(); 41 | std::any get_options() override; 42 | void set_value_for_option(const std::string& key, const std::string& value) override; 43 | std::string get_value_for_option(const std::string& key) const override; 44 | }; 45 | 46 | class GpuOptionsImpl : public BaseDelegateOptions { 47 | public: 48 | GpuOptionsImpl(); 49 | ~GpuOptionsImpl(); 50 | std::any get_options() override; 51 | void set_value_for_option(const std::string& key, const std::string& value) override; 52 | std::string get_value_for_option(const std::string& key) const override; 53 | }; 54 | 55 | class CpuOptionsImpl : public BaseDelegateOptions { 56 | public: 57 | CpuOptionsImpl(); 58 | ~CpuOptionsImpl(); 59 | std::any get_options() override; 60 | void set_value_for_option(const std::string& key, const std::string& value) override; 61 | std::string get_value_for_option(const std::string& key) const override; 62 | }; 63 | 64 | class DelegateManagerConfiguration { 65 | public: 66 | DelegateManagerConfiguration() = default; 67 | std::shared_ptr getDelegateOptionsForBackend(BackendType backend); 68 | 69 | private: 70 | std::unordered_map> delegate_options_; 71 | }; 72 | 73 | // in common.h from tflite 74 | struct TfLiteDelegate; 75 | 76 | class DelegateManager { 77 | public: 78 | DelegateManager(); 79 | 80 | TfLiteDelegate* getDelegateForBackend(BackendType backend); 81 | BaseDelegateOptions* getDelegateOptionsForBackend(BackendType backend); 82 | 83 | void set_lib_dir(const std::string& lib_dir); 84 | void set_cache_dir(const std::string& cache_dir); 85 | void set_model_token(const std::string& model_token); 86 | 87 | void initialize(DelegateManagerConfiguration& config); 88 | 89 | ~DelegateManager(); 90 | 91 | DelegateManager(const DelegateManager&) = delete; 92 | DelegateManager& operator=(const DelegateManager&) = delete; 93 | 94 | private: 95 | void checkInitialization() const; 96 | 97 | DelegateManagerConfiguration configuration; 98 | 99 | // TODO: these should be moved elsewhere and made pass through to the delegate options 100 | // prior to delegate creation. 101 | std::string _lib_dir; 102 | std::string _cache_dir; 103 | std::string _model_token; 104 | bool initialized_ = false; 105 | 106 | TfLiteDelegate* npu_delegate_ = nullptr; 107 | TfLiteDelegate* gpu_delegate_ = nullptr; 108 | TfLiteDelegate* cpu_delegate_ = nullptr; 109 | TfLiteDelegate* experimental_delegate_ = nullptr; 110 | }; 111 | -------------------------------------------------------------------------------- /android/whisperkit/build.gradle.kts: -------------------------------------------------------------------------------- 1 | import com.vanniktech.maven.publish.SonatypeHost 2 | import io.gitlab.arturbosch.detekt.Detekt 3 | import io.gitlab.arturbosch.detekt.DetektCreateBaselineTask 4 | 5 | plugins { 6 | alias(libs.plugins.android.library) 7 | alias(libs.plugins.kotlin.android) 8 | alias(libs.plugins.kotlin.serialization) 9 | alias(libs.plugins.vanniktech.maven.publish) 10 | alias(libs.plugins.detekt) 11 | } 12 | 13 | android { 14 | namespace = "com.argmaxinc.whisperkit" 15 | compileSdk = 35 16 | 17 | defaultConfig { 18 | minSdk = 26 19 | 20 | testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" 21 | consumerProguardFiles("consumer-rules.pro") 22 | } 23 | 24 | buildTypes { 25 | release { 26 | isMinifyEnabled = false 27 | proguardFiles( 28 | getDefaultProguardFile("proguard-android-optimize.txt"), 29 | "proguard-rules.pro", 30 | ) 31 | } 32 | } 33 | compileOptions { 34 | sourceCompatibility = JavaVersion.VERSION_11 35 | targetCompatibility = JavaVersion.VERSION_11 36 | } 37 | kotlinOptions { 38 | jvmTarget = "11" 39 | } 40 | packaging { 41 | jniLibs { 42 | useLegacyPackaging = true 43 | } 44 | } 45 | 46 | sourceSets { 47 | getByName("main") { 48 | jniLibs.srcDirs("src/main/jniLibs") 49 | } 50 | } 51 | } 52 | 53 | dependencies { 54 | implementation(libs.kotlinx.coroutines.core) 55 | implementation(libs.kotlinx.coroutines.android) 56 | implementation(libs.ktor.client.core) 57 | implementation(libs.ktor.client.cio) 58 | implementation(libs.ktor.client.content.negotiation) 59 | implementation(libs.ktor.serialization.kotlinx.json) 60 | testImplementation(libs.junit) 61 | testImplementation(libs.kotlinx.coroutines.test) 62 | testImplementation(libs.mockk) 63 | testImplementation(libs.turbine) 64 | testImplementation(libs.ktor.client.mock) 65 | } 66 | 67 | mavenPublishing { 68 | 69 | coordinates("com.argmaxinc", "whisperkit", "0.3.3") 70 | pom { 71 | name.set("WhisperKit") 72 | description.set("On-device Speech Recognition for Android") 73 | inceptionYear.set("2025") 74 | url.set("https://github.com/argmaxinc/WhisperKitAndroid") 75 | 76 | licenses { 77 | license { 78 | name.set("MIT") 79 | url.set("https://opensource.org/licenses/MIT") 80 | distribution.set("repo") 81 | } 82 | } 83 | 84 | developers { 85 | developer { 86 | id.set("argmaxinc") 87 | name.set("Argmax") 88 | url.set("https://github.com/argmaxinc/") 89 | } 90 | } 91 | 92 | scm { 93 | url.set("https://github.com/argmaxinc/WhisperKitAndroid") 94 | connection.set("scm:git:git://github.com/argmaxinc/WhisperKitAndroid.git") 95 | developerConnection.set("scm:git:ssh://git@github.com:argmaxinc/WhisperKitAndroid.git") 96 | } 97 | } 98 | signAllPublications() 99 | publishToMavenCentral(SonatypeHost.CENTRAL_PORTAL) 100 | } 101 | 102 | detekt { 103 | buildUponDefaultConfig = true 104 | config.setFrom(files("${rootProject.projectDir}/android/config/detekt.yml")) 105 | baseline = file("${rootProject.projectDir}/android/whisperkit/detekt-baseline.xml") 106 | } 107 | 108 | tasks.withType().configureEach { 109 | reports { 110 | html.required.set(true) 111 | } 112 | } 113 | 114 | tasks.withType().configureEach { 115 | jvmTarget = "1.8" 116 | } 117 | tasks.withType().configureEach { 118 | jvmTarget = "1.8" 119 | } 120 | -------------------------------------------------------------------------------- /android/whisperkit/src/main/java/com/argmaxinc/whisperkit/huggingface/ModelInfo.kt: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE.md file. 2 | // Copyright © 2025 Argmax, Inc. All rights reserved. 3 | 4 | package com.argmaxinc.whisperkit.huggingface 5 | 6 | import kotlinx.serialization.SerialName 7 | import kotlinx.serialization.Serializable 8 | import java.nio.file.FileSystems 9 | import java.nio.file.Paths 10 | 11 | /** 12 | * Represents detailed information about a model from the HuggingFace repository. 13 | * This class is used to deserialize the JSON response from the HuggingFace API. 14 | * 15 | * @property id The unique identifier of the model 16 | * @property modelId The model's identifier 17 | * @property private Whether the model is private 18 | * @property pipelineTag The type of pipeline this model is designed for 19 | * @property libraryName The name of the library this model is built with 20 | * @property tags List of tags associated with the model 21 | * @property downloads Number of times the model has been downloaded 22 | * @property likes Number of likes the model has received 23 | * @property author The author of the model 24 | * @property sha The Git commit SHA of the model 25 | * @property lastModified Timestamp of the last modification 26 | * @property gated Whether the model requires authentication to access 27 | * @property disabled Whether the model is disabled 28 | * @property modelIndex The model index identifier 29 | * @property config Configuration parameters for the model 30 | * @property cardData Additional metadata about the model 31 | * @property siblings List of files associated with the model 32 | * @property spaces List of associated HuggingFace Spaces 33 | * @property createdAt Timestamp when the model was created 34 | * @property usedStorage Amount of storage used by the model in bytes 35 | */ 36 | @Serializable 37 | data class ModelInfo( 38 | @SerialName("_id") val id: String? = null, 39 | val modelId: String? = null, 40 | val siblings: List? = null, 41 | ) { 42 | /** 43 | * Filters the model's files based on glob patterns. 44 | * 45 | * @param globFilters List of glob patterns to filter files. If empty, no files are returned 46 | * @return List of filenames that match the glob patterns 47 | */ 48 | fun fileNames(globFilters: List = emptyList()): List { 49 | return siblings?.mapNotNull { it.rfilename }?.filter { filename -> 50 | globFilters.any { pattern -> 51 | val matcher = FileSystems.getDefault().getPathMatcher("glob:$pattern") 52 | matcher.matches(Paths.get(filename)) 53 | } 54 | } ?: emptyList() 55 | } 56 | 57 | /** 58 | * Returns a list of unique first level directory names from the model's files. 59 | * This is useful for identifying different model variants or configurations. 60 | * 61 | * @return List of unique directory names, sorted alphabetically 62 | */ 63 | fun dirNames(): List { 64 | return fileNames().filter { it.contains("/") }.map { it.split("/").first() }.distinct() 65 | .sorted() 66 | } 67 | } 68 | 69 | /** 70 | * Additional metadata about a model from its model card. 71 | * 72 | * @property prettyName A human-readable name for the model 73 | * @property viewer Whether the model has a viewer 74 | * @property libraryName The name of the library this model is built with 75 | * @property tags List of tags associated with the model 76 | */ 77 | @Serializable 78 | data class CardData( 79 | @SerialName("pretty_name") val prettyName: String? = null, 80 | val viewer: Boolean? = null, 81 | @SerialName("library_name") val libraryName: String? = null, 82 | val tags: List? = null, 83 | ) 84 | 85 | /** 86 | * Represents a file associated with a model. 87 | * 88 | * @property rfilename The relative filename of the file in the repository 89 | */ 90 | @Serializable 91 | data class Sibling( 92 | val rfilename: String? = null, 93 | ) 94 | -------------------------------------------------------------------------------- /cpp/src/Models/tflite_qnn_model.cpp: -------------------------------------------------------------------------------- 1 | // For licensing see accompanying LICENSE file. 2 | // Copyright © 2024 Argmax, Inc. All rights reserved. 3 | #if QNN_DELEGATE 4 | #include "tflite_qnn_model.hpp" 5 | 6 | #include // C++ 17 or later 7 | 8 | #include "backend_class.hpp" 9 | #include "tensorflow/lite/optional_debug_tools.h" 10 | 11 | using namespace std; 12 | 13 | TFLiteQNN::TFLiteQNN(const string& name) : TFLiteModel(name) {} 14 | 15 | TFLiteQNN::~TFLiteQNN() { uninitialize(); } 16 | 17 | bool TFLiteQNN::initialize(string model_path, string lib_dir, string cache_dir, int backend, bool debug) { 18 | set_dirs(model_path, lib_dir, cache_dir); 19 | 20 | _options = TfLiteQnnDelegateOptionsDefault(); 21 | _options.backend_type = kUndefinedBackend; 22 | 23 | switch (backend) { 24 | case ComputeBackend::NPU: 25 | _options.backend_type = kHtpBackend; 26 | _options.htp_options.precision = kHtpFp16; 27 | _options.htp_options.performance_mode = kHtpHighPerformance; 28 | // kHtpSustainedHighPerformance; 29 | _options.htp_options.useConvHmx = true; 30 | LOGI("%s: delegate to NPU backend..\n", _model_name.c_str()); 31 | break; 32 | case ComputeBackend::GPU: 33 | default: 34 | _options.backend_type = kGpuBackend; 35 | _options.gpu_options.precision = kGpuFp16; 36 | _options.gpu_options.performance_mode = kGpuHigh; 37 | LOGI("%s: delegate to GPUv2 backend..\n", _model_name.c_str()); 38 | break; 39 | } 40 | 41 | if (!create_interpreter_delegate(model_path)) { 42 | LOGE("Failed with create_interpreter_delegate..\n"); 43 | return false; 44 | } 45 | if (!allocate_tensors()) { 46 | LOGE("Failed with allocate_tensors..\n"); 47 | return false; 48 | } 49 | 50 | modify_graph_delegate(); 51 | 52 | if (debug) { 53 | LOGI("\n========== %s delegation info ==========\n", _model_name.c_str()); 54 | tflite::PrintInterpreterState(_interpreter.get()); 55 | } 56 | return true; 57 | } 58 | 59 | void TFLiteQNN::uninitialize() { 60 | if (_delegate != nullptr) { 61 | if (_options.backend_type == kUndefinedBackend) 62 | TfLiteGpuDelegateV2Delete(_delegate); 63 | else 64 | TfLiteQnnDelegateDelete(_delegate); 65 | _delegate = nullptr; 66 | } 67 | 68 | TFLiteModel::uninitialize(); 69 | } 70 | 71 | bool TFLiteQNN::create_interpreter_delegate(string model_path) { 72 | _model = tflite::FlatBufferModel::BuildFromFile(model_path.c_str()); 73 | if (_model.get() == nullptr) return false; 74 | 75 | if (_options.backend_type == kUndefinedBackend || _options.backend_type == kGpuBackend) { 76 | tflite::ops::builtin::BuiltinOpResolver tflite_resolver; 77 | tflite::InterpreterBuilder builder(*_model, tflite_resolver); 78 | TFLITE_FUNCTION_CHECK(builder(&_interpreter)) 79 | 80 | TfLiteGpuDelegateOptionsV2 gpu_options = TfLiteGpuDelegateOptionsV2Default(); 81 | gpu_options.experimental_flags |= TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION; 82 | gpu_options.serialization_dir = _cache_dir.c_str(); 83 | gpu_options.model_token = "model_token"; 84 | ; 85 | _delegate = TfLiteGpuDelegateV2Create(&gpu_options); 86 | } else { 87 | _options.skel_library_dir = _lib_dir.c_str(); 88 | _options.cache_dir = _cache_dir.c_str(); 89 | _options.model_token = _model_token.c_str(); 90 | 91 | tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates qnn_resolver; 92 | tflite::InterpreterBuilder builder(*_model, qnn_resolver); 93 | TFLITE_FUNCTION_CHECK(builder(&_interpreter)) 94 | _delegate = TfLiteQnnDelegateCreate(&_options); 95 | } 96 | 97 | if (_delegate == nullptr) return false; 98 | 99 | const auto processor_count = thread::hardware_concurrency(); 100 | _interpreter->SetNumThreads(processor_count); 101 | 102 | return true; 103 | } 104 | #endif -------------------------------------------------------------------------------- /android/examples/WhisperAX/src/main/res/layout/activity_main.xml: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 12 | 13 | 23 | 24 | 36 | 37 |