├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── examples
    ├── CMakeLists.txt
    ├── complex
    │   ├── CMakeLists.txt
    │   ├── complex.cpp
    │   ├── complex.qml
    │   └── resources.qrc
    └── simple
    │   ├── CMakeLists.txt
    │   ├── resources.qrc
    │   ├── simple.cpp
    │   └── simple.qml
├── models
    ├── ggml-tiny-en-q4-0.bin
    └── models.qrc
├── src
    ├── QmlMacros.h
    ├── SpeechToText.cpp
    ├── SpeechToText.h
    ├── VoiceActivityDetector.cpp
    ├── VoiceActivityDetector.h
    ├── WhisperBackend.cpp
    ├── WhisperBackend.h
    └── private
    │   └── quantization.h
└── tests
    ├── CMakeLists.txt
    ├── bin
        ├── quantize.exe
        └── whisper.dll
    └── tst_quant.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | *.user
2 | /build
3 | /third-party
4 | 
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "whisper.cpp"]
2 | 	path = whisper.cpp
3 | 	url = https://github.com/ggerganov/whisper.cpp.git
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.1)
 2 | project(qt-whisper LANGUAGES CXX)
 3 | set(CMAKE_AUTOMOC OFF)
 4 | set(CMAKE_AUTORCC OFF)
 5 | set(CMAKE_CXX_STANDARD 20)
 6 | 
 7 | set(QT_WHISPER_TARGET qt-whisper)
 8 | set(QT_WHISPER_LIB ${QT_WHISPER_TARGET})
 9 | option(QT_WHISPER_EMBED_MODEL "Embed the compressed model weights into the library" OFF)
10 | 
11 | add_subdirectory(whisper.cpp)
12 | 
13 | 
14 | find_package(QT NAMES Qt5 Qt6 COMPONENTS Core Multimedia Concurrent Quick REQUIRED)
15 | find_package(Qt${QT_VERSION_MAJOR} COMPONENTS Core Multimedia Concurrent Quick REQUIRED)
16 | 
17 | 
18 | file(GLOB SOURCE_CPP
19 |     "src/*.cpp" "src/*.h" "src/private/*.h"
20 | )
21 | if(QT_WHISPER_EMBED_MODEL)
22 |     set(SOURCE_RCC "models/models.qrc")
23 | endif()
24 | 
25 | qt_add_library(${QT_WHISPER_TARGET} STATIC MANUAL_FINALIZATION ${SOURCE_CPP} ${SOURCE_RCC}
26 | )
27 | 
28 | 
29 | target_link_libraries(${QT_WHISPER_TARGET} PRIVATE whisper)
30 | target_link_libraries(${QT_WHISPER_TARGET} PUBLIC Qt6::Core Qt6::Multimedia  Qt6::Concurrent  Qt6::Quick)
31 | target_include_directories(${QT_WHISPER_TARGET} INTERFACE "src" "whisper.cpp")
32 | target_include_directories(${QT_WHISPER_TARGET} PRIVATE "src/private")
33 | 
34 | set_property(TARGET ${QT_WHISPER_TARGET} PROPERTY AUTOMOC ON)
35 | set_property(TARGET ${QT_WHISPER_TARGET} PROPERTY AUTORCC ${QT_WHISPER_EMBED_MODEL})
36 | if(${QT_WHISPER_EMBED_MODEL})
37 |     target_compile_definitions(${QT_WHISPER_TARGET} PRIVATE EMBED_MODEL)
38 | endif()
39 | qt_finalize_target(${QT_WHISPER_TARGET})
40 | 
41 | 
42 | #Add examples if build as a standalone
43 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
44 |     macro(download_whisper_tiny)
45 |         file(DOWNLOAD "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny.bin SHOW_PROGRESS EXPECTED_HASH SHA256=be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21)
46 |     endmacro()
47 |     add_subdirectory(examples)
48 |     add_subdirectory(tests)
49 | endif()
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Qt Whisper
 2 | This project is a Qt & Qml wrapper for [whisper.cpp](https://github.com/ggerganov/whisper.cpp) - A high performance library for [OpenAI's Whisper](https://github.com/openai/whisper) inference.
 3 | 
 4 | ## Value added
 5 | While whisper.cpp provides the framework for Whisper model inference, its framework agnostic nature requires the programmer to write wrapper code that allows the use of whisper in the actual application.
 6 | 
 7 | This project provides ready to use QML object that performes inference away from GUI thread. Note that while the project is functional some features are still work in progress:  
 8 | :heavy_check_mark: Threaded inference - Don't block GUI thread while running the model  
 9 | :heavy_check_mark: Voice Activity Detection - Wait for Speech to start capturing audio and Automatically stop audio capture after speech has stopped.  
10 | :heavy_check_mark: Embedded small model - You can build the library with a small model embedded into the binary for quick prototyping  
11 | :warning: VAD ML models - No ready avaliable ML models to easly embed into application. Simple Energy-based detection implemented.  
12 | :heavy_check_mark: Model Quantization - Model Quantization and reloading during runtime.  
13 | :x: Building QML plugin  
14 | 
15 | ## Usage
16 | Pull the repo as a submodule using:
17 | ```
18 | git submodule add https://github.com/Ugibugi/qt-whisper.git
19 | git submodule update --init --recursive
20 | ```
21 | Then in your CMakeLists.txt:
22 | 
23 | ```cmake
24 | add_subdirectory(qt-whisper)
25 | ...
26 | add_executable(mytarget ...)
27 | target_link_libraries(mytarget PRIVATE ${QT_WHISPER_LIB} ...)
28 | 
29 | ```
30 | 
31 | Then register the type in your main.cpp (To be removed after QML plugin support):
32 | 
33 | ```cpp
34 | #include <SpeechToText.h>
35 | ...
36 | // in int main()
37 | qmlRegisterType<SpeechToText>("qtwhisper", 1, 0, "SpeechToText");
38 | 
39 | ```
40 | 
41 | And it's ready to use:
42 | 
43 | ```qml
44 | import QtQuick
45 | import QtQuick.Controls
46 | import qtwhisper
47 | 
48 | ApplicationWindow {
49 |     visible: true
50 |     width: 800
51 |     height: 600
52 |     Button {
53 |         text: "Start"
54 |         // Check the current state of inference
55 |         enabled: stt.state === SpeechToText.Ready
56 |         onClicked: {
57 |             // Start listening for speech - it will wait for speech to run inference and will
58 |             // automatically stop after you stop speaking
59 |             stt.start()
60 |         }
61 |     }
62 |     SpeechToText {
63 |         id: stt
64 |         onResultReady: function (recognisedSpeech) {
65 |             // print out the result
66 |         }
67 |     }
68 | }
69 | ```
70 | 
71 | See the examples folder for more in-depth usage
72 | 
73 | ## Notes on usage
74 | ### Read if  App crashes when trying to run the inference
75 | whisper.cpp uses vector instruction sets which may not be supported by your device. Pass one of the whisper.cpp cmake flags: `WHISPER_NO_AVX2`, `WHISPER_NO_AVX`, `WHISPER_NO_F16C`, `WHISPER_NO_FMA` to disable those instructions.
76 | 


--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_subdirectory(simple)
2 | add_subdirectory(complex)
3 | 


--------------------------------------------------------------------------------
/examples/complex/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(complex complex.cpp resources.qrc)
2 | set_target_properties(complex PROPERTIES AUTORCC ON AUTOMOC ON)
3 | target_link_libraries(complex PRIVATE Qt6::Core Qt6::Quick ${QT_WHISPER_TARGET})
4 | download_whisper_tiny()
5 | 


--------------------------------------------------------------------------------
/examples/complex/complex.cpp:
--------------------------------------------------------------------------------
 1 | #include <QGuiApplication>
 2 | #include <QQmlApplicationEngine>
 3 | 
 4 | #include <SpeechToText.h>
 5 | 
 6 | int main(int argc, char *argv[])
 7 | {
 8 |     QGuiApplication app(argc, argv);
 9 | 
10 |     QQmlApplicationEngine engine;
11 | 
12 | 
13 |     const QUrl url(QStringLiteral("qrc:complex.qml"));
14 | 
15 |     qmlRegisterType<SpeechToText>("qtwhisper", 1, 0, "SpeechToText");
16 |     qmlRegisterUncreatableType<WhisperInfo>("qtwhisper", 1, 0, "WhisperInfo", "");
17 | 
18 |     // QML startup
19 |     QObject::connect(&engine, &QQmlApplicationEngine::objectCreated,
20 |       &app, [url](QObject *obj, const QUrl &objUrl){
21 |         if (!obj && url == objUrl)
22 |             QCoreApplication::exit(-1);
23 |     }, Qt::QueuedConnection);
24 | 
25 |     engine.load(url);
26 |     return app.exec();
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/complex/complex.qml:
--------------------------------------------------------------------------------
  1 | import QtQuick
  2 | import QtQuick.Controls.Material
  3 | import QtQuick.Layouts
  4 | import qtwhisper
  5 | 
  6 | ApplicationWindow {
  7 |   id: root
  8 |   visible: true
  9 |   width: 800
 10 |   height: 600
 11 | 
 12 |   Material.theme: Material.Dark
 13 | 
 14 |   RowLayout {
 15 |     id: headerRow
 16 |     anchors {
 17 |       top: parent.top
 18 |     }
 19 |     width: parent.width
 20 |     height: 200
 21 |     ColumnLayout {
 22 |       Repeater {
 23 |         id: rep
 24 |         property var names: ["Normal", "Q4_0", "Q4_1", "Q5_0", "Q5_1", "Q8_0"]
 25 |         model: [0, 2, 3, 8, 9, 7]
 26 | 
 27 |         RadioDelegate {
 28 |           text: rep.names[index]
 29 |           enabled: stt.backendInfo.requantizable
 30 |                    && (stt.state == SpeechToText.Ready)
 31 |           checked: stt.backendInfo.floatType === rep.model[index]
 32 |           onClicked: stt.quantize(rep.model[index])
 33 |         }
 34 |       }
 35 |     }
 36 |     Item {
 37 |       Layout.fillWidth: true
 38 |     }
 39 | 
 40 |     ColumnLayout {
 41 |       Label {
 42 |         text: "Collected Info:"
 43 |         font.pixelSize: 20
 44 |       }
 45 |       Label {
 46 |         text: stt.backendInfo.floatTypeString
 47 |         Layout.leftMargin: 10
 48 |         Layout.maximumWidth: 200
 49 |         font.pixelSize: 15
 50 |         wrapMode: Text.Wrap
 51 |       }
 52 |       Label {
 53 |         text: stt.backendInfo.modelTypeString
 54 |         Layout.leftMargin: 10
 55 |         font.pixelSize: 15
 56 |       }
 57 |     }
 58 |   }
 59 | 
 60 |   ColumnLayout {
 61 |     anchors.centerIn: parent
 62 |     width: parent.width / 2
 63 |     height: parent.height / 3
 64 | 
 65 |     Item {
 66 |       Layout.alignment: Qt.AlignHCenter | Qt.AlignVCenter
 67 |       Layout.fillWidth: true
 68 |       Layout.fillHeight: true
 69 |       Label {
 70 |         id: result
 71 | 
 72 |         horizontalAlignment: Text.AlignHCenter
 73 |         anchors.fill: parent
 74 | 
 75 |         wrapMode: Text.Wrap
 76 |         font.capitalization: Font.AllUppercase
 77 |         font.pixelSize: 20
 78 |         text: "(((Recognised text)))"
 79 |         opacity: {
 80 |           switch (stt.state) {
 81 |           case SpeechToText.Tuning:
 82 |           case SpeechToText.WaitingForSpeech:
 83 |           case SpeechToText.SpeechDetected:
 84 |             return 0.5
 85 |           case SpeechToText.Busy:
 86 |             return 0.0
 87 |           default:
 88 |             return 1.0
 89 |           }
 90 |         }
 91 |       }
 92 |       BusyIndicator {
 93 |         anchors.fill: parent
 94 |         running: stt.state === SpeechToText.Busy
 95 |       }
 96 |     }
 97 | 
 98 |     Button {
 99 |       text: "Start"
100 |       Layout.alignment: Qt.AlignHCenter | Qt.AlignVCenter
101 |       enabled: stt.state === SpeechToText.Ready
102 |       onClicked: {
103 |         stt.start()
104 |       }
105 |     }
106 | 
107 |     Label {
108 |       id: prompt
109 |       Layout.alignment: Qt.AlignHCenter | Qt.AlignVCenter
110 |       horizontalAlignment: Text.AlignHCenter
111 | 
112 |       font.pixelSize: 20
113 |       text: {
114 |         switch (stt.state) {
115 |         case SpeechToText.NoModel:
116 |           return "No model loaded"
117 |         case SpeechToText.WaitingForModel:
118 |           return "Loading model, please wait."
119 |         case SpeechToText.Busy:
120 |           return "Inference in progress"
121 |         case SpeechToText.Tuning:
122 |           return "Tuning out background noise"
123 |         case SpeechToText.SpeechDetected:
124 |           return "Speech detected"
125 |         case SpeechToText.WaitingForSpeech:
126 |           return "Speak to start detection"
127 |         case SpeechToText.Ready:
128 |           return "Press start to start listening"
129 |         default:
130 |           return "Unknown state: " + stt.state
131 |         }
132 |       }
133 |     }
134 |   }
135 | 
136 |   SpeechToText {
137 |     id: stt
138 |     modelPath: "ggml-tiny.bin"
139 |     onResultReady: function (r) {
140 |       result.text = r
141 |     }
142 |   }
143 | }
144 | 


--------------------------------------------------------------------------------
/examples/complex/resources.qrc:
--------------------------------------------------------------------------------
1 | <RCC>
2 |     <qresource prefix="/">
3 |         <file>complex.qml</file>
4 |     </qresource>
5 | </RCC>
6 | 


--------------------------------------------------------------------------------
/examples/simple/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(simple simple.cpp resources.qrc)
2 | set_target_properties(simple PROPERTIES AUTORCC ON AUTOMOC ON)
3 | target_link_libraries(simple PRIVATE Qt6::Core Qt6::Quick ${QT_WHISPER_TARGET})
4 | 


--------------------------------------------------------------------------------
/examples/simple/resources.qrc:
--------------------------------------------------------------------------------
1 | <RCC>
2 |     <qresource prefix="/">
3 |         <file>simple.qml</file>
4 |     </qresource>
5 | </RCC>
6 | 


--------------------------------------------------------------------------------
/examples/simple/simple.cpp:
--------------------------------------------------------------------------------
 1 | #include <QGuiApplication>
 2 | #include <QQmlApplicationEngine>
 3 | 
 4 | #include <SpeechToText.h>
 5 | 
 6 | int main(int argc, char *argv[])
 7 | {
 8 |     QGuiApplication app(argc, argv);
 9 | 
10 |     QQmlApplicationEngine engine;
11 | 
12 | 
13 |     const QUrl url(QStringLiteral("qrc:simple.qml"));
14 | 
15 |     qmlRegisterType<SpeechToText>("qtwhisper", 1, 0, "SpeechToText");
16 |     qmlRegisterUncreatableType<WhisperInfo>("qtwhisper", 1, 0, "WhisperInfo", "");
17 | 
18 |     // QML startup
19 |     QObject::connect(&engine, &QQmlApplicationEngine::objectCreated,
20 |       &app, [url](QObject *obj, const QUrl &objUrl){
21 |         if (!obj && url == objUrl)
22 |             QCoreApplication::exit(-1);
23 |     }, Qt::QueuedConnection);
24 | 
25 |     engine.load(url);
26 |     return app.exec();
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/simple/simple.qml:
--------------------------------------------------------------------------------
 1 | import QtQuick
 2 | import QtQuick.Controls.Material
 3 | import QtQuick.Layouts
 4 | import qtwhisper
 5 | 
 6 | ApplicationWindow {
 7 |   id: root
 8 |   visible: true
 9 |   width: 800
10 |   height: 600
11 |   Material.theme: Material.Dark
12 | 
13 |   ColumnLayout {
14 |     anchors.centerIn: parent
15 |     width: parent.width / 2
16 |     height: parent.height / 3
17 | 
18 |     Item {
19 |       Layout.alignment: Qt.AlignHCenter | Qt.AlignVCenter
20 |       Layout.fillWidth: true
21 |       Layout.fillHeight: true
22 |       Label {
23 |         id: result
24 | 
25 |         horizontalAlignment: Text.AlignHCenter
26 |         anchors.fill: parent
27 | 
28 |         wrapMode: Text.Wrap
29 |         font.capitalization: Font.AllUppercase
30 |         font.pixelSize: 20
31 |         text: "(((Recognised text)))"
32 |         opacity: {
33 |           switch (stt.state) {
34 |           case SpeechToText.Tuning:
35 |           case SpeechToText.WaitingForSpeech:
36 |           case SpeechToText.SpeechDetected:
37 |             return 0.5
38 |           case SpeechToText.Busy:
39 |             return 0.0
40 |           default:
41 |             return 1.0
42 |           }
43 |         }
44 |       }
45 |       BusyIndicator {
46 |         anchors.fill: parent
47 |         running: stt.state === SpeechToText.Busy
48 |       }
49 |     }
50 | 
51 |     Button {
52 |       text: "Start"
53 |       Layout.alignment: Qt.AlignHCenter | Qt.AlignVCenter
54 |       enabled: stt.state === SpeechToText.Ready
55 |       onClicked: {
56 |         stt.start()
57 |       }
58 |     }
59 | 
60 |     Label {
61 |       id: prompt
62 |       Layout.alignment: Qt.AlignHCenter | Qt.AlignVCenter
63 |       horizontalAlignment: Text.AlignHCenter
64 | 
65 |       font.pixelSize: 20
66 |       text: {
67 |         switch (stt.state) {
68 |         case SpeechToText.NoModel:
69 |           return "No model loaded"
70 |         case SpeechToText.WaitingForModel:
71 |           return "Loading model, please wait."
72 |         case SpeechToText.Busy:
73 |           return "Inference in progress"
74 |         case SpeechToText.Tuning:
75 |           return "Tuning out background noise"
76 |         case SpeechToText.SpeechDetected:
77 |           return "Speech detected"
78 |         case SpeechToText.WaitingForSpeech:
79 |           return "Speak to start detection"
80 |         case SpeechToText.Ready:
81 |           return "Press start to start listening"
82 |         default:
83 |           return "Unknown state: " + stt.state
84 |         }
85 |       }
86 |     }
87 |   }
88 | 
89 |   SpeechToText {
90 |     id: stt
91 |     onResultReady: function (r) {
92 |       result.text = r
93 |     }
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/models/ggml-tiny-en-q4-0.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radkoder/qt-whisper/6f0ba6f360c358c6940a61724a429b0b8c136908/models/ggml-tiny-en-q4-0.bin


--------------------------------------------------------------------------------
/models/models.qrc:
--------------------------------------------------------------------------------
1 | <RCC>
2 |     <qresource prefix="/">
3 |         <file>ggml-tiny-en-q4-0.bin</file>
4 |     </qresource>
5 | </RCC>
6 | 


--------------------------------------------------------------------------------
/src/QmlMacros.h:
--------------------------------------------------------------------------------
  1 | #ifndef QMLMACROS_H
  2 | #define QMLMACROS_H
  3 | 
  4 | /*!
  5 |  *  \def QML_WRITABLE_PROPERTY(type, name)
  6 |  *  \ingroup QT_QML_HELPERS
  7 |  *  \hideinitializer
  8 |  *  \details Creates a \c Q_PROPERTY that will be readable / writable from QML.
  9 |  *
 10 |  *  \param type The C++ type of the property
 11 |  *  \param name The name for the property
 12 |  *  \param capitalName - capitalized name of the property to insert into function names
 13 |  *
 14 |  *  It generates for this goal :
 15 |  *  \code
 16 |  *      {type} m_{name}; // private member variable
 17 |  *      {type} get_{name} () const; // public getter method
 18 |  *      void set_{name} ({type}); // public setter slot
 19 |  *      void {name}Changed ({type}); // notifier signal
 20 |  *  \endcode
 21 |  *
 22 |  *  \b Note : Any change from either C++ or QML side will trigger the
 23 |  * notification.
 24 |  */
 25 | #define QML_WRITABLE_PROPERTY(type, name, capitalName)                                      \
 26 | protected:                                                                     \
 27 |     Q_PROPERTY(type name READ get ## capitalName WRITE set ## capitalName NOTIFY name ## Changed)  \
 28 | private:                                                                       \
 29 |     type _ ## name{ };                                                              \
 30 |   \
 31 | public:                                                                        \
 32 |     type get ## capitalName() const { return _ ## name; }                                  \
 33 | Q_SIGNALS:                                                                     \
 34 |     void name ## Changed(type name);                                               \
 35 | public Q_SLOTS:                                                                \
 36 |     void set ## capitalName(type name){                                                 \
 37 |         if (_ ## name != name) {                                                    \
 38 |             _ ## name = name;                                                         \
 39 |             emit name ## Changed(_ ## name);                                            \
 40 |         }                                                                          \
 41 |     }                                                                            \
 42 |   \
 43 | private:
 44 | 
 45 | 
 46 | /*!
 47 |  *  \def QML_READONLY_PROPERTY(type, name)
 48 |  *  \ingroup QT_QML_HELPERS
 49 |  *  \hideinitializer
 50 |  *  \details Creates a \c Q_PROPERTY that will be readable from QML and writable
 51 |  * from C++.
 52 |  *
 53 |  *  \param type The C++ type of the property
 54 |  *  \param name The name for the property
 55 |  *
 56 |  *  It generates for this goal :
 57 |  *  \code
 58 |  *      {type} m_{name}; // private member variable
 59 |  *      {type} get_{name} () const; // public getter method
 60 |  *      void update_{name} ({type}); // public setter method
 61 |  *      void {name}Changed ({type}); // notifier signal
 62 |  *  \endcode
 63 |  *
 64 |  *  \b Note : Any change from C++ side will trigger the notification to QML.
 65 |  */
 66 | #define QML_READONLY_PROPERTY(type, name, capitalName)                                      \
 67 | protected:                                                                     \
 68 |     Q_PROPERTY(type name READ get ##  capitalName NOTIFY name ## Changed)                   \
 69 | private:                                                                       \
 70 |     type _ ## name{ };                                                             \
 71 |   \
 72 | public:                                                                        \
 73 |     type get ##  capitalName() const { return _ ## name; }                                 \
 74 |     void set ## capitalName(const type& name){                                                 \
 75 |         if (_ ## name != name) {                                                    \
 76 |             _ ## name = name;                                                         \
 77 |             emit name ## Changed(_ ## name);                                            \
 78 |         }                                                                          \
 79 |     }                                                                   \
 80 | Q_SIGNALS:                                                                     \
 81 |     void name ## Changed(type name);                                               \
 82 |   \
 83 | private:
 84 | 
 85 | 
 86 | /*!
 87 |  *  \def QML_CONSTANT_PROPERTY(type, name)
 88 |  *  \ingroup QT_QML_HELPERS
 89 |  *  \hideinitializer
 90 |  *  \details Creates a \c Q_PROPERTY for a constant value exposed from C++ to
 91 |  * QML.
 92 |  *
 93 |  *  \param type The C++ type of the property
 94 |  *  \param name The name for the property
 95 |  *
 96 |  *  It generates for this goal :
 97 |  *  \code
 98 |  *      {type} m_{name}; // private member variable
 99 |  *      {type} get_{name} () const; // public getter method
100 |  *  \endcode
101 |  *
102 |  *  \b Note : There is no change notifier because value is constant.
103 |  */
104 | 
105 | #define QML_CONSTANT_PROPERTY(type, name, capitalizedName)                     \
106 | protected:                                                                     \
107 |     Q_PROPERTY(type name READ get ## capitalizedName CONSTANT)                     \
108 | private:                                                                       \
109 |     type m_ ## name;                                                               \
110 |   \
111 | public:                                                                        \
112 |     type get ## capitalizedName() const { return m_ ## name; }                       \
113 |   \
114 | private:
115 | 
116 | 
117 | #endif // QMLMACROS_H
118 | 


--------------------------------------------------------------------------------
/src/SpeechToText.cpp:
--------------------------------------------------------------------------------
  1 | #include "SpeechToText.h"
  2 | #include <QMediaDevices>
  3 | #include <QAudioDevice>
  4 | #include <QDebug>
  5 | 
  6 | 
  7 | constexpr int SAMPLE_RATE = 16000;
  8 | constexpr const char *MODEL_RESOURCE = ":/ggml-tiny-en-q4-0.bin";
  9 | 
 10 | #define ASSERT_STATE(x) Q_ASSERT((updateState(),getState()) == x);
 11 | 
 12 | SpeechToText::SpeechToText()
 13 | {
 14 | 
 15 |     qRegisterMetaType<WhisperInfo::FloatType >();
 16 |     qRegisterMetaType<WhisperInfo::ModelType >();
 17 |     qRegisterMetaType<std::vector<float> >();
 18 | 
 19 | 
 20 |     connect(this, &SpeechToText::modelPathChanged, this, &SpeechToText::loadModel);
 21 |     ASSERT_STATE(State::NoModel);
 22 | 
 23 |     #ifdef EMBED_MODEL
 24 |     Q_INIT_RESOURCE(models);
 25 |     setHasEmbeddedModel(true);
 26 |     setModelPath(MODEL_RESOURCE);
 27 |     #else
 28 |     setHasEmbeddedModel(false);
 29 |     #endif
 30 | 
 31 |     //State UpdateTimers
 32 |     _stateUpdateTimer.setInterval(30);
 33 |     _stateUpdateTimer.callOnTimeout(this,&SpeechToText::updateState);
 34 |     _stateUpdateTimer.start();
 35 | 
 36 | }
 37 | 
 38 | void SpeechToText::start()
 39 | {
 40 |     auto device = QMediaDevices::defaultAudioInput();
 41 |     QAudioFormat fmt;
 42 | 
 43 |     fmt.setSampleFormat(QAudioFormat::Float);
 44 |     fmt.setSampleRate(SAMPLE_RATE);
 45 |     fmt.setChannelConfig(QAudioFormat::ChannelConfigMono);
 46 |     fmt.setChannelCount(1);
 47 | 
 48 |     if (!device.isFormatSupported(fmt)) {
 49 |         qDebug() << "Format " << fmt << " not supported";
 50 |     }
 51 | 
 52 | 
 53 | 
 54 |     _source.reset(new QAudioSource{ device, fmt });
 55 |     _audioDevice = _source->start();
 56 |     ASSERT_STATE(State::WaitingForSpeech);
 57 |     connect(_source.get(),&QAudioSource::stateChanged,this,[=](QAudio::State s){
 58 |         qDebug() << "Audio source" << _source.get() << " state:" << s;
 59 |     });
 60 |     connect(_audioDevice, &QIODevice::readyRead, this, [ = ](){
 61 | 
 62 |         auto bytes = _audioDevice->readAll();
 63 |         float samples_count = bytes.size() / _source->format().bytesPerSample();
 64 |         auto time_count     = samples_count / _source->format().sampleRate();
 65 |         qDebug() << "Read " << bytes.size() << "bytes" << samples_count << "Samples" << time_count << "Seconds";
 66 |         std::vector<float> frame{ reinterpret_cast<const float *>(bytes.cbegin()),
 67 |                                   reinterpret_cast<const float *>(bytes.cend()) };
 68 | 
 69 |         _vad.feedSamples(std::move(frame));
 70 |     });
 71 |     connect(&_vad, &VoiceActivityDetector::speechDetected, this, [ = ](std::vector<float> samples){
 72 |         qDebug() << "Speech detected " << samples.size() << "samples";
 73 |         QTimer::singleShot(1,this,&SpeechToText::stop);
 74 |         auto r = QMetaObject::invokeMethod(_whisper, "threadedInference", Qt::QueuedConnection,
 75 |         Q_ARG(std::vector<float>, samples));
 76 |         if (!r) {
 77 |             qFatal("Failed to invoke threaded inference");
 78 |         }
 79 |     });
 80 | } // SpeechToText::start
 81 | 
 82 | void SpeechToText::stop()
 83 | {
 84 |     // immidieatly stop the audio recording
 85 |     if(_source){
 86 |         _source->stop();
 87 |         _source.reset();
 88 |     }
 89 | 
 90 | 
 91 |     // if waiting for speech - simply disconnect the slots
 92 |     disconnect(&_vad,nullptr,this,nullptr);
 93 |     _vad.reset();
 94 | }
 95 | 
 96 | SpeechToText::~SpeechToText()
 97 | {
 98 |     unloadModel();
 99 |     _whisperThread.quit();
100 |     _whisperThread.wait();
101 | }
102 | 
103 | void SpeechToText::loadModel(const QString &path)
104 | {
105 |     if (_whisper || getState() == State::Busy) {
106 |         // Unload model before loading
107 |         connect(this,&SpeechToText::modelUnloaded,this,[=](){
108 |                 ASSERT_STATE(State::NoModel);
109 |                 loadModel(path);
110 |             },static_cast<Qt::ConnectionType>(Qt::AutoConnection | Qt::SingleShotConnection));
111 |         unloadModel();
112 | 
113 |         return;
114 |     }
115 |     _whisper = new WhisperBackend(path);
116 |     _whisper->moveToThread(&_whisperThread);
117 | 
118 | 
119 |     connect(_whisper, &WhisperBackend::resultReady, this, [ = ](auto s){
120 |         ASSERT_STATE(State::Ready);
121 |         emit resultReady(s);
122 |     });
123 |     connect(_whisper, &WhisperBackend::error, this, [ = ](auto s){
124 |         ASSERT_STATE(State::Ready);
125 |         emit SpeechToText::errorOccured(s);
126 |     });
127 |     connect(_whisper, &WhisperBackend::modelLoaded, this, &SpeechToText::backendInfoChanged);
128 | 
129 | 
130 |     QMetaObject::invokeMethod(_whisper, "loadModel", Qt::QueuedConnection);
131 | 
132 | 
133 |     if (!_whisperThread.isRunning())
134 |         _whisperThread.start();
135 |     ASSERT_STATE(State::WaitingForModel);
136 | }
137 | 
138 | void SpeechToText::unloadModel()
139 | {
140 |     stop();
141 |     if (_whisper)
142 |     {
143 |         disconnect(_whisper,nullptr,this,nullptr);
144 |         connect(_whisper, &QObject::destroyed, this, &SpeechToText::modelUnloaded);
145 |         _whisper->deleteLater();
146 |     }
147 | }
148 | 
149 | const WhisperInfo *SpeechToText::getBackendInfo() const
150 | {
151 |     Q_ASSERT(_whisper);
152 |     return _whisper->info();
153 | }
154 | 
155 | SpeechToText::State SpeechToText::getState() const
156 | {
157 | #define O(state, cond) \
158 |     if(cond) return state
159 | 
160 |     // whisper related states
161 |     O(State::NoModel, _whisper.isNull()); // No model is loaded, need to call loadModel first
162 |     O(State::WaitingForModel, _whisper->info()->getModelType()==WhisperInfo::MODEL_UNKNOWN); // Model is being loaded in the background thread
163 |     O(State::Busy,_whisper->getBusy()); // Model is performing inference in the background thread
164 | 
165 |     // VAD related states
166 |     O(State::Tuning, _vad.getAdjustInProgress()); // VAD is listening for sound in order to adjust itself for background noise
167 |     O(State::SpeechDetected,_vad.getVoiceInProgress()); // VAD is detecting voice in current samples
168 |     O(State::WaitingForSpeech, _source); // if source is not deleted - the sound is being recorded and relayed to VAD
169 | 
170 |     // default state
171 |     O(State::Ready,true); // Nothing is happening - the object is idle
172 | #undef O
173 | }
174 | 
175 | void SpeechToText::quantize(int mode)
176 | {
177 |     Q_ASSERT(_whisper);
178 |     QMetaObject::invokeMethod(_whisper, "unloadModel", Qt::QueuedConnection);
179 |     QMetaObject::invokeMethod(_whisper, "loadModel", Qt::QueuedConnection, static_cast<WhisperInfo::FloatType>(mode));
180 | }
181 | 
182 | void SpeechToText::updateState()
183 | {
184 |     static State s = State::NoModel;
185 |     if(getState() != s){
186 |         emit stateChanged(getState());
187 |     }
188 | }
189 | 


--------------------------------------------------------------------------------
/src/SpeechToText.h:
--------------------------------------------------------------------------------
 1 | #ifndef SPEECHTOTEXT_H
 2 | #define SPEECHTOTEXT_H
 3 | 
 4 | #include <QQmlEngine>
 5 | #include <QAudioSource>
 6 | #include <QThread>
 7 | #include <QObjectBindableProperty>
 8 | #include <QTimer>
 9 | 
10 | #include "WhisperBackend.h"
11 | #include "VoiceActivityDetector.h"
12 | #include "QmlMacros.h"
13 | 
14 | class SpeechToText : public QObject
15 | {
16 |     Q_OBJECT
17 |     QML_ELEMENT;
18 | public:
19 |     enum State {
20 |         NoModel,
21 |         Ready,
22 |         Tuning,
23 |         WaitingForSpeech,
24 |         SpeechDetected,
25 |         WaitingForModel,
26 |         Busy
27 |     };
28 |     Q_ENUM(State);
29 | private:
30 |     QML_WRITABLE_PROPERTY(QString, modelPath, ModelPath)
31 |     QML_READONLY_PROPERTY(bool, hasEmbeddedModel, HasEmbeddedModel)
32 |     Q_PROPERTY(const WhisperInfo * backendInfo READ getBackendInfo NOTIFY backendInfoChanged)
33 |     Q_PROPERTY(State state READ getState NOTIFY stateChanged)
34 | public:
35 |     SpeechToText();
36 |     Q_INVOKABLE void start();
37 |     Q_INVOKABLE void stop();
38 | 
39 | 
40 |     ~SpeechToText();
41 |     void loadModel(const QString& path);
42 |     void unloadModel();
43 | 
44 |     const WhisperInfo *getBackendInfo() const;
45 |     State getState() const;
46 |     Q_INVOKABLE void quantize(int mode);
47 | 
48 | 
49 | 
50 | public slots:
51 |     void updateState();
52 | 
53 | signals:
54 |     void resultReady(const QString& str);
55 |     void modelUnloaded();
56 |     void modelLoaded();
57 |     void errorOccured(const QString& str);
58 |     void stateChanged(State s);
59 | 
60 |     void backendInfoChanged();
61 | 
62 | private:
63 |     QPointer<WhisperBackend> _whisper = nullptr;
64 |     VoiceActivityDetector _vad;
65 |     std::unique_ptr<QAudioSource> _source = nullptr;
66 |     std::vector<float> _audioBuffer;
67 |     QIODevice *_audioDevice = nullptr;
68 |     bool _stopFlag = false;
69 |     QThread _whisperThread;
70 |     QTimer _stateUpdateTimer;
71 | };
72 | 
73 | 
74 | #endif // SPEECHTOTEXT_H
75 | 


--------------------------------------------------------------------------------
/src/VoiceActivityDetector.cpp:
--------------------------------------------------------------------------------
 1 | #include "VoiceActivityDetector.h"
 2 | #include <QDebug>
 3 | VoiceActivityDetector::VoiceActivityDetector(const Params& params, QObject *parent)
 4 |     : QObject{parent}, _params{params}, _patience_counter{params.patience},
 5 |     _detected_samples_counter{params.minimum_samples}, _adjustment_counter{params.adjust_samples}
 6 | {
 7 |     qRegisterMetaType<std::vector<float> >();
 8 | }
 9 | 
10 | void VoiceActivityDetector::feedSamples(const std::vector<float> &data)
11 | {
12 |     _adjustment_counter = std::max(_adjustment_counter - 1, 0);
13 |     if (_adjustment_counter > 0) {
14 |         setAdjustInProgress(true);
15 |         adjust(data);
16 |         return;
17 |     }
18 |     setAdjustInProgress(false);
19 | 
20 |     auto energy = std::inner_product(data.begin(), data.end(), data.begin(), 0.0f) / data.size();
21 |     const bool current_score = energy > threshold();
22 | 
23 |     if (current_score) {
24 |         // reset patience
25 |         _patience_counter = _params.patience;
26 | 
27 |         // start the new potential segment if not already started
28 |         setVoiceInProgress(true);
29 | 
30 |         // count consecutive accepted samples
31 |         if (--_detected_samples_counter < 0) {
32 |             _segment_approved = true;
33 |         }
34 |     } else {
35 |         // decrement patience counter
36 |         _patience_counter = std::max(_patience_counter - 1, 0);
37 | 
38 |         // reset accepted samples counter
39 |         _detected_samples_counter = _params.minimum_samples;
40 |     }
41 | 
42 |     // Capture voice if speech is detected
43 |     if (getVoiceInProgress()) {
44 |         _voice_buffer.insert(_voice_buffer.end(), data.begin(), data.end());
45 |     }
46 | 
47 | 
48 |     // if patience runs out, signal speech detection and reset buffers
49 |     if (_patience_counter <= 0 && getVoiceInProgress()) {
50 |         if (_segment_approved) {
51 |             emit speechDetected(_voice_buffer);
52 |         }
53 |         reset();
54 |     }
55 |     qDebug() << "Energy: " << energy << "Threshold: " <<threshold()<<" Speech:" << getVoiceInProgress() << " Patience:" << _patience_counter
56 |              << " Valid counter:" << _detected_samples_counter;
57 | }// VoiceActivityDetector::feedSamples
58 | 
59 | void VoiceActivityDetector::reset()
60 | {
61 |     _voice_buffer.clear();
62 |     setVoiceInProgress(false);
63 |     _segment_approved         = false;
64 |     _patience_counter         = _params.patience;
65 |     _detected_samples_counter = _params.minimum_samples;
66 | }
67 | 
68 | void VoiceActivityDetector::adjust(const std::vector<float> &data)
69 | {
70 |     auto energy = std::inner_product(data.begin(), data.end(), data.begin(), 0.0f) / data.size();
71 |     auto diff   = std::abs(energy - _mean_energy);
72 | 
73 |     _mean_energy = _mean_energy * _params.beta + (1 - _params.beta) * energy;
74 |     _std_energy  = _std_energy * _params.beta + (1 - _params.beta) * diff;
75 | }
76 | 
77 | float VoiceActivityDetector::threshold() const
78 | {
79 | 
80 |     // Expecting exponential distribution
81 |     // Tukey anomaly criterion
82 |     auto lambda = 1/_mean_energy;
83 |     return 2*std::log(10)/lambda;
84 | }
85 | 
86 | VoiceActivityDetector::Params VoiceActivityDetector::defaultParams()
87 | {
88 |     return Params{
89 |         50, //patience
90 |         50, // minimum samples
91 |         0.5f, // tuning coefficient
92 |         4.0f, // treshold coefficient
93 |         200
94 |     };
95 | }
96 | 


--------------------------------------------------------------------------------
/src/VoiceActivityDetector.h:
--------------------------------------------------------------------------------
 1 | #ifndef VOICEACTIVITYDETECTOR_H
 2 | #define VOICEACTIVITYDETECTOR_H
 3 | 
 4 | #include <QObject>
 5 | #include "QmlMacros.h"
 6 | 
 7 | class VoiceActivityDetector : public QObject
 8 | {
 9 |     Q_OBJECT
10 |     QML_READONLY_PROPERTY(bool, voiceInProgress, VoiceInProgress)
11 |     QML_READONLY_PROPERTY(bool, adjustInProgress, AdjustInProgress)
12 | public:
13 |     struct Params {
14 |         /// Longest streak of samples with no voice before speech is considered to have ended
15 |         int   patience;
16 |         /// Minimum streak of samples with speach for a segment to be considered as containing speech
17 |         int   minimum_samples;
18 |         /// Tuning coefficient - higher coefficient requires longer tuning
19 |         float beta;
20 |         /// Treshold coeffitient - real threashold is calculated by mean(E) + k*std(E)
21 |         float threshold;
22 |         /// How many samples from the beginning of audio should be used for tuning
23 |         int   adjust_samples;
24 |     };
25 |     explicit VoiceActivityDetector(const Params& params = defaultParams(), QObject *parent = nullptr);
26 |     /// Feed series of samples to the detection
27 |     void feedSamples(const std::vector<float>& data);
28 |     /// Reset the speech detection state
29 |     void reset();
30 |     /// Adjust the treshold of speech detection assuming that the given data is background noise
31 |     void adjust(const std::vector<float>& data);
32 |     /// Current speech threshold calculated from the background noise
33 |     float threshold() const;
34 |     /// Default parameters for the Voice Activity Detector
35 |     static Params defaultParams();
36 | public slots:
37 | 
38 | 
39 | signals:
40 |     /// Fired when the given samples are considered to contain speech
41 |     void speechDetected(std::vector<float> samples);
42 | private:
43 |     /// Parameters passed in during construction
44 |     Params _params;
45 |     /// Internal counter for patience
46 |     int _patience_counter = 0;
47 |     /// Internal counter for positive samples
48 |     int _detected_samples_counter = 0;
49 |     /// Wether a given speech segment (a series of samples) was approved as speech.
50 |     bool _segment_approved = false;
51 |     /// Buffer for storing speech samples
52 |     std::vector<float> _voice_buffer;
53 |     /// Current mean sample energy for background noise
54 |     float _mean_energy = 0;
55 |     /// Current standard deviation of energy for background noise
56 |     float _std_energy = 0;
57 |     /// Counter for samples used in adjustment
58 |     int _adjustment_counter;
59 | };
60 | 
61 | #endif // VOICEACTIVITYDETECTOR_H
62 | 


--------------------------------------------------------------------------------
/src/WhisperBackend.cpp:
--------------------------------------------------------------------------------
  1 | #include "WhisperBackend.h"
  2 | 
  3 | #include <numeric>
  4 | #include <functional>
  5 | 
  6 | #include <QDebug>
  7 | #include <QFile>
  8 | #include <QRegularExpression>
  9 | #include <QBuffer>
 10 | 
 11 | #include "quantization.h"
 12 | 
 13 | WhisperBackend::WhisperBackend(const QString& filePath, QObject *parent)
 14 |     : _numThreads{2}
 15 | {
 16 |     setBusy(true);
 17 |     _og_filepath = filePath;
 18 |     setBusy(false);
 19 | }
 20 | 
 21 | WhisperBackend::~WhisperBackend()
 22 | {
 23 |     unloadModel();
 24 | }
 25 | 
 26 | void WhisperBackend::loadModel(WhisperInfo::FloatType ftype)
 27 | {
 28 |     setBusy(true);
 29 |     qDebug() << "load model called with quantization type: " << ftype;
 30 |     _params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
 31 |     _params.progress_callback = [] (whisper_context *ctx, whisper_state *state, int progress, void *user_data){
 32 |           qDebug() << "Inference progress: " << progress;
 33 |       };
 34 | 
 35 |     QFile file{ _og_filepath };
 36 |     file.open(QIODeviceBase::ReadOnly);
 37 |     QByteArray bytes;
 38 | 
 39 |     if (ftype == GGML_FTYPE_ALL_F32) {
 40 |         bytes = file.readAll();
 41 |     } else {
 42 |         QBuffer buffer;
 43 |         buffer.open(QBuffer::WriteOnly);
 44 |         auto err = qtw::buffer_quantize(file, buffer, ftype);
 45 |         buffer.close();
 46 |         if (err != 0) {
 47 |             emit error(QString{ "Model quantization failed with code: %1" }.arg(err));
 48 |             return;
 49 |         }
 50 |         bytes = buffer.buffer();
 51 |     }
 52 |     file.close();
 53 | 
 54 |     _ctx = whisper_init_from_buffer(bytes.data(), bytes.size());
 55 | 
 56 |     if (_ctx == nullptr) {
 57 |         emit error("Failed to initialize whisper context");
 58 |         return;
 59 |     }
 60 |     collectInfo();
 61 | 
 62 |     setBusy(false);
 63 |     emit modelLoaded();
 64 | } // WhisperBackend::loadModel
 65 | 
 66 | void WhisperBackend::unloadModel()
 67 | {
 68 |     whisper_free(_ctx);
 69 |     _ctx = nullptr;
 70 | }
 71 | 
 72 | void WhisperBackend::threadedInference(std::vector<float> samples)
 73 | {
 74 |     setBusy(true);
 75 |     if (whisper_full_parallel(_ctx, _params, samples.data(), static_cast<int>(samples.size()), getNumThreads()) != 0) {
 76 |         fprintf(stderr, "failed to process audio\n");
 77 |     }
 78 | 
 79 |     QString s;
 80 |     const int n_seg = whisper_full_n_segments(_ctx);
 81 |     for (int i = 0; i < n_seg; i++) {
 82 |         const char *text = whisper_full_get_segment_text(_ctx, i);
 83 |         s.append(text);
 84 |     }
 85 | 
 86 |     setBusy(false);
 87 |     setLastResult(s);
 88 | 
 89 |     emit resultReady(s);
 90 | }
 91 | 
 92 | const WhisperInfo *WhisperBackend::info() const
 93 | {
 94 |     return &_info;
 95 | }
 96 | 
 97 | void WhisperBackend::collectInfo()
 98 | {
 99 |     Q_ASSERT(_ctx);
100 | 
101 |     _info.setFloatType(static_cast<WhisperInfo::FloatType>( whisper_model_ftype(_ctx)));
102 |     _info.setModelType(static_cast<WhisperInfo::ModelType>( whisper_model_type(_ctx)));
103 | }
104 | 
105 | QString WhisperInfo::floatTypeString() const
106 | {
107 |     switch (_floatType) {
108 |         default:
109 |         case GGML_FTYPE_UNKNOWN: return "Unknown float type";
110 | 
111 |         case GGML_FTYPE_ALL_F32: return "32-bit float";
112 | 
113 |         case GGML_FTYPE_MOSTLY_F16: return "mostly 16-bit float (except 1d tensors)";
114 | 
115 |         case GGML_FTYPE_MOSTLY_Q4_0: return "(Q4_0) 16-bit blocks of 4-bit quantized weights (16-bit float multiplier)"; // except 1d tensors
116 | 
117 |         case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
118 |         case GGML_FTYPE_MOSTLY_Q4_1: return
119 |                 "(Q4_1) 16-bit blocks of 4-bit quantized weights (16-bit float multiplier and offset)"; // except 1d tensors
120 | 
121 |         case GGML_FTYPE_MOSTLY_Q8_0: return "(Q8_0) 32-bit blocks of 8-bit quantized weights (32-bit float multiplier)"; // except 1d tensors
122 | 
123 |         case GGML_FTYPE_MOSTLY_Q5_0: return "(Q5_0) blocks of 32 5-bit quantized weights (16-bit float multiplier)"; // except 1d tensors
124 | 
125 |         case GGML_FTYPE_MOSTLY_Q5_1: return
126 |                 "(Q5_1) blocks of 32 5-bit quantized weights (16-bit float multiplier and offset)"; // except 1d tensors
127 |     }
128 | }
129 | 
130 | QString WhisperInfo::modelTypeString() const
131 | {
132 |     switch (_modelType) {
133 |         default:
134 |         case MODEL_UNKNOWN:
135 |             return "Unknown model";
136 |         case MODEL_TINY:
137 |             return "Tiny model";
138 |         case MODEL_BASE:
139 |             return "Base model";
140 |         case MODEL_SMALL:
141 |             return "Small model";
142 |         case MODEL_MEDIUM:
143 |             return "Medium model";
144 |         case MODEL_LARGE:
145 |             return "Large model";
146 |         }
147 | }
148 | 
149 | bool WhisperInfo::requantizable() const
150 | {
151 |         return (_floatType == GGML_FTYPE_ALL_F32) || (_floatType == GGML_FTYPE_MOSTLY_F16);
152 | }
153 | 
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/src/WhisperBackend.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <QObject>
 3 | #include "whisper.h"
 4 | #include "ggml.h"
 5 | #include "QmlMacros.h"
 6 | 
 7 | class WhisperInfo : public QObject {
 8 |     Q_OBJECT
 9 | public:
10 |     /// copied from whisper.cpp
11 |     enum ModelType {
12 |         MODEL_UNKNOWN,
13 |         MODEL_TINY,
14 |         MODEL_BASE,
15 |         MODEL_SMALL,
16 |         MODEL_MEDIUM,
17 |         MODEL_LARGE,
18 |     };
19 |     using FloatType = ggml_ftype;
20 |     Q_ENUM(ModelType)
21 |     Q_ENUM(FloatType)
22 | 
23 |     QString floatTypeString() const;
24 |     QString modelTypeString() const;
25 |     bool requantizable() const;
26 | 
27 | private:
28 |     QML_READONLY_PROPERTY(ModelType, modelType, ModelType)
29 |     QML_READONLY_PROPERTY(FloatType, floatType, FloatType)
30 |     Q_PROPERTY(bool requantizable READ requantizable NOTIFY floatTypeChanged)
31 |     Q_PROPERTY(QString modelTypeString READ modelTypeString NOTIFY modelTypeChanged)
32 |     Q_PROPERTY(QString floatTypeString READ floatTypeString NOTIFY floatTypeChanged)
33 | 
34 | };
35 | 
36 | class WhisperBackend : public QObject {
37 |     Q_OBJECT
38 |     QML_READONLY_PROPERTY(bool, busy, Busy)
39 |     QML_WRITABLE_PROPERTY(int, numThreads, NumThreads)
40 |     QML_READONLY_PROPERTY(QString, lastResult, LastResult)
41 | public:
42 |     WhisperBackend(const QString &filePath, QObject *parent = nullptr);
43 |     ~WhisperBackend();
44 |     Q_INVOKABLE void loadModel(WhisperInfo::FloatType = GGML_FTYPE_ALL_F32);
45 |     Q_INVOKABLE void unloadModel();
46 |     Q_INVOKABLE void threadedInference(std::vector<float> samples);
47 |     const WhisperInfo *info() const;
48 |     static int bufferQuantize(QIODevice & in, QIODevice & out, ggml_ftype type);
49 | signals:
50 |     void resultReady(QString result);
51 |     void error(QString s);
52 |     void modelLoaded();
53 | private:
54 |     void collectInfo();
55 | 
56 | 
57 |     QString _og_filepath;
58 | 
59 |     whisper_context *_ctx = nullptr;
60 |     whisper_full_params _params;
61 |     WhisperInfo _info;
62 | };
63 | 


--------------------------------------------------------------------------------
/src/private/quantization.h:
--------------------------------------------------------------------------------
  1 | #ifndef QUANTIZATION_H
  2 | #define QUANTIZATION_H
  3 | #include <ggml.h>
  4 | #include <QIODevice>
  5 | #include <QRegularExpression>
  6 | 
  7 | namespace qtw {
  8 | 
  9 | typedef size_t (*quantizer_func)(const float * src, void * dst, int n, int k, int64_t * hist);
 10 | 
 11 | struct TensorHeader {
 12 |     int32_t n_dims;
 13 |     int32_t name_len;
 14 |     int32_t ttype;
 15 |     std::vector<int32_t> dims;
 16 |     QByteArray name;
 17 |     void read(QIODevice& in){
 18 |         in.read(reinterpret_cast<char *>(&n_dims), sizeof(n_dims));
 19 |         in.read(reinterpret_cast<char *>(&name_len), sizeof(name_len));
 20 |         in.read(reinterpret_cast<char *>(&ttype), sizeof(ttype));
 21 | 
 22 |         dims.resize(n_dims,1);
 23 |         for (auto& d : dims) {
 24 |             in.read(reinterpret_cast<char *>(&d), sizeof(d));
 25 |         }
 26 | 
 27 |         name.resize(name_len,0);
 28 |         in.read(name.data(), name_len);
 29 | 
 30 |     }
 31 |     void write(QIODevice& out)
 32 |     {
 33 |         out.write(reinterpret_cast<char *>(&n_dims), sizeof(n_dims));
 34 |         out.write(reinterpret_cast<char *>(&name_len), sizeof(name_len));
 35 |         out.write(reinterpret_cast<char *>(&ttype), sizeof(ttype));
 36 |         for (auto d : dims) {
 37 |             out.write(reinterpret_cast<char *>(&d), sizeof(d));
 38 |         }
 39 |         out.write(name.constData(), name_len);
 40 | 
 41 |     }
 42 | };
 43 | 
 44 | quantizer_func get_quantizer(ggml_type t){
 45 |     switch(t){
 46 |     case GGML_TYPE_Q4_0:
 47 |         return ggml_quantize_q4_0;
 48 |     case GGML_TYPE_Q4_1:
 49 |         return ggml_quantize_q4_1;
 50 |     case GGML_TYPE_Q5_0:
 51 |         return  ggml_quantize_q5_0;
 52 |     case GGML_TYPE_Q5_1:
 53 |         return  ggml_quantize_q5_1;
 54 |     case GGML_TYPE_Q8_0:
 55 |         return ggml_quantize_q8_0;
 56 |     default:
 57 |         return nullptr;
 58 |     }
 59 | }
 60 | 
 61 | void write_through(QIODevice& in, QIODevice& out, size_t n)
 62 | {
 63 |     auto written = out.write(in.read(n));
 64 |     Q_ASSERT(written == n);
 65 | }
 66 | 
 67 | int buffer_quantize(QIODevice& in, QIODevice& out, ggml_ftype ftype)
 68 | {
 69 |     // error codes for the function
 70 |     constexpr int INVALID_MAGIC = 1;
 71 |     constexpr int INVALID_QUANTIZATION_TYPE = 2;
 72 |     constexpr int UNSUPPORTED_TENSOR_TYPE   = 3;
 73 |     constexpr int UNSUPPORTED_QUANT_TYPE    = 4;
 74 | 
 75 |     // verify magic
 76 |     {
 77 |         uint32_t magic;
 78 |         in.read((char *) &magic, sizeof(magic));
 79 |         if (magic != GGML_FILE_MAGIC) {
 80 |             return INVALID_MAGIC;
 81 |         }
 82 | 
 83 |         out.write((char *) &magic, sizeof(magic));
 84 |     }
 85 | 
 86 | 
 87 |     // load hparams
 88 |     {
 89 |         int32_t hparams[11];
 90 |         in.read((char *) hparams, sizeof(hparams));
 91 | 
 92 |         // Change the declared model float type to target
 93 |         const int32_t ftype_dst = GGML_QNT_VERSION * GGML_QNT_VERSION_FACTOR + ftype;
 94 | 
 95 |         out.write((const char *) hparams, sizeof(hparams) - sizeof(int32_t));
 96 |         out.write((const char *) &ftype_dst, sizeof(ftype_dst));
 97 |     }
 98 | 
 99 |     // load mel filters
100 |     {
101 |         int32_t n_mel, n_fft;
102 | 
103 |         in.read((char *) &n_mel, sizeof(n_mel));
104 |         in.read((char *) &n_fft, sizeof(n_fft));
105 | 
106 |         out.write((char *) &n_mel, sizeof(n_mel));
107 |         out.write((char *) &n_fft, sizeof(n_fft));
108 | 
109 |         write_through(in,out, n_mel * n_fft * sizeof(float));
110 |     }
111 | 
112 |     // load vocab
113 |     {
114 |         int32_t n_vocab = 0;
115 |         in.read((char *) &n_vocab, sizeof(n_vocab));
116 |         out.write((char *) &n_vocab, sizeof(n_vocab));
117 | 
118 |         for (int i = 0; i < n_vocab; i++) {
119 |             uint32_t len;
120 |             in.read((char *) &len, sizeof(len));
121 |             out.write((char *) &len, sizeof(len));
122 | 
123 |             write_through(in,out,len);
124 |         }
125 |     }
126 | 
127 |     // regexes of tensor names to not be quantized
128 |     const QList<QRegularExpression> to_skip = {
129 |         // "encoder.*",
130 |         QRegularExpression{ "encoder.conv1.bias"           },
131 |         QRegularExpression{ "encoder.conv2.bias"           },
132 |         QRegularExpression{ "encoder.positional_embedding" },
133 |         QRegularExpression{ "decoder.positional_embedding" }
134 |     };
135 | 
136 |     // regexes of tensor names to be quantized
137 |     const QList<QRegularExpression> to_quant = {
138 |         QRegularExpression{ ".*" }
139 |     };
140 |     // quantization
141 |     {
142 |         // Map the ggml float type to ggml type
143 |         ggml_type qtype = GGML_TYPE_F32;
144 |         switch (ftype) {
145 |         case GGML_FTYPE_MOSTLY_Q4_0: qtype = GGML_TYPE_Q4_0;
146 |             break;
147 |         case GGML_FTYPE_MOSTLY_Q4_1: qtype = GGML_TYPE_Q4_1;
148 |             break;
149 |         case GGML_FTYPE_MOSTLY_Q5_0: qtype = GGML_TYPE_Q5_0;
150 |             break;
151 |         case GGML_FTYPE_MOSTLY_Q5_1: qtype = GGML_TYPE_Q5_1;
152 |             break;
153 |         case GGML_FTYPE_MOSTLY_Q8_0: qtype = GGML_TYPE_Q8_0;
154 |             break;
155 |         default:
156 |             return INVALID_QUANTIZATION_TYPE;
157 |         }
158 | 
159 | 
160 |         // rest of the file is just tensors
161 |         TensorHeader tensor_header;
162 |         std::vector<float> weight_buffer;
163 | 
164 |         while (in.bytesAvailable() > 0)
165 |         {
166 |             // read tensor header - dimentions, type, name
167 |             tensor_header.read(in);
168 | 
169 |             auto n_elements = std::reduce(tensor_header.dims.begin(), tensor_header.dims.end(), 1, std::multiplies{ });
170 |             Q_ASSERT(n_elements < std::vector<float>{}.max_size());
171 | 
172 |             // Decide wheter to quantize a tensor based on white / black lists
173 |             bool quantize = std::any_of(to_quant.begin(), to_quant.end(), [&](auto re){
174 |                 return re.match(QString::fromUtf8(tensor_header.name)).hasMatch();
175 |             });
176 |             quantize &= std::none_of(to_skip.begin(), to_skip.end(), [&](auto re){
177 |                 return re.match(QString::fromUtf8(tensor_header.name)).hasMatch();
178 |             });
179 |             quantize &= (tensor_header.n_dims == 2);
180 | 
181 |             if (!quantize) {
182 |                 //If the tensor is not to be quantized - just write it trough
183 |                 // Write tensor header
184 |                 tensor_header.write(out);
185 | 
186 |                 // write tensor data
187 |                 const int bytes_per_elem = (tensor_header.ttype == 0) ? sizeof(float) : sizeof(uint16_t);
188 |                 write_through(in,out, n_elements * bytes_per_elem);
189 |             }
190 |             else
191 |             {
192 |                 if (tensor_header.ttype != GGML_TYPE_F32 && tensor_header.ttype != GGML_TYPE_F16) {
193 |                     return UNSUPPORTED_TENSOR_TYPE;
194 |                 }
195 | 
196 |                 weight_buffer.resize(n_elements);
197 |                 if (tensor_header.ttype == GGML_TYPE_F16) {
198 |                     // if tensor is in float-16, convert it to float-32
199 |                     std::vector<ggml_fp16_t> buff(n_elements);
200 |                     in.read(reinterpret_cast<char *>(buff.data()), n_elements * sizeof(ggml_fp16_t));
201 |                     std::transform(buff.begin(), buff.end(), weight_buffer.begin(), ggml_fp16_to_fp32);
202 |                 } else {
203 |                     // else just read it normally
204 |                     in.read(reinterpret_cast<char *>(weight_buffer.data()), n_elements * sizeof(float));
205 |                 }
206 |                 // set the tensor type to the target type
207 |                 tensor_header.ttype = qtype;
208 | 
209 |                 std::vector<int32_t> quants(n_elements);
210 |                 std::vector<int64_t> hist_cur(1 << 4, 0);
211 |                 size_t cur_size = 0;
212 | 
213 |                 // Select quantizing function based on the quant type
214 |                 quantizer_func quantizer = get_quantizer(static_cast<ggml_type>(tensor_header.ttype));
215 | 
216 |                 if(!quantizer){
217 |                     return UNSUPPORTED_QUANT_TYPE;
218 |                 }
219 | 
220 |                 cur_size = quantizer(weight_buffer.data(),
221 |                                      quants.data(), n_elements, tensor_header.dims[0], hist_cur.data());
222 | 
223 |                 // write quantized tensor
224 |                 tensor_header.write(out);
225 |                 auto n = out.write(reinterpret_cast<char *>(quants.data()), cur_size);
226 |             }
227 |         }
228 |     }
229 | 
230 |     return 0;
231 | } // WhisperBackend::bufferQuantize
232 | } // namespace qtw
233 | #endif // QUANTIZATION_H
234 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(Qt6 REQUIRED COMPONENTS Test)
 2 | 
 3 | enable_testing(true)
 4 | 
 5 | 
 6 | qt_add_executable(quantizer_test MANUAL_FINALIZATION tst_quant.cpp)
 7 | set_target_properties(quantizer_test PROPERTIES AUTOMOC ON )
 8 | qt_finalize_target(quantizer_test)
 9 | 
10 | add_test(NAME quantizer_test COMMAND quantizer_test)
11 | 
12 | target_link_libraries(quantizer_test PRIVATE Qt6::Core Qt6::Quick ${QT_WHISPER_TARGET} Qt::Test)
13 | 
14 | ### Dependencies
15 | file(DOWNLOAD "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin" ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny.bin SHOW_PROGRESS EXPECTED_HASH SHA256=be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21)
16 | add_custom_command(
17 |     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q4_0.bin
18 |     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/quantize.exe ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny.bin ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q4_0.bin q4_0
19 |      VERBATIM)
20 | add_custom_command(
21 |     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q4_1.bin
22 |     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/quantize.exe ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny.bin ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q4_1.bin q4_1
23 |     VERBATIM)
24 | add_custom_command(
25 |     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q5_0.bin
26 |     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/quantize.exe ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny.bin ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q5_0.bin q5_0
27 |     VERBATIM)
28 | add_custom_command(
29 |     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q5_1.bin
30 |     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/quantize.exe ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny.bin ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q5_1.bin q5_1
31 |     VERBATIM)
32 | add_custom_command(
33 |     OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q8_0.bin
34 |     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/quantize.exe ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny.bin ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q8_0.bin q8_0
35 |     VERBATIM)
36 | add_custom_target(quantized_models
37 |     DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q4_0.bin
38 | ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q4_1.bin
39 | ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q5_0.bin
40 | ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q5_1.bin
41 | ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q8_0.bin)
42 | 
43 | add_dependencies(quantizer_test quantized_models)
44 | #file(DOWNLOAD "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-model-whisper-tiny-q5_1.bin" ${CMAKE_CURRENT_BINARY_DIR}/ggml-tiny-q5_1.bin SHOW_PROGRESS EXPECTED_HASH SHA256=818710568da3ca15689e31a743197b520007872ff9576237bda97bd1b469c3d7)
45 | 


--------------------------------------------------------------------------------
/tests/bin/quantize.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radkoder/qt-whisper/6f0ba6f360c358c6940a61724a429b0b8c136908/tests/bin/quantize.exe


--------------------------------------------------------------------------------
/tests/bin/whisper.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/radkoder/qt-whisper/6f0ba6f360c358c6940a61724a429b0b8c136908/tests/bin/whisper.dll


--------------------------------------------------------------------------------
/tests/tst_quant.cpp:
--------------------------------------------------------------------------------
 1 | #include <QTest>
 2 | #include "private/quantization.h"
 3 | #include "qbuffer.h"
 4 | #include "ggml.h"
 5 | #include <QDebug>
 6 | 
 7 | class QuantizerTest : public QObject
 8 | {
 9 |     Q_OBJECT
10 |     const char *base_model_name = "ggml-tiny.bin";
11 |     const char *q40_model_name = "ggml-tiny-q4_0.bin";
12 |     const char *q41_model_name = "ggml-tiny-q4_1.bin";
13 |     const char *q50_model_name = "ggml-tiny-q5_0.bin";
14 |     const char *q51_model_name = "ggml-tiny-q5_1.bin";
15 |     const char *q80_model_name = "ggml-tiny-q8_0.bin";
16 |     ggml_context* _ctx = nullptr;
17 | 
18 |     void quantize(const char* in, const char* ref_name, ggml_ftype type){
19 | 
20 |         QFile modelFile{ in };
21 |         modelFile.open(QIODeviceBase::ReadOnly);
22 |         QBuffer result;
23 |         result.open(QIODeviceBase::WriteOnly);
24 | 
25 |         auto error_code = qtw::buffer_quantize(modelFile, result, type);
26 |         modelFile.close();
27 |         result.close();
28 | 
29 |         QFile quantized{ ref_name };
30 |         quantized.open(QIODeviceBase::ReadOnly);
31 |         auto ref = quantized.readAll();
32 |         quantized.close();
33 | 
34 |         QCOMPARE(error_code, 0);
35 |         QCOMPARE(ref.size(), result.buffer().size());
36 |         QCOMPARE(ref.compare(result.buffer()),0);
37 |     }
38 | 
39 | private slots:
40 | 
41 |     void initTestCase()
42 |     {
43 |         QVERIFY(QFileInfo{ base_model_name }.size() > 0);
44 |         QVERIFY(QFileInfo{ q40_model_name }.size() > 0);
45 |         QVERIFY(QFileInfo{ q41_model_name }.size() > 0);
46 |         QVERIFY(QFileInfo{ q50_model_name }.size() > 0);
47 |         QVERIFY(QFileInfo{ q51_model_name }.size() > 0);
48 |         QVERIFY(QFileInfo{ q80_model_name }.size() > 0);
49 | 
50 |         // initializes float-16 lookup table - critical to quantization
51 |         _ctx = ggml_init({});
52 |         QVERIFY(_ctx);
53 | 
54 |     }
55 |     void q4_0()
56 |     {
57 |         quantize(base_model_name,q40_model_name,GGML_FTYPE_MOSTLY_Q4_0);
58 |     }
59 |     void q4_1()
60 |     {
61 |         quantize(base_model_name,q41_model_name,GGML_FTYPE_MOSTLY_Q4_1);
62 |     }
63 |     void q5_0()
64 |     {
65 |         quantize(base_model_name,q50_model_name,GGML_FTYPE_MOSTLY_Q5_0);
66 |     }
67 |     void q5_1()
68 |     {
69 |         quantize(base_model_name,q51_model_name,GGML_FTYPE_MOSTLY_Q5_1);
70 |     }
71 |     void q8_0()
72 |     {
73 |         quantize(base_model_name,q80_model_name,GGML_FTYPE_MOSTLY_Q8_0);
74 |     }
75 | };
76 | 
77 | QTEST_MAIN(QuantizerTest)
78 | #include "tst_quant.moc"
79 | 


--------------------------------------------------------------------------------