├── .github ├── AAR Source (Android) │ ├── AndroidManifest.xml │ ├── java │ │ └── com │ │ │ └── yasirkula │ │ │ └── unity │ │ │ ├── SpeechToText.java │ │ │ ├── SpeechToTextListener.java │ │ │ ├── SpeechToTextPermissionFragment.java │ │ │ ├── SpeechToTextPermissionReceiver.java │ │ │ └── SpeechToTextRecognitionListener.java │ └── proguard.txt └── README.md ├── LICENSE.txt ├── LICENSE.txt.meta ├── Plugins.meta ├── Plugins ├── SpeechToText.meta └── SpeechToText │ ├── Android.meta │ ├── Android │ ├── STTCallbackHelper.cs │ ├── STTCallbackHelper.cs.meta │ ├── STTInteractionCallbackAndroid.cs │ ├── STTInteractionCallbackAndroid.cs.meta │ ├── STTPermissionCallbackAndroid.cs │ ├── STTPermissionCallbackAndroid.cs.meta │ ├── SpeechToText.aar │ └── SpeechToText.aar.meta │ ├── Editor.meta │ ├── Editor │ ├── STTPostProcessBuild.cs │ ├── STTPostProcessBuild.cs.meta │ ├── SpeechToText.Editor.asmdef │ └── SpeechToText.Editor.asmdef.meta │ ├── ISpeechToTextListener.cs │ ├── ISpeechToTextListener.cs.meta │ ├── README.txt │ ├── README.txt.meta │ ├── SpeechToText.Runtime.asmdef │ ├── SpeechToText.Runtime.asmdef.meta │ ├── SpeechToText.cs │ ├── SpeechToText.cs.meta │ ├── iOS.meta │ └── iOS │ ├── STTInteractionCallbackiOS.cs │ ├── STTInteractionCallbackiOS.cs.meta │ ├── STTPermissionCallbackiOS.cs │ ├── STTPermissionCallbackiOS.cs.meta │ ├── SpeechToText.mm │ └── SpeechToText.mm.meta ├── package.json └── package.json.meta /.github/AAR Source (Android)/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToText.java: -------------------------------------------------------------------------------- 1 | package com.yasirkula.unity; 2 | 3 | import android.Manifest; 4 | import android.annotation.TargetApi; 5 | import android.app.Activity; 6 | import android.app.Fragment; 7 | import android.content.BroadcastReceiver; 8 | import android.content.Context; 9 | import android.content.Intent; 10 | import android.content.pm.PackageManager; 11 | import android.net.Uri; 12 | import android.os.Build; 13 | import android.os.Bundle; 14 | import android.os.Looper; 15 | import android.provider.Settings; 16 | import android.speech.RecognizerIntent; 17 | import android.speech.SpeechRecognizer; 18 | import android.util.Log; 19 | 20 | import java.util.ArrayList; 21 | 22 | public class SpeechToText 23 | { 24 | public static boolean PermissionFreeMode = false; 25 | public static long MinimumSessionLength = -1; // Observed default value: 5000 milliseconds 26 | public static long SpeechSilenceTimeout = -1; // Observed default value: 2000 milliseconds 27 | 28 | private static ArrayList supportedLanguages; 29 | private static SpeechRecognizer speechRecognizer; 30 | private static SpeechToTextRecognitionListener speechRecognitionListener; 31 | 32 | public static boolean Start( final Context context, final SpeechToTextListener unityInterface, final String language, final boolean useFreeFormLanguageModel, final boolean enablePartialResults, final boolean preferOfflineRecognition ) 33 | { 34 | if( !IsServiceAvailable( context, preferOfflineRecognition ) || IsBusy() || !RequestPermission( context, null ) ) 35 | return false; 36 | 37 | ( (Activity) context ).runOnUiThread( new Runnable() 38 | { 39 | @Override 40 | public void run() 41 | { 42 | try 43 | { 44 | // Dispose leftover objects from the previous operation 45 | CancelInternal( false ); 46 | 47 | Intent intent = new Intent( RecognizerIntent.ACTION_RECOGNIZE_SPEECH ); 48 | intent.putExtra( RecognizerIntent.EXTRA_LANGUAGE_MODEL, useFreeFormLanguageModel ? RecognizerIntent.LANGUAGE_MODEL_FREE_FORM : RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH ); 49 | intent.putExtra( RecognizerIntent.EXTRA_MAX_RESULTS, 3 ); 50 | if( language != null && language.length() > 0 ) 51 | intent.putExtra( RecognizerIntent.EXTRA_LANGUAGE, language.replace( '_', '-' ) ); 52 | if( enablePartialResults ) 53 | intent.putExtra( RecognizerIntent.EXTRA_PARTIAL_RESULTS, true ); 54 | if( preferOfflineRecognition && Build.VERSION.SDK_INT >= 23 ) 55 | intent.putExtra( RecognizerIntent.EXTRA_PREFER_OFFLINE, true ); 56 | if( MinimumSessionLength > 0 ) 57 | intent.putExtra( RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, MinimumSessionLength ); 58 | if( SpeechSilenceTimeout > 0 ) 59 | { 60 | intent.putExtra( RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, SpeechSilenceTimeout ); 61 | intent.putExtra( RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, SpeechSilenceTimeout ); 62 | } 63 | 64 | speechRecognizer = preferOfflineRecognition && Build.VERSION.SDK_INT >= 31 ? SpeechRecognizer.createOnDeviceSpeechRecognizer( context ) : SpeechRecognizer.createSpeechRecognizer( context ); 65 | speechRecognitionListener = new SpeechToTextRecognitionListener( unityInterface ); 66 | speechRecognizer.setRecognitionListener( speechRecognitionListener ); 67 | speechRecognizer.startListening( intent ); 68 | } 69 | catch( Exception e ) 70 | { 71 | Log.e( "Unity", "Exception:", e ); 72 | CancelInternal( false ); 73 | } 74 | } 75 | } ); 76 | 77 | return true; 78 | } 79 | 80 | public static void Stop( Context context ) 81 | { 82 | if( Looper.myLooper() == Looper.getMainLooper() ) 83 | StopInternal(); 84 | else 85 | { 86 | ( (Activity) context ).runOnUiThread( new Runnable() 87 | { 88 | @Override 89 | public void run() 90 | { 91 | StopInternal(); 92 | } 93 | } ); 94 | } 95 | } 96 | 97 | private static void StopInternal() 98 | { 99 | if( speechRecognizer != null ) 100 | speechRecognizer.stopListening(); 101 | } 102 | 103 | public static void Cancel( Context context ) 104 | { 105 | if( Looper.myLooper() == Looper.getMainLooper() ) 106 | CancelInternal( true ); 107 | else 108 | { 109 | ( (Activity) context ).runOnUiThread( new Runnable() 110 | { 111 | @Override 112 | public void run() 113 | { 114 | CancelInternal( true ); 115 | } 116 | } ); 117 | } 118 | } 119 | 120 | private static void CancelInternal( boolean isCanceledByUser ) 121 | { 122 | if( speechRecognizer != null ) 123 | { 124 | try 125 | { 126 | speechRecognitionListener.OnSpeechRecognizerCanceled( isCanceledByUser ); 127 | } 128 | catch( Exception e ) 129 | { 130 | Log.e( "Unity", "Exception:", e ); 131 | } 132 | finally 133 | { 134 | speechRecognitionListener = null; 135 | } 136 | 137 | try 138 | { 139 | speechRecognizer.destroy(); 140 | } 141 | catch( Exception e ) 142 | { 143 | Log.e( "Unity", "Exception:", e ); 144 | } 145 | finally 146 | { 147 | speechRecognizer = null; 148 | } 149 | } 150 | } 151 | 152 | public static void InitializeSupportedLanguages( final Context context ) 153 | { 154 | InitializeSupportedLanguagesInternal( context, false ); 155 | } 156 | 157 | private static void InitializeSupportedLanguagesInternal( final Context context, final boolean secondAttempt ) 158 | { 159 | Intent intent = RecognizerIntent.getVoiceDetailsIntent( context ); 160 | if( intent == null ) 161 | intent = new Intent( RecognizerIntent.ACTION_GET_LANGUAGE_DETAILS ); 162 | 163 | // In the first attempt, try to fetch the supported languages list without this hack 164 | // Credit: https://stackoverflow.com/q/48500077 165 | if( secondAttempt ) 166 | intent.setPackage( "com.google.android.googlequicksearchbox" ); 167 | 168 | try 169 | { 170 | context.sendOrderedBroadcast( intent, null, new BroadcastReceiver() 171 | { 172 | @Override 173 | public void onReceive( Context context, Intent intent ) 174 | { 175 | if( getResultCode() == Activity.RESULT_OK ) 176 | { 177 | Bundle results = getResultExtras( true ); 178 | supportedLanguages = results.getStringArrayList( RecognizerIntent.EXTRA_SUPPORTED_LANGUAGES ); 179 | if( supportedLanguages == null && !secondAttempt ) 180 | InitializeSupportedLanguagesInternal( context, true ); 181 | } 182 | } 183 | }, null, Activity.RESULT_OK, null, null ); 184 | } 185 | catch( Exception e ) 186 | { 187 | Log.e( "Unity", "Exception:", e ); 188 | } 189 | } 190 | 191 | // -1: Unknown, 0: No, 1: Yes, 2: Most likely 192 | public static int IsLanguageSupported( String language ) 193 | { 194 | if( language == null || language.length() == 0 ) 195 | return 0; 196 | 197 | if( supportedLanguages != null ) 198 | { 199 | language = language.replace( '_', '-' ); 200 | 201 | if( supportedLanguages.contains( language ) ) 202 | return 1; 203 | else 204 | { 205 | // Match "en" with "en-US" and etc. 206 | language += "-"; 207 | 208 | for( String supportedLanguage : supportedLanguages ) 209 | { 210 | if( supportedLanguage.startsWith( language ) ) 211 | return 2; 212 | } 213 | } 214 | 215 | return 0; 216 | } 217 | 218 | return -1; 219 | } 220 | 221 | public static boolean IsServiceAvailable( final Context context, boolean preferOfflineRecognition ) 222 | { 223 | if( preferOfflineRecognition ) 224 | { 225 | if( Build.VERSION.SDK_INT >= 31 ) 226 | return SpeechRecognizer.isOnDeviceRecognitionAvailable( context ); 227 | else if( Build.VERSION.SDK_INT < 23 ) 228 | return false; 229 | } 230 | 231 | return SpeechRecognizer.isRecognitionAvailable( context ); 232 | } 233 | 234 | public static boolean IsBusy() 235 | { 236 | return speechRecognitionListener != null && !speechRecognitionListener.IsFinished(); 237 | } 238 | 239 | @TargetApi( Build.VERSION_CODES.M ) 240 | public static boolean CheckPermission( final Context context ) 241 | { 242 | return PermissionFreeMode || Build.VERSION.SDK_INT < Build.VERSION_CODES.M || context.checkSelfPermission( Manifest.permission.RECORD_AUDIO ) == PackageManager.PERMISSION_GRANTED; 243 | } 244 | 245 | @TargetApi( Build.VERSION_CODES.M ) 246 | public static boolean RequestPermission( final Context context, final SpeechToTextPermissionReceiver permissionReceiver ) 247 | { 248 | if( CheckPermission( context ) ) 249 | { 250 | if( permissionReceiver != null ) 251 | permissionReceiver.OnPermissionResult( 1 ); 252 | 253 | return true; 254 | } 255 | 256 | if( permissionReceiver == null ) 257 | ( (Activity) context ).requestPermissions( new String[] { Manifest.permission.RECORD_AUDIO }, 875621 ); 258 | else 259 | { 260 | final Fragment request = new SpeechToTextPermissionFragment( permissionReceiver ); 261 | ( (Activity) context ).getFragmentManager().beginTransaction().add( 0, request ).commitAllowingStateLoss(); 262 | } 263 | 264 | return false; 265 | } 266 | 267 | // Credit: https://stackoverflow.com/a/35456817/2373034 268 | public static void OpenSettings( final Context context, String packageName ) 269 | { 270 | Uri uri = Uri.fromParts( "package", ( packageName == null || packageName.length() == 0 ) ? context.getPackageName() : packageName, null ); 271 | 272 | Intent intent = new Intent(); 273 | intent.setAction( Settings.ACTION_APPLICATION_DETAILS_SETTINGS ); 274 | intent.setData( uri ); 275 | 276 | try 277 | { 278 | context.startActivity( intent ); 279 | } 280 | catch( Exception e ) 281 | { 282 | Log.e( "Unity", "Exception:", e ); 283 | } 284 | } 285 | } -------------------------------------------------------------------------------- /.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToTextListener.java: -------------------------------------------------------------------------------- 1 | package com.yasirkula.unity; 2 | 3 | public interface SpeechToTextListener 4 | { 5 | void OnReadyForSpeech(); 6 | void OnBeginningOfSpeech(); 7 | void OnVoiceLevelChanged( float rmsdB ); 8 | void OnPartialResultReceived( String spokenText ); 9 | void OnResultReceived( String spokenText, int errorCode ); 10 | } -------------------------------------------------------------------------------- /.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToTextPermissionFragment.java: -------------------------------------------------------------------------------- 1 | package com.yasirkula.unity; 2 | 3 | // Original work Copyright (c) 2017 Yury Habets 4 | // Modified work Copyright 2018 yasirkula 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | // SOFTWARE. 23 | 24 | import android.Manifest; 25 | import android.annotation.TargetApi; 26 | import android.app.Activity; 27 | import android.app.Fragment; 28 | import android.content.Intent; 29 | import android.content.pm.PackageManager; 30 | import android.os.Build; 31 | import android.os.Bundle; 32 | import android.util.Log; 33 | 34 | @TargetApi( Build.VERSION_CODES.M ) 35 | public class SpeechToTextPermissionFragment extends Fragment 36 | { 37 | private static final int PERMISSIONS_REQUEST_CODE = 875621; 38 | 39 | private final SpeechToTextPermissionReceiver permissionReceiver; 40 | 41 | public SpeechToTextPermissionFragment() 42 | { 43 | permissionReceiver = null; 44 | } 45 | 46 | public SpeechToTextPermissionFragment( final SpeechToTextPermissionReceiver permissionReceiver ) 47 | { 48 | this.permissionReceiver = permissionReceiver; 49 | } 50 | 51 | @Override 52 | public void onCreate( Bundle savedInstanceState ) 53 | { 54 | super.onCreate( savedInstanceState ); 55 | if( permissionReceiver == null ) 56 | onRequestPermissionsResult( PERMISSIONS_REQUEST_CODE, new String[0], new int[0] ); 57 | else 58 | requestPermissions( new String[] { Manifest.permission.RECORD_AUDIO }, PERMISSIONS_REQUEST_CODE ); 59 | } 60 | 61 | @Override 62 | public void onRequestPermissionsResult( int requestCode, String[] permissions, int[] grantResults ) 63 | { 64 | if( requestCode != PERMISSIONS_REQUEST_CODE ) 65 | return; 66 | 67 | if( permissionReceiver == null ) 68 | { 69 | Log.e( "Unity", "Fragment data got reset while asking permissions!" ); 70 | 71 | getFragmentManager().beginTransaction().remove( this ).commitAllowingStateLoss(); 72 | return; 73 | } 74 | 75 | // 0 -> denied, must go to settings 76 | // 1 -> granted 77 | // 2 -> denied, can ask again 78 | int result = 1; 79 | if( permissions.length == 0 || grantResults.length == 0 ) 80 | result = 2; 81 | else 82 | { 83 | for( int i = 0; i < permissions.length && i < grantResults.length; ++i ) 84 | { 85 | if( grantResults[i] == PackageManager.PERMISSION_DENIED ) 86 | { 87 | if( !shouldShowRequestPermissionRationale( permissions[i] ) ) 88 | { 89 | result = 0; 90 | break; 91 | } 92 | 93 | result = 2; 94 | } 95 | } 96 | } 97 | 98 | permissionReceiver.OnPermissionResult( result ); 99 | getFragmentManager().beginTransaction().remove( this ).commitAllowingStateLoss(); 100 | 101 | // Resolves a bug in Unity 2019 where the calling activity 102 | // doesn't resume automatically after the fragment finishes 103 | // Credit: https://stackoverflow.com/a/12409215/2373034 104 | try 105 | { 106 | Intent resumeUnityActivity = new Intent( getActivity(), getActivity().getClass() ); 107 | resumeUnityActivity.setFlags( Intent.FLAG_ACTIVITY_REORDER_TO_FRONT ); 108 | getActivity().startActivityIfNeeded( resumeUnityActivity, 0 ); 109 | } 110 | catch( Exception e ) 111 | { 112 | Log.e( "Unity", "Exception (resume):", e ); 113 | } 114 | } 115 | } -------------------------------------------------------------------------------- /.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToTextPermissionReceiver.java: -------------------------------------------------------------------------------- 1 | package com.yasirkula.unity; 2 | 3 | public interface SpeechToTextPermissionReceiver 4 | { 5 | void OnPermissionResult( int result ); 6 | } -------------------------------------------------------------------------------- /.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToTextRecognitionListener.java: -------------------------------------------------------------------------------- 1 | package com.yasirkula.unity; 2 | 3 | import android.os.Bundle; 4 | import android.speech.RecognitionListener; 5 | import android.speech.RecognizerIntent; 6 | import android.speech.SpeechRecognizer; 7 | import android.util.Log; 8 | 9 | import java.util.ArrayList; 10 | 11 | public class SpeechToTextRecognitionListener implements RecognitionListener 12 | { 13 | private final SpeechToTextListener unityInterface; 14 | private boolean isResultSent; 15 | private String lastResult = ""; 16 | 17 | public SpeechToTextRecognitionListener( SpeechToTextListener unityInterface ) 18 | { 19 | this.unityInterface = unityInterface; 20 | } 21 | 22 | private void SendResult( String result, int errorCode ) 23 | { 24 | if( !isResultSent ) 25 | { 26 | isResultSent = true; 27 | unityInterface.OnResultReceived( result, errorCode ); 28 | } 29 | } 30 | 31 | public boolean IsFinished() 32 | { 33 | return isResultSent; 34 | } 35 | 36 | public void OnSpeechRecognizerCanceled( boolean isCanceledByUser ) 37 | { 38 | SendResult( lastResult, isCanceledByUser ? 0 : SpeechRecognizer.ERROR_RECOGNIZER_BUSY ); 39 | } 40 | 41 | @Override 42 | public void onReadyForSpeech( Bundle params ) 43 | { 44 | if( !isResultSent ) 45 | unityInterface.OnReadyForSpeech(); 46 | } 47 | 48 | @Override 49 | public void onBeginningOfSpeech() 50 | { 51 | if( !isResultSent ) 52 | unityInterface.OnBeginningOfSpeech(); 53 | } 54 | 55 | @Override 56 | public void onResults( Bundle results ) 57 | { 58 | SendResult( GetMostPromisingResult( results ), -1 ); 59 | } 60 | 61 | @Override 62 | public void onPartialResults( Bundle partialResults ) 63 | { 64 | if( !isResultSent ) 65 | unityInterface.OnPartialResultReceived( GetMostPromisingResult( partialResults ) ); 66 | } 67 | 68 | private String GetMostPromisingResult( Bundle resultsBundle ) 69 | { 70 | ArrayList results = resultsBundle.getStringArrayList( SpeechRecognizer.RESULTS_RECOGNITION ); 71 | if( results != null && results.size() > 0 ) 72 | { 73 | lastResult = results.get( 0 ); 74 | if( results.size() > 1 ) 75 | { 76 | // Try to get the result with the highest confidence score 77 | float[] confidenceScores = resultsBundle.getFloatArray( RecognizerIntent.EXTRA_CONFIDENCE_SCORES ); 78 | if( confidenceScores != null && confidenceScores.length >= results.size() ) 79 | { 80 | float highestConfidenceScore = confidenceScores[0]; 81 | for( int i = 1; i < confidenceScores.length; i++ ) 82 | { 83 | if( confidenceScores[i] > highestConfidenceScore ) 84 | { 85 | highestConfidenceScore = confidenceScores[i]; 86 | lastResult = results.get( i ); 87 | } 88 | } 89 | } 90 | } 91 | } 92 | 93 | if( lastResult == null ) 94 | lastResult = ""; 95 | 96 | return lastResult; 97 | } 98 | 99 | @Override 100 | public void onError( int error ) 101 | { 102 | // Error codes: https://developer.android.com/reference/android/speech/SpeechRecognizer 103 | Log.e( "Unity", "Speech recognition error code: " + error ); 104 | SendResult( lastResult, error ); 105 | } 106 | 107 | @Override 108 | public void onRmsChanged( float rmsdB ) 109 | { 110 | if( !isResultSent ) 111 | unityInterface.OnVoiceLevelChanged( rmsdB ); 112 | } 113 | 114 | @Override 115 | public void onBufferReceived( byte[] buffer ) 116 | { 117 | } 118 | 119 | @Override 120 | public void onEndOfSpeech() 121 | { 122 | } 123 | 124 | @Override 125 | public void onEvent( int eventType, Bundle params ) 126 | { 127 | } 128 | } -------------------------------------------------------------------------------- /.github/AAR Source (Android)/proguard.txt: -------------------------------------------------------------------------------- 1 | -keep class com.yasirkula.unity.* { *; } -------------------------------------------------------------------------------- /.github/README.md: -------------------------------------------------------------------------------- 1 | # Unity Speech to Text Plugin for Android & iOS 2 | 3 | **Discord:** https://discord.gg/UJJt549AaV 4 | 5 | **[GitHub Sponsors ☕](https://github.com/sponsors/yasirkula)** 6 | 7 | This plugin helps you convert speech to text on Android (all versions) and iOS 10+. Offline speech recognition is supported on Android 23+ and iOS 13+ if the target language's speech recognition model is present on the device. 8 | 9 | Note that continuous speech detection isn't supported so the speech recognition sessions automatically end after a short break in the speech or when the OS-determined time limits are reached. 10 | 11 | ## INSTALLATION 12 | 13 | There are 4 ways to install this plugin: 14 | 15 | - import [SpeechToText.unitypackage](https://github.com/yasirkula/UnitySpeechToText/releases) via *Assets-Import Package* 16 | - clone/[download](https://github.com/yasirkula/UnitySpeechToText/archive/master.zip) this repository and move the *Plugins* folder to your Unity project's *Assets* folder 17 | - *(via Package Manager)* click the + button and install the package from the following git URL: 18 | - `https://github.com/yasirkula/UnitySpeechToText.git` 19 | - *(via [OpenUPM](https://openupm.com))* after installing [openupm-cli](https://github.com/openupm/openupm-cli), run the following command: 20 | - `openupm add com.yasirkula.speechtotext` 21 | 22 | ### iOS Setup 23 | 24 | There are two ways to set up the plugin on iOS: 25 | 26 | **a. Automated Setup for iOS** 27 | 28 | - *(optional)* change the values of **Speech Recognition Usage Description** and **Microphone Usage Description** at *Project Settings/yasirkula/Speech to Text* 29 | 30 | **b. Manual Setup for iOS** 31 | 32 | - see: https://github.com/yasirkula/UnitySpeechToText/wiki/Manual-Setup-for-iOS 33 | 34 | ## KNOWN ISSUES 35 | 36 | - Speech session returned [error code 12](https://developer.android.com/reference/android/speech/SpeechRecognizer#ERROR_LANGUAGE_NOT_SUPPORTED) on a single Android test device (regardless of target language) and couldn't be started 37 | 38 | ## HOW TO 39 | 40 | **NOTE:** The codebase is documented using XML comments so this section will only briefly mention the functions. 41 | 42 | You should first initialize the plugin via `SpeechToText.Initialize( string preferredLanguage = null )`. If you don't provide a preferred language (in the format "*en-US*"), the device's default language is used. You can check if a language is supported via `SpeechToText.IsLanguageSupported( string language )`. 43 | 44 | After initialization, you can query `SpeechToText.IsServiceAvailable( bool preferOfflineRecognition = false )` and `SpeechToText.IsBusy()` to see if a speech recognition session can be started. Most operations will fail while the service is unavailable or busy. 45 | 46 | Before starting a speech recognition session, you must make sure that the necessary permissions are granted via `SpeechToText.CheckPermission()` and `SpeechToText.RequestPermissionAsync( PermissionCallback callback )` functions. If permission is *Denied*, you can call `SpeechToText.OpenSettings()` to automatically open the app's Settings from where the user can grant the necessary permissions manually (Android: Microphone, iOS: Microphone and Speech Recognition). On Android, the speech recognition system also requires the Google app to have Microphone permission. If not, its result callback will return error code 9. In that scenario, you can notify the user and call `SpeechToText.OpenGoogleAppSettings()` to automatically open the Google app's Settings from where the user can grant it the Microphone permission manually. 47 | 48 | To start a speech recognition session, you can call `SpeechToText.Start( ISpeechToTextListener listener, bool useFreeFormLanguageModel = true, bool preferOfflineRecognition = false )`. Normally, sessions end automatically after a short break in the speech but you can also stop the session manually via `SpeechToText.ForceStop()` (processes the speech input so far) or `SpeechToText.Cancel()` (doesn't process any speech input and immediately invokes the result callback with error code 0). The `ISpeechToTextListener` interface has the following functions: 49 | 50 | - `OnReadyForSpeech()` 51 | - `OnBeginningOfSpeech()` 52 | - `OnVoiceLevelChanged( float normalizedVoiceLevel )` 53 | - `OnPartialResultReceived( string spokenText )` 54 | - `OnResultReceived( string spokenText, int? errorCode )` 55 | 56 | ## EXAMPLE CODE 57 | 58 | ```csharp 59 | using UnityEngine; 60 | using UnityEngine.UI; 61 | 62 | public class SpeechToTextDemo : MonoBehaviour, ISpeechToTextListener 63 | { 64 | public Text SpeechText; 65 | public Button StartSpeechToTextButton, StopSpeechToTextButton; 66 | public Slider VoiceLevelSlider; 67 | public bool PreferOfflineRecognition; 68 | 69 | private float normalizedVoiceLevel; 70 | 71 | private void Awake() 72 | { 73 | SpeechToText.Initialize( "en-US" ); 74 | 75 | StartSpeechToTextButton.onClick.AddListener( StartSpeechToText ); 76 | StopSpeechToTextButton.onClick.AddListener( StopSpeechToText ); 77 | } 78 | 79 | private void Update() 80 | { 81 | StartSpeechToTextButton.interactable = SpeechToText.IsServiceAvailable( PreferOfflineRecognition ) && !SpeechToText.IsBusy(); 82 | StopSpeechToTextButton.interactable = SpeechToText.IsBusy(); 83 | 84 | // You may also apply some noise to the voice level for a more fluid animation (e.g. via Mathf.PerlinNoise) 85 | VoiceLevelSlider.value = Mathf.Lerp( VoiceLevelSlider.value, normalizedVoiceLevel, 15f * Time.unscaledDeltaTime ); 86 | } 87 | 88 | public void ChangeLanguage( string preferredLanguage ) 89 | { 90 | if( !SpeechToText.Initialize( preferredLanguage ) ) 91 | SpeechText.text = "Couldn't initialize with language: " + preferredLanguage; 92 | } 93 | 94 | public void StartSpeechToText() 95 | { 96 | SpeechToText.RequestPermissionAsync( ( permission ) => 97 | { 98 | if( permission == SpeechToText.Permission.Granted ) 99 | { 100 | if( SpeechToText.Start( this, preferOfflineRecognition: PreferOfflineRecognition ) ) 101 | SpeechText.text = ""; 102 | else 103 | SpeechText.text = "Couldn't start speech recognition session!"; 104 | } 105 | else 106 | SpeechText.text = "Permission is denied!"; 107 | } ); 108 | } 109 | 110 | public void StopSpeechToText() 111 | { 112 | SpeechToText.ForceStop(); 113 | } 114 | 115 | void ISpeechToTextListener.OnReadyForSpeech() 116 | { 117 | Debug.Log( "OnReadyForSpeech" ); 118 | } 119 | 120 | void ISpeechToTextListener.OnBeginningOfSpeech() 121 | { 122 | Debug.Log( "OnBeginningOfSpeech" ); 123 | } 124 | 125 | void ISpeechToTextListener.OnVoiceLevelChanged( float normalizedVoiceLevel ) 126 | { 127 | // Note that On Android, voice detection starts with a beep sound and it can trigger this callback. You may want to ignore this callback for ~0.5s on Android. 128 | this.normalizedVoiceLevel = normalizedVoiceLevel; 129 | } 130 | 131 | void ISpeechToTextListener.OnPartialResultReceived( string spokenText ) 132 | { 133 | Debug.Log( "OnPartialResultReceived: " + spokenText ); 134 | SpeechText.text = spokenText; 135 | } 136 | 137 | void ISpeechToTextListener.OnResultReceived( string spokenText, int? errorCode ) 138 | { 139 | Debug.Log( "OnResultReceived: " + spokenText + ( errorCode.HasValue ? ( " --- Error: " + errorCode ) : "" ) ); 140 | SpeechText.text = spokenText; 141 | normalizedVoiceLevel = 0f; 142 | 143 | // Recommended approach: 144 | // - If errorCode is 0, session was aborted via SpeechToText.Cancel. Handle the case appropriately. 145 | // - If errorCode is 9, notify the user that they must grant Microphone permission to the Google app and call SpeechToText.OpenGoogleAppSettings. 146 | // - If the speech session took shorter than 1 seconds (should be an error) or a null/empty spokenText is returned, prompt the user to try again (note that if 147 | // errorCode is 6, then the user hasn't spoken and the session has timed out as expected). 148 | } 149 | } 150 | ``` 151 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 yasirkula 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE.txt.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 655736bd183ddce479c4653920b87581 3 | timeCreated: 1697647234 4 | licenseType: Free 5 | TextScriptImporter: 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Plugins.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 2b923af336f75444586aae4bb214d417 3 | folderAsset: yes 4 | timeCreated: 1697647261 5 | licenseType: Free 6 | DefaultImporter: 7 | userData: 8 | assetBundleName: 9 | assetBundleVariant: 10 | -------------------------------------------------------------------------------- /Plugins/SpeechToText.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 1ef4088c266d7ae4296ebe1353696fed 3 | folderAsset: yes 4 | timeCreated: 1694851727 5 | licenseType: Free 6 | DefaultImporter: 7 | userData: 8 | assetBundleName: 9 | assetBundleVariant: 10 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: df3f5a4ce8a292a44b3a41db492c7b6a 3 | folderAsset: yes 4 | timeCreated: 1694851764 5 | licenseType: Free 6 | DefaultImporter: 7 | userData: 8 | assetBundleName: 9 | assetBundleVariant: 10 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android/STTCallbackHelper.cs: -------------------------------------------------------------------------------- 1 | #if UNITY_EDITOR || UNITY_ANDROID 2 | using UnityEngine; 3 | 4 | namespace SpeechToTextNamespace 5 | { 6 | public class STTCallbackHelper : MonoBehaviour 7 | { 8 | private bool autoDestroy; 9 | private System.Action mainThreadAction = null; 10 | 11 | private void Awake() 12 | { 13 | DontDestroyOnLoad( gameObject ); 14 | } 15 | 16 | private void Update() 17 | { 18 | if( mainThreadAction != null ) 19 | { 20 | try 21 | { 22 | lock( this ) 23 | { 24 | System.Action temp = mainThreadAction; 25 | mainThreadAction = null; 26 | temp(); 27 | } 28 | } 29 | finally 30 | { 31 | if( autoDestroy ) 32 | Destroy( gameObject ); 33 | } 34 | } 35 | } 36 | 37 | public STTCallbackHelper AutoDestroy() 38 | { 39 | autoDestroy = true; 40 | return this; 41 | } 42 | 43 | public void CallOnMainThread( System.Action function ) 44 | { 45 | lock( this ) 46 | { 47 | mainThreadAction += function; 48 | } 49 | } 50 | } 51 | } 52 | #endif -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android/STTCallbackHelper.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 86700fe55bdd1204994105ae16138f34 3 | timeCreated: 1545147258 4 | licenseType: Free 5 | MonoImporter: 6 | serializedVersion: 2 7 | defaultReferences: [] 8 | executionOrder: 0 9 | icon: {instanceID: 0} 10 | userData: 11 | assetBundleName: 12 | assetBundleVariant: 13 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android/STTInteractionCallbackAndroid.cs: -------------------------------------------------------------------------------- 1 | #if UNITY_EDITOR || UNITY_ANDROID 2 | using UnityEngine; 3 | 4 | namespace SpeechToTextNamespace 5 | { 6 | public class STTInteractionCallbackAndroid : AndroidJavaProxy 7 | { 8 | private readonly ISpeechToTextListener listener; 9 | private readonly STTCallbackHelper callbackHelper; 10 | 11 | public STTInteractionCallbackAndroid( ISpeechToTextListener listener ) : base( "com.yasirkula.unity.SpeechToTextListener" ) 12 | { 13 | this.listener = listener; 14 | callbackHelper = new GameObject( "STTCallbackHelper" ).AddComponent(); 15 | } 16 | 17 | [UnityEngine.Scripting.Preserve] 18 | public void OnReadyForSpeech() 19 | { 20 | callbackHelper.CallOnMainThread( listener.OnReadyForSpeech ); 21 | } 22 | 23 | [UnityEngine.Scripting.Preserve] 24 | public void OnBeginningOfSpeech() 25 | { 26 | callbackHelper.CallOnMainThread( listener.OnBeginningOfSpeech ); 27 | } 28 | 29 | [UnityEngine.Scripting.Preserve] 30 | /// Root Mean Square (RMS) dB between range [-2, 10] (-2: quiet, 10: loud) 31 | public void OnVoiceLevelChanged( float rmsdB ) 32 | { 33 | // Credit: https://stackoverflow.com/a/14124484/2373034 34 | float normalizedVoiceLevel = Mathf.Clamp01( 0.1f * Mathf.Pow( 10f, rmsdB / 10f ) ); 35 | callbackHelper.CallOnMainThread( () => listener.OnVoiceLevelChanged( normalizedVoiceLevel ) ); 36 | } 37 | 38 | [UnityEngine.Scripting.Preserve] 39 | public void OnPartialResultReceived( string spokenText ) 40 | { 41 | if( !string.IsNullOrEmpty( spokenText ) ) 42 | callbackHelper.CallOnMainThread( () => listener.OnPartialResultReceived( spokenText ) ); 43 | } 44 | 45 | [UnityEngine.Scripting.Preserve] 46 | public void OnResultReceived( string spokenText, int errorCode ) 47 | { 48 | // ERROR_NO_MATCH (7) error code is thrown instead of ERROR_SPEECH_TIMEOUT (6) if the user doesn't speak. ERROR_NO_MATCH is also 49 | // thrown if the system can't understand the user's speech but I unfortunately couldn't find a way to distinguish between 50 | // these two cases. So, ERROR_NO_MATCH is always considered as ERROR_SPEECH_TIMEOUT for the time being. 51 | if( errorCode == 7 ) 52 | errorCode = 6; 53 | 54 | callbackHelper.CallOnMainThread( () => 55 | { 56 | try 57 | { 58 | listener.OnResultReceived( !string.IsNullOrEmpty( spokenText ) ? spokenText : null, ( errorCode >= 0 ) ? (int?) errorCode : null ); 59 | } 60 | finally 61 | { 62 | Object.DestroyImmediate( callbackHelper.gameObject ); 63 | } 64 | } ); 65 | } 66 | } 67 | } 68 | #endif -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android/STTInteractionCallbackAndroid.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 82274ccc8e66eb243b0ea95903cccd76 3 | timeCreated: 1519060539 4 | licenseType: Free 5 | MonoImporter: 6 | serializedVersion: 2 7 | defaultReferences: [] 8 | executionOrder: 0 9 | icon: {instanceID: 0} 10 | userData: 11 | assetBundleName: 12 | assetBundleVariant: 13 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android/STTPermissionCallbackAndroid.cs: -------------------------------------------------------------------------------- 1 | #if UNITY_EDITOR || UNITY_ANDROID 2 | using System.Threading; 3 | using UnityEngine; 4 | 5 | namespace SpeechToTextNamespace 6 | { 7 | public class STTPermissionCallbackAndroid : AndroidJavaProxy 8 | { 9 | private readonly object threadLock; 10 | public int Result { get; private set; } 11 | 12 | public STTPermissionCallbackAndroid( object threadLock ) : base( "com.yasirkula.unity.SpeechToTextPermissionReceiver" ) 13 | { 14 | Result = -1; 15 | this.threadLock = threadLock; 16 | } 17 | 18 | [UnityEngine.Scripting.Preserve] 19 | public void OnPermissionResult( int result ) 20 | { 21 | Result = result; 22 | 23 | lock( threadLock ) 24 | { 25 | Monitor.Pulse( threadLock ); 26 | } 27 | } 28 | } 29 | 30 | public class STTPermissionCallbackAsyncAndroid : AndroidJavaProxy 31 | { 32 | private readonly SpeechToText.PermissionCallback callback; 33 | private readonly STTCallbackHelper callbackHelper; 34 | 35 | public STTPermissionCallbackAsyncAndroid( SpeechToText.PermissionCallback callback ) : base( "com.yasirkula.unity.SpeechToTextPermissionReceiver" ) 36 | { 37 | this.callback = callback; 38 | callbackHelper = new GameObject( "STTCallbackHelper" ).AddComponent().AutoDestroy(); 39 | } 40 | 41 | [UnityEngine.Scripting.Preserve] 42 | public void OnPermissionResult( int result ) 43 | { 44 | callbackHelper.CallOnMainThread( () => callback( (SpeechToText.Permission) result ) ); 45 | } 46 | } 47 | } 48 | #endif -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android/STTPermissionCallbackAndroid.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 884395253a44f734b868612aebdb3c7f 3 | timeCreated: 1519060539 4 | licenseType: Free 5 | MonoImporter: 6 | serializedVersion: 2 7 | defaultReferences: [] 8 | executionOrder: 0 9 | icon: {instanceID: 0} 10 | userData: 11 | assetBundleName: 12 | assetBundleVariant: 13 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android/SpeechToText.aar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yasirkula/UnitySpeechToText/511736fbefd300c0ef169689f0851e99357976c3/Plugins/SpeechToText/Android/SpeechToText.aar -------------------------------------------------------------------------------- /Plugins/SpeechToText/Android/SpeechToText.aar.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 8ea5beac9a3c3d74289579f751e2c9c4 3 | timeCreated: 1569764737 4 | licenseType: Free 5 | PluginImporter: 6 | serializedVersion: 2 7 | iconMap: {} 8 | executionOrder: {} 9 | isPreloaded: 0 10 | isOverridable: 0 11 | platformData: 12 | data: 13 | first: 14 | Android: Android 15 | second: 16 | enabled: 1 17 | settings: {} 18 | data: 19 | first: 20 | Any: 21 | second: 22 | enabled: 0 23 | settings: {} 24 | data: 25 | first: 26 | Editor: Editor 27 | second: 28 | enabled: 0 29 | settings: 30 | DefaultValueInitialized: true 31 | userData: 32 | assetBundleName: 33 | assetBundleVariant: 34 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/Editor.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bc1e3b18ca26ed4408a127190e9a40a9 3 | folderAsset: yes 4 | timeCreated: 1521452097 5 | licenseType: Free 6 | DefaultImporter: 7 | userData: 8 | assetBundleName: 9 | assetBundleVariant: 10 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/Editor/STTPostProcessBuild.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using UnityEngine; 3 | using UnityEditor; 4 | #if UNITY_IOS 5 | using UnityEditor.Callbacks; 6 | using UnityEditor.iOS.Xcode; 7 | #endif 8 | 9 | namespace SpeechToTextNamespace 10 | { 11 | [System.Serializable] 12 | public class Settings 13 | { 14 | private const string SAVE_PATH = "ProjectSettings/SpeechToText.json"; 15 | 16 | public bool AutomatedSetup = true; 17 | public string SpeechRecognitionUsageDescription = "Speech recognition will be used for speech-to-text conversion."; 18 | public string MicrophoneUsageDescription = "Microphone will be used with speech recognition."; 19 | 20 | private static Settings m_instance = null; 21 | public static Settings Instance 22 | { 23 | get 24 | { 25 | if( m_instance == null ) 26 | { 27 | try 28 | { 29 | if( File.Exists( SAVE_PATH ) ) 30 | m_instance = JsonUtility.FromJson( File.ReadAllText( SAVE_PATH ) ); 31 | else 32 | m_instance = new Settings(); 33 | } 34 | catch( System.Exception e ) 35 | { 36 | Debug.LogException( e ); 37 | m_instance = new Settings(); 38 | } 39 | } 40 | 41 | return m_instance; 42 | } 43 | } 44 | 45 | public void Save() 46 | { 47 | File.WriteAllText( SAVE_PATH, JsonUtility.ToJson( this, true ) ); 48 | } 49 | 50 | #if UNITY_2018_3_OR_NEWER 51 | [SettingsProvider] 52 | public static SettingsProvider CreatePreferencesGUI() 53 | { 54 | return new SettingsProvider( "Project/yasirkula/Speech to Text", SettingsScope.Project ) 55 | { 56 | guiHandler = ( searchContext ) => PreferencesGUI(), 57 | keywords = new System.Collections.Generic.HashSet() { "Speech", "Text", "Android", "iOS" } 58 | }; 59 | } 60 | #endif 61 | 62 | #if !UNITY_2018_3_OR_NEWER 63 | [PreferenceItem( "Speech to Text" )] 64 | #endif 65 | public static void PreferencesGUI() 66 | { 67 | EditorGUI.BeginChangeCheck(); 68 | 69 | Instance.AutomatedSetup = EditorGUILayout.Toggle( "Automated Setup", Instance.AutomatedSetup ); 70 | 71 | EditorGUI.BeginDisabledGroup( !Instance.AutomatedSetup ); 72 | Instance.SpeechRecognitionUsageDescription = EditorGUILayout.DelayedTextField( "Speech Recognition Usage Description", Instance.SpeechRecognitionUsageDescription ); 73 | Instance.MicrophoneUsageDescription = EditorGUILayout.DelayedTextField( "Microphone Usage Description", Instance.MicrophoneUsageDescription ); 74 | EditorGUI.EndDisabledGroup(); 75 | 76 | if( EditorGUI.EndChangeCheck() ) 77 | Instance.Save(); 78 | } 79 | } 80 | 81 | public class STTPostProcessBuild 82 | { 83 | #if UNITY_IOS 84 | [PostProcessBuild] 85 | public static void OnPostprocessBuild( BuildTarget target, string buildPath ) 86 | { 87 | if( !Settings.Instance.AutomatedSetup ) 88 | return; 89 | 90 | if( target == BuildTarget.iOS ) 91 | { 92 | string pbxProjectPath = PBXProject.GetPBXProjectPath( buildPath ); 93 | string plistPath = Path.Combine( buildPath, "Info.plist" ); 94 | 95 | PBXProject pbxProject = new PBXProject(); 96 | pbxProject.ReadFromFile( pbxProjectPath ); 97 | 98 | #if UNITY_2019_3_OR_NEWER 99 | string targetGUID = pbxProject.GetUnityFrameworkTargetGuid(); 100 | #else 101 | string targetGUID = pbxProject.TargetGuidByName( PBXProject.GetUnityTargetName() ); 102 | #endif 103 | 104 | pbxProject.AddBuildProperty( targetGUID, "OTHER_LDFLAGS", "-weak_framework Speech" ); 105 | pbxProject.AddBuildProperty( targetGUID, "OTHER_LDFLAGS", "-weak_framework Accelerate" ); 106 | 107 | pbxProject.RemoveFrameworkFromProject( targetGUID, "Speech.framework" ); 108 | pbxProject.RemoveFrameworkFromProject( targetGUID, "Accelerate.framework" ); 109 | 110 | File.WriteAllText( pbxProjectPath, pbxProject.WriteToString() ); 111 | 112 | PlistDocument plist = new PlistDocument(); 113 | plist.ReadFromString( File.ReadAllText( plistPath ) ); 114 | 115 | PlistElementDict rootDict = plist.root; 116 | if( !string.IsNullOrEmpty( Settings.Instance.SpeechRecognitionUsageDescription ) ) 117 | rootDict.SetString( "NSSpeechRecognitionUsageDescription", Settings.Instance.SpeechRecognitionUsageDescription ); 118 | if( !string.IsNullOrEmpty( Settings.Instance.MicrophoneUsageDescription ) ) 119 | rootDict.SetString( "NSMicrophoneUsageDescription", Settings.Instance.MicrophoneUsageDescription ); 120 | 121 | File.WriteAllText( plistPath, plist.WriteToString() ); 122 | } 123 | } 124 | #endif 125 | } 126 | } -------------------------------------------------------------------------------- /Plugins/SpeechToText/Editor/STTPostProcessBuild.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: c27ea70fabb400d4d982ee5095c7e706 3 | timeCreated: 1521452119 4 | licenseType: Free 5 | MonoImporter: 6 | serializedVersion: 2 7 | defaultReferences: [] 8 | executionOrder: 0 9 | icon: {instanceID: 0} 10 | userData: 11 | assetBundleName: 12 | assetBundleVariant: 13 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/Editor/SpeechToText.Editor.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "SpeechToText.Editor", 3 | "references": [], 4 | "includePlatforms": [ 5 | "Editor" 6 | ], 7 | "excludePlatforms": [], 8 | "allowUnsafeCode": false, 9 | "overrideReferences": false, 10 | "precompiledReferences": [], 11 | "autoReferenced": true, 12 | "defineConstraints": [], 13 | "versionDefines": [], 14 | "noEngineReferences": false 15 | } -------------------------------------------------------------------------------- /Plugins/SpeechToText/Editor/SpeechToText.Editor.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 299584742f797d243aeca7a7cbcf8656 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/ISpeechToTextListener.cs: -------------------------------------------------------------------------------- 1 | public interface ISpeechToTextListener 2 | { 3 | /// 4 | /// Invoked when speech recognition service starts listening to the user's speech input. On iOS, it's invoked immediately. 5 | /// 6 | void OnReadyForSpeech(); 7 | 8 | /// 9 | /// Invoked when speech recognition service detects a speech for the first time. On iOS, it's called just before the first invocation of . 10 | /// 11 | void OnBeginningOfSpeech(); 12 | 13 | /// 14 | /// Invoked regularly as the user speaks to report their current voice level. 15 | /// 16 | /// User's voice level in [0, 1] range (0: quiet, 1: loud) 17 | void OnVoiceLevelChanged( float normalizedVoiceLevel ); 18 | 19 | /// 20 | /// Invoked regularly as the user speaks to report their speech input so far. 21 | /// 22 | void OnPartialResultReceived( string spokenText ); 23 | 24 | /// 25 | /// Invoked after the speech recognition is finalized. 26 | /// 27 | /// 28 | /// If not null, an error has occurred. On Android, all error codes are listed here: https://developer.android.com/reference/android/speech/SpeechRecognizer#constants_1
29 | /// Special error codes:
30 | /// - 0: is called.
31 | /// - 6: User hasn't spoken and the speech session has timed out.
32 | /// - 9: Google app that processes the speech doesn't have Microphone permission on Android. User can be informed that they should grant the permission 33 | /// from Google app's Settings and, for convenience, that Settings page can be opened programmatically via . 34 | /// See: https://stackoverflow.com/a/48006238/2373034 35 | /// 36 | void OnResultReceived( string spokenText, int? errorCode ); 37 | } -------------------------------------------------------------------------------- /Plugins/SpeechToText/ISpeechToTextListener.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bafcbb19f74998e469561185e3f9d948 3 | timeCreated: 1698694980 4 | licenseType: Free 5 | MonoImporter: 6 | serializedVersion: 2 7 | defaultReferences: [] 8 | executionOrder: 0 9 | icon: {instanceID: 0} 10 | userData: 11 | assetBundleName: 12 | assetBundleVariant: 13 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/README.txt: -------------------------------------------------------------------------------- 1 | = Speech to Text for Android & iOS (v1.1.1) = 2 | 3 | Documentation: https://github.com/yasirkula/UnitySpeechToText 4 | Example code: https://github.com/yasirkula/UnitySpeechToText#example-code 5 | E-mail: yasirkula@gmail.com -------------------------------------------------------------------------------- /Plugins/SpeechToText/README.txt.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: 3a69dfa27c431764faf2a189d4cc8a44 3 | timeCreated: 1563308465 4 | licenseType: Free 5 | TextScriptImporter: 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/SpeechToText.Runtime.asmdef: -------------------------------------------------------------------------------- 1 | { 2 | "name": "SpeechToText.Runtime" 3 | } 4 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/SpeechToText.Runtime.asmdef.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: bec8f24081b3e1145891d23c338072e2 3 | AssemblyDefinitionImporter: 4 | externalObjects: {} 5 | userData: 6 | assetBundleName: 7 | assetBundleVariant: 8 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/SpeechToText.cs: -------------------------------------------------------------------------------- 1 | using UnityEngine; 2 | #if UNITY_2018_4_OR_NEWER && !SPEECH_TO_TEXT_DISABLE_ASYNC_FUNCTIONS 3 | using System.Threading.Tasks; 4 | #endif 5 | #if UNITY_EDITOR || UNITY_ANDROID || UNITY_IOS 6 | using SpeechToTextNamespace; 7 | #endif 8 | 9 | public static class SpeechToText 10 | { 11 | public enum Permission 12 | { 13 | /// 14 | /// Permission is permanently denied. User must grant the permission from the app's Settings (see ). 15 | /// 16 | Denied = 0, 17 | /// 18 | /// Permission is granted. 19 | /// 20 | Granted = 1, 21 | /// 22 | /// Permission isn't granted but it can be asked via . 23 | /// 24 | ShouldAsk = 2 25 | }; 26 | 27 | public enum LanguageSupport 28 | { 29 | /// 30 | /// Language support couldn't be determined (Android only). 31 | /// 32 | Unknown = -1, 33 | /// 34 | /// Language is not supported. 35 | /// 36 | NotSupported = 0, 37 | /// 38 | /// Language is supported. 39 | /// 40 | Supported = 1, 41 | /// 42 | /// Happens when e.g. the queried language is "en" but the speech recognition service returns "en-US" instead of "en" (Android only). 43 | /// 44 | LikelySupported = 2 45 | }; 46 | 47 | public delegate void PermissionCallback( Permission permission ); 48 | 49 | #region Platform Specific Elements 50 | #if !UNITY_EDITOR && UNITY_ANDROID 51 | private static AndroidJavaClass m_ajc = null; 52 | private static AndroidJavaClass AJC 53 | { 54 | get 55 | { 56 | if( m_ajc == null ) 57 | m_ajc = new AndroidJavaClass( "com.yasirkula.unity.SpeechToText" ); 58 | 59 | return m_ajc; 60 | } 61 | } 62 | 63 | private static AndroidJavaObject m_context = null; 64 | private static AndroidJavaObject Context 65 | { 66 | get 67 | { 68 | if( m_context == null ) 69 | { 70 | using( AndroidJavaObject unityClass = new AndroidJavaClass( "com.unity3d.player.UnityPlayer" ) ) 71 | m_context = unityClass.GetStatic( "currentActivity" ); 72 | } 73 | 74 | return m_context; 75 | } 76 | } 77 | 78 | private static string preferredLanguage; 79 | #elif !UNITY_EDITOR && UNITY_IOS 80 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 81 | private static extern int _SpeechToText_Initialize( string language ); 82 | 83 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 84 | private static extern int _SpeechToText_Start( int useFreeFormLanguageModel, int preferOfflineRecognition ); 85 | 86 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 87 | private static extern void _SpeechToText_Stop(); 88 | 89 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 90 | private static extern void _SpeechToText_Cancel(); 91 | 92 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 93 | private static extern int _SpeechToText_IsLanguageSupported( string language ); 94 | 95 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 96 | private static extern int _SpeechToText_IsServiceAvailable( int preferOfflineRecognition ); 97 | 98 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 99 | private static extern int _SpeechToText_IsBusy(); 100 | 101 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 102 | private static extern int _SpeechToText_CheckPermission(); 103 | 104 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 105 | private static extern void _SpeechToText_RequestPermission(); 106 | 107 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 108 | private static extern void _SpeechToText_OpenSettings(); 109 | #elif UNITY_EDITOR 110 | private static STTCallbackHelper speechSessionEmulator; 111 | private static ISpeechToTextListener speechSessionEmulatorListener; 112 | #endif 113 | #endregion 114 | 115 | [RuntimeInitializeOnLoadMethod( RuntimeInitializeLoadType.AfterSceneLoad )] 116 | private static void InitializeOnLoad() 117 | { 118 | #if !UNITY_EDITOR && UNITY_ANDROID 119 | AJC.CallStatic( "InitializeSupportedLanguages", Context ); 120 | #endif 121 | } 122 | 123 | /// 124 | /// Initializes speech recognition service with the preferred language or the default device language. 125 | /// If the preferred language isn't available, the default device language may be used by the system as fallback. 126 | /// 127 | /// Must be in the format: "en-US". 128 | /// True, if the service is initialized successfully. 129 | public static bool Initialize( string preferredLanguage = null ) 130 | { 131 | #if !UNITY_EDITOR && UNITY_ANDROID 132 | SpeechToText.preferredLanguage = preferredLanguage; 133 | return true; 134 | #elif !UNITY_EDITOR && UNITY_IOS 135 | return _SpeechToText_Initialize( preferredLanguage ?? "" ) == 1; 136 | #else 137 | return true; 138 | #endif 139 | } 140 | 141 | /// Must be in the format: "en-US". 142 | public static LanguageSupport IsLanguageSupported( string language ) 143 | { 144 | #if !UNITY_EDITOR && UNITY_ANDROID 145 | return (LanguageSupport) AJC.CallStatic( "IsLanguageSupported", language ?? "" ); 146 | #elif !UNITY_EDITOR && UNITY_IOS 147 | return (LanguageSupport) _SpeechToText_IsLanguageSupported( language ?? "" ); 148 | #else 149 | return LanguageSupport.Supported; 150 | #endif 151 | } 152 | 153 | /// 154 | /// Checks if speech recognition service is available. Must be called AFTER . 155 | /// 156 | /// 157 | /// If true, checks if on-device speech recognition is supported. 158 | /// On Android, it isn't guaranteed that offline speech recognition will actually be used, even if this function returns true. 159 | /// Also, there is currently no way to check if the target language is actually downloaded on Android (if not, this function may 160 | /// return true but the speech recognition session will fail). So this function isn't reliable for offline recognition on Android. 161 | /// 162 | public static bool IsServiceAvailable( bool preferOfflineRecognition = false ) 163 | { 164 | #if !UNITY_EDITOR && UNITY_ANDROID 165 | return AJC.CallStatic( "IsServiceAvailable", Context, preferOfflineRecognition ); 166 | #elif !UNITY_EDITOR && UNITY_IOS 167 | return _SpeechToText_IsServiceAvailable( preferOfflineRecognition ? 1 : 0 ) == 1; 168 | #else 169 | return true; 170 | #endif 171 | } 172 | 173 | /// True, if a speech recognition session is currently in progress. Another session can't be started during that time. 174 | public static bool IsBusy() 175 | { 176 | #if !UNITY_EDITOR && UNITY_ANDROID 177 | return AJC.CallStatic( "IsBusy" ); 178 | #elif !UNITY_EDITOR && UNITY_IOS 179 | return _SpeechToText_IsBusy() == 1; 180 | #elif UNITY_EDITOR 181 | return speechSessionEmulator != null; 182 | #else 183 | return false; 184 | #endif 185 | } 186 | 187 | #region Runtime Permissions 188 | /// True, if we have permission to start a speech recognition session. 189 | public static bool CheckPermission() 190 | { 191 | #if !UNITY_EDITOR && UNITY_ANDROID 192 | return AJC.CallStatic( "CheckPermission", Context ); 193 | #elif !UNITY_EDITOR && UNITY_IOS 194 | return _SpeechToText_CheckPermission() == 1; 195 | #else 196 | return true; 197 | #endif 198 | } 199 | 200 | /// 201 | /// Requests the necessary permission for speech recognition. Without this permission, will fail. 202 | /// 203 | public static void RequestPermissionAsync( PermissionCallback callback ) 204 | { 205 | #if !UNITY_EDITOR && UNITY_ANDROID 206 | STTPermissionCallbackAsyncAndroid nativeCallback = new STTPermissionCallbackAsyncAndroid( callback ); 207 | AJC.CallStatic( "RequestPermission", Context, nativeCallback ); 208 | #elif !UNITY_EDITOR && UNITY_IOS 209 | STTPermissionCallbackiOS.Initialize( callback ); 210 | _SpeechToText_RequestPermission(); 211 | #else 212 | callback( Permission.Granted ); 213 | #endif 214 | } 215 | 216 | #if UNITY_2018_4_OR_NEWER && !SPEECH_TO_TEXT_DISABLE_ASYNC_FUNCTIONS 217 | /// 218 | public static Task RequestPermissionAsync() 219 | { 220 | TaskCompletionSource tcs = new TaskCompletionSource(); 221 | RequestPermissionAsync( ( permission ) => tcs.SetResult( permission ) ); 222 | return tcs.Task; 223 | } 224 | #endif 225 | 226 | /// 227 | /// Opens the app's Settings from where the user can grant the necessary permissions manually 228 | /// (Android: Record Audio, iOS: Speech Recognition and Microphone). 229 | /// 230 | public static void OpenSettings() 231 | { 232 | #if !UNITY_EDITOR && UNITY_ANDROID 233 | AJC.CallStatic( "OpenSettings", Context, "" ); 234 | #elif !UNITY_EDITOR && UNITY_IOS 235 | _SpeechToText_OpenSettings(); 236 | #endif 237 | } 238 | 239 | /// 240 | /// Opens the Google app's Settings from where the user can grant the Microphone permission to it on Android. 241 | /// Can be called if returns error code 9. 242 | /// 243 | public static void OpenGoogleAppSettings() 244 | { 245 | #if !UNITY_EDITOR && UNITY_ANDROID 246 | AJC.CallStatic( "OpenSettings", Context, "com.google.android.googlequicksearchbox" ); 247 | #endif 248 | } 249 | #endregion 250 | 251 | #region Speech-to-text Functions 252 | /// 253 | /// Attempts to start a speech recognition session. Must be called AFTER . 254 | /// 255 | /// The listener whose callback functions will be invoked. 256 | /// 257 | /// If true, free-form/dictation language model will be used (more suited for general purpose speech). 258 | /// Otherwise, search-focused language model will be used (specialized in search terms). 259 | /// 260 | /// 261 | /// If true and the active language supports on-device speech recognition, it'll be used. 262 | /// Note that offline speech recognition may not be very accurate. Requires Android 23+ or iOS 13+. 263 | /// 264 | /// True, if session is created successfully. If permission isn't granted yet, returns false (see ). 265 | public static bool Start( ISpeechToTextListener listener, bool useFreeFormLanguageModel = true, bool preferOfflineRecognition = false ) 266 | { 267 | #if !UNITY_EDITOR && UNITY_ANDROID 268 | STTInteractionCallbackAndroid nativeCallback = new STTInteractionCallbackAndroid( listener ); 269 | return AJC.CallStatic( "Start", Context, nativeCallback, preferredLanguage ?? "", useFreeFormLanguageModel, true, preferOfflineRecognition ); 270 | #elif !UNITY_EDITOR && UNITY_IOS 271 | if( _SpeechToText_Start( useFreeFormLanguageModel ? 1 : 0, preferOfflineRecognition ? 1 : 0 ) == 1 ) 272 | { 273 | STTInteractionCallbackiOS.Initialize( listener ); 274 | return true; 275 | } 276 | 277 | return false; 278 | #elif UNITY_EDITOR 279 | speechSessionEmulatorListener = listener; 280 | speechSessionEmulator = new GameObject( "SpeechToText Emulator" ).AddComponent(); 281 | speechSessionEmulator.StartCoroutine( EmulateSpeechOnEditor() ); 282 | 283 | return true; 284 | #else 285 | return true; 286 | #endif 287 | } 288 | 289 | /// 290 | /// If a speech recognition session is in progress, stops it manually. Normally, a session is automatically stopped after the user stops speaking for a short while. 291 | /// Note that on some Android versions, this call may have no effect (welcome to Android ecosystem): https://issuetracker.google.com/issues/158198432 292 | /// 293 | public static void ForceStop() 294 | { 295 | #if !UNITY_EDITOR && UNITY_ANDROID 296 | AJC.CallStatic( "Stop", Context ); 297 | #elif !UNITY_EDITOR && UNITY_IOS 298 | _SpeechToText_Stop(); 299 | #elif UNITY_EDITOR 300 | StopEmulateSpeechOnEditor( "Hello world", null ); 301 | #endif 302 | } 303 | 304 | /// 305 | /// If a speech recognition session is in progress, cancels it. Canceled sessions return an error code of 0 in their callback. 306 | /// 307 | public static void Cancel() 308 | { 309 | #if !UNITY_EDITOR && UNITY_ANDROID 310 | AJC.CallStatic( "Cancel", Context ); 311 | #elif !UNITY_EDITOR && UNITY_IOS 312 | _SpeechToText_Cancel(); 313 | #elif UNITY_EDITOR 314 | StopEmulateSpeechOnEditor( null, 0 ); 315 | #endif 316 | } 317 | 318 | #if UNITY_EDITOR 319 | private static System.Collections.IEnumerator EmulateSpeechOnEditor() 320 | { 321 | try 322 | { 323 | speechSessionEmulator.StartCoroutine( EmulateVoiceLevelChangeOnEditor() ); 324 | 325 | yield return new WaitForSecondsRealtime( 0.25f ); 326 | speechSessionEmulatorListener.OnReadyForSpeech(); 327 | yield return new WaitForSecondsRealtime( 0.5f ); 328 | speechSessionEmulatorListener.OnBeginningOfSpeech(); 329 | yield return new WaitForSecondsRealtime( 0.33f ); 330 | speechSessionEmulatorListener.OnPartialResultReceived( "Hello" ); 331 | yield return new WaitForSecondsRealtime( 0.33f ); 332 | speechSessionEmulatorListener.OnPartialResultReceived( "Hello world" ); 333 | yield return new WaitForSecondsRealtime( 0.5f ); 334 | } 335 | finally 336 | { 337 | StopEmulateSpeechOnEditor( "Hello world", null ); 338 | } 339 | } 340 | 341 | private static System.Collections.IEnumerator EmulateVoiceLevelChangeOnEditor() 342 | { 343 | yield return new WaitForSecondsRealtime( 0.25f ); 344 | 345 | while( true ) 346 | { 347 | speechSessionEmulatorListener.OnVoiceLevelChanged( Mathf.Clamp01( Mathf.PerlinNoise( Time.unscaledTime * 4f, Time.unscaledTime * -2f ) ) ); 348 | 349 | for( int i = 0; i < 3; i++ ) 350 | yield return null; 351 | } 352 | } 353 | 354 | private static void StopEmulateSpeechOnEditor( string spokenText, int? errorCode ) 355 | { 356 | if( speechSessionEmulator == null ) 357 | return; 358 | 359 | Object.DestroyImmediate( speechSessionEmulator.gameObject ); 360 | speechSessionEmulatorListener.OnResultReceived( spokenText, errorCode ); 361 | speechSessionEmulator = null; 362 | speechSessionEmulatorListener = null; 363 | } 364 | #endif 365 | #endregion 366 | } -------------------------------------------------------------------------------- /Plugins/SpeechToText/SpeechToText.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: cac27a1df7da40a4391b171fdf482662 3 | timeCreated: 1694881090 4 | licenseType: Free 5 | MonoImporter: 6 | serializedVersion: 2 7 | defaultReferences: [] 8 | executionOrder: 0 9 | icon: {instanceID: 0} 10 | userData: 11 | assetBundleName: 12 | assetBundleVariant: 13 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/iOS.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: f190315c6dd85214890b514bb2c598f5 3 | folderAsset: yes 4 | timeCreated: 1697459600 5 | licenseType: Free 6 | DefaultImporter: 7 | userData: 8 | assetBundleName: 9 | assetBundleVariant: 10 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/iOS/STTInteractionCallbackiOS.cs: -------------------------------------------------------------------------------- 1 | #if UNITY_EDITOR || UNITY_IOS 2 | using System.Collections; 3 | using UnityEngine; 4 | 5 | namespace SpeechToTextNamespace 6 | { 7 | public class STTInteractionCallbackiOS : MonoBehaviour 8 | { 9 | private static STTInteractionCallbackiOS instance; 10 | private ISpeechToTextListener listener; 11 | private bool beginningOfSpeechInvoked; 12 | private Coroutine voiceLevelChangeDetectionCoroutine; 13 | 14 | #if !UNITY_EDITOR && UNITY_IOS 15 | [System.Runtime.InteropServices.DllImport( "__Internal" )] 16 | private static extern float _SpeechToText_GetAudioRmsdB(); 17 | #endif 18 | 19 | public static void Initialize( ISpeechToTextListener listener ) 20 | { 21 | if( instance == null ) 22 | { 23 | instance = new GameObject( "STTInteractionCallbackiOS" ).AddComponent(); 24 | DontDestroyOnLoad( instance.gameObject ); 25 | } 26 | else if( instance.listener != null ) 27 | instance.listener.OnResultReceived( null, 8 ); 28 | 29 | instance.listener = listener; 30 | instance.beginningOfSpeechInvoked = false; 31 | 32 | if( instance.voiceLevelChangeDetectionCoroutine == null ) 33 | instance.voiceLevelChangeDetectionCoroutine = instance.StartCoroutine( instance.VoiceLevelChangeDetectionCoroutine() ); 34 | 35 | listener.OnReadyForSpeech(); 36 | } 37 | 38 | private IEnumerator VoiceLevelChangeDetectionCoroutine() 39 | { 40 | float lastRmsDB = -1f; 41 | while( listener != null ) 42 | { 43 | #if !UNITY_EDITOR && UNITY_IOS 44 | float rmsDB = _SpeechToText_GetAudioRmsdB(); 45 | #else 46 | float rmsDB = 0f; 47 | #endif 48 | if( rmsDB != lastRmsDB ) 49 | { 50 | lastRmsDB = rmsDB; 51 | OnVoiceLevelChanged( rmsDB ); 52 | } 53 | 54 | yield return null; 55 | } 56 | 57 | voiceLevelChangeDetectionCoroutine = null; 58 | } 59 | 60 | [UnityEngine.Scripting.Preserve] 61 | /// Root Mean Square (RMS) dB between range [0, 160] (0: quiet, 160: loud) 62 | public void OnVoiceLevelChanged( float rmsdB ) 63 | { 64 | // Convert [130, 150] dB range to [0, 1] 65 | if( listener != null ) 66 | listener.OnVoiceLevelChanged( Mathf.Clamp01( ( rmsdB - 130f ) / 20f ) ); 67 | } 68 | 69 | [UnityEngine.Scripting.Preserve] 70 | public void OnPartialResultReceived( string spokenText ) 71 | { 72 | if( listener != null ) 73 | { 74 | // Potentially more accurate way of determining the beginning of speech: https://stackoverflow.com/a/46325305 75 | if( !beginningOfSpeechInvoked ) 76 | { 77 | beginningOfSpeechInvoked = true; 78 | listener.OnBeginningOfSpeech(); 79 | } 80 | 81 | if( !string.IsNullOrEmpty( spokenText ) ) 82 | listener.OnPartialResultReceived( spokenText ); 83 | } 84 | } 85 | 86 | [UnityEngine.Scripting.Preserve] 87 | public void OnResultReceived( string spokenText ) 88 | { 89 | ISpeechToTextListener _listener = listener; 90 | listener = null; 91 | 92 | if( _listener != null ) 93 | _listener.OnResultReceived( !string.IsNullOrEmpty( spokenText ) ? spokenText : null, null ); 94 | } 95 | 96 | [UnityEngine.Scripting.Preserve] 97 | public void OnError( string error ) 98 | { 99 | ISpeechToTextListener _listener = listener; 100 | listener = null; 101 | 102 | if( _listener != null ) 103 | { 104 | int errorCode; 105 | if( !int.TryParse( error, out errorCode ) ) 106 | errorCode = -1; 107 | 108 | _listener.OnResultReceived( null, errorCode ); 109 | } 110 | } 111 | } 112 | } 113 | #endif -------------------------------------------------------------------------------- /Plugins/SpeechToText/iOS/STTInteractionCallbackiOS.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: ed03c4c9c394f1a4bbc7c5e2eb9056bb 3 | timeCreated: 1519060539 4 | licenseType: Free 5 | MonoImporter: 6 | serializedVersion: 2 7 | defaultReferences: [] 8 | executionOrder: 0 9 | icon: {instanceID: 0} 10 | userData: 11 | assetBundleName: 12 | assetBundleVariant: 13 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/iOS/STTPermissionCallbackiOS.cs: -------------------------------------------------------------------------------- 1 | #if UNITY_EDITOR || UNITY_IOS 2 | using UnityEngine; 3 | 4 | namespace SpeechToTextNamespace 5 | { 6 | public class STTPermissionCallbackiOS : MonoBehaviour 7 | { 8 | private static STTPermissionCallbackiOS instance; 9 | private SpeechToText.PermissionCallback callback; 10 | 11 | public static void Initialize( SpeechToText.PermissionCallback callback ) 12 | { 13 | if( instance == null ) 14 | { 15 | instance = new GameObject( "STTPermissionCallbackiOS" ).AddComponent(); 16 | DontDestroyOnLoad( instance.gameObject ); 17 | } 18 | else if( instance.callback != null ) 19 | instance.callback( SpeechToText.Permission.ShouldAsk ); 20 | 21 | instance.callback = callback; 22 | } 23 | 24 | [UnityEngine.Scripting.Preserve] 25 | public void OnPermissionRequested( string message ) 26 | { 27 | SpeechToText.PermissionCallback _callback = callback; 28 | callback = null; 29 | 30 | if( _callback != null ) 31 | _callback( (SpeechToText.Permission) int.Parse( message ) ); 32 | } 33 | } 34 | } 35 | #endif -------------------------------------------------------------------------------- /Plugins/SpeechToText/iOS/STTPermissionCallbackiOS.cs.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: e3abdba7e0d88ae41903930fae2c0584 3 | timeCreated: 1519060539 4 | licenseType: Free 5 | MonoImporter: 6 | serializedVersion: 2 7 | defaultReferences: [] 8 | executionOrder: 0 9 | icon: {instanceID: 0} 10 | userData: 11 | assetBundleName: 12 | assetBundleVariant: 13 | -------------------------------------------------------------------------------- /Plugins/SpeechToText/iOS/SpeechToText.mm: -------------------------------------------------------------------------------- 1 | #import 2 | #import 3 | #import 4 | #import 5 | 6 | @interface USpeechToText:NSObject 7 | + (int)initialize:(NSString *)language; 8 | + (int)start:(BOOL)useFreeFormLanguageModel preferOfflineRecognition:(BOOL)preferOfflineRecognition; 9 | + (void)stop; 10 | + (void)cancel:(BOOL)isCanceledByUser; 11 | + (int)isLanguageSupported:(NSString *)language; 12 | + (int)isServiceAvailable:(BOOL)preferOfflineRecognition; 13 | + (int)isBusy; 14 | + (float)getAudioRmsdB; 15 | + (int)checkPermission; 16 | + (int)requestPermission; 17 | + (void)openSettings; 18 | @end 19 | 20 | // Credit: https://developer.apple.com/documentation/speech/recognizing_speech_in_live_audio?language=objc 21 | @implementation USpeechToText 22 | 23 | static NSString *speechRecognizerLanguage; 24 | static SFSpeechRecognizer *speechRecognizer; 25 | static SFSpeechAudioBufferRecognitionRequest *recognitionRequest; 26 | static SFSpeechRecognitionTask *recognitionTask; 27 | static int recognitionTaskErrorCode; 28 | static NSTimer *recognitionTimeoutTimer; 29 | static AVAudioEngine *audioEngine; 30 | static float audioRmsdB; 31 | 32 | + (int)initialize:(NSString *)language 33 | { 34 | if( @available(iOS 10.0, *) ) 35 | { 36 | if( [self isBusy] == 1 ) 37 | return 0; 38 | } 39 | else 40 | return 0; 41 | 42 | if( speechRecognizerLanguage == nil || ![speechRecognizerLanguage isEqualToString:language] ) 43 | { 44 | speechRecognizerLanguage = language; 45 | 46 | [self cancel:NO]; 47 | 48 | if( language == nil || [language length] == 0 ) 49 | speechRecognizer = [[SFSpeechRecognizer alloc] init]; 50 | else 51 | speechRecognizer = [[SFSpeechRecognizer alloc] initWithLocale:[NSLocale localeWithLocaleIdentifier:language]]; 52 | } 53 | 54 | return ( speechRecognizer != nil ) ? 1 : 0; 55 | } 56 | 57 | + (int)start:(BOOL)useFreeFormLanguageModel preferOfflineRecognition:(BOOL)preferOfflineRecognition 58 | { 59 | if( [self isServiceAvailable:preferOfflineRecognition] == 0 || [self isBusy] == 1 || [self requestPermission] != 1 ) 60 | return 0; 61 | 62 | // Cancel the previous task if it's running 63 | [self cancel:NO]; 64 | 65 | // Cache the current AVAudioSession settings so that they can be restored after the microphone session 66 | AVAudioSessionCategory unityAudioSessionCategory = [[AVAudioSession sharedInstance] category]; 67 | NSUInteger unityAudioSessionCategoryOptions = [[AVAudioSession sharedInstance] categoryOptions]; 68 | AVAudioSessionMode unityAudioSessionMode = [[AVAudioSession sharedInstance] mode]; 69 | 70 | AVAudioSession *audioSession = [AVAudioSession sharedInstance]; 71 | [audioSession setCategory:AVAudioSessionCategoryRecord mode:AVAudioSessionModeMeasurement options:AVAudioSessionCategoryOptionDuckOthers error:nil]; 72 | [audioSession setActive:YES withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:nil]; 73 | 74 | if( audioEngine == nil ) 75 | audioEngine = [[AVAudioEngine alloc] init]; 76 | 77 | AVAudioInputNode *inputNode = audioEngine.inputNode; 78 | if( inputNode == nil ) 79 | { 80 | NSLog( @"Couldn't get AVAudioInputNode for speech recognition" ); 81 | return 0; 82 | } 83 | 84 | audioRmsdB = 0; 85 | 86 | recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init]; 87 | if( recognitionRequest == nil ) 88 | { 89 | NSLog( @"Couldn't create an instance of SFSpeechAudioBufferRecognitionRequest for speech recognition" ); 90 | return 0; 91 | } 92 | 93 | speechRecognizer.defaultTaskHint = useFreeFormLanguageModel ? SFSpeechRecognitionTaskHintDictation : SFSpeechRecognitionTaskHintSearch; 94 | recognitionRequest.shouldReportPartialResults = YES; 95 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 130000 96 | if( @available(iOS 13.0, *) ) 97 | { 98 | if( preferOfflineRecognition ) 99 | recognitionRequest.requiresOnDeviceRecognition = YES; 100 | } 101 | #endif 102 | 103 | recognitionTaskErrorCode = 5; 104 | recognitionTask = [speechRecognizer recognitionTaskWithRequest:recognitionRequest resultHandler:^( SFSpeechRecognitionResult *result, NSError *error ) 105 | { 106 | BOOL isFinal = NO; 107 | if( result != nil ) 108 | { 109 | isFinal = result.isFinal; 110 | UnitySendMessage( "STTInteractionCallbackiOS", isFinal ? "OnResultReceived" : "OnPartialResultReceived", [self getCString:result.bestTranscription.formattedString] ); 111 | } 112 | 113 | if( recognitionTimeoutTimer != nil ) 114 | { 115 | [recognitionTimeoutTimer invalidate]; 116 | recognitionTimeoutTimer = nil; 117 | } 118 | 119 | if( error != nil || isFinal ) 120 | { 121 | if( error != nil ) 122 | { 123 | NSLog( @"Error during speech recognition: %@", error ); 124 | 125 | if( !isFinal ) 126 | UnitySendMessage( "STTInteractionCallbackiOS", "OnError", [self getCString:[NSString stringWithFormat:@"%d", recognitionTaskErrorCode]] ); 127 | } 128 | 129 | [audioEngine stop]; 130 | [inputNode removeTapOnBus:0]; 131 | 132 | recognitionRequest = nil; 133 | recognitionTask = nil; 134 | 135 | // Try restoring AVAudioSession settings back to their initial values 136 | NSError *error = nil; 137 | if( ![[AVAudioSession sharedInstance] setCategory:unityAudioSessionCategory mode:unityAudioSessionMode options:unityAudioSessionCategoryOptions error:&error] ) 138 | { 139 | NSLog( @"SpeechToText error (1) setting audio session category back to %@ with mode %@ and options %lu: %@", unityAudioSessionCategory, unityAudioSessionMode, (unsigned long) unityAudioSessionCategoryOptions, error ); 140 | 141 | // It somehow failed. Try restoring AVAudioSession settings back to Unity's default values 142 | if( ![[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryAmbient mode:AVAudioSessionModeDefault options:1 error:&error] ) 143 | NSLog( @"SpeechToText error (2) setting audio session category back to %@ with mode %@ and options %lu: %@", unityAudioSessionCategory, unityAudioSessionMode, (unsigned long) unityAudioSessionCategoryOptions, error ); 144 | } 145 | } 146 | else 147 | { 148 | // Restart the timeout timer 149 | recognitionTimeoutTimer = [NSTimer scheduledTimerWithTimeInterval:2.0 target:self selector:@selector(onSpeechTimedOut:) userInfo:nil repeats:NO]; 150 | } 151 | }]; 152 | 153 | [inputNode installTapOnBus:0 bufferSize:1024 format:[inputNode outputFormatForBus:0] block:^( AVAudioPCMBuffer *buffer, AVAudioTime *when ) 154 | { 155 | if( [buffer floatChannelData] != nil && buffer.format.channelCount > 0 ) 156 | { 157 | float voiceLevel = 0.0; 158 | vDSP_rmsqv( (float*) buffer.floatChannelData[0], 1, &voiceLevel, vDSP_Length( buffer.frameLength ) ); 159 | audioRmsdB = 10 * log10f( voiceLevel ) + 160; // Convert voice level to dB in range [0, 160] 160 | } 161 | else 162 | audioRmsdB = 0; 163 | 164 | [recognitionRequest appendAudioPCMBuffer:buffer]; 165 | }]; 166 | 167 | NSError *audioEngineError; 168 | [audioEngine prepare]; 169 | if( ![audioEngine startAndReturnError:&audioEngineError] ) 170 | { 171 | if( audioEngineError != nil ) 172 | NSLog( @"Couldn't start AudioEngine for speech recognition: %@", audioEngineError ); 173 | else 174 | NSLog( @"Couldn't start AudioEngine for speech recognition: UnknownError" ); 175 | 176 | [recognitionTask cancel]; 177 | return 0; 178 | } 179 | 180 | recognitionTimeoutTimer = [NSTimer scheduledTimerWithTimeInterval:5.0 target:self selector:@selector(onSpeechTimedOut:) userInfo:nil repeats:NO]; 181 | 182 | return 1; 183 | } 184 | 185 | + (void)onSpeechTimedOut:(NSTimer *)timer 186 | { 187 | recognitionTimeoutTimer = nil; 188 | recognitionTaskErrorCode = 6; 189 | 190 | [self stop]; 191 | } 192 | 193 | + (void)stop 194 | { 195 | if( @available(iOS 10.0, *) ) 196 | { 197 | if( audioEngine != nil && audioEngine.isRunning ) 198 | { 199 | [audioEngine stop]; 200 | [recognitionRequest endAudio]; 201 | } 202 | } 203 | } 204 | 205 | + (void)cancel:(BOOL)isCanceledByUser 206 | { 207 | if( @available(iOS 10.0, *) ) 208 | { 209 | if( recognitionTask != nil ) 210 | { 211 | if( isCanceledByUser ) 212 | recognitionTaskErrorCode = 0; 213 | 214 | [recognitionTask cancel]; 215 | recognitionTask = nil; 216 | } 217 | } 218 | } 219 | 220 | + (int)isLanguageSupported:(NSString *)language 221 | { 222 | if( @available(iOS 10.0, *) ) 223 | return [[SFSpeechRecognizer supportedLocales] containsObject:[NSLocale localeWithLocaleIdentifier:language]] ? 1 : 0; 224 | 225 | return 0; 226 | } 227 | 228 | + (int)isServiceAvailable:(BOOL)preferOfflineRecognition 229 | { 230 | if( @available(iOS 10.0, *) ) 231 | { 232 | if( speechRecognizer != nil && [speechRecognizer isAvailable] ) 233 | { 234 | if( !preferOfflineRecognition ) 235 | return 1; 236 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 130000 237 | else if( @available(iOS 13.0, *) ) 238 | return [speechRecognizer supportsOnDeviceRecognition] ? 1 : 0; 239 | #endif 240 | } 241 | } 242 | 243 | return 0; 244 | } 245 | 246 | + (int)isBusy 247 | { 248 | if( @available(iOS 10.0, *) ) 249 | return ( recognitionRequest != nil ) ? 1 : 0; 250 | 251 | return 0; 252 | } 253 | 254 | + (float)getAudioRmsdB 255 | { 256 | return audioRmsdB; 257 | } 258 | 259 | + (int)checkPermission 260 | { 261 | if( @available(iOS 10.0, *) ) 262 | { 263 | int speechRecognitionPermission = [self checkSpeechRecognitionPermission]; 264 | int microphonePermission = [self checkMicrophonePermission]; 265 | if( speechRecognitionPermission == 1 && microphonePermission == 1 ) 266 | return 1; 267 | else if( speechRecognitionPermission != 0 && microphonePermission != 0 ) 268 | return 2; 269 | } 270 | 271 | return 0; 272 | } 273 | 274 | + (int)checkSpeechRecognitionPermission 275 | { 276 | SFSpeechRecognizerAuthorizationStatus status = [SFSpeechRecognizer authorizationStatus]; 277 | if( status == SFSpeechRecognizerAuthorizationStatusAuthorized ) 278 | return 1; 279 | else if( status == SFSpeechRecognizerAuthorizationStatusNotDetermined ) 280 | return 2; 281 | else 282 | return 0; 283 | } 284 | 285 | + (int)checkMicrophonePermission 286 | { 287 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000 288 | if( @available(iOS 17.0, *) ) 289 | { 290 | AVAudioApplicationRecordPermission status = [[AVAudioApplication sharedInstance] recordPermission]; 291 | if( status == AVAudioApplicationRecordPermissionGranted ) 292 | return 1; 293 | else if( status == AVAudioApplicationRecordPermissionUndetermined ) 294 | return 2; 295 | } 296 | else 297 | #endif 298 | { 299 | AVAudioSessionRecordPermission status = [[AVAudioSession sharedInstance] recordPermission]; 300 | if( status == AVAudioSessionRecordPermissionGranted ) 301 | return 1; 302 | else if( status == AVAudioSessionRecordPermissionUndetermined ) 303 | return 2; 304 | } 305 | 306 | return 0; 307 | } 308 | 309 | + (int)requestPermission 310 | { 311 | int currentPermission = [self checkPermission]; 312 | if( currentPermission != 2 ) 313 | { 314 | UnitySendMessage( "STTPermissionCallbackiOS", "OnPermissionRequested", [self getCString:[NSString stringWithFormat:@"%d", currentPermission]] ); 315 | return currentPermission; 316 | } 317 | 318 | // Request Speech Recognition permission first 319 | [SFSpeechRecognizer requestAuthorization:^( SFSpeechRecognizerAuthorizationStatus status ) 320 | { 321 | // Request Microphone permission immediately afterwards 322 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000 323 | // For some reason, requestRecordPermissionWithCompletionHandler function couldn't be found in AVAudioApplication while writing this code. Uncomment when it's fixed by Apple. 324 | /*if( @available(iOS 17.0, *) ) 325 | { 326 | [[AVAudioApplication sharedInstance] requestRecordPermissionWithCompletionHandler:^( BOOL granted ) 327 | { 328 | UnitySendMessage( "STTPermissionCallbackiOS", "OnPermissionRequested", ( granted && status == SFSpeechRecognizerAuthorizationStatusAuthorized ) ? "1" : "0" ); 329 | }]; 330 | } 331 | else*/ 332 | #endif 333 | { 334 | [[AVAudioSession sharedInstance] requestRecordPermission:^( BOOL granted ) 335 | { 336 | UnitySendMessage( "STTPermissionCallbackiOS", "OnPermissionRequested", ( granted && status == SFSpeechRecognizerAuthorizationStatusAuthorized ) ? "1" : "0" ); 337 | }]; 338 | } 339 | }]; 340 | 341 | return -1; 342 | } 343 | 344 | #pragma clang diagnostic push 345 | #pragma clang diagnostic ignored "-Wdeprecated-declarations" 346 | + (void)openSettings 347 | { 348 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 100000 349 | if( @available(iOS 10.0, *) ) 350 | [[UIApplication sharedApplication] openURL:[NSURL URLWithString:UIApplicationOpenSettingsURLString] options:@{} completionHandler:nil]; 351 | else 352 | #endif 353 | [[UIApplication sharedApplication] openURL:[NSURL URLWithString:UIApplicationOpenSettingsURLString]]; 354 | } 355 | #pragma clang diagnostic pop 356 | 357 | // Credit: https://stackoverflow.com/a/37052118/2373034 358 | + (char *)getCString:(NSString *)source 359 | { 360 | if( source == nil ) 361 | source = @""; 362 | 363 | const char *sourceUTF8 = [source UTF8String]; 364 | char *result = (char*) malloc( strlen( sourceUTF8 ) + 1 ); 365 | strcpy( result, sourceUTF8 ); 366 | 367 | return result; 368 | } 369 | 370 | @end 371 | 372 | extern "C" int _SpeechToText_Initialize( const char* language ) 373 | { 374 | return [USpeechToText initialize:[NSString stringWithUTF8String:language]]; 375 | } 376 | 377 | extern "C" int _SpeechToText_Start( int useFreeFormLanguageModel, int preferOfflineRecognition ) 378 | { 379 | return [USpeechToText start:( useFreeFormLanguageModel == 1 ) preferOfflineRecognition:( preferOfflineRecognition == 1 )]; 380 | } 381 | 382 | extern "C" void _SpeechToText_Stop() 383 | { 384 | [USpeechToText stop]; 385 | } 386 | 387 | extern "C" void _SpeechToText_Cancel() 388 | { 389 | [USpeechToText cancel:YES]; 390 | } 391 | 392 | extern "C" int _SpeechToText_IsLanguageSupported( const char* language ) 393 | { 394 | return [USpeechToText isLanguageSupported:[NSString stringWithUTF8String:language]]; 395 | } 396 | 397 | extern "C" int _SpeechToText_IsServiceAvailable( int preferOfflineRecognition ) 398 | { 399 | return [USpeechToText isServiceAvailable:( preferOfflineRecognition == 1 )]; 400 | } 401 | 402 | extern "C" int _SpeechToText_IsBusy() 403 | { 404 | return [USpeechToText isBusy]; 405 | } 406 | 407 | extern "C" float _SpeechToText_GetAudioRmsdB() 408 | { 409 | return [USpeechToText getAudioRmsdB]; 410 | } 411 | 412 | extern "C" int _SpeechToText_CheckPermission() 413 | { 414 | return [USpeechToText checkPermission]; 415 | } 416 | 417 | extern "C" void _SpeechToText_RequestPermission() 418 | { 419 | [USpeechToText requestPermission]; 420 | } 421 | 422 | extern "C" void _SpeechToText_OpenSettings() 423 | { 424 | [USpeechToText openSettings]; 425 | } -------------------------------------------------------------------------------- /Plugins/SpeechToText/iOS/SpeechToText.mm.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d93e932981dab7a428ad55354139ffa8 3 | PluginImporter: 4 | externalObjects: {} 5 | serializedVersion: 2 6 | iconMap: {} 7 | executionOrder: {} 8 | defineConstraints: [] 9 | isPreloaded: 0 10 | isOverridable: 0 11 | isExplicitlyReferenced: 0 12 | validateReferences: 1 13 | platformData: 14 | - first: 15 | Any: 16 | second: 17 | enabled: 0 18 | settings: {} 19 | - first: 20 | Editor: Editor 21 | second: 22 | enabled: 0 23 | settings: 24 | DefaultValueInitialized: true 25 | - first: 26 | iPhone: iOS 27 | second: 28 | enabled: 1 29 | settings: {} 30 | userData: 31 | assetBundleName: 32 | assetBundleVariant: 33 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "com.yasirkula.speechtotext", 3 | "displayName": "Speech to Text", 4 | "version": "1.1.1", 5 | "documentationUrl": "https://github.com/yasirkula/UnitySpeechToText", 6 | "changelogUrl": "https://github.com/yasirkula/UnitySpeechToText/releases", 7 | "licensesUrl": "https://github.com/yasirkula/UnitySpeechToText/blob/master/LICENSE.txt", 8 | "description": "This plugin helps you convert speech to text on Android (all versions) and iOS 10+." 9 | } 10 | -------------------------------------------------------------------------------- /package.json.meta: -------------------------------------------------------------------------------- 1 | fileFormatVersion: 2 2 | guid: d5494445853f3f442a20d5d7338250db 3 | timeCreated: 1697647234 4 | licenseType: Free 5 | TextScriptImporter: 6 | userData: 7 | assetBundleName: 8 | assetBundleVariant: 9 | --------------------------------------------------------------------------------