├── .github
├── AAR Source (Android)
│ ├── AndroidManifest.xml
│ ├── java
│ │ └── com
│ │ │ └── yasirkula
│ │ │ └── unity
│ │ │ ├── SpeechToText.java
│ │ │ ├── SpeechToTextListener.java
│ │ │ ├── SpeechToTextPermissionFragment.java
│ │ │ ├── SpeechToTextPermissionReceiver.java
│ │ │ └── SpeechToTextRecognitionListener.java
│ └── proguard.txt
└── README.md
├── LICENSE.txt
├── LICENSE.txt.meta
├── Plugins.meta
├── Plugins
├── SpeechToText.meta
└── SpeechToText
│ ├── Android.meta
│ ├── Android
│ ├── STTCallbackHelper.cs
│ ├── STTCallbackHelper.cs.meta
│ ├── STTInteractionCallbackAndroid.cs
│ ├── STTInteractionCallbackAndroid.cs.meta
│ ├── STTPermissionCallbackAndroid.cs
│ ├── STTPermissionCallbackAndroid.cs.meta
│ ├── SpeechToText.aar
│ └── SpeechToText.aar.meta
│ ├── Editor.meta
│ ├── Editor
│ ├── STTPostProcessBuild.cs
│ ├── STTPostProcessBuild.cs.meta
│ ├── SpeechToText.Editor.asmdef
│ └── SpeechToText.Editor.asmdef.meta
│ ├── ISpeechToTextListener.cs
│ ├── ISpeechToTextListener.cs.meta
│ ├── README.txt
│ ├── README.txt.meta
│ ├── SpeechToText.Runtime.asmdef
│ ├── SpeechToText.Runtime.asmdef.meta
│ ├── SpeechToText.cs
│ ├── SpeechToText.cs.meta
│ ├── iOS.meta
│ └── iOS
│ ├── STTInteractionCallbackiOS.cs
│ ├── STTInteractionCallbackiOS.cs.meta
│ ├── STTPermissionCallbackiOS.cs
│ ├── STTPermissionCallbackiOS.cs.meta
│ ├── SpeechToText.mm
│ └── SpeechToText.mm.meta
├── package.json
└── package.json.meta
/.github/AAR Source (Android)/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToText.java:
--------------------------------------------------------------------------------
1 | package com.yasirkula.unity;
2 |
3 | import android.Manifest;
4 | import android.annotation.TargetApi;
5 | import android.app.Activity;
6 | import android.app.Fragment;
7 | import android.content.BroadcastReceiver;
8 | import android.content.Context;
9 | import android.content.Intent;
10 | import android.content.pm.PackageManager;
11 | import android.net.Uri;
12 | import android.os.Build;
13 | import android.os.Bundle;
14 | import android.os.Looper;
15 | import android.provider.Settings;
16 | import android.speech.RecognizerIntent;
17 | import android.speech.SpeechRecognizer;
18 | import android.util.Log;
19 |
20 | import java.util.ArrayList;
21 |
22 | public class SpeechToText
23 | {
24 | public static boolean PermissionFreeMode = false;
25 | public static long MinimumSessionLength = -1; // Observed default value: 5000 milliseconds
26 | public static long SpeechSilenceTimeout = -1; // Observed default value: 2000 milliseconds
27 |
28 | private static ArrayList supportedLanguages;
29 | private static SpeechRecognizer speechRecognizer;
30 | private static SpeechToTextRecognitionListener speechRecognitionListener;
31 |
32 | public static boolean Start( final Context context, final SpeechToTextListener unityInterface, final String language, final boolean useFreeFormLanguageModel, final boolean enablePartialResults, final boolean preferOfflineRecognition )
33 | {
34 | if( !IsServiceAvailable( context, preferOfflineRecognition ) || IsBusy() || !RequestPermission( context, null ) )
35 | return false;
36 |
37 | ( (Activity) context ).runOnUiThread( new Runnable()
38 | {
39 | @Override
40 | public void run()
41 | {
42 | try
43 | {
44 | // Dispose leftover objects from the previous operation
45 | CancelInternal( false );
46 |
47 | Intent intent = new Intent( RecognizerIntent.ACTION_RECOGNIZE_SPEECH );
48 | intent.putExtra( RecognizerIntent.EXTRA_LANGUAGE_MODEL, useFreeFormLanguageModel ? RecognizerIntent.LANGUAGE_MODEL_FREE_FORM : RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH );
49 | intent.putExtra( RecognizerIntent.EXTRA_MAX_RESULTS, 3 );
50 | if( language != null && language.length() > 0 )
51 | intent.putExtra( RecognizerIntent.EXTRA_LANGUAGE, language.replace( '_', '-' ) );
52 | if( enablePartialResults )
53 | intent.putExtra( RecognizerIntent.EXTRA_PARTIAL_RESULTS, true );
54 | if( preferOfflineRecognition && Build.VERSION.SDK_INT >= 23 )
55 | intent.putExtra( RecognizerIntent.EXTRA_PREFER_OFFLINE, true );
56 | if( MinimumSessionLength > 0 )
57 | intent.putExtra( RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, MinimumSessionLength );
58 | if( SpeechSilenceTimeout > 0 )
59 | {
60 | intent.putExtra( RecognizerIntent.EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, SpeechSilenceTimeout );
61 | intent.putExtra( RecognizerIntent.EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, SpeechSilenceTimeout );
62 | }
63 |
64 | speechRecognizer = preferOfflineRecognition && Build.VERSION.SDK_INT >= 31 ? SpeechRecognizer.createOnDeviceSpeechRecognizer( context ) : SpeechRecognizer.createSpeechRecognizer( context );
65 | speechRecognitionListener = new SpeechToTextRecognitionListener( unityInterface );
66 | speechRecognizer.setRecognitionListener( speechRecognitionListener );
67 | speechRecognizer.startListening( intent );
68 | }
69 | catch( Exception e )
70 | {
71 | Log.e( "Unity", "Exception:", e );
72 | CancelInternal( false );
73 | }
74 | }
75 | } );
76 |
77 | return true;
78 | }
79 |
80 | public static void Stop( Context context )
81 | {
82 | if( Looper.myLooper() == Looper.getMainLooper() )
83 | StopInternal();
84 | else
85 | {
86 | ( (Activity) context ).runOnUiThread( new Runnable()
87 | {
88 | @Override
89 | public void run()
90 | {
91 | StopInternal();
92 | }
93 | } );
94 | }
95 | }
96 |
97 | private static void StopInternal()
98 | {
99 | if( speechRecognizer != null )
100 | speechRecognizer.stopListening();
101 | }
102 |
103 | public static void Cancel( Context context )
104 | {
105 | if( Looper.myLooper() == Looper.getMainLooper() )
106 | CancelInternal( true );
107 | else
108 | {
109 | ( (Activity) context ).runOnUiThread( new Runnable()
110 | {
111 | @Override
112 | public void run()
113 | {
114 | CancelInternal( true );
115 | }
116 | } );
117 | }
118 | }
119 |
120 | private static void CancelInternal( boolean isCanceledByUser )
121 | {
122 | if( speechRecognizer != null )
123 | {
124 | try
125 | {
126 | speechRecognitionListener.OnSpeechRecognizerCanceled( isCanceledByUser );
127 | }
128 | catch( Exception e )
129 | {
130 | Log.e( "Unity", "Exception:", e );
131 | }
132 | finally
133 | {
134 | speechRecognitionListener = null;
135 | }
136 |
137 | try
138 | {
139 | speechRecognizer.destroy();
140 | }
141 | catch( Exception e )
142 | {
143 | Log.e( "Unity", "Exception:", e );
144 | }
145 | finally
146 | {
147 | speechRecognizer = null;
148 | }
149 | }
150 | }
151 |
152 | public static void InitializeSupportedLanguages( final Context context )
153 | {
154 | InitializeSupportedLanguagesInternal( context, false );
155 | }
156 |
157 | private static void InitializeSupportedLanguagesInternal( final Context context, final boolean secondAttempt )
158 | {
159 | Intent intent = RecognizerIntent.getVoiceDetailsIntent( context );
160 | if( intent == null )
161 | intent = new Intent( RecognizerIntent.ACTION_GET_LANGUAGE_DETAILS );
162 |
163 | // In the first attempt, try to fetch the supported languages list without this hack
164 | // Credit: https://stackoverflow.com/q/48500077
165 | if( secondAttempt )
166 | intent.setPackage( "com.google.android.googlequicksearchbox" );
167 |
168 | try
169 | {
170 | context.sendOrderedBroadcast( intent, null, new BroadcastReceiver()
171 | {
172 | @Override
173 | public void onReceive( Context context, Intent intent )
174 | {
175 | if( getResultCode() == Activity.RESULT_OK )
176 | {
177 | Bundle results = getResultExtras( true );
178 | supportedLanguages = results.getStringArrayList( RecognizerIntent.EXTRA_SUPPORTED_LANGUAGES );
179 | if( supportedLanguages == null && !secondAttempt )
180 | InitializeSupportedLanguagesInternal( context, true );
181 | }
182 | }
183 | }, null, Activity.RESULT_OK, null, null );
184 | }
185 | catch( Exception e )
186 | {
187 | Log.e( "Unity", "Exception:", e );
188 | }
189 | }
190 |
191 | // -1: Unknown, 0: No, 1: Yes, 2: Most likely
192 | public static int IsLanguageSupported( String language )
193 | {
194 | if( language == null || language.length() == 0 )
195 | return 0;
196 |
197 | if( supportedLanguages != null )
198 | {
199 | language = language.replace( '_', '-' );
200 |
201 | if( supportedLanguages.contains( language ) )
202 | return 1;
203 | else
204 | {
205 | // Match "en" with "en-US" and etc.
206 | language += "-";
207 |
208 | for( String supportedLanguage : supportedLanguages )
209 | {
210 | if( supportedLanguage.startsWith( language ) )
211 | return 2;
212 | }
213 | }
214 |
215 | return 0;
216 | }
217 |
218 | return -1;
219 | }
220 |
221 | public static boolean IsServiceAvailable( final Context context, boolean preferOfflineRecognition )
222 | {
223 | if( preferOfflineRecognition )
224 | {
225 | if( Build.VERSION.SDK_INT >= 31 )
226 | return SpeechRecognizer.isOnDeviceRecognitionAvailable( context );
227 | else if( Build.VERSION.SDK_INT < 23 )
228 | return false;
229 | }
230 |
231 | return SpeechRecognizer.isRecognitionAvailable( context );
232 | }
233 |
234 | public static boolean IsBusy()
235 | {
236 | return speechRecognitionListener != null && !speechRecognitionListener.IsFinished();
237 | }
238 |
239 | @TargetApi( Build.VERSION_CODES.M )
240 | public static boolean CheckPermission( final Context context )
241 | {
242 | return PermissionFreeMode || Build.VERSION.SDK_INT < Build.VERSION_CODES.M || context.checkSelfPermission( Manifest.permission.RECORD_AUDIO ) == PackageManager.PERMISSION_GRANTED;
243 | }
244 |
245 | @TargetApi( Build.VERSION_CODES.M )
246 | public static boolean RequestPermission( final Context context, final SpeechToTextPermissionReceiver permissionReceiver )
247 | {
248 | if( CheckPermission( context ) )
249 | {
250 | if( permissionReceiver != null )
251 | permissionReceiver.OnPermissionResult( 1 );
252 |
253 | return true;
254 | }
255 |
256 | if( permissionReceiver == null )
257 | ( (Activity) context ).requestPermissions( new String[] { Manifest.permission.RECORD_AUDIO }, 875621 );
258 | else
259 | {
260 | final Fragment request = new SpeechToTextPermissionFragment( permissionReceiver );
261 | ( (Activity) context ).getFragmentManager().beginTransaction().add( 0, request ).commitAllowingStateLoss();
262 | }
263 |
264 | return false;
265 | }
266 |
267 | // Credit: https://stackoverflow.com/a/35456817/2373034
268 | public static void OpenSettings( final Context context, String packageName )
269 | {
270 | Uri uri = Uri.fromParts( "package", ( packageName == null || packageName.length() == 0 ) ? context.getPackageName() : packageName, null );
271 |
272 | Intent intent = new Intent();
273 | intent.setAction( Settings.ACTION_APPLICATION_DETAILS_SETTINGS );
274 | intent.setData( uri );
275 |
276 | try
277 | {
278 | context.startActivity( intent );
279 | }
280 | catch( Exception e )
281 | {
282 | Log.e( "Unity", "Exception:", e );
283 | }
284 | }
285 | }
--------------------------------------------------------------------------------
/.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToTextListener.java:
--------------------------------------------------------------------------------
1 | package com.yasirkula.unity;
2 |
3 | public interface SpeechToTextListener
4 | {
5 | void OnReadyForSpeech();
6 | void OnBeginningOfSpeech();
7 | void OnVoiceLevelChanged( float rmsdB );
8 | void OnPartialResultReceived( String spokenText );
9 | void OnResultReceived( String spokenText, int errorCode );
10 | }
--------------------------------------------------------------------------------
/.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToTextPermissionFragment.java:
--------------------------------------------------------------------------------
1 | package com.yasirkula.unity;
2 |
3 | // Original work Copyright (c) 2017 Yury Habets
4 | // Modified work Copyright 2018 yasirkula
5 | //
6 | // Permission is hereby granted, free of charge, to any person obtaining a copy
7 | // of this software and associated documentation files (the "Software"), to deal
8 | // in the Software without restriction, including without limitation the rights
9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | // copies of the Software, and to permit persons to whom the Software is
11 | // furnished to do so, subject to the following conditions:
12 | //
13 | // The above copyright notice and this permission notice shall be included in
14 | // all copies or substantial portions of the Software.
15 | //
16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | // SOFTWARE.
23 |
24 | import android.Manifest;
25 | import android.annotation.TargetApi;
26 | import android.app.Activity;
27 | import android.app.Fragment;
28 | import android.content.Intent;
29 | import android.content.pm.PackageManager;
30 | import android.os.Build;
31 | import android.os.Bundle;
32 | import android.util.Log;
33 |
34 | @TargetApi( Build.VERSION_CODES.M )
35 | public class SpeechToTextPermissionFragment extends Fragment
36 | {
37 | private static final int PERMISSIONS_REQUEST_CODE = 875621;
38 |
39 | private final SpeechToTextPermissionReceiver permissionReceiver;
40 |
41 | public SpeechToTextPermissionFragment()
42 | {
43 | permissionReceiver = null;
44 | }
45 |
46 | public SpeechToTextPermissionFragment( final SpeechToTextPermissionReceiver permissionReceiver )
47 | {
48 | this.permissionReceiver = permissionReceiver;
49 | }
50 |
51 | @Override
52 | public void onCreate( Bundle savedInstanceState )
53 | {
54 | super.onCreate( savedInstanceState );
55 | if( permissionReceiver == null )
56 | onRequestPermissionsResult( PERMISSIONS_REQUEST_CODE, new String[0], new int[0] );
57 | else
58 | requestPermissions( new String[] { Manifest.permission.RECORD_AUDIO }, PERMISSIONS_REQUEST_CODE );
59 | }
60 |
61 | @Override
62 | public void onRequestPermissionsResult( int requestCode, String[] permissions, int[] grantResults )
63 | {
64 | if( requestCode != PERMISSIONS_REQUEST_CODE )
65 | return;
66 |
67 | if( permissionReceiver == null )
68 | {
69 | Log.e( "Unity", "Fragment data got reset while asking permissions!" );
70 |
71 | getFragmentManager().beginTransaction().remove( this ).commitAllowingStateLoss();
72 | return;
73 | }
74 |
75 | // 0 -> denied, must go to settings
76 | // 1 -> granted
77 | // 2 -> denied, can ask again
78 | int result = 1;
79 | if( permissions.length == 0 || grantResults.length == 0 )
80 | result = 2;
81 | else
82 | {
83 | for( int i = 0; i < permissions.length && i < grantResults.length; ++i )
84 | {
85 | if( grantResults[i] == PackageManager.PERMISSION_DENIED )
86 | {
87 | if( !shouldShowRequestPermissionRationale( permissions[i] ) )
88 | {
89 | result = 0;
90 | break;
91 | }
92 |
93 | result = 2;
94 | }
95 | }
96 | }
97 |
98 | permissionReceiver.OnPermissionResult( result );
99 | getFragmentManager().beginTransaction().remove( this ).commitAllowingStateLoss();
100 |
101 | // Resolves a bug in Unity 2019 where the calling activity
102 | // doesn't resume automatically after the fragment finishes
103 | // Credit: https://stackoverflow.com/a/12409215/2373034
104 | try
105 | {
106 | Intent resumeUnityActivity = new Intent( getActivity(), getActivity().getClass() );
107 | resumeUnityActivity.setFlags( Intent.FLAG_ACTIVITY_REORDER_TO_FRONT );
108 | getActivity().startActivityIfNeeded( resumeUnityActivity, 0 );
109 | }
110 | catch( Exception e )
111 | {
112 | Log.e( "Unity", "Exception (resume):", e );
113 | }
114 | }
115 | }
--------------------------------------------------------------------------------
/.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToTextPermissionReceiver.java:
--------------------------------------------------------------------------------
1 | package com.yasirkula.unity;
2 |
3 | public interface SpeechToTextPermissionReceiver
4 | {
5 | void OnPermissionResult( int result );
6 | }
--------------------------------------------------------------------------------
/.github/AAR Source (Android)/java/com/yasirkula/unity/SpeechToTextRecognitionListener.java:
--------------------------------------------------------------------------------
1 | package com.yasirkula.unity;
2 |
3 | import android.os.Bundle;
4 | import android.speech.RecognitionListener;
5 | import android.speech.RecognizerIntent;
6 | import android.speech.SpeechRecognizer;
7 | import android.util.Log;
8 |
9 | import java.util.ArrayList;
10 |
11 | public class SpeechToTextRecognitionListener implements RecognitionListener
12 | {
13 | private final SpeechToTextListener unityInterface;
14 | private boolean isResultSent;
15 | private String lastResult = "";
16 |
17 | public SpeechToTextRecognitionListener( SpeechToTextListener unityInterface )
18 | {
19 | this.unityInterface = unityInterface;
20 | }
21 |
22 | private void SendResult( String result, int errorCode )
23 | {
24 | if( !isResultSent )
25 | {
26 | isResultSent = true;
27 | unityInterface.OnResultReceived( result, errorCode );
28 | }
29 | }
30 |
31 | public boolean IsFinished()
32 | {
33 | return isResultSent;
34 | }
35 |
36 | public void OnSpeechRecognizerCanceled( boolean isCanceledByUser )
37 | {
38 | SendResult( lastResult, isCanceledByUser ? 0 : SpeechRecognizer.ERROR_RECOGNIZER_BUSY );
39 | }
40 |
41 | @Override
42 | public void onReadyForSpeech( Bundle params )
43 | {
44 | if( !isResultSent )
45 | unityInterface.OnReadyForSpeech();
46 | }
47 |
48 | @Override
49 | public void onBeginningOfSpeech()
50 | {
51 | if( !isResultSent )
52 | unityInterface.OnBeginningOfSpeech();
53 | }
54 |
55 | @Override
56 | public void onResults( Bundle results )
57 | {
58 | SendResult( GetMostPromisingResult( results ), -1 );
59 | }
60 |
61 | @Override
62 | public void onPartialResults( Bundle partialResults )
63 | {
64 | if( !isResultSent )
65 | unityInterface.OnPartialResultReceived( GetMostPromisingResult( partialResults ) );
66 | }
67 |
68 | private String GetMostPromisingResult( Bundle resultsBundle )
69 | {
70 | ArrayList results = resultsBundle.getStringArrayList( SpeechRecognizer.RESULTS_RECOGNITION );
71 | if( results != null && results.size() > 0 )
72 | {
73 | lastResult = results.get( 0 );
74 | if( results.size() > 1 )
75 | {
76 | // Try to get the result with the highest confidence score
77 | float[] confidenceScores = resultsBundle.getFloatArray( RecognizerIntent.EXTRA_CONFIDENCE_SCORES );
78 | if( confidenceScores != null && confidenceScores.length >= results.size() )
79 | {
80 | float highestConfidenceScore = confidenceScores[0];
81 | for( int i = 1; i < confidenceScores.length; i++ )
82 | {
83 | if( confidenceScores[i] > highestConfidenceScore )
84 | {
85 | highestConfidenceScore = confidenceScores[i];
86 | lastResult = results.get( i );
87 | }
88 | }
89 | }
90 | }
91 | }
92 |
93 | if( lastResult == null )
94 | lastResult = "";
95 |
96 | return lastResult;
97 | }
98 |
99 | @Override
100 | public void onError( int error )
101 | {
102 | // Error codes: https://developer.android.com/reference/android/speech/SpeechRecognizer
103 | Log.e( "Unity", "Speech recognition error code: " + error );
104 | SendResult( lastResult, error );
105 | }
106 |
107 | @Override
108 | public void onRmsChanged( float rmsdB )
109 | {
110 | if( !isResultSent )
111 | unityInterface.OnVoiceLevelChanged( rmsdB );
112 | }
113 |
114 | @Override
115 | public void onBufferReceived( byte[] buffer )
116 | {
117 | }
118 |
119 | @Override
120 | public void onEndOfSpeech()
121 | {
122 | }
123 |
124 | @Override
125 | public void onEvent( int eventType, Bundle params )
126 | {
127 | }
128 | }
--------------------------------------------------------------------------------
/.github/AAR Source (Android)/proguard.txt:
--------------------------------------------------------------------------------
1 | -keep class com.yasirkula.unity.* { *; }
--------------------------------------------------------------------------------
/.github/README.md:
--------------------------------------------------------------------------------
1 | # Unity Speech to Text Plugin for Android & iOS
2 |
3 | **Discord:** https://discord.gg/UJJt549AaV
4 |
5 | **[GitHub Sponsors ☕](https://github.com/sponsors/yasirkula)**
6 |
7 | This plugin helps you convert speech to text on Android (all versions) and iOS 10+. Offline speech recognition is supported on Android 23+ and iOS 13+ if the target language's speech recognition model is present on the device.
8 |
9 | Note that continuous speech detection isn't supported so the speech recognition sessions automatically end after a short break in the speech or when the OS-determined time limits are reached.
10 |
11 | ## INSTALLATION
12 |
13 | There are 4 ways to install this plugin:
14 |
15 | - import [SpeechToText.unitypackage](https://github.com/yasirkula/UnitySpeechToText/releases) via *Assets-Import Package*
16 | - clone/[download](https://github.com/yasirkula/UnitySpeechToText/archive/master.zip) this repository and move the *Plugins* folder to your Unity project's *Assets* folder
17 | - *(via Package Manager)* click the + button and install the package from the following git URL:
18 | - `https://github.com/yasirkula/UnitySpeechToText.git`
19 | - *(via [OpenUPM](https://openupm.com))* after installing [openupm-cli](https://github.com/openupm/openupm-cli), run the following command:
20 | - `openupm add com.yasirkula.speechtotext`
21 |
22 | ### iOS Setup
23 |
24 | There are two ways to set up the plugin on iOS:
25 |
26 | **a. Automated Setup for iOS**
27 |
28 | - *(optional)* change the values of **Speech Recognition Usage Description** and **Microphone Usage Description** at *Project Settings/yasirkula/Speech to Text*
29 |
30 | **b. Manual Setup for iOS**
31 |
32 | - see: https://github.com/yasirkula/UnitySpeechToText/wiki/Manual-Setup-for-iOS
33 |
34 | ## KNOWN ISSUES
35 |
36 | - Speech session returned [error code 12](https://developer.android.com/reference/android/speech/SpeechRecognizer#ERROR_LANGUAGE_NOT_SUPPORTED) on a single Android test device (regardless of target language) and couldn't be started
37 |
38 | ## HOW TO
39 |
40 | **NOTE:** The codebase is documented using XML comments so this section will only briefly mention the functions.
41 |
42 | You should first initialize the plugin via `SpeechToText.Initialize( string preferredLanguage = null )`. If you don't provide a preferred language (in the format "*en-US*"), the device's default language is used. You can check if a language is supported via `SpeechToText.IsLanguageSupported( string language )`.
43 |
44 | After initialization, you can query `SpeechToText.IsServiceAvailable( bool preferOfflineRecognition = false )` and `SpeechToText.IsBusy()` to see if a speech recognition session can be started. Most operations will fail while the service is unavailable or busy.
45 |
46 | Before starting a speech recognition session, you must make sure that the necessary permissions are granted via `SpeechToText.CheckPermission()` and `SpeechToText.RequestPermissionAsync( PermissionCallback callback )` functions. If permission is *Denied*, you can call `SpeechToText.OpenSettings()` to automatically open the app's Settings from where the user can grant the necessary permissions manually (Android: Microphone, iOS: Microphone and Speech Recognition). On Android, the speech recognition system also requires the Google app to have Microphone permission. If not, its result callback will return error code 9. In that scenario, you can notify the user and call `SpeechToText.OpenGoogleAppSettings()` to automatically open the Google app's Settings from where the user can grant it the Microphone permission manually.
47 |
48 | To start a speech recognition session, you can call `SpeechToText.Start( ISpeechToTextListener listener, bool useFreeFormLanguageModel = true, bool preferOfflineRecognition = false )`. Normally, sessions end automatically after a short break in the speech but you can also stop the session manually via `SpeechToText.ForceStop()` (processes the speech input so far) or `SpeechToText.Cancel()` (doesn't process any speech input and immediately invokes the result callback with error code 0). The `ISpeechToTextListener` interface has the following functions:
49 |
50 | - `OnReadyForSpeech()`
51 | - `OnBeginningOfSpeech()`
52 | - `OnVoiceLevelChanged( float normalizedVoiceLevel )`
53 | - `OnPartialResultReceived( string spokenText )`
54 | - `OnResultReceived( string spokenText, int? errorCode )`
55 |
56 | ## EXAMPLE CODE
57 |
58 | ```csharp
59 | using UnityEngine;
60 | using UnityEngine.UI;
61 |
62 | public class SpeechToTextDemo : MonoBehaviour, ISpeechToTextListener
63 | {
64 | public Text SpeechText;
65 | public Button StartSpeechToTextButton, StopSpeechToTextButton;
66 | public Slider VoiceLevelSlider;
67 | public bool PreferOfflineRecognition;
68 |
69 | private float normalizedVoiceLevel;
70 |
71 | private void Awake()
72 | {
73 | SpeechToText.Initialize( "en-US" );
74 |
75 | StartSpeechToTextButton.onClick.AddListener( StartSpeechToText );
76 | StopSpeechToTextButton.onClick.AddListener( StopSpeechToText );
77 | }
78 |
79 | private void Update()
80 | {
81 | StartSpeechToTextButton.interactable = SpeechToText.IsServiceAvailable( PreferOfflineRecognition ) && !SpeechToText.IsBusy();
82 | StopSpeechToTextButton.interactable = SpeechToText.IsBusy();
83 |
84 | // You may also apply some noise to the voice level for a more fluid animation (e.g. via Mathf.PerlinNoise)
85 | VoiceLevelSlider.value = Mathf.Lerp( VoiceLevelSlider.value, normalizedVoiceLevel, 15f * Time.unscaledDeltaTime );
86 | }
87 |
88 | public void ChangeLanguage( string preferredLanguage )
89 | {
90 | if( !SpeechToText.Initialize( preferredLanguage ) )
91 | SpeechText.text = "Couldn't initialize with language: " + preferredLanguage;
92 | }
93 |
94 | public void StartSpeechToText()
95 | {
96 | SpeechToText.RequestPermissionAsync( ( permission ) =>
97 | {
98 | if( permission == SpeechToText.Permission.Granted )
99 | {
100 | if( SpeechToText.Start( this, preferOfflineRecognition: PreferOfflineRecognition ) )
101 | SpeechText.text = "";
102 | else
103 | SpeechText.text = "Couldn't start speech recognition session!";
104 | }
105 | else
106 | SpeechText.text = "Permission is denied!";
107 | } );
108 | }
109 |
110 | public void StopSpeechToText()
111 | {
112 | SpeechToText.ForceStop();
113 | }
114 |
115 | void ISpeechToTextListener.OnReadyForSpeech()
116 | {
117 | Debug.Log( "OnReadyForSpeech" );
118 | }
119 |
120 | void ISpeechToTextListener.OnBeginningOfSpeech()
121 | {
122 | Debug.Log( "OnBeginningOfSpeech" );
123 | }
124 |
125 | void ISpeechToTextListener.OnVoiceLevelChanged( float normalizedVoiceLevel )
126 | {
127 | // Note that On Android, voice detection starts with a beep sound and it can trigger this callback. You may want to ignore this callback for ~0.5s on Android.
128 | this.normalizedVoiceLevel = normalizedVoiceLevel;
129 | }
130 |
131 | void ISpeechToTextListener.OnPartialResultReceived( string spokenText )
132 | {
133 | Debug.Log( "OnPartialResultReceived: " + spokenText );
134 | SpeechText.text = spokenText;
135 | }
136 |
137 | void ISpeechToTextListener.OnResultReceived( string spokenText, int? errorCode )
138 | {
139 | Debug.Log( "OnResultReceived: " + spokenText + ( errorCode.HasValue ? ( " --- Error: " + errorCode ) : "" ) );
140 | SpeechText.text = spokenText;
141 | normalizedVoiceLevel = 0f;
142 |
143 | // Recommended approach:
144 | // - If errorCode is 0, session was aborted via SpeechToText.Cancel. Handle the case appropriately.
145 | // - If errorCode is 9, notify the user that they must grant Microphone permission to the Google app and call SpeechToText.OpenGoogleAppSettings.
146 | // - If the speech session took shorter than 1 seconds (should be an error) or a null/empty spokenText is returned, prompt the user to try again (note that if
147 | // errorCode is 6, then the user hasn't spoken and the session has timed out as expected).
148 | }
149 | }
150 | ```
151 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 yasirkula
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LICENSE.txt.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 655736bd183ddce479c4653920b87581
3 | timeCreated: 1697647234
4 | licenseType: Free
5 | TextScriptImporter:
6 | userData:
7 | assetBundleName:
8 | assetBundleVariant:
9 |
--------------------------------------------------------------------------------
/Plugins.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 2b923af336f75444586aae4bb214d417
3 | folderAsset: yes
4 | timeCreated: 1697647261
5 | licenseType: Free
6 | DefaultImporter:
7 | userData:
8 | assetBundleName:
9 | assetBundleVariant:
10 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 1ef4088c266d7ae4296ebe1353696fed
3 | folderAsset: yes
4 | timeCreated: 1694851727
5 | licenseType: Free
6 | DefaultImporter:
7 | userData:
8 | assetBundleName:
9 | assetBundleVariant:
10 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: df3f5a4ce8a292a44b3a41db492c7b6a
3 | folderAsset: yes
4 | timeCreated: 1694851764
5 | licenseType: Free
6 | DefaultImporter:
7 | userData:
8 | assetBundleName:
9 | assetBundleVariant:
10 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android/STTCallbackHelper.cs:
--------------------------------------------------------------------------------
1 | #if UNITY_EDITOR || UNITY_ANDROID
2 | using UnityEngine;
3 |
4 | namespace SpeechToTextNamespace
5 | {
6 | public class STTCallbackHelper : MonoBehaviour
7 | {
8 | private bool autoDestroy;
9 | private System.Action mainThreadAction = null;
10 |
11 | private void Awake()
12 | {
13 | DontDestroyOnLoad( gameObject );
14 | }
15 |
16 | private void Update()
17 | {
18 | if( mainThreadAction != null )
19 | {
20 | try
21 | {
22 | lock( this )
23 | {
24 | System.Action temp = mainThreadAction;
25 | mainThreadAction = null;
26 | temp();
27 | }
28 | }
29 | finally
30 | {
31 | if( autoDestroy )
32 | Destroy( gameObject );
33 | }
34 | }
35 | }
36 |
37 | public STTCallbackHelper AutoDestroy()
38 | {
39 | autoDestroy = true;
40 | return this;
41 | }
42 |
43 | public void CallOnMainThread( System.Action function )
44 | {
45 | lock( this )
46 | {
47 | mainThreadAction += function;
48 | }
49 | }
50 | }
51 | }
52 | #endif
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android/STTCallbackHelper.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 86700fe55bdd1204994105ae16138f34
3 | timeCreated: 1545147258
4 | licenseType: Free
5 | MonoImporter:
6 | serializedVersion: 2
7 | defaultReferences: []
8 | executionOrder: 0
9 | icon: {instanceID: 0}
10 | userData:
11 | assetBundleName:
12 | assetBundleVariant:
13 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android/STTInteractionCallbackAndroid.cs:
--------------------------------------------------------------------------------
1 | #if UNITY_EDITOR || UNITY_ANDROID
2 | using UnityEngine;
3 |
4 | namespace SpeechToTextNamespace
5 | {
6 | public class STTInteractionCallbackAndroid : AndroidJavaProxy
7 | {
8 | private readonly ISpeechToTextListener listener;
9 | private readonly STTCallbackHelper callbackHelper;
10 |
11 | public STTInteractionCallbackAndroid( ISpeechToTextListener listener ) : base( "com.yasirkula.unity.SpeechToTextListener" )
12 | {
13 | this.listener = listener;
14 | callbackHelper = new GameObject( "STTCallbackHelper" ).AddComponent();
15 | }
16 |
17 | [UnityEngine.Scripting.Preserve]
18 | public void OnReadyForSpeech()
19 | {
20 | callbackHelper.CallOnMainThread( listener.OnReadyForSpeech );
21 | }
22 |
23 | [UnityEngine.Scripting.Preserve]
24 | public void OnBeginningOfSpeech()
25 | {
26 | callbackHelper.CallOnMainThread( listener.OnBeginningOfSpeech );
27 | }
28 |
29 | [UnityEngine.Scripting.Preserve]
30 | /// Root Mean Square (RMS) dB between range [-2, 10] (-2: quiet, 10: loud)
31 | public void OnVoiceLevelChanged( float rmsdB )
32 | {
33 | // Credit: https://stackoverflow.com/a/14124484/2373034
34 | float normalizedVoiceLevel = Mathf.Clamp01( 0.1f * Mathf.Pow( 10f, rmsdB / 10f ) );
35 | callbackHelper.CallOnMainThread( () => listener.OnVoiceLevelChanged( normalizedVoiceLevel ) );
36 | }
37 |
38 | [UnityEngine.Scripting.Preserve]
39 | public void OnPartialResultReceived( string spokenText )
40 | {
41 | if( !string.IsNullOrEmpty( spokenText ) )
42 | callbackHelper.CallOnMainThread( () => listener.OnPartialResultReceived( spokenText ) );
43 | }
44 |
45 | [UnityEngine.Scripting.Preserve]
46 | public void OnResultReceived( string spokenText, int errorCode )
47 | {
48 | // ERROR_NO_MATCH (7) error code is thrown instead of ERROR_SPEECH_TIMEOUT (6) if the user doesn't speak. ERROR_NO_MATCH is also
49 | // thrown if the system can't understand the user's speech but I unfortunately couldn't find a way to distinguish between
50 | // these two cases. So, ERROR_NO_MATCH is always considered as ERROR_SPEECH_TIMEOUT for the time being.
51 | if( errorCode == 7 )
52 | errorCode = 6;
53 |
54 | callbackHelper.CallOnMainThread( () =>
55 | {
56 | try
57 | {
58 | listener.OnResultReceived( !string.IsNullOrEmpty( spokenText ) ? spokenText : null, ( errorCode >= 0 ) ? (int?) errorCode : null );
59 | }
60 | finally
61 | {
62 | Object.DestroyImmediate( callbackHelper.gameObject );
63 | }
64 | } );
65 | }
66 | }
67 | }
68 | #endif
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android/STTInteractionCallbackAndroid.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 82274ccc8e66eb243b0ea95903cccd76
3 | timeCreated: 1519060539
4 | licenseType: Free
5 | MonoImporter:
6 | serializedVersion: 2
7 | defaultReferences: []
8 | executionOrder: 0
9 | icon: {instanceID: 0}
10 | userData:
11 | assetBundleName:
12 | assetBundleVariant:
13 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android/STTPermissionCallbackAndroid.cs:
--------------------------------------------------------------------------------
1 | #if UNITY_EDITOR || UNITY_ANDROID
2 | using System.Threading;
3 | using UnityEngine;
4 |
5 | namespace SpeechToTextNamespace
6 | {
7 | public class STTPermissionCallbackAndroid : AndroidJavaProxy
8 | {
9 | private readonly object threadLock;
10 | public int Result { get; private set; }
11 |
12 | public STTPermissionCallbackAndroid( object threadLock ) : base( "com.yasirkula.unity.SpeechToTextPermissionReceiver" )
13 | {
14 | Result = -1;
15 | this.threadLock = threadLock;
16 | }
17 |
18 | [UnityEngine.Scripting.Preserve]
19 | public void OnPermissionResult( int result )
20 | {
21 | Result = result;
22 |
23 | lock( threadLock )
24 | {
25 | Monitor.Pulse( threadLock );
26 | }
27 | }
28 | }
29 |
30 | public class STTPermissionCallbackAsyncAndroid : AndroidJavaProxy
31 | {
32 | private readonly SpeechToText.PermissionCallback callback;
33 | private readonly STTCallbackHelper callbackHelper;
34 |
35 | public STTPermissionCallbackAsyncAndroid( SpeechToText.PermissionCallback callback ) : base( "com.yasirkula.unity.SpeechToTextPermissionReceiver" )
36 | {
37 | this.callback = callback;
38 | callbackHelper = new GameObject( "STTCallbackHelper" ).AddComponent().AutoDestroy();
39 | }
40 |
41 | [UnityEngine.Scripting.Preserve]
42 | public void OnPermissionResult( int result )
43 | {
44 | callbackHelper.CallOnMainThread( () => callback( (SpeechToText.Permission) result ) );
45 | }
46 | }
47 | }
48 | #endif
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android/STTPermissionCallbackAndroid.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 884395253a44f734b868612aebdb3c7f
3 | timeCreated: 1519060539
4 | licenseType: Free
5 | MonoImporter:
6 | serializedVersion: 2
7 | defaultReferences: []
8 | executionOrder: 0
9 | icon: {instanceID: 0}
10 | userData:
11 | assetBundleName:
12 | assetBundleVariant:
13 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android/SpeechToText.aar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yasirkula/UnitySpeechToText/511736fbefd300c0ef169689f0851e99357976c3/Plugins/SpeechToText/Android/SpeechToText.aar
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Android/SpeechToText.aar.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 8ea5beac9a3c3d74289579f751e2c9c4
3 | timeCreated: 1569764737
4 | licenseType: Free
5 | PluginImporter:
6 | serializedVersion: 2
7 | iconMap: {}
8 | executionOrder: {}
9 | isPreloaded: 0
10 | isOverridable: 0
11 | platformData:
12 | data:
13 | first:
14 | Android: Android
15 | second:
16 | enabled: 1
17 | settings: {}
18 | data:
19 | first:
20 | Any:
21 | second:
22 | enabled: 0
23 | settings: {}
24 | data:
25 | first:
26 | Editor: Editor
27 | second:
28 | enabled: 0
29 | settings:
30 | DefaultValueInitialized: true
31 | userData:
32 | assetBundleName:
33 | assetBundleVariant:
34 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Editor.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: bc1e3b18ca26ed4408a127190e9a40a9
3 | folderAsset: yes
4 | timeCreated: 1521452097
5 | licenseType: Free
6 | DefaultImporter:
7 | userData:
8 | assetBundleName:
9 | assetBundleVariant:
10 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Editor/STTPostProcessBuild.cs:
--------------------------------------------------------------------------------
1 | using System.IO;
2 | using UnityEngine;
3 | using UnityEditor;
4 | #if UNITY_IOS
5 | using UnityEditor.Callbacks;
6 | using UnityEditor.iOS.Xcode;
7 | #endif
8 |
9 | namespace SpeechToTextNamespace
10 | {
11 | [System.Serializable]
12 | public class Settings
13 | {
14 | private const string SAVE_PATH = "ProjectSettings/SpeechToText.json";
15 |
16 | public bool AutomatedSetup = true;
17 | public string SpeechRecognitionUsageDescription = "Speech recognition will be used for speech-to-text conversion.";
18 | public string MicrophoneUsageDescription = "Microphone will be used with speech recognition.";
19 |
20 | private static Settings m_instance = null;
21 | public static Settings Instance
22 | {
23 | get
24 | {
25 | if( m_instance == null )
26 | {
27 | try
28 | {
29 | if( File.Exists( SAVE_PATH ) )
30 | m_instance = JsonUtility.FromJson( File.ReadAllText( SAVE_PATH ) );
31 | else
32 | m_instance = new Settings();
33 | }
34 | catch( System.Exception e )
35 | {
36 | Debug.LogException( e );
37 | m_instance = new Settings();
38 | }
39 | }
40 |
41 | return m_instance;
42 | }
43 | }
44 |
45 | public void Save()
46 | {
47 | File.WriteAllText( SAVE_PATH, JsonUtility.ToJson( this, true ) );
48 | }
49 |
50 | #if UNITY_2018_3_OR_NEWER
51 | [SettingsProvider]
52 | public static SettingsProvider CreatePreferencesGUI()
53 | {
54 | return new SettingsProvider( "Project/yasirkula/Speech to Text", SettingsScope.Project )
55 | {
56 | guiHandler = ( searchContext ) => PreferencesGUI(),
57 | keywords = new System.Collections.Generic.HashSet() { "Speech", "Text", "Android", "iOS" }
58 | };
59 | }
60 | #endif
61 |
62 | #if !UNITY_2018_3_OR_NEWER
63 | [PreferenceItem( "Speech to Text" )]
64 | #endif
65 | public static void PreferencesGUI()
66 | {
67 | EditorGUI.BeginChangeCheck();
68 |
69 | Instance.AutomatedSetup = EditorGUILayout.Toggle( "Automated Setup", Instance.AutomatedSetup );
70 |
71 | EditorGUI.BeginDisabledGroup( !Instance.AutomatedSetup );
72 | Instance.SpeechRecognitionUsageDescription = EditorGUILayout.DelayedTextField( "Speech Recognition Usage Description", Instance.SpeechRecognitionUsageDescription );
73 | Instance.MicrophoneUsageDescription = EditorGUILayout.DelayedTextField( "Microphone Usage Description", Instance.MicrophoneUsageDescription );
74 | EditorGUI.EndDisabledGroup();
75 |
76 | if( EditorGUI.EndChangeCheck() )
77 | Instance.Save();
78 | }
79 | }
80 |
81 | public class STTPostProcessBuild
82 | {
83 | #if UNITY_IOS
84 | [PostProcessBuild]
85 | public static void OnPostprocessBuild( BuildTarget target, string buildPath )
86 | {
87 | if( !Settings.Instance.AutomatedSetup )
88 | return;
89 |
90 | if( target == BuildTarget.iOS )
91 | {
92 | string pbxProjectPath = PBXProject.GetPBXProjectPath( buildPath );
93 | string plistPath = Path.Combine( buildPath, "Info.plist" );
94 |
95 | PBXProject pbxProject = new PBXProject();
96 | pbxProject.ReadFromFile( pbxProjectPath );
97 |
98 | #if UNITY_2019_3_OR_NEWER
99 | string targetGUID = pbxProject.GetUnityFrameworkTargetGuid();
100 | #else
101 | string targetGUID = pbxProject.TargetGuidByName( PBXProject.GetUnityTargetName() );
102 | #endif
103 |
104 | pbxProject.AddBuildProperty( targetGUID, "OTHER_LDFLAGS", "-weak_framework Speech" );
105 | pbxProject.AddBuildProperty( targetGUID, "OTHER_LDFLAGS", "-weak_framework Accelerate" );
106 |
107 | pbxProject.RemoveFrameworkFromProject( targetGUID, "Speech.framework" );
108 | pbxProject.RemoveFrameworkFromProject( targetGUID, "Accelerate.framework" );
109 |
110 | File.WriteAllText( pbxProjectPath, pbxProject.WriteToString() );
111 |
112 | PlistDocument plist = new PlistDocument();
113 | plist.ReadFromString( File.ReadAllText( plistPath ) );
114 |
115 | PlistElementDict rootDict = plist.root;
116 | if( !string.IsNullOrEmpty( Settings.Instance.SpeechRecognitionUsageDescription ) )
117 | rootDict.SetString( "NSSpeechRecognitionUsageDescription", Settings.Instance.SpeechRecognitionUsageDescription );
118 | if( !string.IsNullOrEmpty( Settings.Instance.MicrophoneUsageDescription ) )
119 | rootDict.SetString( "NSMicrophoneUsageDescription", Settings.Instance.MicrophoneUsageDescription );
120 |
121 | File.WriteAllText( plistPath, plist.WriteToString() );
122 | }
123 | }
124 | #endif
125 | }
126 | }
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Editor/STTPostProcessBuild.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: c27ea70fabb400d4d982ee5095c7e706
3 | timeCreated: 1521452119
4 | licenseType: Free
5 | MonoImporter:
6 | serializedVersion: 2
7 | defaultReferences: []
8 | executionOrder: 0
9 | icon: {instanceID: 0}
10 | userData:
11 | assetBundleName:
12 | assetBundleVariant:
13 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Editor/SpeechToText.Editor.asmdef:
--------------------------------------------------------------------------------
1 | {
2 | "name": "SpeechToText.Editor",
3 | "references": [],
4 | "includePlatforms": [
5 | "Editor"
6 | ],
7 | "excludePlatforms": [],
8 | "allowUnsafeCode": false,
9 | "overrideReferences": false,
10 | "precompiledReferences": [],
11 | "autoReferenced": true,
12 | "defineConstraints": [],
13 | "versionDefines": [],
14 | "noEngineReferences": false
15 | }
--------------------------------------------------------------------------------
/Plugins/SpeechToText/Editor/SpeechToText.Editor.asmdef.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 299584742f797d243aeca7a7cbcf8656
3 | AssemblyDefinitionImporter:
4 | externalObjects: {}
5 | userData:
6 | assetBundleName:
7 | assetBundleVariant:
8 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/ISpeechToTextListener.cs:
--------------------------------------------------------------------------------
1 | public interface ISpeechToTextListener
2 | {
3 | ///
4 | /// Invoked when speech recognition service starts listening to the user's speech input. On iOS, it's invoked immediately.
5 | ///
6 | void OnReadyForSpeech();
7 |
8 | ///
9 | /// Invoked when speech recognition service detects a speech for the first time. On iOS, it's called just before the first invocation of .
10 | ///
11 | void OnBeginningOfSpeech();
12 |
13 | ///
14 | /// Invoked regularly as the user speaks to report their current voice level.
15 | ///
16 | /// User's voice level in [0, 1] range (0: quiet, 1: loud)
17 | void OnVoiceLevelChanged( float normalizedVoiceLevel );
18 |
19 | ///
20 | /// Invoked regularly as the user speaks to report their speech input so far.
21 | ///
22 | void OnPartialResultReceived( string spokenText );
23 |
24 | ///
25 | /// Invoked after the speech recognition is finalized.
26 | ///
27 | ///
28 | /// If not null, an error has occurred. On Android, all error codes are listed here: https://developer.android.com/reference/android/speech/SpeechRecognizer#constants_1
29 | /// Special error codes:
30 | /// - 0: is called.
31 | /// - 6: User hasn't spoken and the speech session has timed out.
32 | /// - 9: Google app that processes the speech doesn't have Microphone permission on Android. User can be informed that they should grant the permission
33 | /// from Google app's Settings and, for convenience, that Settings page can be opened programmatically via .
34 | /// See: https://stackoverflow.com/a/48006238/2373034
35 | ///
36 | void OnResultReceived( string spokenText, int? errorCode );
37 | }
--------------------------------------------------------------------------------
/Plugins/SpeechToText/ISpeechToTextListener.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: bafcbb19f74998e469561185e3f9d948
3 | timeCreated: 1698694980
4 | licenseType: Free
5 | MonoImporter:
6 | serializedVersion: 2
7 | defaultReferences: []
8 | executionOrder: 0
9 | icon: {instanceID: 0}
10 | userData:
11 | assetBundleName:
12 | assetBundleVariant:
13 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/README.txt:
--------------------------------------------------------------------------------
1 | = Speech to Text for Android & iOS (v1.1.1) =
2 |
3 | Documentation: https://github.com/yasirkula/UnitySpeechToText
4 | Example code: https://github.com/yasirkula/UnitySpeechToText#example-code
5 | E-mail: yasirkula@gmail.com
--------------------------------------------------------------------------------
/Plugins/SpeechToText/README.txt.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: 3a69dfa27c431764faf2a189d4cc8a44
3 | timeCreated: 1563308465
4 | licenseType: Free
5 | TextScriptImporter:
6 | userData:
7 | assetBundleName:
8 | assetBundleVariant:
9 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/SpeechToText.Runtime.asmdef:
--------------------------------------------------------------------------------
1 | {
2 | "name": "SpeechToText.Runtime"
3 | }
4 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/SpeechToText.Runtime.asmdef.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: bec8f24081b3e1145891d23c338072e2
3 | AssemblyDefinitionImporter:
4 | externalObjects: {}
5 | userData:
6 | assetBundleName:
7 | assetBundleVariant:
8 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/SpeechToText.cs:
--------------------------------------------------------------------------------
1 | using UnityEngine;
2 | #if UNITY_2018_4_OR_NEWER && !SPEECH_TO_TEXT_DISABLE_ASYNC_FUNCTIONS
3 | using System.Threading.Tasks;
4 | #endif
5 | #if UNITY_EDITOR || UNITY_ANDROID || UNITY_IOS
6 | using SpeechToTextNamespace;
7 | #endif
8 |
9 | public static class SpeechToText
10 | {
11 | public enum Permission
12 | {
13 | ///
14 | /// Permission is permanently denied. User must grant the permission from the app's Settings (see ).
15 | ///
16 | Denied = 0,
17 | ///
18 | /// Permission is granted.
19 | ///
20 | Granted = 1,
21 | ///
22 | /// Permission isn't granted but it can be asked via .
23 | ///
24 | ShouldAsk = 2
25 | };
26 |
27 | public enum LanguageSupport
28 | {
29 | ///
30 | /// Language support couldn't be determined (Android only).
31 | ///
32 | Unknown = -1,
33 | ///
34 | /// Language is not supported.
35 | ///
36 | NotSupported = 0,
37 | ///
38 | /// Language is supported.
39 | ///
40 | Supported = 1,
41 | ///
42 | /// Happens when e.g. the queried language is "en" but the speech recognition service returns "en-US" instead of "en" (Android only).
43 | ///
44 | LikelySupported = 2
45 | };
46 |
47 | public delegate void PermissionCallback( Permission permission );
48 |
49 | #region Platform Specific Elements
50 | #if !UNITY_EDITOR && UNITY_ANDROID
51 | private static AndroidJavaClass m_ajc = null;
52 | private static AndroidJavaClass AJC
53 | {
54 | get
55 | {
56 | if( m_ajc == null )
57 | m_ajc = new AndroidJavaClass( "com.yasirkula.unity.SpeechToText" );
58 |
59 | return m_ajc;
60 | }
61 | }
62 |
63 | private static AndroidJavaObject m_context = null;
64 | private static AndroidJavaObject Context
65 | {
66 | get
67 | {
68 | if( m_context == null )
69 | {
70 | using( AndroidJavaObject unityClass = new AndroidJavaClass( "com.unity3d.player.UnityPlayer" ) )
71 | m_context = unityClass.GetStatic( "currentActivity" );
72 | }
73 |
74 | return m_context;
75 | }
76 | }
77 |
78 | private static string preferredLanguage;
79 | #elif !UNITY_EDITOR && UNITY_IOS
80 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
81 | private static extern int _SpeechToText_Initialize( string language );
82 |
83 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
84 | private static extern int _SpeechToText_Start( int useFreeFormLanguageModel, int preferOfflineRecognition );
85 |
86 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
87 | private static extern void _SpeechToText_Stop();
88 |
89 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
90 | private static extern void _SpeechToText_Cancel();
91 |
92 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
93 | private static extern int _SpeechToText_IsLanguageSupported( string language );
94 |
95 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
96 | private static extern int _SpeechToText_IsServiceAvailable( int preferOfflineRecognition );
97 |
98 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
99 | private static extern int _SpeechToText_IsBusy();
100 |
101 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
102 | private static extern int _SpeechToText_CheckPermission();
103 |
104 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
105 | private static extern void _SpeechToText_RequestPermission();
106 |
107 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
108 | private static extern void _SpeechToText_OpenSettings();
109 | #elif UNITY_EDITOR
110 | private static STTCallbackHelper speechSessionEmulator;
111 | private static ISpeechToTextListener speechSessionEmulatorListener;
112 | #endif
113 | #endregion
114 |
115 | [RuntimeInitializeOnLoadMethod( RuntimeInitializeLoadType.AfterSceneLoad )]
116 | private static void InitializeOnLoad()
117 | {
118 | #if !UNITY_EDITOR && UNITY_ANDROID
119 | AJC.CallStatic( "InitializeSupportedLanguages", Context );
120 | #endif
121 | }
122 |
123 | ///
124 | /// Initializes speech recognition service with the preferred language or the default device language.
125 | /// If the preferred language isn't available, the default device language may be used by the system as fallback.
126 | ///
127 | /// Must be in the format: "en-US".
128 | /// True, if the service is initialized successfully.
129 | public static bool Initialize( string preferredLanguage = null )
130 | {
131 | #if !UNITY_EDITOR && UNITY_ANDROID
132 | SpeechToText.preferredLanguage = preferredLanguage;
133 | return true;
134 | #elif !UNITY_EDITOR && UNITY_IOS
135 | return _SpeechToText_Initialize( preferredLanguage ?? "" ) == 1;
136 | #else
137 | return true;
138 | #endif
139 | }
140 |
141 | /// Must be in the format: "en-US".
142 | public static LanguageSupport IsLanguageSupported( string language )
143 | {
144 | #if !UNITY_EDITOR && UNITY_ANDROID
145 | return (LanguageSupport) AJC.CallStatic( "IsLanguageSupported", language ?? "" );
146 | #elif !UNITY_EDITOR && UNITY_IOS
147 | return (LanguageSupport) _SpeechToText_IsLanguageSupported( language ?? "" );
148 | #else
149 | return LanguageSupport.Supported;
150 | #endif
151 | }
152 |
153 | ///
154 | /// Checks if speech recognition service is available. Must be called AFTER .
155 | ///
156 | ///
157 | /// If true, checks if on-device speech recognition is supported.
158 | /// On Android, it isn't guaranteed that offline speech recognition will actually be used, even if this function returns true.
159 | /// Also, there is currently no way to check if the target language is actually downloaded on Android (if not, this function may
160 | /// return true but the speech recognition session will fail). So this function isn't reliable for offline recognition on Android.
161 | ///
162 | public static bool IsServiceAvailable( bool preferOfflineRecognition = false )
163 | {
164 | #if !UNITY_EDITOR && UNITY_ANDROID
165 | return AJC.CallStatic( "IsServiceAvailable", Context, preferOfflineRecognition );
166 | #elif !UNITY_EDITOR && UNITY_IOS
167 | return _SpeechToText_IsServiceAvailable( preferOfflineRecognition ? 1 : 0 ) == 1;
168 | #else
169 | return true;
170 | #endif
171 | }
172 |
173 | /// True, if a speech recognition session is currently in progress. Another session can't be started during that time.
174 | public static bool IsBusy()
175 | {
176 | #if !UNITY_EDITOR && UNITY_ANDROID
177 | return AJC.CallStatic( "IsBusy" );
178 | #elif !UNITY_EDITOR && UNITY_IOS
179 | return _SpeechToText_IsBusy() == 1;
180 | #elif UNITY_EDITOR
181 | return speechSessionEmulator != null;
182 | #else
183 | return false;
184 | #endif
185 | }
186 |
187 | #region Runtime Permissions
188 | /// True, if we have permission to start a speech recognition session.
189 | public static bool CheckPermission()
190 | {
191 | #if !UNITY_EDITOR && UNITY_ANDROID
192 | return AJC.CallStatic( "CheckPermission", Context );
193 | #elif !UNITY_EDITOR && UNITY_IOS
194 | return _SpeechToText_CheckPermission() == 1;
195 | #else
196 | return true;
197 | #endif
198 | }
199 |
200 | ///
201 | /// Requests the necessary permission for speech recognition. Without this permission, will fail.
202 | ///
203 | public static void RequestPermissionAsync( PermissionCallback callback )
204 | {
205 | #if !UNITY_EDITOR && UNITY_ANDROID
206 | STTPermissionCallbackAsyncAndroid nativeCallback = new STTPermissionCallbackAsyncAndroid( callback );
207 | AJC.CallStatic( "RequestPermission", Context, nativeCallback );
208 | #elif !UNITY_EDITOR && UNITY_IOS
209 | STTPermissionCallbackiOS.Initialize( callback );
210 | _SpeechToText_RequestPermission();
211 | #else
212 | callback( Permission.Granted );
213 | #endif
214 | }
215 |
216 | #if UNITY_2018_4_OR_NEWER && !SPEECH_TO_TEXT_DISABLE_ASYNC_FUNCTIONS
217 | ///
218 | public static Task RequestPermissionAsync()
219 | {
220 | TaskCompletionSource tcs = new TaskCompletionSource();
221 | RequestPermissionAsync( ( permission ) => tcs.SetResult( permission ) );
222 | return tcs.Task;
223 | }
224 | #endif
225 |
226 | ///
227 | /// Opens the app's Settings from where the user can grant the necessary permissions manually
228 | /// (Android: Record Audio, iOS: Speech Recognition and Microphone).
229 | ///
230 | public static void OpenSettings()
231 | {
232 | #if !UNITY_EDITOR && UNITY_ANDROID
233 | AJC.CallStatic( "OpenSettings", Context, "" );
234 | #elif !UNITY_EDITOR && UNITY_IOS
235 | _SpeechToText_OpenSettings();
236 | #endif
237 | }
238 |
239 | ///
240 | /// Opens the Google app's Settings from where the user can grant the Microphone permission to it on Android.
241 | /// Can be called if returns error code 9.
242 | ///
243 | public static void OpenGoogleAppSettings()
244 | {
245 | #if !UNITY_EDITOR && UNITY_ANDROID
246 | AJC.CallStatic( "OpenSettings", Context, "com.google.android.googlequicksearchbox" );
247 | #endif
248 | }
249 | #endregion
250 |
251 | #region Speech-to-text Functions
252 | ///
253 | /// Attempts to start a speech recognition session. Must be called AFTER .
254 | ///
255 | /// The listener whose callback functions will be invoked.
256 | ///
257 | /// If true, free-form/dictation language model will be used (more suited for general purpose speech).
258 | /// Otherwise, search-focused language model will be used (specialized in search terms).
259 | ///
260 | ///
261 | /// If true and the active language supports on-device speech recognition, it'll be used.
262 | /// Note that offline speech recognition may not be very accurate. Requires Android 23+ or iOS 13+.
263 | ///
264 | /// True, if session is created successfully. If permission isn't granted yet, returns false (see ).
265 | public static bool Start( ISpeechToTextListener listener, bool useFreeFormLanguageModel = true, bool preferOfflineRecognition = false )
266 | {
267 | #if !UNITY_EDITOR && UNITY_ANDROID
268 | STTInteractionCallbackAndroid nativeCallback = new STTInteractionCallbackAndroid( listener );
269 | return AJC.CallStatic( "Start", Context, nativeCallback, preferredLanguage ?? "", useFreeFormLanguageModel, true, preferOfflineRecognition );
270 | #elif !UNITY_EDITOR && UNITY_IOS
271 | if( _SpeechToText_Start( useFreeFormLanguageModel ? 1 : 0, preferOfflineRecognition ? 1 : 0 ) == 1 )
272 | {
273 | STTInteractionCallbackiOS.Initialize( listener );
274 | return true;
275 | }
276 |
277 | return false;
278 | #elif UNITY_EDITOR
279 | speechSessionEmulatorListener = listener;
280 | speechSessionEmulator = new GameObject( "SpeechToText Emulator" ).AddComponent();
281 | speechSessionEmulator.StartCoroutine( EmulateSpeechOnEditor() );
282 |
283 | return true;
284 | #else
285 | return true;
286 | #endif
287 | }
288 |
289 | ///
290 | /// If a speech recognition session is in progress, stops it manually. Normally, a session is automatically stopped after the user stops speaking for a short while.
291 | /// Note that on some Android versions, this call may have no effect (welcome to Android ecosystem): https://issuetracker.google.com/issues/158198432
292 | ///
293 | public static void ForceStop()
294 | {
295 | #if !UNITY_EDITOR && UNITY_ANDROID
296 | AJC.CallStatic( "Stop", Context );
297 | #elif !UNITY_EDITOR && UNITY_IOS
298 | _SpeechToText_Stop();
299 | #elif UNITY_EDITOR
300 | StopEmulateSpeechOnEditor( "Hello world", null );
301 | #endif
302 | }
303 |
304 | ///
305 | /// If a speech recognition session is in progress, cancels it. Canceled sessions return an error code of 0 in their callback.
306 | ///
307 | public static void Cancel()
308 | {
309 | #if !UNITY_EDITOR && UNITY_ANDROID
310 | AJC.CallStatic( "Cancel", Context );
311 | #elif !UNITY_EDITOR && UNITY_IOS
312 | _SpeechToText_Cancel();
313 | #elif UNITY_EDITOR
314 | StopEmulateSpeechOnEditor( null, 0 );
315 | #endif
316 | }
317 |
318 | #if UNITY_EDITOR
319 | private static System.Collections.IEnumerator EmulateSpeechOnEditor()
320 | {
321 | try
322 | {
323 | speechSessionEmulator.StartCoroutine( EmulateVoiceLevelChangeOnEditor() );
324 |
325 | yield return new WaitForSecondsRealtime( 0.25f );
326 | speechSessionEmulatorListener.OnReadyForSpeech();
327 | yield return new WaitForSecondsRealtime( 0.5f );
328 | speechSessionEmulatorListener.OnBeginningOfSpeech();
329 | yield return new WaitForSecondsRealtime( 0.33f );
330 | speechSessionEmulatorListener.OnPartialResultReceived( "Hello" );
331 | yield return new WaitForSecondsRealtime( 0.33f );
332 | speechSessionEmulatorListener.OnPartialResultReceived( "Hello world" );
333 | yield return new WaitForSecondsRealtime( 0.5f );
334 | }
335 | finally
336 | {
337 | StopEmulateSpeechOnEditor( "Hello world", null );
338 | }
339 | }
340 |
341 | private static System.Collections.IEnumerator EmulateVoiceLevelChangeOnEditor()
342 | {
343 | yield return new WaitForSecondsRealtime( 0.25f );
344 |
345 | while( true )
346 | {
347 | speechSessionEmulatorListener.OnVoiceLevelChanged( Mathf.Clamp01( Mathf.PerlinNoise( Time.unscaledTime * 4f, Time.unscaledTime * -2f ) ) );
348 |
349 | for( int i = 0; i < 3; i++ )
350 | yield return null;
351 | }
352 | }
353 |
354 | private static void StopEmulateSpeechOnEditor( string spokenText, int? errorCode )
355 | {
356 | if( speechSessionEmulator == null )
357 | return;
358 |
359 | Object.DestroyImmediate( speechSessionEmulator.gameObject );
360 | speechSessionEmulatorListener.OnResultReceived( spokenText, errorCode );
361 | speechSessionEmulator = null;
362 | speechSessionEmulatorListener = null;
363 | }
364 | #endif
365 | #endregion
366 | }
--------------------------------------------------------------------------------
/Plugins/SpeechToText/SpeechToText.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: cac27a1df7da40a4391b171fdf482662
3 | timeCreated: 1694881090
4 | licenseType: Free
5 | MonoImporter:
6 | serializedVersion: 2
7 | defaultReferences: []
8 | executionOrder: 0
9 | icon: {instanceID: 0}
10 | userData:
11 | assetBundleName:
12 | assetBundleVariant:
13 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/iOS.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: f190315c6dd85214890b514bb2c598f5
3 | folderAsset: yes
4 | timeCreated: 1697459600
5 | licenseType: Free
6 | DefaultImporter:
7 | userData:
8 | assetBundleName:
9 | assetBundleVariant:
10 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/iOS/STTInteractionCallbackiOS.cs:
--------------------------------------------------------------------------------
1 | #if UNITY_EDITOR || UNITY_IOS
2 | using System.Collections;
3 | using UnityEngine;
4 |
5 | namespace SpeechToTextNamespace
6 | {
7 | public class STTInteractionCallbackiOS : MonoBehaviour
8 | {
9 | private static STTInteractionCallbackiOS instance;
10 | private ISpeechToTextListener listener;
11 | private bool beginningOfSpeechInvoked;
12 | private Coroutine voiceLevelChangeDetectionCoroutine;
13 |
14 | #if !UNITY_EDITOR && UNITY_IOS
15 | [System.Runtime.InteropServices.DllImport( "__Internal" )]
16 | private static extern float _SpeechToText_GetAudioRmsdB();
17 | #endif
18 |
19 | public static void Initialize( ISpeechToTextListener listener )
20 | {
21 | if( instance == null )
22 | {
23 | instance = new GameObject( "STTInteractionCallbackiOS" ).AddComponent();
24 | DontDestroyOnLoad( instance.gameObject );
25 | }
26 | else if( instance.listener != null )
27 | instance.listener.OnResultReceived( null, 8 );
28 |
29 | instance.listener = listener;
30 | instance.beginningOfSpeechInvoked = false;
31 |
32 | if( instance.voiceLevelChangeDetectionCoroutine == null )
33 | instance.voiceLevelChangeDetectionCoroutine = instance.StartCoroutine( instance.VoiceLevelChangeDetectionCoroutine() );
34 |
35 | listener.OnReadyForSpeech();
36 | }
37 |
38 | private IEnumerator VoiceLevelChangeDetectionCoroutine()
39 | {
40 | float lastRmsDB = -1f;
41 | while( listener != null )
42 | {
43 | #if !UNITY_EDITOR && UNITY_IOS
44 | float rmsDB = _SpeechToText_GetAudioRmsdB();
45 | #else
46 | float rmsDB = 0f;
47 | #endif
48 | if( rmsDB != lastRmsDB )
49 | {
50 | lastRmsDB = rmsDB;
51 | OnVoiceLevelChanged( rmsDB );
52 | }
53 |
54 | yield return null;
55 | }
56 |
57 | voiceLevelChangeDetectionCoroutine = null;
58 | }
59 |
60 | [UnityEngine.Scripting.Preserve]
61 | /// Root Mean Square (RMS) dB between range [0, 160] (0: quiet, 160: loud)
62 | public void OnVoiceLevelChanged( float rmsdB )
63 | {
64 | // Convert [130, 150] dB range to [0, 1]
65 | if( listener != null )
66 | listener.OnVoiceLevelChanged( Mathf.Clamp01( ( rmsdB - 130f ) / 20f ) );
67 | }
68 |
69 | [UnityEngine.Scripting.Preserve]
70 | public void OnPartialResultReceived( string spokenText )
71 | {
72 | if( listener != null )
73 | {
74 | // Potentially more accurate way of determining the beginning of speech: https://stackoverflow.com/a/46325305
75 | if( !beginningOfSpeechInvoked )
76 | {
77 | beginningOfSpeechInvoked = true;
78 | listener.OnBeginningOfSpeech();
79 | }
80 |
81 | if( !string.IsNullOrEmpty( spokenText ) )
82 | listener.OnPartialResultReceived( spokenText );
83 | }
84 | }
85 |
86 | [UnityEngine.Scripting.Preserve]
87 | public void OnResultReceived( string spokenText )
88 | {
89 | ISpeechToTextListener _listener = listener;
90 | listener = null;
91 |
92 | if( _listener != null )
93 | _listener.OnResultReceived( !string.IsNullOrEmpty( spokenText ) ? spokenText : null, null );
94 | }
95 |
96 | [UnityEngine.Scripting.Preserve]
97 | public void OnError( string error )
98 | {
99 | ISpeechToTextListener _listener = listener;
100 | listener = null;
101 |
102 | if( _listener != null )
103 | {
104 | int errorCode;
105 | if( !int.TryParse( error, out errorCode ) )
106 | errorCode = -1;
107 |
108 | _listener.OnResultReceived( null, errorCode );
109 | }
110 | }
111 | }
112 | }
113 | #endif
--------------------------------------------------------------------------------
/Plugins/SpeechToText/iOS/STTInteractionCallbackiOS.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: ed03c4c9c394f1a4bbc7c5e2eb9056bb
3 | timeCreated: 1519060539
4 | licenseType: Free
5 | MonoImporter:
6 | serializedVersion: 2
7 | defaultReferences: []
8 | executionOrder: 0
9 | icon: {instanceID: 0}
10 | userData:
11 | assetBundleName:
12 | assetBundleVariant:
13 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/iOS/STTPermissionCallbackiOS.cs:
--------------------------------------------------------------------------------
1 | #if UNITY_EDITOR || UNITY_IOS
2 | using UnityEngine;
3 |
4 | namespace SpeechToTextNamespace
5 | {
6 | public class STTPermissionCallbackiOS : MonoBehaviour
7 | {
8 | private static STTPermissionCallbackiOS instance;
9 | private SpeechToText.PermissionCallback callback;
10 |
11 | public static void Initialize( SpeechToText.PermissionCallback callback )
12 | {
13 | if( instance == null )
14 | {
15 | instance = new GameObject( "STTPermissionCallbackiOS" ).AddComponent();
16 | DontDestroyOnLoad( instance.gameObject );
17 | }
18 | else if( instance.callback != null )
19 | instance.callback( SpeechToText.Permission.ShouldAsk );
20 |
21 | instance.callback = callback;
22 | }
23 |
24 | [UnityEngine.Scripting.Preserve]
25 | public void OnPermissionRequested( string message )
26 | {
27 | SpeechToText.PermissionCallback _callback = callback;
28 | callback = null;
29 |
30 | if( _callback != null )
31 | _callback( (SpeechToText.Permission) int.Parse( message ) );
32 | }
33 | }
34 | }
35 | #endif
--------------------------------------------------------------------------------
/Plugins/SpeechToText/iOS/STTPermissionCallbackiOS.cs.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: e3abdba7e0d88ae41903930fae2c0584
3 | timeCreated: 1519060539
4 | licenseType: Free
5 | MonoImporter:
6 | serializedVersion: 2
7 | defaultReferences: []
8 | executionOrder: 0
9 | icon: {instanceID: 0}
10 | userData:
11 | assetBundleName:
12 | assetBundleVariant:
13 |
--------------------------------------------------------------------------------
/Plugins/SpeechToText/iOS/SpeechToText.mm:
--------------------------------------------------------------------------------
1 | #import
2 | #import
3 | #import
4 | #import
5 |
6 | @interface USpeechToText:NSObject
7 | + (int)initialize:(NSString *)language;
8 | + (int)start:(BOOL)useFreeFormLanguageModel preferOfflineRecognition:(BOOL)preferOfflineRecognition;
9 | + (void)stop;
10 | + (void)cancel:(BOOL)isCanceledByUser;
11 | + (int)isLanguageSupported:(NSString *)language;
12 | + (int)isServiceAvailable:(BOOL)preferOfflineRecognition;
13 | + (int)isBusy;
14 | + (float)getAudioRmsdB;
15 | + (int)checkPermission;
16 | + (int)requestPermission;
17 | + (void)openSettings;
18 | @end
19 |
20 | // Credit: https://developer.apple.com/documentation/speech/recognizing_speech_in_live_audio?language=objc
21 | @implementation USpeechToText
22 |
23 | static NSString *speechRecognizerLanguage;
24 | static SFSpeechRecognizer *speechRecognizer;
25 | static SFSpeechAudioBufferRecognitionRequest *recognitionRequest;
26 | static SFSpeechRecognitionTask *recognitionTask;
27 | static int recognitionTaskErrorCode;
28 | static NSTimer *recognitionTimeoutTimer;
29 | static AVAudioEngine *audioEngine;
30 | static float audioRmsdB;
31 |
32 | + (int)initialize:(NSString *)language
33 | {
34 | if( @available(iOS 10.0, *) )
35 | {
36 | if( [self isBusy] == 1 )
37 | return 0;
38 | }
39 | else
40 | return 0;
41 |
42 | if( speechRecognizerLanguage == nil || ![speechRecognizerLanguage isEqualToString:language] )
43 | {
44 | speechRecognizerLanguage = language;
45 |
46 | [self cancel:NO];
47 |
48 | if( language == nil || [language length] == 0 )
49 | speechRecognizer = [[SFSpeechRecognizer alloc] init];
50 | else
51 | speechRecognizer = [[SFSpeechRecognizer alloc] initWithLocale:[NSLocale localeWithLocaleIdentifier:language]];
52 | }
53 |
54 | return ( speechRecognizer != nil ) ? 1 : 0;
55 | }
56 |
57 | + (int)start:(BOOL)useFreeFormLanguageModel preferOfflineRecognition:(BOOL)preferOfflineRecognition
58 | {
59 | if( [self isServiceAvailable:preferOfflineRecognition] == 0 || [self isBusy] == 1 || [self requestPermission] != 1 )
60 | return 0;
61 |
62 | // Cancel the previous task if it's running
63 | [self cancel:NO];
64 |
65 | // Cache the current AVAudioSession settings so that they can be restored after the microphone session
66 | AVAudioSessionCategory unityAudioSessionCategory = [[AVAudioSession sharedInstance] category];
67 | NSUInteger unityAudioSessionCategoryOptions = [[AVAudioSession sharedInstance] categoryOptions];
68 | AVAudioSessionMode unityAudioSessionMode = [[AVAudioSession sharedInstance] mode];
69 |
70 | AVAudioSession *audioSession = [AVAudioSession sharedInstance];
71 | [audioSession setCategory:AVAudioSessionCategoryRecord mode:AVAudioSessionModeMeasurement options:AVAudioSessionCategoryOptionDuckOthers error:nil];
72 | [audioSession setActive:YES withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:nil];
73 |
74 | if( audioEngine == nil )
75 | audioEngine = [[AVAudioEngine alloc] init];
76 |
77 | AVAudioInputNode *inputNode = audioEngine.inputNode;
78 | if( inputNode == nil )
79 | {
80 | NSLog( @"Couldn't get AVAudioInputNode for speech recognition" );
81 | return 0;
82 | }
83 |
84 | audioRmsdB = 0;
85 |
86 | recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
87 | if( recognitionRequest == nil )
88 | {
89 | NSLog( @"Couldn't create an instance of SFSpeechAudioBufferRecognitionRequest for speech recognition" );
90 | return 0;
91 | }
92 |
93 | speechRecognizer.defaultTaskHint = useFreeFormLanguageModel ? SFSpeechRecognitionTaskHintDictation : SFSpeechRecognitionTaskHintSearch;
94 | recognitionRequest.shouldReportPartialResults = YES;
95 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 130000
96 | if( @available(iOS 13.0, *) )
97 | {
98 | if( preferOfflineRecognition )
99 | recognitionRequest.requiresOnDeviceRecognition = YES;
100 | }
101 | #endif
102 |
103 | recognitionTaskErrorCode = 5;
104 | recognitionTask = [speechRecognizer recognitionTaskWithRequest:recognitionRequest resultHandler:^( SFSpeechRecognitionResult *result, NSError *error )
105 | {
106 | BOOL isFinal = NO;
107 | if( result != nil )
108 | {
109 | isFinal = result.isFinal;
110 | UnitySendMessage( "STTInteractionCallbackiOS", isFinal ? "OnResultReceived" : "OnPartialResultReceived", [self getCString:result.bestTranscription.formattedString] );
111 | }
112 |
113 | if( recognitionTimeoutTimer != nil )
114 | {
115 | [recognitionTimeoutTimer invalidate];
116 | recognitionTimeoutTimer = nil;
117 | }
118 |
119 | if( error != nil || isFinal )
120 | {
121 | if( error != nil )
122 | {
123 | NSLog( @"Error during speech recognition: %@", error );
124 |
125 | if( !isFinal )
126 | UnitySendMessage( "STTInteractionCallbackiOS", "OnError", [self getCString:[NSString stringWithFormat:@"%d", recognitionTaskErrorCode]] );
127 | }
128 |
129 | [audioEngine stop];
130 | [inputNode removeTapOnBus:0];
131 |
132 | recognitionRequest = nil;
133 | recognitionTask = nil;
134 |
135 | // Try restoring AVAudioSession settings back to their initial values
136 | NSError *error = nil;
137 | if( ![[AVAudioSession sharedInstance] setCategory:unityAudioSessionCategory mode:unityAudioSessionMode options:unityAudioSessionCategoryOptions error:&error] )
138 | {
139 | NSLog( @"SpeechToText error (1) setting audio session category back to %@ with mode %@ and options %lu: %@", unityAudioSessionCategory, unityAudioSessionMode, (unsigned long) unityAudioSessionCategoryOptions, error );
140 |
141 | // It somehow failed. Try restoring AVAudioSession settings back to Unity's default values
142 | if( ![[AVAudioSession sharedInstance] setCategory:AVAudioSessionCategoryAmbient mode:AVAudioSessionModeDefault options:1 error:&error] )
143 | NSLog( @"SpeechToText error (2) setting audio session category back to %@ with mode %@ and options %lu: %@", unityAudioSessionCategory, unityAudioSessionMode, (unsigned long) unityAudioSessionCategoryOptions, error );
144 | }
145 | }
146 | else
147 | {
148 | // Restart the timeout timer
149 | recognitionTimeoutTimer = [NSTimer scheduledTimerWithTimeInterval:2.0 target:self selector:@selector(onSpeechTimedOut:) userInfo:nil repeats:NO];
150 | }
151 | }];
152 |
153 | [inputNode installTapOnBus:0 bufferSize:1024 format:[inputNode outputFormatForBus:0] block:^( AVAudioPCMBuffer *buffer, AVAudioTime *when )
154 | {
155 | if( [buffer floatChannelData] != nil && buffer.format.channelCount > 0 )
156 | {
157 | float voiceLevel = 0.0;
158 | vDSP_rmsqv( (float*) buffer.floatChannelData[0], 1, &voiceLevel, vDSP_Length( buffer.frameLength ) );
159 | audioRmsdB = 10 * log10f( voiceLevel ) + 160; // Convert voice level to dB in range [0, 160]
160 | }
161 | else
162 | audioRmsdB = 0;
163 |
164 | [recognitionRequest appendAudioPCMBuffer:buffer];
165 | }];
166 |
167 | NSError *audioEngineError;
168 | [audioEngine prepare];
169 | if( ![audioEngine startAndReturnError:&audioEngineError] )
170 | {
171 | if( audioEngineError != nil )
172 | NSLog( @"Couldn't start AudioEngine for speech recognition: %@", audioEngineError );
173 | else
174 | NSLog( @"Couldn't start AudioEngine for speech recognition: UnknownError" );
175 |
176 | [recognitionTask cancel];
177 | return 0;
178 | }
179 |
180 | recognitionTimeoutTimer = [NSTimer scheduledTimerWithTimeInterval:5.0 target:self selector:@selector(onSpeechTimedOut:) userInfo:nil repeats:NO];
181 |
182 | return 1;
183 | }
184 |
185 | + (void)onSpeechTimedOut:(NSTimer *)timer
186 | {
187 | recognitionTimeoutTimer = nil;
188 | recognitionTaskErrorCode = 6;
189 |
190 | [self stop];
191 | }
192 |
193 | + (void)stop
194 | {
195 | if( @available(iOS 10.0, *) )
196 | {
197 | if( audioEngine != nil && audioEngine.isRunning )
198 | {
199 | [audioEngine stop];
200 | [recognitionRequest endAudio];
201 | }
202 | }
203 | }
204 |
205 | + (void)cancel:(BOOL)isCanceledByUser
206 | {
207 | if( @available(iOS 10.0, *) )
208 | {
209 | if( recognitionTask != nil )
210 | {
211 | if( isCanceledByUser )
212 | recognitionTaskErrorCode = 0;
213 |
214 | [recognitionTask cancel];
215 | recognitionTask = nil;
216 | }
217 | }
218 | }
219 |
220 | + (int)isLanguageSupported:(NSString *)language
221 | {
222 | if( @available(iOS 10.0, *) )
223 | return [[SFSpeechRecognizer supportedLocales] containsObject:[NSLocale localeWithLocaleIdentifier:language]] ? 1 : 0;
224 |
225 | return 0;
226 | }
227 |
228 | + (int)isServiceAvailable:(BOOL)preferOfflineRecognition
229 | {
230 | if( @available(iOS 10.0, *) )
231 | {
232 | if( speechRecognizer != nil && [speechRecognizer isAvailable] )
233 | {
234 | if( !preferOfflineRecognition )
235 | return 1;
236 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 130000
237 | else if( @available(iOS 13.0, *) )
238 | return [speechRecognizer supportsOnDeviceRecognition] ? 1 : 0;
239 | #endif
240 | }
241 | }
242 |
243 | return 0;
244 | }
245 |
246 | + (int)isBusy
247 | {
248 | if( @available(iOS 10.0, *) )
249 | return ( recognitionRequest != nil ) ? 1 : 0;
250 |
251 | return 0;
252 | }
253 |
254 | + (float)getAudioRmsdB
255 | {
256 | return audioRmsdB;
257 | }
258 |
259 | + (int)checkPermission
260 | {
261 | if( @available(iOS 10.0, *) )
262 | {
263 | int speechRecognitionPermission = [self checkSpeechRecognitionPermission];
264 | int microphonePermission = [self checkMicrophonePermission];
265 | if( speechRecognitionPermission == 1 && microphonePermission == 1 )
266 | return 1;
267 | else if( speechRecognitionPermission != 0 && microphonePermission != 0 )
268 | return 2;
269 | }
270 |
271 | return 0;
272 | }
273 |
274 | + (int)checkSpeechRecognitionPermission
275 | {
276 | SFSpeechRecognizerAuthorizationStatus status = [SFSpeechRecognizer authorizationStatus];
277 | if( status == SFSpeechRecognizerAuthorizationStatusAuthorized )
278 | return 1;
279 | else if( status == SFSpeechRecognizerAuthorizationStatusNotDetermined )
280 | return 2;
281 | else
282 | return 0;
283 | }
284 |
285 | + (int)checkMicrophonePermission
286 | {
287 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000
288 | if( @available(iOS 17.0, *) )
289 | {
290 | AVAudioApplicationRecordPermission status = [[AVAudioApplication sharedInstance] recordPermission];
291 | if( status == AVAudioApplicationRecordPermissionGranted )
292 | return 1;
293 | else if( status == AVAudioApplicationRecordPermissionUndetermined )
294 | return 2;
295 | }
296 | else
297 | #endif
298 | {
299 | AVAudioSessionRecordPermission status = [[AVAudioSession sharedInstance] recordPermission];
300 | if( status == AVAudioSessionRecordPermissionGranted )
301 | return 1;
302 | else if( status == AVAudioSessionRecordPermissionUndetermined )
303 | return 2;
304 | }
305 |
306 | return 0;
307 | }
308 |
309 | + (int)requestPermission
310 | {
311 | int currentPermission = [self checkPermission];
312 | if( currentPermission != 2 )
313 | {
314 | UnitySendMessage( "STTPermissionCallbackiOS", "OnPermissionRequested", [self getCString:[NSString stringWithFormat:@"%d", currentPermission]] );
315 | return currentPermission;
316 | }
317 |
318 | // Request Speech Recognition permission first
319 | [SFSpeechRecognizer requestAuthorization:^( SFSpeechRecognizerAuthorizationStatus status )
320 | {
321 | // Request Microphone permission immediately afterwards
322 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000
323 | // For some reason, requestRecordPermissionWithCompletionHandler function couldn't be found in AVAudioApplication while writing this code. Uncomment when it's fixed by Apple.
324 | /*if( @available(iOS 17.0, *) )
325 | {
326 | [[AVAudioApplication sharedInstance] requestRecordPermissionWithCompletionHandler:^( BOOL granted )
327 | {
328 | UnitySendMessage( "STTPermissionCallbackiOS", "OnPermissionRequested", ( granted && status == SFSpeechRecognizerAuthorizationStatusAuthorized ) ? "1" : "0" );
329 | }];
330 | }
331 | else*/
332 | #endif
333 | {
334 | [[AVAudioSession sharedInstance] requestRecordPermission:^( BOOL granted )
335 | {
336 | UnitySendMessage( "STTPermissionCallbackiOS", "OnPermissionRequested", ( granted && status == SFSpeechRecognizerAuthorizationStatusAuthorized ) ? "1" : "0" );
337 | }];
338 | }
339 | }];
340 |
341 | return -1;
342 | }
343 |
344 | #pragma clang diagnostic push
345 | #pragma clang diagnostic ignored "-Wdeprecated-declarations"
346 | + (void)openSettings
347 | {
348 | #if __IPHONE_OS_VERSION_MAX_ALLOWED >= 100000
349 | if( @available(iOS 10.0, *) )
350 | [[UIApplication sharedApplication] openURL:[NSURL URLWithString:UIApplicationOpenSettingsURLString] options:@{} completionHandler:nil];
351 | else
352 | #endif
353 | [[UIApplication sharedApplication] openURL:[NSURL URLWithString:UIApplicationOpenSettingsURLString]];
354 | }
355 | #pragma clang diagnostic pop
356 |
357 | // Credit: https://stackoverflow.com/a/37052118/2373034
358 | + (char *)getCString:(NSString *)source
359 | {
360 | if( source == nil )
361 | source = @"";
362 |
363 | const char *sourceUTF8 = [source UTF8String];
364 | char *result = (char*) malloc( strlen( sourceUTF8 ) + 1 );
365 | strcpy( result, sourceUTF8 );
366 |
367 | return result;
368 | }
369 |
370 | @end
371 |
372 | extern "C" int _SpeechToText_Initialize( const char* language )
373 | {
374 | return [USpeechToText initialize:[NSString stringWithUTF8String:language]];
375 | }
376 |
377 | extern "C" int _SpeechToText_Start( int useFreeFormLanguageModel, int preferOfflineRecognition )
378 | {
379 | return [USpeechToText start:( useFreeFormLanguageModel == 1 ) preferOfflineRecognition:( preferOfflineRecognition == 1 )];
380 | }
381 |
382 | extern "C" void _SpeechToText_Stop()
383 | {
384 | [USpeechToText stop];
385 | }
386 |
387 | extern "C" void _SpeechToText_Cancel()
388 | {
389 | [USpeechToText cancel:YES];
390 | }
391 |
392 | extern "C" int _SpeechToText_IsLanguageSupported( const char* language )
393 | {
394 | return [USpeechToText isLanguageSupported:[NSString stringWithUTF8String:language]];
395 | }
396 |
397 | extern "C" int _SpeechToText_IsServiceAvailable( int preferOfflineRecognition )
398 | {
399 | return [USpeechToText isServiceAvailable:( preferOfflineRecognition == 1 )];
400 | }
401 |
402 | extern "C" int _SpeechToText_IsBusy()
403 | {
404 | return [USpeechToText isBusy];
405 | }
406 |
407 | extern "C" float _SpeechToText_GetAudioRmsdB()
408 | {
409 | return [USpeechToText getAudioRmsdB];
410 | }
411 |
412 | extern "C" int _SpeechToText_CheckPermission()
413 | {
414 | return [USpeechToText checkPermission];
415 | }
416 |
417 | extern "C" void _SpeechToText_RequestPermission()
418 | {
419 | [USpeechToText requestPermission];
420 | }
421 |
422 | extern "C" void _SpeechToText_OpenSettings()
423 | {
424 | [USpeechToText openSettings];
425 | }
--------------------------------------------------------------------------------
/Plugins/SpeechToText/iOS/SpeechToText.mm.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: d93e932981dab7a428ad55354139ffa8
3 | PluginImporter:
4 | externalObjects: {}
5 | serializedVersion: 2
6 | iconMap: {}
7 | executionOrder: {}
8 | defineConstraints: []
9 | isPreloaded: 0
10 | isOverridable: 0
11 | isExplicitlyReferenced: 0
12 | validateReferences: 1
13 | platformData:
14 | - first:
15 | Any:
16 | second:
17 | enabled: 0
18 | settings: {}
19 | - first:
20 | Editor: Editor
21 | second:
22 | enabled: 0
23 | settings:
24 | DefaultValueInitialized: true
25 | - first:
26 | iPhone: iOS
27 | second:
28 | enabled: 1
29 | settings: {}
30 | userData:
31 | assetBundleName:
32 | assetBundleVariant:
33 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "com.yasirkula.speechtotext",
3 | "displayName": "Speech to Text",
4 | "version": "1.1.1",
5 | "documentationUrl": "https://github.com/yasirkula/UnitySpeechToText",
6 | "changelogUrl": "https://github.com/yasirkula/UnitySpeechToText/releases",
7 | "licensesUrl": "https://github.com/yasirkula/UnitySpeechToText/blob/master/LICENSE.txt",
8 | "description": "This plugin helps you convert speech to text on Android (all versions) and iOS 10+."
9 | }
10 |
--------------------------------------------------------------------------------
/package.json.meta:
--------------------------------------------------------------------------------
1 | fileFormatVersion: 2
2 | guid: d5494445853f3f442a20d5d7338250db
3 | timeCreated: 1697647234
4 | licenseType: Free
5 | TextScriptImporter:
6 | userData:
7 | assetBundleName:
8 | assetBundleVariant:
9 |
--------------------------------------------------------------------------------