├── plugin ├── tsconfig.tsbuildinfo ├── tsconfig.json └── src │ └── withSpeechTranscriber.ts ├── example ├── assets │ ├── icon.png │ ├── favicon.png │ ├── splash-icon.png │ └── adaptive-icon.png ├── babel.config.js ├── tsconfig.json ├── index.ts ├── webpack.config.js ├── .gitignore ├── package.json ├── metro.config.js ├── app.json ├── BufferTranscriptionExample.tsx ├── RecordRealTimeAndTranscribe.tsx └── App.tsx ├── expo-speech-transcriber-0.1.0.tgz ├── app.plugin.js ├── .eslintrc.js ├── expo-module.config.json ├── tsconfig.json ├── .npmignore ├── android ├── src │ └── main │ │ ├── AndroidManifest.xml │ │ └── java │ │ └── expo │ │ └── modules │ │ └── speechtranscriber │ │ └── ExpoSpeechTranscriberModule.kt └── build.gradle ├── src ├── ExpoSpeechTranscriberModule.web.ts ├── ExpoSpeechTranscriberModule.ts ├── ExpoSpeechTranscriber.types.ts └── index.ts ├── .gitignore ├── ios ├── ExpoSpeechTranscriber.podspec └── ExpoSpeechTranscriberModule.swift ├── LICENSE ├── package.json └── README.md /plugin/tsconfig.tsbuildinfo: -------------------------------------------------------------------------------- 1 | {"root":["./src/withspeechtranscriber.ts"],"version":"5.9.3"} -------------------------------------------------------------------------------- /example/assets/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/example/assets/icon.png -------------------------------------------------------------------------------- /example/assets/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/example/assets/favicon.png -------------------------------------------------------------------------------- /example/assets/splash-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/example/assets/splash-icon.png -------------------------------------------------------------------------------- /example/assets/adaptive-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/example/assets/adaptive-icon.png -------------------------------------------------------------------------------- /expo-speech-transcriber-0.1.0.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/expo-speech-transcriber-0.1.0.tgz -------------------------------------------------------------------------------- /app.plugin.js: -------------------------------------------------------------------------------- 1 | // This file configures the entry file for your plugin. 2 | module.exports = require('./plugin/build/withSpeechTranscriber'); 3 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | extends: ['universe/native', 'universe/web'], 4 | ignorePatterns: ['build'], 5 | }; 6 | -------------------------------------------------------------------------------- /example/babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = function (api) { 2 | api.cache(true); 3 | return { 4 | presets: ['babel-preset-expo'], 5 | }; 6 | }; 7 | -------------------------------------------------------------------------------- /plugin/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "expo-module-scripts/tsconfig.plugin", 3 | "compilerOptions": { 4 | "outDir": "build", 5 | "rootDir": "src" 6 | }, 7 | "include": ["./src"], 8 | "exclude": ["**/__mocks__/*", "**/__tests__/*"] 9 | } 10 | -------------------------------------------------------------------------------- /example/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "expo/tsconfig.base", 3 | "compilerOptions": { 4 | "strict": true, 5 | "paths": { 6 | "expo-speech-transcriber": ["../src/index"], 7 | "expo-speech-transcriber/*": ["../src/*"] 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /expo-module.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "platforms": ["apple", "android", "web"], 3 | "apple": { 4 | "modules": ["ExpoSpeechTranscriberModule"] 5 | }, 6 | "android": { 7 | "modules": ["expo.modules.speechtranscriber.ExpoSpeechTranscriberModule"] 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | // @generated by expo-module-scripts 2 | { 3 | "extends": "expo-module-scripts/tsconfig.base", 4 | "compilerOptions": { 5 | "outDir": "./build" 6 | }, 7 | "include": ["./src"], 8 | "exclude": ["**/__mocks__/*", "**/__tests__/*", "**/__rsc_tests__/*"] 9 | } 10 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # Exclude all top-level hidden directories by convention 2 | /.*/ 3 | 4 | # Exclude tarballs generated by `npm pack` 5 | /*.tgz 6 | 7 | __mocks__ 8 | __tests__ 9 | 10 | /babel.config.js 11 | /android/src/androidTest/ 12 | /android/src/test/ 13 | /android/build/ 14 | /example/ 15 | -------------------------------------------------------------------------------- /android/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /example/index.ts: -------------------------------------------------------------------------------- 1 | import { registerRootComponent } from 'expo'; 2 | 3 | import App from './RecordRealTimeAndTranscribe'; //To see the demo for buffer based transcription, import BufferTranscriptionExample. That is, import App from './BufferTranscriptionExample 4 | 5 | 6 | // registerRootComponent calls AppRegistry.registerComponent('main', () => App); 7 | // It also ensures that whether you load the app in Expo Go or in a native build, 8 | // the environment is set up appropriately 9 | registerRootComponent(App); 10 | -------------------------------------------------------------------------------- /example/webpack.config.js: -------------------------------------------------------------------------------- 1 | const createConfigAsync = require('@expo/webpack-config'); 2 | const path = require('path'); 3 | 4 | module.exports = async (env, argv) => { 5 | const config = await createConfigAsync( 6 | { 7 | ...env, 8 | babel: { 9 | dangerouslyAddModulePathsToTranspile: ['expo-speech-transcriber'], 10 | }, 11 | }, 12 | argv 13 | ); 14 | config.resolve.modules = [ 15 | path.resolve(__dirname, './node_modules'), 16 | path.resolve(__dirname, '../node_modules'), 17 | ]; 18 | 19 | return config; 20 | }; 21 | -------------------------------------------------------------------------------- /src/ExpoSpeechTranscriberModule.web.ts: -------------------------------------------------------------------------------- 1 | // import { registerWebModule, NativeModule } from 'expo'; 2 | // import { ExpoSpeechTranscriberModuleEvents } from './ExpoSpeechTranscriber.types'; 3 | 4 | // class ExpoSpeechTranscriberModule extends NativeModule { 5 | // PI = Math.PI; 6 | // async setValueAsync(value: string): Promise { 7 | // this.emit('onChange', { value }); 8 | // } 9 | // hello() { 10 | // return 'Hello world! 👋'; 11 | // } 12 | // } 13 | 14 | // export default registerWebModule(ExpoSpeechTranscriberModule, 'ExpoSpeechTranscriberModule'); 15 | -------------------------------------------------------------------------------- /example/.gitignore: -------------------------------------------------------------------------------- 1 | # Learn more https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files 2 | 3 | # dependencies 4 | node_modules/ 5 | 6 | # Expo 7 | .expo/ 8 | dist/ 9 | web-build/ 10 | expo-env.d.ts 11 | 12 | # Native 13 | .kotlin/ 14 | *.orig.* 15 | *.jks 16 | *.p8 17 | *.p12 18 | *.key 19 | *.mobileprovision 20 | 21 | # Metro 22 | .metro-health-check* 23 | 24 | # debug 25 | npm-debug.* 26 | yarn-debug.* 27 | yarn-error.* 28 | 29 | # macOS 30 | .DS_Store 31 | *.pem 32 | 33 | # local env files 34 | .env*.local 35 | 36 | # typescript 37 | *.tsbuildinfo 38 | 39 | # generated native folders 40 | /ios 41 | /android 42 | -------------------------------------------------------------------------------- /example/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "expo-speech-transcriber-example", 3 | "version": "1.0.0", 4 | "main": "index.ts", 5 | "scripts": { 6 | "start": "expo start", 7 | "android": "expo run:android", 8 | "ios": "expo run:ios", 9 | "web": "expo start --web" 10 | }, 11 | "dependencies": { 12 | "expo": "~54.0.25", 13 | "expo-audio": "~1.0.15", 14 | "expo-linking": "^8.0.9", 15 | "react": "19.1.0", 16 | "react-native": "0.81.5", 17 | "react-native-audio-api": "^0.10.1" 18 | }, 19 | "devDependencies": { 20 | "@types/react": "~19.1.0", 21 | "typescript": "~5.9.2" 22 | }, 23 | "private": true, 24 | "expo": { 25 | "autolinking": { 26 | "nativeModulesDir": ".." 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OSX 2 | # 3 | .DS_Store 4 | 5 | # VSCode 6 | .vscode/ 7 | jsconfig.json 8 | 9 | # Xcode 10 | # 11 | build/ 12 | *.pbxuser 13 | !default.pbxuser 14 | *.mode1v3 15 | !default.mode1v3 16 | *.mode2v3 17 | !default.mode2v3 18 | *.perspectivev3 19 | !default.perspectivev3 20 | xcuserdata 21 | *.xccheckout 22 | *.moved-aside 23 | DerivedData 24 | *.hmap 25 | *.ipa 26 | *.xcuserstate 27 | project.xcworkspace 28 | 29 | # Android/IJ 30 | # 31 | .classpath 32 | .cxx 33 | .gradle 34 | .idea 35 | .project 36 | .settings 37 | local.properties 38 | android.iml 39 | android/app/libs 40 | android/keystores/debug.keystore 41 | 42 | # Cocoapods 43 | # 44 | example/ios/Pods 45 | 46 | # Ruby 47 | example/vendor/ 48 | 49 | # node.js 50 | # 51 | node_modules/ 52 | npm-debug.log 53 | yarn-debug.log 54 | yarn-error.log 55 | 56 | # Expo 57 | .expo/* 58 | -------------------------------------------------------------------------------- /ios/ExpoSpeechTranscriber.podspec: -------------------------------------------------------------------------------- 1 | require 'json' 2 | 3 | package = JSON.parse(File.read(File.join(__dir__, '..', 'package.json'))) 4 | 5 | Pod::Spec.new do |s| 6 | s.name = 'ExpoSpeechTranscriber' 7 | s.version = package['version'] 8 | s.summary = package['description'] 9 | s.description = package['description'] 10 | s.license = package['license'] 11 | s.author = package['author'] 12 | s.homepage = package['homepage'] 13 | s.platforms = { 14 | :ios => '15.1', 15 | :tvos => '15.1' 16 | } 17 | s.swift_version = '5.9' 18 | s.source = { git: 'https://github.com/DaveyEke/expo-speech-transcriber' } 19 | s.static_framework = true 20 | 21 | s.dependency 'ExpoModulesCore' 22 | 23 | # Swift/Objective-C compatibility 24 | s.pod_target_xcconfig = { 25 | 'DEFINES_MODULE' => 'YES', 26 | } 27 | 28 | s.source_files = "**/*.{h,m,mm,swift,hpp,cpp}" 29 | end 30 | -------------------------------------------------------------------------------- /src/ExpoSpeechTranscriberModule.ts: -------------------------------------------------------------------------------- 1 | import { requireNativeModule, NativeModule } from 'expo-modules-core'; 2 | import type { 3 | ExpoSpeechTranscriberModuleEvents, 4 | PermissionTypes, 5 | MicrophonePermissionTypes 6 | } from './ExpoSpeechTranscriber.types'; 7 | 8 | declare class ExpoSpeechTranscriberNative extends NativeModule { 9 | recordRealTimeAndTranscribe(): Promise; 10 | stopListening(): void; 11 | transcribeAudioWithSFRecognizer(audioFilePath: string): Promise; 12 | transcribeAudioWithAnalyzer(audioFilePath: string): Promise; 13 | requestPermissions(): Promise; 14 | requestMicrophonePermissions(): Promise; 15 | isRecording(): boolean; 16 | isAnalyzerAvailable(): boolean; 17 | } 18 | 19 | const ExpoSpeechTranscriberModule = 20 | requireNativeModule('ExpoSpeechTranscriber'); 21 | 22 | export default ExpoSpeechTranscriberModule; 23 | -------------------------------------------------------------------------------- /src/ExpoSpeechTranscriber.types.ts: -------------------------------------------------------------------------------- 1 | import type { StyleProp, ViewStyle } from 'react-native'; 2 | 3 | export type OnLoadEventPayload = { 4 | url: string; 5 | }; 6 | 7 | export type TranscriptionProgressPayload = { 8 | text: string; 9 | isFinal: boolean; 10 | }; 11 | 12 | 13 | export type TranscriptionErrorPayload = { 14 | error: string; 15 | }; 16 | 17 | export type ExpoSpeechTranscriberModuleEvents = { 18 | onTranscriptionProgress(payload: TranscriptionProgressPayload): void; 19 | onTranscriptionError(payload: TranscriptionErrorPayload): void; 20 | }; 21 | 22 | export type ChangeEventPayload = { 23 | value: string; 24 | }; 25 | 26 | 27 | export type PermissionTypes = 'authorized' | 'denied' | 'restricted' | 'notDetermined'; 28 | 29 | export type MicrophonePermissionTypes = 'granted' | 'denied' 30 | 31 | export type ExpoSpeechTranscriberViewProps = { 32 | url: string; 33 | onLoad: (event: { nativeEvent: OnLoadEventPayload }) => void; 34 | style?: StyleProp; 35 | }; 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Dave Mkpa Eke 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /plugin/src/withSpeechTranscriber.ts: -------------------------------------------------------------------------------- 1 | import { ConfigPlugin, IOSConfig, AndroidConfig } from 'expo/config-plugins'; 2 | 3 | const SPEECH_RECOGNITION_USAGE = 'Allow $(PRODUCT_NAME) to use speech recognition to transcribe audio'; 4 | const MICROPHONE_USAGE = 'Allow $(PRODUCT_NAME) to access your microphone'; 5 | 6 | const withSpeechTranscriber: ConfigPlugin<{ speechRecognitionPermission?: string | false; microphonePermission?: string | false } | void> = ( 7 | config, 8 | { speechRecognitionPermission, microphonePermission } = {} 9 | ) => { 10 | config = IOSConfig.Permissions.createPermissionsPlugin({ 11 | NSSpeechRecognitionUsageDescription: SPEECH_RECOGNITION_USAGE, 12 | NSMicrophoneUsageDescription: MICROPHONE_USAGE, 13 | })(config, { 14 | NSSpeechRecognitionUsageDescription: speechRecognitionPermission, 15 | NSMicrophoneUsageDescription: microphonePermission, 16 | }); 17 | 18 | if (microphonePermission !== false) { 19 | config = AndroidConfig.Permissions.withPermissions(config, [ 20 | 'android.permission.RECORD_AUDIO', 21 | ]); 22 | } 23 | 24 | return config; 25 | }; 26 | 27 | export default withSpeechTranscriber; -------------------------------------------------------------------------------- /example/metro.config.js: -------------------------------------------------------------------------------- 1 | // Learn more https://docs.expo.io/guides/customizing-metro 2 | const { getDefaultConfig } = require('expo/metro-config'); 3 | const path = require('path'); 4 | 5 | const config = getDefaultConfig(__dirname); 6 | 7 | // npm v7+ will install ../node_modules/react and ../node_modules/react-native because of peerDependencies. 8 | // To prevent the incompatible react-native between ./node_modules/react-native and ../node_modules/react-native, 9 | // excludes the one from the parent folder when bundling. 10 | config.resolver.blockList = [ 11 | ...Array.from(config.resolver.blockList ?? []), 12 | new RegExp(path.resolve('..', 'node_modules', 'react')), 13 | new RegExp(path.resolve('..', 'node_modules', 'react-native')), 14 | ]; 15 | 16 | config.resolver.nodeModulesPaths = [ 17 | path.resolve(__dirname, './node_modules'), 18 | path.resolve(__dirname, '../node_modules'), 19 | ]; 20 | 21 | config.resolver.extraNodeModules = { 22 | 'expo-speech-transcriber': '..', 23 | }; 24 | 25 | config.watchFolders = [path.resolve(__dirname, '..')]; 26 | 27 | config.transformer.getTransformOptions = async () => ({ 28 | transform: { 29 | experimentalImportSupport: false, 30 | inlineRequires: true, 31 | }, 32 | }); 33 | 34 | module.exports = config; 35 | -------------------------------------------------------------------------------- /example/app.json: -------------------------------------------------------------------------------- 1 | { 2 | "expo": { 3 | "name": "expo-speech-transcriber-example", 4 | "slug": "expo-speech-transcriber-example", 5 | "version": "1.0.0", 6 | "orientation": "portrait", 7 | "icon": "./assets/icon.png", 8 | "userInterfaceStyle": "light", 9 | "newArchEnabled": true, 10 | "splash": { 11 | "image": "./assets/splash-icon.png", 12 | "resizeMode": "contain", 13 | "backgroundColor": "#ffffff" 14 | }, 15 | "ios": { 16 | "supportsTablet": true, 17 | "bundleIdentifier": "expo.modules.speechtranscriber.example" 18 | }, 19 | "android": { 20 | "adaptiveIcon": { 21 | "foregroundImage": "./assets/adaptive-icon.png", 22 | "backgroundColor": "#ffffff" 23 | }, 24 | "edgeToEdgeEnabled": true, 25 | "predictiveBackGestureEnabled": false, 26 | "package": "expo.modules.speechtranscriber.example" 27 | }, 28 | "web": { 29 | "favicon": "./assets/favicon.png" 30 | }, 31 | "plugins": [ 32 | "../app.plugin.js", 33 | "expo-audio", 34 | [ 35 | "react-native-audio-api", 36 | { 37 | "iosBackgroundMode": true, 38 | "iosMicrophonePermission": "This app requires access to the microphone to record audio." 39 | } 40 | ] 41 | ] 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /android/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.library' 2 | 3 | group = 'expo.modules.speechtranscriber' 4 | version = '0.1.0' 5 | 6 | def expoModulesCorePlugin = new File(project(":expo-modules-core").projectDir.absolutePath, "ExpoModulesCorePlugin.gradle") 7 | apply from: expoModulesCorePlugin 8 | applyKotlinExpoModulesCorePlugin() 9 | useCoreDependencies() 10 | useExpoPublishing() 11 | 12 | // If you want to use the managed Android SDK versions from expo-modules-core, set this to true. 13 | // The Android SDK versions will be bumped from time to time in SDK releases and may introduce breaking changes in your module code. 14 | // Most of the time, you may like to manage the Android SDK versions yourself. 15 | def useManagedAndroidSdkVersions = false 16 | if (useManagedAndroidSdkVersions) { 17 | useDefaultAndroidSdkVersions() 18 | } else { 19 | buildscript { 20 | // Simple helper that allows the root project to override versions declared by this library. 21 | ext.safeExtGet = { prop, fallback -> 22 | rootProject.ext.has(prop) ? rootProject.ext.get(prop) : fallback 23 | } 24 | } 25 | project.android { 26 | compileSdkVersion safeExtGet("compileSdkVersion", 36) 27 | defaultConfig { 28 | minSdkVersion safeExtGet("minSdkVersion", 24) 29 | targetSdkVersion safeExtGet("targetSdkVersion", 36) 30 | } 31 | } 32 | } 33 | 34 | android { 35 | namespace "expo.modules.speechtranscriber" 36 | defaultConfig { 37 | versionCode 1 38 | versionName "0.1.0" 39 | } 40 | lintOptions { 41 | abortOnError false 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "expo-speech-transcriber", 3 | "version": "0.1.9", 4 | "description": "An iOS only on-device transcription library for React Native and Expo apps.", 5 | "main": "build/index.js", 6 | "types": "build/index.d.ts", 7 | "scripts": { 8 | "build": "expo-module build", 9 | "clean": "expo-module clean", 10 | "lint": "expo-module lint", 11 | "test": "expo-module test", 12 | "prepare": "expo-module prepare", 13 | "prepublishOnly": "expo-module prepublishOnly", 14 | "expo-module": "expo-module", 15 | "open:ios": "xed example/ios", 16 | "open:android": "open -a \"Android Studio\" example/android" 17 | }, 18 | "keywords": [ 19 | "react-native", 20 | "expo", 21 | "expo-speech-transcriber", 22 | "ExpoSpeechTranscriber", 23 | "transcription", 24 | "speech-to-text", 25 | "ios", 26 | "siri", 27 | "voice", 28 | "audio", 29 | "recognition" 30 | ], 31 | "repository": "https://github.com/DaveyEke/expo-speech-transcriber", 32 | "bugs": { 33 | "url": "https://github.com/DaveyEke/expo-speech-transcriber/issues" 34 | }, 35 | "author": "Dave Mkpa Eke (https://github.com/DaveyEke)", 36 | "license": "MIT", 37 | "homepage": "https://github.com/DaveyEke/expo-speech-transcriber#readme", 38 | "dependencies": {}, 39 | "devDependencies": { 40 | "@types/react": "~19.1.0", 41 | "expo-module-scripts": "^5.0.7", 42 | "expo": "^54.0.18", 43 | "react-native": "0.81.5" 44 | }, 45 | "peerDependencies": { 46 | "expo": "*", 47 | "react": "*", 48 | "react-native": "*" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | // Reexport the native module. On web, it will be resolved to ExpoSpeechTranscriberModule.web.ts 2 | // and on native platforms to ExpoSpeechTranscriberModule.ts 3 | import ExpoSpeechTranscriberModule from './ExpoSpeechTranscriberModule'; 4 | import type { 5 | TranscriptionProgressPayload, 6 | TranscriptionErrorPayload, 7 | MicrophonePermissionTypes, 8 | PermissionTypes 9 | } from './ExpoSpeechTranscriber.types'; 10 | import { useState, useEffect } from 'react'; 11 | 12 | export function recordRealTimeAndTranscribe(): Promise { 13 | return ExpoSpeechTranscriberModule.recordRealTimeAndTranscribe(); 14 | } 15 | 16 | export { default as ExpoSpeechTranscriberModule } from './ExpoSpeechTranscriberModule'; 17 | export * from './ExpoSpeechTranscriber.types'; 18 | 19 | export function transcribeAudioWithSFRecognizer(audioFilePath: string): Promise { 20 | return ExpoSpeechTranscriberModule.transcribeAudioWithSFRecognizer(audioFilePath); 21 | } 22 | 23 | export function stopListening(): void { 24 | return ExpoSpeechTranscriberModule.stopListening(); 25 | } 26 | 27 | export function transcribeAudioWithAnalyzer(audioFilePath: string): Promise { 28 | return ExpoSpeechTranscriberModule.transcribeAudioWithAnalyzer(audioFilePath); 29 | } 30 | 31 | export function requestPermissions(): Promise { 32 | return ExpoSpeechTranscriberModule.requestPermissions(); 33 | } 34 | 35 | export function requestMicrophonePermissions(): Promise { 36 | return ExpoSpeechTranscriberModule.requestMicrophonePermissions(); 37 | } 38 | 39 | export function isRecording(): boolean { 40 | return ExpoSpeechTranscriberModule.isRecording(); 41 | } 42 | 43 | export function isAnalyzerAvailable(): boolean { 44 | return ExpoSpeechTranscriberModule.isAnalyzerAvailable(); 45 | } 46 | 47 | export function realtimeBufferTranscribe( 48 | buffer: number[] | Float32Array, 49 | sampleRate: number, 50 | ): Promise { 51 | const bufferArray = Array.isArray(buffer) ? buffer : Array.from(buffer); 52 | return ExpoSpeechTranscriberModule.realtimeBufferTranscribe( 53 | bufferArray, 54 | sampleRate, 55 | ); 56 | } 57 | 58 | export function stopBufferTranscription(): void { 59 | return ExpoSpeechTranscriberModule.stopBufferTranscription(); 60 | } 61 | 62 | export function useRealTimeTranscription() { 63 | const [text, setText] = useState(''); 64 | const [isFinal, setIsFinal] = useState(false); 65 | const [error, setError] = useState(null); 66 | const [isRecording, setIsRecording] = useState(false); 67 | 68 | useEffect(() => { 69 | const progressListener = ExpoSpeechTranscriberModule.addListener('onTranscriptionProgress', (payload: TranscriptionProgressPayload) => { 70 | setText(payload.text); 71 | setIsFinal(payload.isFinal); 72 | }); 73 | 74 | const errorListener = ExpoSpeechTranscriberModule.addListener('onTranscriptionError', (payload: TranscriptionErrorPayload) => { 75 | setError(payload.error); 76 | }) 77 | 78 | 79 | const interval = setInterval(() => { 80 | const newIsRecording = ExpoSpeechTranscriberModule.isRecording(); 81 | setIsRecording(prev => (prev !== newIsRecording ? newIsRecording : prev)); 82 | }, 100); 83 | 84 | return () => { 85 | clearInterval(interval); 86 | progressListener.remove(); 87 | errorListener.remove(); 88 | }; 89 | }, []); 90 | 91 | 92 | useEffect(() => { 93 | if (isRecording) { 94 | setText(''); 95 | setIsFinal(false); 96 | setError(null); 97 | } 98 | }, [isRecording]); 99 | 100 | return { text, isFinal, error, isRecording }; 101 | } 102 | -------------------------------------------------------------------------------- /android/src/main/java/expo/modules/speechtranscriber/ExpoSpeechTranscriberModule.kt: -------------------------------------------------------------------------------- 1 | package expo.modules.speechtranscriber 2 | 3 | import android.Manifest 4 | import android.content.Intent 5 | import android.content.pm.PackageManager 6 | import android.os.Bundle 7 | import android.os.Handler 8 | import android.os.Looper 9 | import android.speech.RecognitionListener 10 | import android.speech.RecognizerIntent 11 | import android.speech.SpeechRecognizer 12 | import android.util.Log 13 | import androidx.core.app.ActivityCompat 14 | import androidx.core.content.ContextCompat 15 | import expo.modules.kotlin.Promise 16 | import expo.modules.kotlin.modules.Module 17 | import expo.modules.kotlin.modules.ModuleDefinition 18 | import java.util.Locale 19 | 20 | class ExpoSpeechTranscriberModule : Module() { 21 | private var speechRecognizer: SpeechRecognizer? = null 22 | private val mainHandler by lazy { Handler(Looper.getMainLooper()) } 23 | 24 | private var isRecording = false 25 | private var permissionPromise: Promise? = null 26 | private val PERMISSION_REQUEST_CODE = 1001 27 | 28 | override fun definition() = ModuleDefinition { 29 | Name("ExpoSpeechTranscriber") 30 | Events("onTranscriptionProgress", "onTranscriptionError") 31 | 32 | AsyncFunction("recordRealTimeAndTranscribe") { promise: Promise -> 33 | mainHandler.post { 34 | startListening(promise) 35 | } 36 | } 37 | 38 | AsyncFunction("stopListening") { 39 | mainHandler.post { 40 | stopListening() 41 | } 42 | } 43 | 44 | Function("isRecording") { 45 | return@Function isRecording 46 | } 47 | 48 | AsyncFunction("requestMicrophonePermissions") { promise: Promise -> 49 | mainHandler.post { 50 | requestMicrophonePermissionsInternal(promise) 51 | } 52 | } 53 | 54 | OnDestroy { 55 | mainHandler.post { 56 | cleanup() 57 | } 58 | } 59 | } 60 | 61 | private fun startListening(promise: Promise) { 62 | val context = appContext.reactContext ?: run { 63 | sendEvent("onTranscriptionError", mapOf("message" to "Context is not available")) 64 | promise.resolve(false) 65 | return 66 | } 67 | 68 | if (!SpeechRecognizer.isRecognitionAvailable(context)) { 69 | val message = "Speech recognition is not available on this device." 70 | Log.e("ExpoSpeechTranscriber", message) 71 | sendEvent("onTranscriptionError", mapOf("message" to message)) 72 | promise.resolve(false) 73 | return 74 | } 75 | 76 | if (ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) { 77 | sendEvent("onTranscriptionError", mapOf("message" to "Missing RECORD_AUDIO permission.")) 78 | promise.resolve(false) 79 | return 80 | } 81 | 82 | speechRecognizer?.destroy() 83 | speechRecognizer = null 84 | 85 | speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context) 86 | 87 | val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply { 88 | putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) 89 | putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault()) 90 | putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true) 91 | putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1) 92 | } 93 | 94 | speechRecognizer?.setRecognitionListener(createRecognitionListener()) 95 | speechRecognizer?.startListening(intent) 96 | isRecording = true 97 | promise.resolve(true) 98 | } 99 | 100 | private fun stopListening() { 101 | try { 102 | speechRecognizer?.stopListening() 103 | speechRecognizer?.destroy() 104 | } catch (e: Exception) { 105 | Log.e("ExpoSpeechTranscriber", "Error stopping recognizer: ${e.message}") 106 | } finally { 107 | speechRecognizer = null 108 | isRecording = false 109 | } 110 | } 111 | 112 | private fun cleanup() { 113 | stopListening() 114 | } 115 | 116 | private fun createRecognitionListener(): RecognitionListener { 117 | return object : RecognitionListener { 118 | override fun onReadyForSpeech(params: Bundle?) { 119 | Log.d("ExpoSpeechTranscriber", "Ready for speech") 120 | } 121 | 122 | override fun onBeginningOfSpeech() { 123 | Log.d("ExpoSpeechTranscriber", "Speech started") 124 | } 125 | 126 | override fun onRmsChanged(rmsdB: Float) {} 127 | override fun onBufferReceived(buffer: ByteArray?) {} 128 | 129 | override fun onEndOfSpeech() { 130 | Log.d("ExpoSpeechTranscriber", "Speech ended") 131 | } 132 | 133 | override fun onPartialResults(partialResults: Bundle?) { 134 | val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION) 135 | if (!matches.isNullOrEmpty()) { 136 | Log.d("ExpoSpeechTranscriber", "Text: ${matches[0]}") 137 | sendEvent("onTranscriptionProgress", mapOf( 138 | "text" to matches[0], 139 | "isFinal" to false 140 | )) 141 | } 142 | } 143 | 144 | override fun onResults(results: Bundle?) { 145 | val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION) 146 | if (!matches.isNullOrEmpty()) { 147 | Log.d("ExpoSpeechTranscriber", "Text: ${matches[0]}") 148 | sendEvent("onTranscriptionProgress", mapOf( 149 | "text" to matches[0], 150 | "isFinal" to true 151 | )) 152 | } 153 | stopListening() 154 | } 155 | 156 | override fun onError(error: Int) { 157 | val errorMessage = getErrorMessage(error) 158 | sendEvent("onTranscriptionError", mapOf("message" to errorMessage)) 159 | stopListening() 160 | } 161 | 162 | override fun onEvent(eventType: Int, params: Bundle?) {} 163 | } 164 | } 165 | 166 | private fun getErrorMessage(errorCode: Int): String { 167 | return when (errorCode) { 168 | SpeechRecognizer.ERROR_AUDIO -> "Audio recording error" 169 | SpeechRecognizer.ERROR_CLIENT -> "Client side error" 170 | SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Insufficient permissions" 171 | SpeechRecognizer.ERROR_NETWORK -> "Network error" 172 | SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout" 173 | SpeechRecognizer.ERROR_NO_MATCH -> "No match found" 174 | SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Recognizer is busy" 175 | SpeechRecognizer.ERROR_SERVER -> "Error from server" 176 | SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "No speech input" 177 | else -> "An unknown error occurred" 178 | } 179 | } 180 | 181 | private fun requestMicrophonePermissionsInternal(promise: Promise) { 182 | val context = appContext.reactContext ?: run { 183 | promise.resolve("denied") 184 | return 185 | } 186 | 187 | if (ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) 188 | == PackageManager.PERMISSION_GRANTED) { 189 | promise.resolve("granted") 190 | return 191 | } 192 | 193 | val activity = appContext.currentActivity 194 | if (activity != null) { 195 | permissionPromise = promise 196 | ActivityCompat.requestPermissions( 197 | activity, 198 | arrayOf(Manifest.permission.RECORD_AUDIO), 199 | PERMISSION_REQUEST_CODE 200 | ) 201 | } else { 202 | promise.resolve("denied") 203 | } 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /example/BufferTranscriptionExample.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import { View, Text, TouchableOpacity, StyleSheet, Alert, ScrollView } from 'react-native'; 3 | import { Ionicons } from '@expo/vector-icons'; 4 | import * as SpeechTranscriber from 'expo-speech-transcriber'; 5 | import { AudioManager, AudioRecorder } from 'react-native-audio-api'; 6 | 7 | const BufferTranscriptionExample = () => { 8 | const { text, isFinal, error } = SpeechTranscriber.useRealTimeTranscription(); 9 | const [isTranscribing, setIsTranscribing] = useState(false); 10 | const [permissionsGranted, setPermissionsGranted] = useState(false); 11 | const [recorder, setRecorder] = useState(null); 12 | 13 | const initializeRecorder = () => { 14 | const audioRecorder = new AudioRecorder({ 15 | sampleRate: 16000, 16 | bufferLengthInSamples: 1600, 17 | }); 18 | 19 | AudioManager.setAudioSessionOptions({ 20 | iosCategory: 'playAndRecord', 21 | iosMode: 'spokenAudio', 22 | iosOptions: ['allowBluetooth', 'defaultToSpeaker'], 23 | }); 24 | 25 | audioRecorder.onAudioReady(({ buffer }) => { 26 | const channelData = buffer.getChannelData(0); 27 | 28 | SpeechTranscriber.realtimeBufferTranscribe( 29 | channelData, 30 | 16000 31 | ); 32 | }); 33 | 34 | setRecorder(audioRecorder); 35 | }; 36 | 37 | const requestAllPermissions = async () => { 38 | try { 39 | const speechPermission = await SpeechTranscriber.requestPermissions(); 40 | const micPermission = await AudioManager.requestRecordingPermissions(); 41 | 42 | if (speechPermission === 'authorized' && micPermission) { 43 | initializeRecorder(); 44 | setPermissionsGranted(true); 45 | Alert.alert('Permissions Granted', 'All permissions are now available.'); 46 | } else { 47 | Alert.alert('Permissions Required', 'Speech and microphone permissions are needed.'); 48 | } 49 | } catch (err) { 50 | Alert.alert('Error', 'Failed to request permissions'); 51 | } 52 | }; 53 | 54 | const handleStartTranscribing = async () => { 55 | if (!permissionsGranted || !recorder) { 56 | await requestAllPermissions(); 57 | return; 58 | } 59 | 60 | if (isTranscribing) { 61 | return; 62 | } 63 | 64 | setIsTranscribing(true); 65 | try { 66 | recorder.start(); 67 | } catch (e) { 68 | console.error('Transcription failed', e); 69 | Alert.alert('Error', 'Failed to start transcription'); 70 | setIsTranscribing(false); 71 | } 72 | }; 73 | 74 | const handleStopTranscribing = () => { 75 | if (!isTranscribing || !recorder) { 76 | return; 77 | } 78 | 79 | recorder.stop(); 80 | SpeechTranscriber.stopBufferTranscription(); 81 | setIsTranscribing(false); 82 | }; 83 | 84 | return ( 85 | 86 | Buffer Transcription Demo 87 | Using react-native-audio-api 88 | 89 | {!permissionsGranted && ( 90 | 91 | 92 | Request Permissions 93 | 94 | )} 95 | 96 | Buffer-Based Transcription 97 | 98 | 103 | 104 | Start Buffer Transcription 105 | 106 | 107 | 112 | 113 | Stop Buffer Transcription 114 | 115 | 116 | {isTranscribing && ( 117 | 118 | 119 | Transcribing from buffer... 120 | 121 | )} 122 | 123 | {error && ( 124 | 125 | Error: {error} 126 | 127 | )} 128 | 129 | {text && ( 130 | 131 | Transcription: 132 | {text} 133 | {isFinal && Final!} 134 | 135 | )} 136 | 137 | {!isTranscribing && !text && ( 138 | 139 | Request permissions, then start buffer transcription to stream audio data for real-time speech recognition. 140 | 141 | )} 142 | 143 | ); 144 | }; 145 | 146 | const styles = StyleSheet.create({ 147 | container: { 148 | flexGrow: 1, 149 | justifyContent: 'center', 150 | alignItems: 'center', 151 | padding: 20, 152 | backgroundColor: '#f5f5f5', 153 | }, 154 | title: { 155 | fontSize: 28, 156 | fontWeight: 'bold', 157 | marginBottom: 4, 158 | color: '#333', 159 | }, 160 | subtitle: { 161 | fontSize: 16, 162 | color: '#666', 163 | marginBottom: 20, 164 | }, 165 | sectionTitle: { 166 | fontSize: 20, 167 | fontWeight: 'bold', 168 | marginTop: 20, 169 | marginBottom: 10, 170 | color: '#333', 171 | }, 172 | button: { 173 | flexDirection: 'row', 174 | alignItems: 'center', 175 | justifyContent: 'center', 176 | paddingVertical: 15, 177 | paddingHorizontal: 30, 178 | borderRadius: 12, 179 | marginVertical: 8, 180 | minWidth: 280, 181 | shadowColor: '#000', 182 | shadowOffset: { width: 0, height: 2 }, 183 | shadowOpacity: 0.1, 184 | shadowRadius: 4, 185 | elevation: 3, 186 | }, 187 | permissionButton: { 188 | backgroundColor: '#6c757d', 189 | }, 190 | recordButton: { 191 | backgroundColor: '#007bff', 192 | }, 193 | stopButton: { 194 | backgroundColor: '#dc3545', 195 | }, 196 | disabled: { 197 | backgroundColor: '#ccc', 198 | opacity: 0.6, 199 | }, 200 | buttonText: { 201 | color: '#fff', 202 | fontSize: 18, 203 | fontWeight: '600', 204 | marginLeft: 10, 205 | }, 206 | recordingIndicator: { 207 | flexDirection: 'row', 208 | alignItems: 'center', 209 | marginTop: 20, 210 | padding: 15, 211 | backgroundColor: '#fff', 212 | borderRadius: 12, 213 | shadowColor: '#000', 214 | shadowOffset: { width: 0, height: 2 }, 215 | shadowOpacity: 0.1, 216 | shadowRadius: 4, 217 | elevation: 3, 218 | }, 219 | recordingText: { 220 | fontSize: 16, 221 | color: '#dc3545', 222 | marginLeft: 10, 223 | fontWeight: '600', 224 | }, 225 | errorContainer: { 226 | marginTop: 20, 227 | padding: 15, 228 | backgroundColor: '#f8d7da', 229 | borderRadius: 12, 230 | width: '100%', 231 | maxWidth: 400, 232 | }, 233 | errorText: { 234 | fontSize: 16, 235 | color: '#721c24', 236 | }, 237 | transcriptionContainer: { 238 | marginTop: 30, 239 | padding: 20, 240 | backgroundColor: '#fff', 241 | borderRadius: 12, 242 | width: '100%', 243 | maxWidth: 400, 244 | shadowColor: '#000', 245 | shadowOffset: { width: 0, height: 2 }, 246 | shadowOpacity: 0.1, 247 | shadowRadius: 8, 248 | elevation: 5, 249 | }, 250 | transcriptionTitle: { 251 | fontSize: 18, 252 | fontWeight: 'bold', 253 | marginBottom: 12, 254 | color: '#333', 255 | }, 256 | transcriptionText: { 257 | fontSize: 16, 258 | color: '#555', 259 | lineHeight: 24, 260 | }, 261 | finalText: { 262 | fontSize: 14, 263 | color: '#28a745', 264 | fontWeight: 'bold', 265 | marginTop: 10, 266 | }, 267 | hintText: { 268 | fontSize: 14, 269 | color: '#999', 270 | marginTop: 20, 271 | textAlign: 'center', 272 | }, 273 | }); 274 | 275 | export default BufferTranscriptionExample; 276 | -------------------------------------------------------------------------------- /example/RecordRealTimeAndTranscribe.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from 'react'; 2 | import { View, Text, StyleSheet, TouchableOpacity, ScrollView, Platform, Alert } from 'react-native'; 3 | import { Ionicons } from '@expo/vector-icons'; 4 | import * as SpeechTranscriber from 'expo-speech-transcriber'; 5 | 6 | /** 7 | * RecordRealTimeAndTranscribe Example 8 | * 9 | * Demonstrates real-time speech transcription using expo-speech-transcriber. 10 | * This example follows the API documented in README.md. 11 | * 12 | * Requirements: 13 | * - Android 13+ (API 33) for Android 14 | * - iOS 13+ for iOS 15 | * - Microphone and speech recognition permissions 16 | */ 17 | export default function RecordRealTimeAndTranscribe() { 18 | const [permissionStatus, setPermissionStatus] = useState({ 19 | speech: 'notDetermined', 20 | microphone: 'denied', 21 | }); 22 | 23 | // Use the built-in hook for real-time transcription 24 | const { text, isFinal, error, isRecording } = SpeechTranscriber.useRealTimeTranscription(); 25 | 26 | /** 27 | * Request all necessary permissions 28 | */ 29 | const requestAllPermissions = async () => { 30 | try { 31 | console.log('🔐 Requesting permissions...'); 32 | 33 | let speechPermission = 'notDetermined'; 34 | 35 | // Request speech recognition permission only on iOS 36 | if (Platform.OS === 'ios') { 37 | speechPermission = await SpeechTranscriber.requestPermissions(); 38 | console.log('Speech permission:', speechPermission); 39 | } 40 | 41 | // Request microphone permission 42 | const micPermission = await SpeechTranscriber.requestMicrophonePermissions(); 43 | console.log('Microphone permission:', micPermission); 44 | 45 | // Both `requestPermissions` and `requestMicrophonePermissions` return 46 | // string union types (e.g. 'authorized' | 'denied'), not objects with 47 | // `.status` or `.granted` properties. Store them directly and check 48 | // their values accordingly. 49 | setPermissionStatus({ 50 | speech: speechPermission, 51 | microphone: micPermission, 52 | }); 53 | 54 | if (Platform.OS === 'ios' && speechPermission !== 'authorized') { 55 | Alert.alert('Permission Denied', 'Speech recognition permission is required.'); 56 | } else if (micPermission !== 'granted') { 57 | Alert.alert('Permission Denied', 'Microphone permission is required.'); 58 | } 59 | } catch (err) { 60 | console.error('❌ Permission error:', err); 61 | Alert.alert('Error', `Failed to request permissions: ${err}`); 62 | } 63 | }; 64 | 65 | /** 66 | * Start real-time transcription 67 | */ 68 | const startTranscription = async () => { 69 | try { 70 | console.log('🎤 Starting transcription...'); 71 | await SpeechTranscriber.recordRealTimeAndTranscribe(); 72 | console.log('✅ Transcription started'); 73 | } catch (err) { 74 | console.error('❌ Start error:', err); 75 | Alert.alert('Error', `Failed to start transcription: ${err}`); 76 | } 77 | }; 78 | 79 | /** 80 | * Stop real-time transcription 81 | */ 82 | const stopTranscription = () => { 83 | try { 84 | console.log('⏹️ Stopping transcription...'); 85 | SpeechTranscriber.stopListening(); 86 | console.log('✅ Transcription stopped'); 87 | } catch (err) { 88 | console.error('❌ Stop error:', err); 89 | Alert.alert('Error', `Failed to stop transcription: ${err}`); 90 | } 91 | }; 92 | 93 | // Check permissions on mount 94 | useEffect(() => { 95 | requestAllPermissions(); 96 | }, []); 97 | 98 | const hasPermissions = Platform.OS === 'ios' 99 | ? permissionStatus.speech === 'authorized' && permissionStatus.microphone === 'granted' 100 | : permissionStatus.microphone === 'granted'; 101 | 102 | return ( 103 | 104 | Real-Time Transcription 105 | Platform: {Platform.OS} 106 | 107 | {!hasPermissions && ( 108 | 109 | 110 | Request Permissions 111 | 112 | )} 113 | 114 | Recording Controls 115 | 116 | 121 | 122 | Start Transcription 123 | 124 | 125 | 130 | 131 | Stop Transcription 132 | 133 | 134 | {isRecording && ( 135 | 136 | 137 | Recording and Transcribing... 138 | 139 | )} 140 | 141 | {error && ( 142 | 143 | Error: {error} 144 | 145 | )} 146 | 147 | {text && ( 148 | 149 | Transcription: 150 | {text} 151 | {isFinal && Final!} 152 | 153 | )} 154 | 155 | {!isRecording && !text && hasPermissions && ( 156 | 157 | Press "Start Transcription" to begin real-time speech recognition. 158 | 159 | )} 160 | 161 | 162 | How to use: 163 | 1. Grant permissions when prompted 164 | 2. Press "Start Transcription" to begin 165 | 3. Speak clearly into your device 166 | 4. Watch the transcription appear in real-time 167 | 5. Press "Stop Transcription" when finished 168 | 169 | 170 | ); 171 | } 172 | 173 | const styles = StyleSheet.create({ 174 | container: { 175 | flexGrow: 1, 176 | justifyContent: 'center', 177 | alignItems: 'center', 178 | padding: 20, 179 | backgroundColor: '#f5f5f5', 180 | }, 181 | title: { 182 | fontSize: 28, 183 | fontWeight: 'bold', 184 | marginBottom: 8, 185 | color: '#333', 186 | }, 187 | subtitle: { 188 | fontSize: 14, 189 | color: '#999', 190 | marginBottom: 10, 191 | }, 192 | sectionTitle: { 193 | fontSize: 20, 194 | fontWeight: 'bold', 195 | marginTop: 20, 196 | marginBottom: 10, 197 | color: '#333', 198 | }, 199 | button: { 200 | flexDirection: 'row', 201 | alignItems: 'center', 202 | justifyContent: 'center', 203 | paddingVertical: 15, 204 | paddingHorizontal: 30, 205 | borderRadius: 12, 206 | marginVertical: 8, 207 | minWidth: 280, 208 | shadowColor: '#000', 209 | shadowOffset: { width: 0, height: 2 }, 210 | shadowOpacity: 0.1, 211 | shadowRadius: 4, 212 | elevation: 3, 213 | }, 214 | permissionButton: { 215 | backgroundColor: '#6c757d', 216 | }, 217 | recordButton: { 218 | backgroundColor: '#007bff', 219 | }, 220 | stopButton: { 221 | backgroundColor: '#dc3545', 222 | }, 223 | disabled: { 224 | backgroundColor: '#ccc', 225 | opacity: 0.6, 226 | }, 227 | buttonText: { 228 | color: '#fff', 229 | fontSize: 18, 230 | fontWeight: '600', 231 | marginLeft: 10, 232 | }, 233 | recordingIndicator: { 234 | flexDirection: 'row', 235 | alignItems: 'center', 236 | marginTop: 20, 237 | padding: 15, 238 | backgroundColor: '#fff', 239 | borderRadius: 12, 240 | shadowColor: '#000', 241 | shadowOffset: { width: 0, height: 2 }, 242 | shadowOpacity: 0.1, 243 | shadowRadius: 4, 244 | elevation: 3, 245 | }, 246 | recordingText: { 247 | fontSize: 16, 248 | color: '#dc3545', 249 | marginLeft: 10, 250 | fontWeight: '600', 251 | }, 252 | errorContainer: { 253 | marginTop: 20, 254 | padding: 15, 255 | backgroundColor: '#f8d7da', 256 | borderRadius: 12, 257 | width: '100%', 258 | maxWidth: 400, 259 | }, 260 | errorText: { 261 | fontSize: 16, 262 | color: '#721c24', 263 | }, 264 | transcriptionContainer: { 265 | marginTop: 30, 266 | padding: 20, 267 | backgroundColor: '#fff', 268 | borderRadius: 12, 269 | width: '100%', 270 | maxWidth: 400, 271 | shadowColor: '#000', 272 | shadowOffset: { width: 0, height: 2 }, 273 | shadowOpacity: 0.1, 274 | shadowRadius: 8, 275 | elevation: 5, 276 | }, 277 | transcriptionTitle: { 278 | fontSize: 18, 279 | fontWeight: 'bold', 280 | marginBottom: 12, 281 | color: '#333', 282 | }, 283 | transcriptionText: { 284 | fontSize: 16, 285 | color: '#555', 286 | lineHeight: 24, 287 | }, 288 | finalText: { 289 | fontSize: 14, 290 | color: '#28a745', 291 | fontWeight: 'bold', 292 | marginTop: 10, 293 | }, 294 | hintText: { 295 | fontSize: 14, 296 | color: '#999', 297 | marginTop: 20, 298 | textAlign: 'center', 299 | }, 300 | instructionsContainer: { 301 | marginTop: 30, 302 | padding: 20, 303 | backgroundColor: '#e7f3ff', 304 | borderRadius: 12, 305 | width: '100%', 306 | maxWidth: 400, 307 | }, 308 | instructionsTitle: { 309 | fontSize: 16, 310 | fontWeight: 'bold', 311 | marginBottom: 12, 312 | color: '#333', 313 | }, 314 | instructionText: { 315 | fontSize: 14, 316 | color: '#555', 317 | marginBottom: 6, 318 | lineHeight: 20, 319 | }, 320 | }); -------------------------------------------------------------------------------- /example/App.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from 'react'; 2 | import { View, Text, TouchableOpacity, StyleSheet, Alert, ScrollView } from 'react-native'; 3 | import { Ionicons } from '@expo/vector-icons'; 4 | import * as SpeechTranscriber from 'expo-speech-transcriber'; 5 | import { useAudioRecorder, RecordingPresets, setAudioModeAsync, useAudioRecorderState } from 'expo-audio'; 6 | 7 | const App = () => { 8 | const { text, isFinal, error, isRecording } = SpeechTranscriber.useRealTimeTranscription(); 9 | const [recordedUri, setRecordedUri] = useState(null); 10 | const [sfTranscription, setSfTranscription] = useState(''); 11 | const [analyzerTranscription, setAnalyzerTranscription] = useState(''); 12 | const [permissionsGranted, setPermissionsGranted] = useState(false); 13 | 14 | const audioRecorder = useAudioRecorder(RecordingPresets.HIGH_QUALITY); 15 | const recorderState = useAudioRecorderState(audioRecorder); 16 | 17 | useEffect(() => { 18 | if (isFinal) { 19 | // Optionally handle final transcription 20 | } 21 | }, [isFinal]); 22 | 23 | const requestAllPermissions = async () => { 24 | try { 25 | const speechPermission = await SpeechTranscriber.requestPermissions(); 26 | const micPermission = await SpeechTranscriber.requestMicrophonePermissions(); 27 | if (speechPermission === "authorized" && micPermission === 'granted') { 28 | // Set audio mode for recording 29 | await setAudioModeAsync({ 30 | playsInSilentMode: true, 31 | allowsRecording: true, 32 | }); 33 | setPermissionsGranted(true); 34 | Alert.alert('Permissions Granted', 'All permissions are now available.'); 35 | } else { 36 | Alert.alert('Permissions Required', 'Speech and microphone permissions are needed.'); 37 | } 38 | } catch (err) { 39 | Alert.alert('Error', 'Failed to request permissions'); 40 | } 41 | }; 42 | 43 | const handleStartTranscription = async () => { 44 | if (!permissionsGranted) { 45 | await requestAllPermissions(); 46 | return; 47 | } 48 | try { 49 | await SpeechTranscriber.recordRealTimeAndTranscribe(); 50 | } catch (err) { 51 | Alert.alert('Error', 'Failed to start transcription'); 52 | } 53 | }; 54 | 55 | const handleStopTranscription = () => { 56 | SpeechTranscriber.stopListening(); 57 | }; 58 | 59 | const startRecording = async () => { 60 | if (!permissionsGranted) { 61 | await requestAllPermissions(); 62 | return; 63 | } 64 | try { 65 | await audioRecorder.prepareToRecordAsync(); 66 | audioRecorder.record(); 67 | } catch (err) { 68 | Alert.alert('Error', 'Failed to start recording'); 69 | } 70 | }; 71 | 72 | const stopRecording = async () => { 73 | try { 74 | await audioRecorder.stop(); 75 | if (audioRecorder.uri) { 76 | setRecordedUri(audioRecorder.uri); 77 | Alert.alert('Recording Complete', `Audio saved at: ${audioRecorder.uri}`); 78 | } 79 | } catch (err) { 80 | Alert.alert('Error', 'Failed to stop recording'); 81 | } 82 | }; 83 | 84 | const transcribeWithSF = async () => { 85 | if (!recordedUri) { 86 | Alert.alert('No Recording', 'Please record audio first.'); 87 | return; 88 | } 89 | try { 90 | const transcription = await SpeechTranscriber.transcribeAudioWithSFRecognizer(recordedUri); 91 | setSfTranscription(transcription); 92 | } catch (err) { 93 | Alert.alert('Error', 'Failed to transcribe with SF Recognizer'); 94 | } 95 | }; 96 | 97 | const transcribeWithAnalyzer = async () => { 98 | if (!recordedUri) { 99 | Alert.alert('No Recording', 'Please record audio first.'); 100 | return; 101 | } 102 | if (!SpeechTranscriber.isAnalyzerAvailable()) { 103 | Alert.alert('Not Available', 'SpeechAnalyzer is not available on this device.'); 104 | return; 105 | } 106 | try { 107 | const transcription = await SpeechTranscriber.transcribeAudioWithAnalyzer(recordedUri); 108 | setAnalyzerTranscription(transcription); 109 | } catch (err) { 110 | Alert.alert('Error', 'Failed to transcribe with Analyzer'); 111 | } 112 | }; 113 | 114 | return ( 115 | 116 | Speech Transcriber Demo 117 | 118 | {!permissionsGranted && ( 119 | 120 | 121 | Request Permissions 122 | 123 | )} 124 | 125 | Realtime Transcription 126 | 131 | 132 | Start Realtime Transcription 133 | 134 | 135 | 140 | 141 | Stop Realtime Transcription 142 | 143 | 144 | {isRecording && ( 145 | 146 | 147 | Recording and Transcribing... 148 | 149 | )} 150 | 151 | {error && ( 152 | 153 | Realtime Error: {error} 154 | 155 | )} 156 | 157 | {text && ( 158 | 159 | Realtime Transcription: 160 | {text} 161 | {isFinal && Final!} 162 | 163 | )} 164 | 165 | File Transcription 166 | 167 | 168 | Start Recording 169 | 170 | 171 | 172 | 173 | Stop Recording 174 | 175 | 176 | {recorderState.isRecording && ( 177 | 178 | 179 | Recording... 180 | 181 | )} 182 | 183 | {recordedUri && ( 184 | <> 185 | 186 | 187 | Transcribe with SF Recognizer 188 | 189 | 190 | {SpeechTranscriber.isAnalyzerAvailable() && ( 191 | 192 | 193 | Transcribe with Analyzer 194 | 195 | )} 196 | 197 | {sfTranscription && ( 198 | 199 | SF Recognizer Result: 200 | {sfTranscription} 201 | 202 | )} 203 | 204 | {analyzerTranscription && ( 205 | 206 | Analyzer Result: 207 | {analyzerTranscription} 208 | 209 | )} 210 | 211 | )} 212 | 213 | {!isRecording && !text && !recordedUri && ( 214 | 215 | Request permissions, then try realtime transcription or record audio for file transcription. 216 | 217 | )} 218 | 219 | ); 220 | }; 221 | 222 | const styles = StyleSheet.create({ 223 | container: { 224 | flexGrow: 1, 225 | justifyContent: 'center', 226 | alignItems: 'center', 227 | padding: 20, 228 | backgroundColor: '#f5f5f5', 229 | }, 230 | title: { 231 | fontSize: 28, 232 | fontWeight: 'bold', 233 | marginBottom: 8, 234 | color: '#333', 235 | }, 236 | sectionTitle: { 237 | fontSize: 20, 238 | fontWeight: 'bold', 239 | marginTop: 20, 240 | marginBottom: 10, 241 | color: '#333', 242 | }, 243 | button: { 244 | flexDirection: 'row', 245 | alignItems: 'center', 246 | justifyContent: 'center', 247 | paddingVertical: 15, 248 | paddingHorizontal: 30, 249 | borderRadius: 12, 250 | marginVertical: 8, 251 | minWidth: 280, 252 | shadowColor: '#000', 253 | shadowOffset: { width: 0, height: 2 }, 254 | shadowOpacity: 0.1, 255 | shadowRadius: 4, 256 | elevation: 3, 257 | }, 258 | permissionButton: { 259 | backgroundColor: '#6c757d', 260 | }, 261 | recordButton: { 262 | backgroundColor: '#007bff', 263 | }, 264 | stopButton: { 265 | backgroundColor: '#dc3545', 266 | }, 267 | transcribeButton: { 268 | backgroundColor: '#28a745', 269 | }, 270 | disabled: { 271 | backgroundColor: '#ccc', 272 | opacity: 0.6, 273 | }, 274 | buttonText: { 275 | color: '#fff', 276 | fontSize: 18, 277 | fontWeight: '600', 278 | marginLeft: 10, 279 | }, 280 | recordingIndicator: { 281 | flexDirection: 'row', 282 | alignItems: 'center', 283 | marginTop: 20, 284 | padding: 15, 285 | backgroundColor: '#fff', 286 | borderRadius: 12, 287 | shadowColor: '#000', 288 | shadowOffset: { width: 0, height: 2 }, 289 | shadowOpacity: 0.1, 290 | shadowRadius: 4, 291 | elevation: 3, 292 | }, 293 | recordingText: { 294 | fontSize: 16, 295 | color: '#dc3545', 296 | marginLeft: 10, 297 | fontWeight: '600', 298 | }, 299 | errorContainer: { 300 | marginTop: 20, 301 | padding: 15, 302 | backgroundColor: '#f8d7da', 303 | borderRadius: 12, 304 | width: '100%', 305 | maxWidth: 400, 306 | }, 307 | errorText: { 308 | fontSize: 16, 309 | color: '#721c24', 310 | }, 311 | transcriptionContainer: { 312 | marginTop: 30, 313 | padding: 20, 314 | backgroundColor: '#fff', 315 | borderRadius: 12, 316 | width: '100%', 317 | maxWidth: 400, 318 | shadowColor: '#000', 319 | shadowOffset: { width: 0, height: 2 }, 320 | shadowOpacity: 0.1, 321 | shadowRadius: 8, 322 | elevation: 5, 323 | }, 324 | transcriptionTitle: { 325 | fontSize: 18, 326 | fontWeight: 'bold', 327 | marginBottom: 12, 328 | color: '#333', 329 | }, 330 | transcriptionText: { 331 | fontSize: 16, 332 | color: '#555', 333 | lineHeight: 24, 334 | }, 335 | finalText: { 336 | fontSize: 14, 337 | color: '#28a745', 338 | fontWeight: 'bold', 339 | marginTop: 10, 340 | }, 341 | hintText: { 342 | fontSize: 14, 343 | color: '#999', 344 | marginTop: 20, 345 | textAlign: 'center', 346 | }, 347 | }); 348 | 349 | export default App; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # expo-speech-transcriber 2 | 3 | On-device speech transcription for Expo apps. Supports iOS (Apple Speech framework) and Android (SpeechRecognizer API). 4 | 5 | ## Features 6 | 7 | - 🎯 On-device transcription - Works offline, privacy-focused 8 | - 📱 Cross-platform - iOS 13+ and Android 13+ (API 33) 9 | - 🚀 Multiple APIs - SFSpeechRecognizer (iOS 13+), SpeechAnalyzer (iOS 26+), and Android SpeechRecognizer 10 | - 📦 Easy integration - Auto-configures permissions 11 | - 🔒 Secure - All processing happens on device 12 | - ⚡ Realtime transcription - Get live speech-to-text updates with built-in audio capture 13 | - 📁 File transcription - Transcribe pre-recorded audio files 14 | - 🎤 Buffer-based transcription - Stream audio buffers from external sources for real-time transcription 15 | 16 | ## Installation 17 | 18 | ```bash 19 | npx expo install expo-speech-transcriber expo-audio 20 | ``` 21 | 22 | Add the plugin to your `app.json`: 23 | 24 | ```json 25 | { 26 | "expo": { 27 | "plugins": ["expo-audio", "expo-speech-transcriber"] 28 | } 29 | } 30 | ``` 31 | 32 | ### Custom permission message (recommended): 33 | 34 | Apple requires a clear purpose string for speech recognition and microphone permissions. Without it, your app may be rejected during App Store review. Provide a descriptive message explaining why your app needs access. 35 | 36 | ```json 37 | { 38 | "expo": { 39 | "plugins": [ 40 | "expo-audio", 41 | [ 42 | "expo-speech-transcriber", 43 | { 44 | "speechRecognitionPermission": "We need speech recognition to transcribe your recordings", 45 | "microphonePermission": "We need microphone access to record audio for transcription" 46 | } 47 | ] 48 | ] 49 | } 50 | } 51 | ``` 52 | 53 | For more details, see Apple's guidelines on [requesting access to protected resources](https://developer.apple.com/documentation/uikit/requesting-access-to-protected-resources). 54 | 55 | > **Note for Android:** The plugin automatically adds the `RECORD_AUDIO` permission to your Android manifest. No additional configuration is required. 56 | 57 | ## Usage 58 | 59 | ### Realtime Transcription 60 | 61 | Start transcribing speech in real-time. This does not require `expo-audio`. 62 | 63 | ```typescript 64 | import { Platform } from "react-native"; 65 | import * as SpeechTranscriber from "expo-speech-transcriber"; 66 | 67 | // Request permissions 68 | // Note: requestPermissions() is only needed on iOS 69 | if (Platform.OS === "ios") { 70 | const speechPermission = await SpeechTranscriber.requestPermissions(); 71 | if (speechPermission !== "authorized") { 72 | console.log("Speech permission denied"); 73 | return; 74 | } 75 | } 76 | 77 | const micPermission = await SpeechTranscriber.requestMicrophonePermissions(); 78 | if (micPermission !== "granted") { 79 | console.log("Microphone permission denied"); 80 | return; 81 | } 82 | 83 | // Use the hook for realtime updates 84 | const { text, isFinal, error, isRecording } = 85 | SpeechTranscriber.useRealTimeTranscription(); 86 | 87 | // Start transcription 88 | await SpeechTranscriber.recordRealTimeAndTranscribe(); 89 | 90 | // Stop when done 91 | SpeechTranscriber.stopListening(); 92 | ``` 93 | **NOTE**: See [RecordRealTimeAndTrancribe](example/RecordRealTimeAndTranscribe.tsx) for an example on how to use Real Time transcription on android. 94 | 95 | ### File Transcription 96 | 97 | Transcribe pre-recorded audio files. Our library handles transcription but not recording—use `expo-audio` to record audio (see [expo-audio documentation](https://docs.expo.dev/versions/latest/sdk/audio/)), or implement your own recording logic with microphone access via `requestMicrophonePermissions()`. 98 | 99 | ```typescript 100 | import * as SpeechTranscriber from "expo-speech-transcriber"; 101 | import { useAudioRecorder, RecordingPresets } from "expo-audio"; 102 | 103 | // Record audio with expo-audio 104 | const audioRecorder = useAudioRecorder(RecordingPresets.HIGH_QUALITY); 105 | await audioRecorder.prepareToRecordAsync(); 106 | audioRecorder.record(); 107 | // ... user speaks ... 108 | await audioRecorder.stop(); 109 | const audioUri = audioRecorder.uri; 110 | 111 | // Transcribe with SFSpeechRecognizer (preferred) 112 | const text = await SpeechTranscriber.transcribeAudioWithSFRecognizer(audioUri); 113 | console.log("Transcription:", text); 114 | 115 | // Or with SpeechAnalyzer if available 116 | if (SpeechTranscriber.isAnalyzerAvailable()) { 117 | const text = await SpeechTranscriber.transcribeAudioWithAnalyzer(audioUri); 118 | console.log("Transcription:", text); 119 | } 120 | ``` 121 | 122 | For custom recording without `expo-audio`: 123 | 124 | ```typescript 125 | // Request microphone permission for your custom recording implementation 126 | const micPermission = await SpeechTranscriber.requestMicrophonePermissions(); 127 | // Implement your own audio recording logic here to save a file 128 | // Then transcribe the resulting audio file URI 129 | ``` 130 | 131 | ### Buffer-Based Transcription 132 | 133 | Stream audio buffers directly to the transcriber for real-time processing. This is ideal for integrating with audio processing libraries like [react-native-audio-api](https://docs.swmansion.com/react-native-audio-api/). 134 | 135 | ```typescript 136 | import * as SpeechTranscriber from "expo-speech-transcriber"; 137 | import { AudioManager, AudioRecorder } from "react-native-audio-api"; 138 | 139 | // Set up audio recorder 140 | const recorder = new AudioRecorder({ 141 | sampleRate: 16000, 142 | bufferLengthInSamples: 1600, 143 | }); 144 | 145 | AudioManager.setAudioSessionOptions({ 146 | iosCategory: "playAndRecord", 147 | iosMode: "spokenAudio", 148 | iosOptions: ["allowBluetooth", "defaultToSpeaker"], 149 | }); 150 | 151 | // Request permissions 152 | const speechPermission = await SpeechTranscriber.requestPermissions(); 153 | const micPermission = await AudioManager.requestRecordingPermissions(); 154 | 155 | // Stream audio buffers to transcriber 156 | recorder.onAudioReady(({ buffer }) => { 157 | const channelData = buffer.getChannelData(0); 158 | SpeechTranscriber.realtimeBufferTranscribe( 159 | channelData, // Float32Array or number[] 160 | 16000, // sample rate 161 | ); 162 | }); 163 | 164 | // Use the hook to get transcription updates 165 | const { text, isFinal, error } = SpeechTranscriber.useRealTimeTranscription(); 166 | 167 | // Start streaming 168 | recorder.start(); 169 | 170 | // Stop when done 171 | recorder.stop(); 172 | SpeechTranscriber.stopBufferTranscription(); 173 | ``` 174 | 175 | See the [BufferTranscriptionExample](./example/BufferTranscriptionExample.tsx) for a complete implementation. 176 | 177 | 178 | 179 | ## API Reference 180 | 181 | ### `requestPermissions()` 182 | Request speech recognition permission. 183 | 184 | **Platform:** iOS only. On Android, speech recognition permission is handled through `requestMicrophonePermissions()`. 185 | 186 | **Returns:** `Promise` - One of: `'authorized'`, `'denied'`, `'restricted'`, or `'notDetermined'` 187 | 188 | **Example:** 189 | 190 | ```typescript 191 | import { Platform } from "react-native"; 192 | 193 | if (Platform.OS === "ios") { 194 | const status = await SpeechTranscriber.requestPermissions(); 195 | } 196 | ``` 197 | 198 | ### `requestMicrophonePermissions()` 199 | 200 | Request microphone permission. 201 | 202 | **Returns:** `Promise` - One of: `'granted'` or `'denied'` 203 | 204 | **Example:** 205 | 206 | ```typescript 207 | const status = await SpeechTranscriber.requestMicrophonePermissions(); 208 | ``` 209 | 210 | ### `recordRealTimeAndTranscribe()` 211 | 212 | Start real-time speech transcription. Listen for events via `useRealTimeTranscription` hook. 213 | 214 | **Returns:** `Promise` 215 | 216 | **Example:** 217 | 218 | ```typescript 219 | await SpeechTranscriber.recordRealTimeAndTranscribe(); 220 | ``` 221 | 222 | ### `stopListening()` 223 | 224 | Stop real-time transcription. 225 | 226 | **Returns:** `void` 227 | 228 | **Example:** 229 | 230 | ```typescript 231 | SpeechTranscriber.stopListening(); 232 | ``` 233 | 234 | ### `isRecording()` 235 | 236 | Check if real-time transcription is currently recording. 237 | 238 | **Returns:** `boolean` 239 | 240 | **Example:** 241 | 242 | ```typescript 243 | const recording = SpeechTranscriber.isRecording(); 244 | ``` 245 | 246 | ### `transcribeAudioWithSFRecognizer(audioFilePath: string)` 247 | 248 | Transcribe audio from a pre-recorded file using SFSpeechRecognizer. I prefer this API for its reliability. 249 | 250 | **Platform:** iOS only 251 | 252 | **Requires:** iOS 13+, pre-recorded audio file URI (record with `expo-audio` or your own implementation) 253 | 254 | **Returns:** `Promise` - Transcribed text 255 | 256 | **Example:** 257 | 258 | ```typescript 259 | const transcription = await SpeechTranscriber.transcribeAudioWithSFRecognizer( 260 | "file://path/to/audio.m4a" 261 | ); 262 | ``` 263 | 264 | ### `transcribeAudioWithAnalyzer(audioFilePath: string)` 265 | 266 | Transcribe audio from a pre-recorded file using SpeechAnalyzer. 267 | 268 | **Platform:** iOS only 269 | 270 | **Requires:** iOS 26+, pre-recorded audio file URI (record with `expo-audio` or your own implementation) 271 | 272 | **Returns:** `Promise` - Transcribed text 273 | 274 | **Example:** 275 | 276 | ```typescript 277 | const transcription = await SpeechTranscriber.transcribeAudioWithAnalyzer( 278 | "file://path/to/audio.m4a" 279 | ); 280 | ``` 281 | 282 | ### `isAnalyzerAvailable()` 283 | 284 | Check if SpeechAnalyzer API is available. 285 | 286 | **Platform:** iOS only. Always returns `false` on Android. 287 | 288 | **Returns:** `boolean` - `true` if iOS 26+, `false` otherwise 289 | 290 | **Example:** 291 | 292 | ```typescript 293 | if (SpeechTranscriber.isAnalyzerAvailable()) { 294 | // Use SpeechAnalyzer 295 | } 296 | ``` 297 | 298 | ### `useRealTimeTranscription()` 299 | 300 | React hook for real-time transcription state. 301 | 302 | **Returns:** `{ text: string, isFinal: boolean, error: string | null, isRecording: boolean }` 303 | 304 | **Example:** 305 | 306 | ```typescript 307 | const { text, isFinal, error, isRecording } = 308 | SpeechTranscriber.useRealTimeTranscription(); 309 | ``` 310 | 311 | ### `realtimeBufferTranscribe(buffer, sampleRate)` 312 | 313 | Stream audio buffers for real-time transcription. Ideal for integration with audio processing libraries. 314 | 315 | **Parameters:** 316 | 317 | - `buffer: Float32Array | number[]` - Audio samples 318 | - `sampleRate: number` - Sample rate in Hz (e.g., 16000) 319 | 320 | **NOTE** We currently support transcription for mono audio only. Natively, the channel is set to 1. 321 | 322 | **Returns:** `Promise` 323 | 324 | **Example:** 325 | 326 | ```typescript 327 | const audioBuffer = new Float32Array([...]); 328 | await SpeechTranscriber.realtimeBufferTranscribe(audioBuffer, 16000); 329 | ``` 330 | 331 | ### `stopBufferTranscription()` 332 | 333 | Stop buffer-based transcription and clean up resources. 334 | 335 | **Returns:** `void` 336 | 337 | **Example:** 338 | 339 | ```typescript 340 | SpeechTranscriber.stopBufferTranscription(); 341 | ``` 342 | 343 | ## Example 344 | 345 | See the [example app](./example) for a complete implementation demonstrating all APIs. 346 | 347 | ## Requirements 348 | 349 | ### iOS 350 | - iOS 13.0+ 351 | - Expo SDK 52+ 352 | - Development build (Expo Go not supported - [why?](https://expo.dev/blog/expo-go-vs-development-builds)) 353 | 354 | ### Android 355 | - Android 13+ (API level 33) 356 | - Expo SDK 52+ 357 | - Development build (Expo Go not supported) 358 | 359 | ## Limitations 360 | 361 | - **English only** - Currently hardcoded to `en_US` locale 362 | - **File size** - Best for short recordings (< 1 minute) 363 | - **Recording not included** - Real-time transcription captures audio internally; file transcription requires pre-recorded audio files (use `expo-audio` or implement your own recording with `requestMicrophonePermissions()`) 364 | - **Android file transcription** - File-based transcription (`transcribeAudioWithSFRecognizer`, `transcribeAudioWithAnalyzer`) is iOS only. Android supports real-time transcription 365 | - **Android API level** - Android requires API level 33+ (Android 13) 366 | 367 | ## License 368 | 369 | MIT 370 | 371 | ## Contributing 372 | 373 | Contributions welcome! Please open an issue or PR on [GitHub](https://github.com/daveyeke). 374 | 375 | ## Author 376 | 377 | Dave Mkpa Eke - [GitHub](https://github.com/daveyeke) | [X](https://x.com/1804davey) 378 | -------------------------------------------------------------------------------- /ios/ExpoSpeechTranscriberModule.swift: -------------------------------------------------------------------------------- 1 | import ExpoModulesCore 2 | import Speech 3 | import AVFoundation 4 | 5 | public class ExpoSpeechTranscriberModule: Module { 6 | private var audioEngine = AVAudioEngine() 7 | private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? 8 | private var bufferRecognitionRequest: SFSpeechAudioBufferRecognitionRequest? 9 | private var recognitionTask: SFSpeechRecognitionTask? 10 | private var bufferRecognitionTask: SFSpeechRecognitionTask? 11 | private var startedListening = false 12 | 13 | public func definition() -> ModuleDefinition { 14 | Name("ExpoSpeechTranscriber") 15 | 16 | Events("onTranscriptionProgress", "onTranscriptionError") 17 | 18 | // expose realtime recording/transcription 19 | AsyncFunction("recordRealTimeAndTranscribe") { () async -> Void in 20 | await self.recordRealTimeAndTranscribe() 21 | } 22 | 23 | // Method 2: Transcribe from URL using SFSpeechRecognizer (iOS 13+) 24 | AsyncFunction("transcribeAudioWithSFRecognizer") { (audioFilePath: String) async throws -> String in 25 | 26 | let url: URL 27 | if audioFilePath.hasPrefix("file://") { 28 | url = URL(string: audioFilePath)! 29 | } else { 30 | url = URL(fileURLWithPath: audioFilePath) 31 | } 32 | 33 | let transcription = await self.transcribeAudio(url: url) 34 | return transcription 35 | } 36 | 37 | // Method 3: Transcribe from URL using SpeechAnalyzer (iOS 26+) 38 | AsyncFunction("transcribeAudioWithAnalyzer") { (audioFilePath: String) async throws -> String in 39 | 40 | if #available(iOS 26.0, *) { 41 | let url: URL 42 | if audioFilePath.hasPrefix("file://") { 43 | url = URL(string: audioFilePath)! 44 | } else { 45 | url = URL(fileURLWithPath: audioFilePath) 46 | } 47 | 48 | let transcription = try await self.transcribeAudioWithAnalyzer(url: url) 49 | return transcription 50 | } else { 51 | throw NSError(domain: "ExpoSpeechTranscriber", code: 501, 52 | userInfo: [NSLocalizedDescriptionKey: "SpeechAnalyzer requires iOS 26.0 or later"]) 53 | } 54 | } 55 | 56 | AsyncFunction("requestPermissions") { () async -> String in 57 | return await self.requestTranscribePermissions() 58 | } 59 | 60 | AsyncFunction("requestMicrophonePermissions") { () async -> String in 61 | return await self.requestMicrophonePermissions() 62 | } 63 | 64 | 65 | Function("stopListening"){ () -> Void in 66 | return self.stopListening() 67 | } 68 | 69 | Function("isRecording") { () -> Bool in 70 | return self.isRecording() 71 | } 72 | 73 | Function("isAnalyzerAvailable") { () -> Bool in 74 | if #available(iOS 26.0, *) { 75 | return true 76 | } 77 | return false 78 | } 79 | 80 | AsyncFunction("realtimeBufferTranscribe") { (buffer: [Float32], sampleRate: Double) async -> Void in 81 | await self.realtimeBufferTranscribe(buffer: buffer, sampleRate: sampleRate) 82 | } 83 | 84 | Function("stopBufferTranscription") { () -> Void in 85 | return self.stopBufferTranscription() 86 | } 87 | } 88 | 89 | // MARK: - Private Implementation Methods 90 | 91 | private func realtimeBufferTranscribe(buffer: [Float32], sampleRate: Double) async -> Void { 92 | if bufferRecognitionRequest == nil { 93 | let speechRecognizer = SFSpeechRecognizer()! 94 | bufferRecognitionRequest = SFSpeechAudioBufferRecognitionRequest() 95 | 96 | guard let recognitionRequest = bufferRecognitionRequest else { 97 | self.sendEvent("onTranscriptionError", ["message": "Unable to create recognition request"]) 98 | return 99 | } 100 | recognitionRequest.shouldReportPartialResults = true 101 | 102 | bufferRecognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in 103 | if let error = error { 104 | self.sendEvent("onTranscriptionError", ["message": error.localizedDescription]) 105 | return 106 | } 107 | 108 | guard let result = result else { 109 | return 110 | } 111 | 112 | let recognizedText = result.bestTranscription.formattedString 113 | self.sendEvent( 114 | "onTranscriptionProgress", 115 | ["text": recognizedText, "isFinal": result.isFinal] 116 | ) 117 | } 118 | } 119 | 120 | let format = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: AVAudioChannelCount(1))! // hardcode channel to 1 since we only support mono audio 121 | guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(buffer.count)) else { 122 | self.sendEvent("onTranscriptionError", ["message": "Unable to create PCM buffer"]) 123 | return 124 | } 125 | 126 | pcmBuffer.frameLength = AVAudioFrameCount(buffer.count) 127 | if let channelData = pcmBuffer.floatChannelData { 128 | buffer.withUnsafeBufferPointer { bufferPointer in 129 | guard let sourceAddress = bufferPointer.baseAddress else { return } 130 | 131 | let destination = channelData[0] 132 | let byteCount = buffer.count * MemoryLayout.size 133 | 134 | memcpy(destination, sourceAddress, byteCount) 135 | } 136 | } 137 | 138 | // Append buffer to recognition request 139 | bufferRecognitionRequest?.append(pcmBuffer) 140 | } 141 | 142 | private func stopBufferTranscription() { 143 | bufferRecognitionRequest?.endAudio() 144 | bufferRecognitionRequest = nil 145 | 146 | bufferRecognitionTask?.cancel() 147 | bufferRecognitionTask = nil 148 | } 149 | 150 | // startRecordingAndTranscription using SFSpeechRecognizer 151 | private func recordRealTimeAndTranscribe() async -> Void { 152 | let speechRecognizer = SFSpeechRecognizer()! 153 | recognitionRequest = SFSpeechAudioBufferRecognitionRequest() 154 | guard let recognitionRequest = recognitionRequest else { 155 | self.sendEvent("onTranscriptionError", ["message": "Unable to create recognition request"]) 156 | return 157 | } 158 | recognitionRequest.shouldReportPartialResults = true 159 | 160 | let inputNode = audioEngine.inputNode 161 | let recordingFormat = inputNode.outputFormat(forBus: 0) 162 | inputNode.removeTap(onBus: 0) 163 | 164 | inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, when in 165 | recognitionRequest.append(buffer) 166 | } 167 | 168 | audioEngine.prepare() 169 | do { 170 | try audioEngine.start() 171 | startedListening = true 172 | } catch { 173 | self.sendEvent("onTranscriptionError", ["message": error.localizedDescription]) 174 | return 175 | } 176 | 177 | recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in 178 | if let error = error { 179 | self.stopListening() 180 | self.sendEvent("onTranscriptionError", ["message": error.localizedDescription]) 181 | return 182 | } 183 | 184 | guard let result = result else { 185 | return 186 | } 187 | 188 | let recognizedText = result.bestTranscription.formattedString 189 | self.sendEvent( 190 | "onTranscriptionProgress", 191 | ["text": recognizedText, "isFinal": result.isFinal] 192 | ) 193 | 194 | if result.isFinal { 195 | self.stopListening() 196 | } 197 | } 198 | } 199 | 200 | private func stopListening() { 201 | audioEngine.stop() 202 | audioEngine.inputNode.removeTap(onBus: 0) 203 | //recognitionRequest?.endAudio() 204 | recognitionRequest = nil 205 | recognitionTask?.cancel() 206 | recognitionTask = nil 207 | } 208 | 209 | 210 | private func isRecording() -> Bool { 211 | return audioEngine.isRunning 212 | } 213 | 214 | 215 | 216 | // Implemetation for URL transcription with SFSpeechRecognizer 217 | private func transcribeAudio(url: URL) async -> String { 218 | 219 | guard FileManager.default.fileExists(atPath: url.path) else { 220 | let err = "Error: Audio file not found at \(url.path)" 221 | return err 222 | } 223 | 224 | return await withCheckedContinuation { continuation in 225 | guard let recognizer = SFSpeechRecognizer() else { 226 | let err = "Error: Speech recognizer not available for current locale" 227 | continuation.resume(returning: err) 228 | return 229 | } 230 | 231 | guard recognizer.isAvailable else { 232 | let err = "Error: Speech recognizer not available at this time" 233 | continuation.resume(returning: err) 234 | return 235 | } 236 | 237 | let request = SFSpeechURLRecognitionRequest(url: url) 238 | request.shouldReportPartialResults = false 239 | recognizer.recognitionTask(with: request) { (result, error) in 240 | if let error = error { 241 | let errorMsg = "Error: \(error.localizedDescription)" 242 | continuation.resume(returning: errorMsg) 243 | return 244 | } 245 | 246 | guard let result = result else { 247 | let errorMsg = "Error: No transcription available" 248 | continuation.resume(returning: errorMsg) 249 | return 250 | } 251 | 252 | if result.isFinal { 253 | let text = result.bestTranscription.formattedString 254 | let finalResult = text.isEmpty ? "No speech detected" : text 255 | continuation.resume(returning: finalResult) 256 | } 257 | } 258 | } 259 | } 260 | 261 | // Implementation for URL transcription with SpeechAnalyzer (iOS 26+) 262 | @available(iOS 26.0, *) 263 | private func transcribeAudioWithAnalyzer(url: URL) async throws -> String { 264 | 265 | guard FileManager.default.fileExists(atPath: url.path) else { 266 | throw NSError(domain: "ExpoSpeechTranscriber", code: 404, 267 | userInfo: [NSLocalizedDescriptionKey: "Audio file not found at \(url.path)"]) 268 | } 269 | 270 | let locale = Locale(identifier: "en_US") 271 | 272 | guard await isLocaleSupported(locale: locale) else { 273 | throw NSError(domain: "ExpoSpeechTranscriber", code: 400, 274 | userInfo: [NSLocalizedDescriptionKey: "English locale not supported"]) 275 | } 276 | 277 | let transcriber = SpeechTranscriber( 278 | locale: locale, 279 | transcriptionOptions: [], 280 | reportingOptions: [.volatileResults], 281 | attributeOptions: [.audioTimeRange] 282 | ) 283 | 284 | try await ensureModel(transcriber: transcriber, locale: locale) 285 | 286 | let analyzer = SpeechAnalyzer(modules: [transcriber]) 287 | 288 | let audioFile = try AVAudioFile(forReading: url) 289 | if let lastSample = try await analyzer.analyzeSequence(from: audioFile) { 290 | try await analyzer.finalizeAndFinish(through: lastSample) 291 | } else { 292 | await analyzer.cancelAndFinishNow() 293 | } 294 | 295 | var finalText = "" 296 | for try await recResponse in transcriber.results { 297 | if recResponse.isFinal { 298 | finalText += String(recResponse.text.characters) 299 | } 300 | } 301 | 302 | let result = finalText.isEmpty ? "No speech detected" : finalText 303 | return result 304 | } 305 | 306 | @available(iOS 26.0, *) 307 | private func isLocaleSupported(locale: Locale) async -> Bool { 308 | guard SpeechTranscriber.isAvailable else { return false } 309 | let supported = await DictationTranscriber.supportedLocales 310 | return supported.map { $0.identifier(.bcp47) }.contains(locale.identifier(.bcp47)) 311 | } 312 | 313 | @available(iOS 26.0, *) 314 | private func isLocaleInstalled(locale: Locale) async -> Bool { 315 | let installed = await Set(SpeechTranscriber.installedLocales) 316 | return installed.map { $0.identifier(.bcp47) }.contains(locale.identifier(.bcp47)) 317 | } 318 | 319 | @available(iOS 26.0, *) 320 | private func ensureModel(transcriber: SpeechTranscriber, locale: Locale) async throws { 321 | guard await isLocaleSupported(locale: locale) else { 322 | throw NSError(domain: "ExpoSpeechTranscriber", code: 400, 323 | userInfo: [NSLocalizedDescriptionKey: "Locale not supported"]) 324 | } 325 | 326 | if await isLocaleInstalled(locale: locale) { 327 | return 328 | } else { 329 | try await downloadModelIfNeeded(for: transcriber) 330 | } 331 | } 332 | 333 | @available(iOS 26.0, *) 334 | private func downloadModelIfNeeded(for module: SpeechTranscriber) async throws { 335 | if let downloader = try await AssetInventory.assetInstallationRequest(supporting: [module]) { 336 | try await downloader.downloadAndInstall() 337 | } 338 | } 339 | 340 | private func requestTranscribePermissions() async -> String { 341 | return await withCheckedContinuation { continuation in 342 | SFSpeechRecognizer.requestAuthorization { authStatus in 343 | let result: String 344 | switch authStatus { 345 | case .authorized: 346 | result = "authorized" 347 | case .denied: 348 | result = "denied" 349 | case .restricted: 350 | result = "restricted" 351 | case .notDetermined: 352 | result = "notDetermined" 353 | @unknown default: 354 | result = "unknown" 355 | } 356 | continuation.resume(returning: result) 357 | } 358 | } 359 | } 360 | 361 | private func requestMicrophonePermissions() async -> String { 362 | return await withCheckedContinuation { continuation in 363 | AVAudioSession.sharedInstance().requestRecordPermission { granted in 364 | let result = granted ? "granted" : "denied" 365 | continuation.resume(returning: result) 366 | } 367 | } 368 | } 369 | } 370 | 371 | --------------------------------------------------------------------------------