├── plugin
├── tsconfig.tsbuildinfo
├── tsconfig.json
└── src
│ └── withSpeechTranscriber.ts
├── example
├── assets
│ ├── icon.png
│ ├── favicon.png
│ ├── splash-icon.png
│ └── adaptive-icon.png
├── babel.config.js
├── tsconfig.json
├── index.ts
├── webpack.config.js
├── .gitignore
├── package.json
├── metro.config.js
├── app.json
├── BufferTranscriptionExample.tsx
├── RecordRealTimeAndTranscribe.tsx
└── App.tsx
├── expo-speech-transcriber-0.1.0.tgz
├── app.plugin.js
├── .eslintrc.js
├── expo-module.config.json
├── tsconfig.json
├── .npmignore
├── android
├── src
│ └── main
│ │ ├── AndroidManifest.xml
│ │ └── java
│ │ └── expo
│ │ └── modules
│ │ └── speechtranscriber
│ │ └── ExpoSpeechTranscriberModule.kt
└── build.gradle
├── src
├── ExpoSpeechTranscriberModule.web.ts
├── ExpoSpeechTranscriberModule.ts
├── ExpoSpeechTranscriber.types.ts
└── index.ts
├── .gitignore
├── ios
├── ExpoSpeechTranscriber.podspec
└── ExpoSpeechTranscriberModule.swift
├── LICENSE
├── package.json
└── README.md
/plugin/tsconfig.tsbuildinfo:
--------------------------------------------------------------------------------
1 | {"root":["./src/withspeechtranscriber.ts"],"version":"5.9.3"}
--------------------------------------------------------------------------------
/example/assets/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/example/assets/icon.png
--------------------------------------------------------------------------------
/example/assets/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/example/assets/favicon.png
--------------------------------------------------------------------------------
/example/assets/splash-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/example/assets/splash-icon.png
--------------------------------------------------------------------------------
/example/assets/adaptive-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/example/assets/adaptive-icon.png
--------------------------------------------------------------------------------
/expo-speech-transcriber-0.1.0.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DaveyEke/expo-speech-transcriber/HEAD/expo-speech-transcriber-0.1.0.tgz
--------------------------------------------------------------------------------
/app.plugin.js:
--------------------------------------------------------------------------------
1 | // This file configures the entry file for your plugin.
2 | module.exports = require('./plugin/build/withSpeechTranscriber');
3 |
--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | root: true,
3 | extends: ['universe/native', 'universe/web'],
4 | ignorePatterns: ['build'],
5 | };
6 |
--------------------------------------------------------------------------------
/example/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = function (api) {
2 | api.cache(true);
3 | return {
4 | presets: ['babel-preset-expo'],
5 | };
6 | };
7 |
--------------------------------------------------------------------------------
/plugin/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "expo-module-scripts/tsconfig.plugin",
3 | "compilerOptions": {
4 | "outDir": "build",
5 | "rootDir": "src"
6 | },
7 | "include": ["./src"],
8 | "exclude": ["**/__mocks__/*", "**/__tests__/*"]
9 | }
10 |
--------------------------------------------------------------------------------
/example/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "expo/tsconfig.base",
3 | "compilerOptions": {
4 | "strict": true,
5 | "paths": {
6 | "expo-speech-transcriber": ["../src/index"],
7 | "expo-speech-transcriber/*": ["../src/*"]
8 | }
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/expo-module.config.json:
--------------------------------------------------------------------------------
1 | {
2 | "platforms": ["apple", "android", "web"],
3 | "apple": {
4 | "modules": ["ExpoSpeechTranscriberModule"]
5 | },
6 | "android": {
7 | "modules": ["expo.modules.speechtranscriber.ExpoSpeechTranscriberModule"]
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | // @generated by expo-module-scripts
2 | {
3 | "extends": "expo-module-scripts/tsconfig.base",
4 | "compilerOptions": {
5 | "outDir": "./build"
6 | },
7 | "include": ["./src"],
8 | "exclude": ["**/__mocks__/*", "**/__tests__/*", "**/__rsc_tests__/*"]
9 | }
10 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | # Exclude all top-level hidden directories by convention
2 | /.*/
3 |
4 | # Exclude tarballs generated by `npm pack`
5 | /*.tgz
6 |
7 | __mocks__
8 | __tests__
9 |
10 | /babel.config.js
11 | /android/src/androidTest/
12 | /android/src/test/
13 | /android/build/
14 | /example/
15 |
--------------------------------------------------------------------------------
/android/src/main/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/example/index.ts:
--------------------------------------------------------------------------------
1 | import { registerRootComponent } from 'expo';
2 |
3 | import App from './RecordRealTimeAndTranscribe'; //To see the demo for buffer based transcription, import BufferTranscriptionExample. That is, import App from './BufferTranscriptionExample
4 |
5 |
6 | // registerRootComponent calls AppRegistry.registerComponent('main', () => App);
7 | // It also ensures that whether you load the app in Expo Go or in a native build,
8 | // the environment is set up appropriately
9 | registerRootComponent(App);
10 |
--------------------------------------------------------------------------------
/example/webpack.config.js:
--------------------------------------------------------------------------------
1 | const createConfigAsync = require('@expo/webpack-config');
2 | const path = require('path');
3 |
4 | module.exports = async (env, argv) => {
5 | const config = await createConfigAsync(
6 | {
7 | ...env,
8 | babel: {
9 | dangerouslyAddModulePathsToTranspile: ['expo-speech-transcriber'],
10 | },
11 | },
12 | argv
13 | );
14 | config.resolve.modules = [
15 | path.resolve(__dirname, './node_modules'),
16 | path.resolve(__dirname, '../node_modules'),
17 | ];
18 |
19 | return config;
20 | };
21 |
--------------------------------------------------------------------------------
/src/ExpoSpeechTranscriberModule.web.ts:
--------------------------------------------------------------------------------
1 | // import { registerWebModule, NativeModule } from 'expo';
2 | // import { ExpoSpeechTranscriberModuleEvents } from './ExpoSpeechTranscriber.types';
3 |
4 | // class ExpoSpeechTranscriberModule extends NativeModule {
5 | // PI = Math.PI;
6 | // async setValueAsync(value: string): Promise {
7 | // this.emit('onChange', { value });
8 | // }
9 | // hello() {
10 | // return 'Hello world! 👋';
11 | // }
12 | // }
13 |
14 | // export default registerWebModule(ExpoSpeechTranscriberModule, 'ExpoSpeechTranscriberModule');
15 |
--------------------------------------------------------------------------------
/example/.gitignore:
--------------------------------------------------------------------------------
1 | # Learn more https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files
2 |
3 | # dependencies
4 | node_modules/
5 |
6 | # Expo
7 | .expo/
8 | dist/
9 | web-build/
10 | expo-env.d.ts
11 |
12 | # Native
13 | .kotlin/
14 | *.orig.*
15 | *.jks
16 | *.p8
17 | *.p12
18 | *.key
19 | *.mobileprovision
20 |
21 | # Metro
22 | .metro-health-check*
23 |
24 | # debug
25 | npm-debug.*
26 | yarn-debug.*
27 | yarn-error.*
28 |
29 | # macOS
30 | .DS_Store
31 | *.pem
32 |
33 | # local env files
34 | .env*.local
35 |
36 | # typescript
37 | *.tsbuildinfo
38 |
39 | # generated native folders
40 | /ios
41 | /android
42 |
--------------------------------------------------------------------------------
/example/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "expo-speech-transcriber-example",
3 | "version": "1.0.0",
4 | "main": "index.ts",
5 | "scripts": {
6 | "start": "expo start",
7 | "android": "expo run:android",
8 | "ios": "expo run:ios",
9 | "web": "expo start --web"
10 | },
11 | "dependencies": {
12 | "expo": "~54.0.25",
13 | "expo-audio": "~1.0.15",
14 | "expo-linking": "^8.0.9",
15 | "react": "19.1.0",
16 | "react-native": "0.81.5",
17 | "react-native-audio-api": "^0.10.1"
18 | },
19 | "devDependencies": {
20 | "@types/react": "~19.1.0",
21 | "typescript": "~5.9.2"
22 | },
23 | "private": true,
24 | "expo": {
25 | "autolinking": {
26 | "nativeModulesDir": ".."
27 | }
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # OSX
2 | #
3 | .DS_Store
4 |
5 | # VSCode
6 | .vscode/
7 | jsconfig.json
8 |
9 | # Xcode
10 | #
11 | build/
12 | *.pbxuser
13 | !default.pbxuser
14 | *.mode1v3
15 | !default.mode1v3
16 | *.mode2v3
17 | !default.mode2v3
18 | *.perspectivev3
19 | !default.perspectivev3
20 | xcuserdata
21 | *.xccheckout
22 | *.moved-aside
23 | DerivedData
24 | *.hmap
25 | *.ipa
26 | *.xcuserstate
27 | project.xcworkspace
28 |
29 | # Android/IJ
30 | #
31 | .classpath
32 | .cxx
33 | .gradle
34 | .idea
35 | .project
36 | .settings
37 | local.properties
38 | android.iml
39 | android/app/libs
40 | android/keystores/debug.keystore
41 |
42 | # Cocoapods
43 | #
44 | example/ios/Pods
45 |
46 | # Ruby
47 | example/vendor/
48 |
49 | # node.js
50 | #
51 | node_modules/
52 | npm-debug.log
53 | yarn-debug.log
54 | yarn-error.log
55 |
56 | # Expo
57 | .expo/*
58 |
--------------------------------------------------------------------------------
/ios/ExpoSpeechTranscriber.podspec:
--------------------------------------------------------------------------------
1 | require 'json'
2 |
3 | package = JSON.parse(File.read(File.join(__dir__, '..', 'package.json')))
4 |
5 | Pod::Spec.new do |s|
6 | s.name = 'ExpoSpeechTranscriber'
7 | s.version = package['version']
8 | s.summary = package['description']
9 | s.description = package['description']
10 | s.license = package['license']
11 | s.author = package['author']
12 | s.homepage = package['homepage']
13 | s.platforms = {
14 | :ios => '15.1',
15 | :tvos => '15.1'
16 | }
17 | s.swift_version = '5.9'
18 | s.source = { git: 'https://github.com/DaveyEke/expo-speech-transcriber' }
19 | s.static_framework = true
20 |
21 | s.dependency 'ExpoModulesCore'
22 |
23 | # Swift/Objective-C compatibility
24 | s.pod_target_xcconfig = {
25 | 'DEFINES_MODULE' => 'YES',
26 | }
27 |
28 | s.source_files = "**/*.{h,m,mm,swift,hpp,cpp}"
29 | end
30 |
--------------------------------------------------------------------------------
/src/ExpoSpeechTranscriberModule.ts:
--------------------------------------------------------------------------------
1 | import { requireNativeModule, NativeModule } from 'expo-modules-core';
2 | import type {
3 | ExpoSpeechTranscriberModuleEvents,
4 | PermissionTypes,
5 | MicrophonePermissionTypes
6 | } from './ExpoSpeechTranscriber.types';
7 |
8 | declare class ExpoSpeechTranscriberNative extends NativeModule {
9 | recordRealTimeAndTranscribe(): Promise;
10 | stopListening(): void;
11 | transcribeAudioWithSFRecognizer(audioFilePath: string): Promise;
12 | transcribeAudioWithAnalyzer(audioFilePath: string): Promise;
13 | requestPermissions(): Promise;
14 | requestMicrophonePermissions(): Promise;
15 | isRecording(): boolean;
16 | isAnalyzerAvailable(): boolean;
17 | }
18 |
19 | const ExpoSpeechTranscriberModule =
20 | requireNativeModule('ExpoSpeechTranscriber');
21 |
22 | export default ExpoSpeechTranscriberModule;
23 |
--------------------------------------------------------------------------------
/src/ExpoSpeechTranscriber.types.ts:
--------------------------------------------------------------------------------
1 | import type { StyleProp, ViewStyle } from 'react-native';
2 |
3 | export type OnLoadEventPayload = {
4 | url: string;
5 | };
6 |
7 | export type TranscriptionProgressPayload = {
8 | text: string;
9 | isFinal: boolean;
10 | };
11 |
12 |
13 | export type TranscriptionErrorPayload = {
14 | error: string;
15 | };
16 |
17 | export type ExpoSpeechTranscriberModuleEvents = {
18 | onTranscriptionProgress(payload: TranscriptionProgressPayload): void;
19 | onTranscriptionError(payload: TranscriptionErrorPayload): void;
20 | };
21 |
22 | export type ChangeEventPayload = {
23 | value: string;
24 | };
25 |
26 |
27 | export type PermissionTypes = 'authorized' | 'denied' | 'restricted' | 'notDetermined';
28 |
29 | export type MicrophonePermissionTypes = 'granted' | 'denied'
30 |
31 | export type ExpoSpeechTranscriberViewProps = {
32 | url: string;
33 | onLoad: (event: { nativeEvent: OnLoadEventPayload }) => void;
34 | style?: StyleProp;
35 | };
36 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Dave Mkpa Eke
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/plugin/src/withSpeechTranscriber.ts:
--------------------------------------------------------------------------------
1 | import { ConfigPlugin, IOSConfig, AndroidConfig } from 'expo/config-plugins';
2 |
3 | const SPEECH_RECOGNITION_USAGE = 'Allow $(PRODUCT_NAME) to use speech recognition to transcribe audio';
4 | const MICROPHONE_USAGE = 'Allow $(PRODUCT_NAME) to access your microphone';
5 |
6 | const withSpeechTranscriber: ConfigPlugin<{ speechRecognitionPermission?: string | false; microphonePermission?: string | false } | void> = (
7 | config,
8 | { speechRecognitionPermission, microphonePermission } = {}
9 | ) => {
10 | config = IOSConfig.Permissions.createPermissionsPlugin({
11 | NSSpeechRecognitionUsageDescription: SPEECH_RECOGNITION_USAGE,
12 | NSMicrophoneUsageDescription: MICROPHONE_USAGE,
13 | })(config, {
14 | NSSpeechRecognitionUsageDescription: speechRecognitionPermission,
15 | NSMicrophoneUsageDescription: microphonePermission,
16 | });
17 |
18 | if (microphonePermission !== false) {
19 | config = AndroidConfig.Permissions.withPermissions(config, [
20 | 'android.permission.RECORD_AUDIO',
21 | ]);
22 | }
23 |
24 | return config;
25 | };
26 |
27 | export default withSpeechTranscriber;
--------------------------------------------------------------------------------
/example/metro.config.js:
--------------------------------------------------------------------------------
1 | // Learn more https://docs.expo.io/guides/customizing-metro
2 | const { getDefaultConfig } = require('expo/metro-config');
3 | const path = require('path');
4 |
5 | const config = getDefaultConfig(__dirname);
6 |
7 | // npm v7+ will install ../node_modules/react and ../node_modules/react-native because of peerDependencies.
8 | // To prevent the incompatible react-native between ./node_modules/react-native and ../node_modules/react-native,
9 | // excludes the one from the parent folder when bundling.
10 | config.resolver.blockList = [
11 | ...Array.from(config.resolver.blockList ?? []),
12 | new RegExp(path.resolve('..', 'node_modules', 'react')),
13 | new RegExp(path.resolve('..', 'node_modules', 'react-native')),
14 | ];
15 |
16 | config.resolver.nodeModulesPaths = [
17 | path.resolve(__dirname, './node_modules'),
18 | path.resolve(__dirname, '../node_modules'),
19 | ];
20 |
21 | config.resolver.extraNodeModules = {
22 | 'expo-speech-transcriber': '..',
23 | };
24 |
25 | config.watchFolders = [path.resolve(__dirname, '..')];
26 |
27 | config.transformer.getTransformOptions = async () => ({
28 | transform: {
29 | experimentalImportSupport: false,
30 | inlineRequires: true,
31 | },
32 | });
33 |
34 | module.exports = config;
35 |
--------------------------------------------------------------------------------
/example/app.json:
--------------------------------------------------------------------------------
1 | {
2 | "expo": {
3 | "name": "expo-speech-transcriber-example",
4 | "slug": "expo-speech-transcriber-example",
5 | "version": "1.0.0",
6 | "orientation": "portrait",
7 | "icon": "./assets/icon.png",
8 | "userInterfaceStyle": "light",
9 | "newArchEnabled": true,
10 | "splash": {
11 | "image": "./assets/splash-icon.png",
12 | "resizeMode": "contain",
13 | "backgroundColor": "#ffffff"
14 | },
15 | "ios": {
16 | "supportsTablet": true,
17 | "bundleIdentifier": "expo.modules.speechtranscriber.example"
18 | },
19 | "android": {
20 | "adaptiveIcon": {
21 | "foregroundImage": "./assets/adaptive-icon.png",
22 | "backgroundColor": "#ffffff"
23 | },
24 | "edgeToEdgeEnabled": true,
25 | "predictiveBackGestureEnabled": false,
26 | "package": "expo.modules.speechtranscriber.example"
27 | },
28 | "web": {
29 | "favicon": "./assets/favicon.png"
30 | },
31 | "plugins": [
32 | "../app.plugin.js",
33 | "expo-audio",
34 | [
35 | "react-native-audio-api",
36 | {
37 | "iosBackgroundMode": true,
38 | "iosMicrophonePermission": "This app requires access to the microphone to record audio."
39 | }
40 | ]
41 | ]
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/android/build.gradle:
--------------------------------------------------------------------------------
1 | apply plugin: 'com.android.library'
2 |
3 | group = 'expo.modules.speechtranscriber'
4 | version = '0.1.0'
5 |
6 | def expoModulesCorePlugin = new File(project(":expo-modules-core").projectDir.absolutePath, "ExpoModulesCorePlugin.gradle")
7 | apply from: expoModulesCorePlugin
8 | applyKotlinExpoModulesCorePlugin()
9 | useCoreDependencies()
10 | useExpoPublishing()
11 |
12 | // If you want to use the managed Android SDK versions from expo-modules-core, set this to true.
13 | // The Android SDK versions will be bumped from time to time in SDK releases and may introduce breaking changes in your module code.
14 | // Most of the time, you may like to manage the Android SDK versions yourself.
15 | def useManagedAndroidSdkVersions = false
16 | if (useManagedAndroidSdkVersions) {
17 | useDefaultAndroidSdkVersions()
18 | } else {
19 | buildscript {
20 | // Simple helper that allows the root project to override versions declared by this library.
21 | ext.safeExtGet = { prop, fallback ->
22 | rootProject.ext.has(prop) ? rootProject.ext.get(prop) : fallback
23 | }
24 | }
25 | project.android {
26 | compileSdkVersion safeExtGet("compileSdkVersion", 36)
27 | defaultConfig {
28 | minSdkVersion safeExtGet("minSdkVersion", 24)
29 | targetSdkVersion safeExtGet("targetSdkVersion", 36)
30 | }
31 | }
32 | }
33 |
34 | android {
35 | namespace "expo.modules.speechtranscriber"
36 | defaultConfig {
37 | versionCode 1
38 | versionName "0.1.0"
39 | }
40 | lintOptions {
41 | abortOnError false
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "expo-speech-transcriber",
3 | "version": "0.1.9",
4 | "description": "An iOS only on-device transcription library for React Native and Expo apps.",
5 | "main": "build/index.js",
6 | "types": "build/index.d.ts",
7 | "scripts": {
8 | "build": "expo-module build",
9 | "clean": "expo-module clean",
10 | "lint": "expo-module lint",
11 | "test": "expo-module test",
12 | "prepare": "expo-module prepare",
13 | "prepublishOnly": "expo-module prepublishOnly",
14 | "expo-module": "expo-module",
15 | "open:ios": "xed example/ios",
16 | "open:android": "open -a \"Android Studio\" example/android"
17 | },
18 | "keywords": [
19 | "react-native",
20 | "expo",
21 | "expo-speech-transcriber",
22 | "ExpoSpeechTranscriber",
23 | "transcription",
24 | "speech-to-text",
25 | "ios",
26 | "siri",
27 | "voice",
28 | "audio",
29 | "recognition"
30 | ],
31 | "repository": "https://github.com/DaveyEke/expo-speech-transcriber",
32 | "bugs": {
33 | "url": "https://github.com/DaveyEke/expo-speech-transcriber/issues"
34 | },
35 | "author": "Dave Mkpa Eke (https://github.com/DaveyEke)",
36 | "license": "MIT",
37 | "homepage": "https://github.com/DaveyEke/expo-speech-transcriber#readme",
38 | "dependencies": {},
39 | "devDependencies": {
40 | "@types/react": "~19.1.0",
41 | "expo-module-scripts": "^5.0.7",
42 | "expo": "^54.0.18",
43 | "react-native": "0.81.5"
44 | },
45 | "peerDependencies": {
46 | "expo": "*",
47 | "react": "*",
48 | "react-native": "*"
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | // Reexport the native module. On web, it will be resolved to ExpoSpeechTranscriberModule.web.ts
2 | // and on native platforms to ExpoSpeechTranscriberModule.ts
3 | import ExpoSpeechTranscriberModule from './ExpoSpeechTranscriberModule';
4 | import type {
5 | TranscriptionProgressPayload,
6 | TranscriptionErrorPayload,
7 | MicrophonePermissionTypes,
8 | PermissionTypes
9 | } from './ExpoSpeechTranscriber.types';
10 | import { useState, useEffect } from 'react';
11 |
12 | export function recordRealTimeAndTranscribe(): Promise {
13 | return ExpoSpeechTranscriberModule.recordRealTimeAndTranscribe();
14 | }
15 |
16 | export { default as ExpoSpeechTranscriberModule } from './ExpoSpeechTranscriberModule';
17 | export * from './ExpoSpeechTranscriber.types';
18 |
19 | export function transcribeAudioWithSFRecognizer(audioFilePath: string): Promise {
20 | return ExpoSpeechTranscriberModule.transcribeAudioWithSFRecognizer(audioFilePath);
21 | }
22 |
23 | export function stopListening(): void {
24 | return ExpoSpeechTranscriberModule.stopListening();
25 | }
26 |
27 | export function transcribeAudioWithAnalyzer(audioFilePath: string): Promise {
28 | return ExpoSpeechTranscriberModule.transcribeAudioWithAnalyzer(audioFilePath);
29 | }
30 |
31 | export function requestPermissions(): Promise {
32 | return ExpoSpeechTranscriberModule.requestPermissions();
33 | }
34 |
35 | export function requestMicrophonePermissions(): Promise {
36 | return ExpoSpeechTranscriberModule.requestMicrophonePermissions();
37 | }
38 |
39 | export function isRecording(): boolean {
40 | return ExpoSpeechTranscriberModule.isRecording();
41 | }
42 |
43 | export function isAnalyzerAvailable(): boolean {
44 | return ExpoSpeechTranscriberModule.isAnalyzerAvailable();
45 | }
46 |
47 | export function realtimeBufferTranscribe(
48 | buffer: number[] | Float32Array,
49 | sampleRate: number,
50 | ): Promise {
51 | const bufferArray = Array.isArray(buffer) ? buffer : Array.from(buffer);
52 | return ExpoSpeechTranscriberModule.realtimeBufferTranscribe(
53 | bufferArray,
54 | sampleRate,
55 | );
56 | }
57 |
58 | export function stopBufferTranscription(): void {
59 | return ExpoSpeechTranscriberModule.stopBufferTranscription();
60 | }
61 |
62 | export function useRealTimeTranscription() {
63 | const [text, setText] = useState('');
64 | const [isFinal, setIsFinal] = useState(false);
65 | const [error, setError] = useState(null);
66 | const [isRecording, setIsRecording] = useState(false);
67 |
68 | useEffect(() => {
69 | const progressListener = ExpoSpeechTranscriberModule.addListener('onTranscriptionProgress', (payload: TranscriptionProgressPayload) => {
70 | setText(payload.text);
71 | setIsFinal(payload.isFinal);
72 | });
73 |
74 | const errorListener = ExpoSpeechTranscriberModule.addListener('onTranscriptionError', (payload: TranscriptionErrorPayload) => {
75 | setError(payload.error);
76 | })
77 |
78 |
79 | const interval = setInterval(() => {
80 | const newIsRecording = ExpoSpeechTranscriberModule.isRecording();
81 | setIsRecording(prev => (prev !== newIsRecording ? newIsRecording : prev));
82 | }, 100);
83 |
84 | return () => {
85 | clearInterval(interval);
86 | progressListener.remove();
87 | errorListener.remove();
88 | };
89 | }, []);
90 |
91 |
92 | useEffect(() => {
93 | if (isRecording) {
94 | setText('');
95 | setIsFinal(false);
96 | setError(null);
97 | }
98 | }, [isRecording]);
99 |
100 | return { text, isFinal, error, isRecording };
101 | }
102 |
--------------------------------------------------------------------------------
/android/src/main/java/expo/modules/speechtranscriber/ExpoSpeechTranscriberModule.kt:
--------------------------------------------------------------------------------
1 | package expo.modules.speechtranscriber
2 |
3 | import android.Manifest
4 | import android.content.Intent
5 | import android.content.pm.PackageManager
6 | import android.os.Bundle
7 | import android.os.Handler
8 | import android.os.Looper
9 | import android.speech.RecognitionListener
10 | import android.speech.RecognizerIntent
11 | import android.speech.SpeechRecognizer
12 | import android.util.Log
13 | import androidx.core.app.ActivityCompat
14 | import androidx.core.content.ContextCompat
15 | import expo.modules.kotlin.Promise
16 | import expo.modules.kotlin.modules.Module
17 | import expo.modules.kotlin.modules.ModuleDefinition
18 | import java.util.Locale
19 |
20 | class ExpoSpeechTranscriberModule : Module() {
21 | private var speechRecognizer: SpeechRecognizer? = null
22 | private val mainHandler by lazy { Handler(Looper.getMainLooper()) }
23 |
24 | private var isRecording = false
25 | private var permissionPromise: Promise? = null
26 | private val PERMISSION_REQUEST_CODE = 1001
27 |
28 | override fun definition() = ModuleDefinition {
29 | Name("ExpoSpeechTranscriber")
30 | Events("onTranscriptionProgress", "onTranscriptionError")
31 |
32 | AsyncFunction("recordRealTimeAndTranscribe") { promise: Promise ->
33 | mainHandler.post {
34 | startListening(promise)
35 | }
36 | }
37 |
38 | AsyncFunction("stopListening") {
39 | mainHandler.post {
40 | stopListening()
41 | }
42 | }
43 |
44 | Function("isRecording") {
45 | return@Function isRecording
46 | }
47 |
48 | AsyncFunction("requestMicrophonePermissions") { promise: Promise ->
49 | mainHandler.post {
50 | requestMicrophonePermissionsInternal(promise)
51 | }
52 | }
53 |
54 | OnDestroy {
55 | mainHandler.post {
56 | cleanup()
57 | }
58 | }
59 | }
60 |
61 | private fun startListening(promise: Promise) {
62 | val context = appContext.reactContext ?: run {
63 | sendEvent("onTranscriptionError", mapOf("message" to "Context is not available"))
64 | promise.resolve(false)
65 | return
66 | }
67 |
68 | if (!SpeechRecognizer.isRecognitionAvailable(context)) {
69 | val message = "Speech recognition is not available on this device."
70 | Log.e("ExpoSpeechTranscriber", message)
71 | sendEvent("onTranscriptionError", mapOf("message" to message))
72 | promise.resolve(false)
73 | return
74 | }
75 |
76 | if (ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
77 | sendEvent("onTranscriptionError", mapOf("message" to "Missing RECORD_AUDIO permission."))
78 | promise.resolve(false)
79 | return
80 | }
81 |
82 | speechRecognizer?.destroy()
83 | speechRecognizer = null
84 |
85 | speechRecognizer = SpeechRecognizer.createSpeechRecognizer(context)
86 |
87 | val intent = Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH).apply {
88 | putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM)
89 | putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault())
90 | putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true)
91 | putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 1)
92 | }
93 |
94 | speechRecognizer?.setRecognitionListener(createRecognitionListener())
95 | speechRecognizer?.startListening(intent)
96 | isRecording = true
97 | promise.resolve(true)
98 | }
99 |
100 | private fun stopListening() {
101 | try {
102 | speechRecognizer?.stopListening()
103 | speechRecognizer?.destroy()
104 | } catch (e: Exception) {
105 | Log.e("ExpoSpeechTranscriber", "Error stopping recognizer: ${e.message}")
106 | } finally {
107 | speechRecognizer = null
108 | isRecording = false
109 | }
110 | }
111 |
112 | private fun cleanup() {
113 | stopListening()
114 | }
115 |
116 | private fun createRecognitionListener(): RecognitionListener {
117 | return object : RecognitionListener {
118 | override fun onReadyForSpeech(params: Bundle?) {
119 | Log.d("ExpoSpeechTranscriber", "Ready for speech")
120 | }
121 |
122 | override fun onBeginningOfSpeech() {
123 | Log.d("ExpoSpeechTranscriber", "Speech started")
124 | }
125 |
126 | override fun onRmsChanged(rmsdB: Float) {}
127 | override fun onBufferReceived(buffer: ByteArray?) {}
128 |
129 | override fun onEndOfSpeech() {
130 | Log.d("ExpoSpeechTranscriber", "Speech ended")
131 | }
132 |
133 | override fun onPartialResults(partialResults: Bundle?) {
134 | val matches = partialResults?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
135 | if (!matches.isNullOrEmpty()) {
136 | Log.d("ExpoSpeechTranscriber", "Text: ${matches[0]}")
137 | sendEvent("onTranscriptionProgress", mapOf(
138 | "text" to matches[0],
139 | "isFinal" to false
140 | ))
141 | }
142 | }
143 |
144 | override fun onResults(results: Bundle?) {
145 | val matches = results?.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION)
146 | if (!matches.isNullOrEmpty()) {
147 | Log.d("ExpoSpeechTranscriber", "Text: ${matches[0]}")
148 | sendEvent("onTranscriptionProgress", mapOf(
149 | "text" to matches[0],
150 | "isFinal" to true
151 | ))
152 | }
153 | stopListening()
154 | }
155 |
156 | override fun onError(error: Int) {
157 | val errorMessage = getErrorMessage(error)
158 | sendEvent("onTranscriptionError", mapOf("message" to errorMessage))
159 | stopListening()
160 | }
161 |
162 | override fun onEvent(eventType: Int, params: Bundle?) {}
163 | }
164 | }
165 |
166 | private fun getErrorMessage(errorCode: Int): String {
167 | return when (errorCode) {
168 | SpeechRecognizer.ERROR_AUDIO -> "Audio recording error"
169 | SpeechRecognizer.ERROR_CLIENT -> "Client side error"
170 | SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS -> "Insufficient permissions"
171 | SpeechRecognizer.ERROR_NETWORK -> "Network error"
172 | SpeechRecognizer.ERROR_NETWORK_TIMEOUT -> "Network timeout"
173 | SpeechRecognizer.ERROR_NO_MATCH -> "No match found"
174 | SpeechRecognizer.ERROR_RECOGNIZER_BUSY -> "Recognizer is busy"
175 | SpeechRecognizer.ERROR_SERVER -> "Error from server"
176 | SpeechRecognizer.ERROR_SPEECH_TIMEOUT -> "No speech input"
177 | else -> "An unknown error occurred"
178 | }
179 | }
180 |
181 | private fun requestMicrophonePermissionsInternal(promise: Promise) {
182 | val context = appContext.reactContext ?: run {
183 | promise.resolve("denied")
184 | return
185 | }
186 |
187 | if (ContextCompat.checkSelfPermission(context, Manifest.permission.RECORD_AUDIO)
188 | == PackageManager.PERMISSION_GRANTED) {
189 | promise.resolve("granted")
190 | return
191 | }
192 |
193 | val activity = appContext.currentActivity
194 | if (activity != null) {
195 | permissionPromise = promise
196 | ActivityCompat.requestPermissions(
197 | activity,
198 | arrayOf(Manifest.permission.RECORD_AUDIO),
199 | PERMISSION_REQUEST_CODE
200 | )
201 | } else {
202 | promise.resolve("denied")
203 | }
204 | }
205 | }
206 |
--------------------------------------------------------------------------------
/example/BufferTranscriptionExample.tsx:
--------------------------------------------------------------------------------
1 | import React, { useState } from 'react';
2 | import { View, Text, TouchableOpacity, StyleSheet, Alert, ScrollView } from 'react-native';
3 | import { Ionicons } from '@expo/vector-icons';
4 | import * as SpeechTranscriber from 'expo-speech-transcriber';
5 | import { AudioManager, AudioRecorder } from 'react-native-audio-api';
6 |
7 | const BufferTranscriptionExample = () => {
8 | const { text, isFinal, error } = SpeechTranscriber.useRealTimeTranscription();
9 | const [isTranscribing, setIsTranscribing] = useState(false);
10 | const [permissionsGranted, setPermissionsGranted] = useState(false);
11 | const [recorder, setRecorder] = useState(null);
12 |
13 | const initializeRecorder = () => {
14 | const audioRecorder = new AudioRecorder({
15 | sampleRate: 16000,
16 | bufferLengthInSamples: 1600,
17 | });
18 |
19 | AudioManager.setAudioSessionOptions({
20 | iosCategory: 'playAndRecord',
21 | iosMode: 'spokenAudio',
22 | iosOptions: ['allowBluetooth', 'defaultToSpeaker'],
23 | });
24 |
25 | audioRecorder.onAudioReady(({ buffer }) => {
26 | const channelData = buffer.getChannelData(0);
27 |
28 | SpeechTranscriber.realtimeBufferTranscribe(
29 | channelData,
30 | 16000
31 | );
32 | });
33 |
34 | setRecorder(audioRecorder);
35 | };
36 |
37 | const requestAllPermissions = async () => {
38 | try {
39 | const speechPermission = await SpeechTranscriber.requestPermissions();
40 | const micPermission = await AudioManager.requestRecordingPermissions();
41 |
42 | if (speechPermission === 'authorized' && micPermission) {
43 | initializeRecorder();
44 | setPermissionsGranted(true);
45 | Alert.alert('Permissions Granted', 'All permissions are now available.');
46 | } else {
47 | Alert.alert('Permissions Required', 'Speech and microphone permissions are needed.');
48 | }
49 | } catch (err) {
50 | Alert.alert('Error', 'Failed to request permissions');
51 | }
52 | };
53 |
54 | const handleStartTranscribing = async () => {
55 | if (!permissionsGranted || !recorder) {
56 | await requestAllPermissions();
57 | return;
58 | }
59 |
60 | if (isTranscribing) {
61 | return;
62 | }
63 |
64 | setIsTranscribing(true);
65 | try {
66 | recorder.start();
67 | } catch (e) {
68 | console.error('Transcription failed', e);
69 | Alert.alert('Error', 'Failed to start transcription');
70 | setIsTranscribing(false);
71 | }
72 | };
73 |
74 | const handleStopTranscribing = () => {
75 | if (!isTranscribing || !recorder) {
76 | return;
77 | }
78 |
79 | recorder.stop();
80 | SpeechTranscriber.stopBufferTranscription();
81 | setIsTranscribing(false);
82 | };
83 |
84 | return (
85 |
86 | Buffer Transcription Demo
87 | Using react-native-audio-api
88 |
89 | {!permissionsGranted && (
90 |
91 |
92 | Request Permissions
93 |
94 | )}
95 |
96 | Buffer-Based Transcription
97 |
98 |
103 |
104 | Start Buffer Transcription
105 |
106 |
107 |
112 |
113 | Stop Buffer Transcription
114 |
115 |
116 | {isTranscribing && (
117 |
118 |
119 | Transcribing from buffer...
120 |
121 | )}
122 |
123 | {error && (
124 |
125 | Error: {error}
126 |
127 | )}
128 |
129 | {text && (
130 |
131 | Transcription:
132 | {text}
133 | {isFinal && Final!}
134 |
135 | )}
136 |
137 | {!isTranscribing && !text && (
138 |
139 | Request permissions, then start buffer transcription to stream audio data for real-time speech recognition.
140 |
141 | )}
142 |
143 | );
144 | };
145 |
146 | const styles = StyleSheet.create({
147 | container: {
148 | flexGrow: 1,
149 | justifyContent: 'center',
150 | alignItems: 'center',
151 | padding: 20,
152 | backgroundColor: '#f5f5f5',
153 | },
154 | title: {
155 | fontSize: 28,
156 | fontWeight: 'bold',
157 | marginBottom: 4,
158 | color: '#333',
159 | },
160 | subtitle: {
161 | fontSize: 16,
162 | color: '#666',
163 | marginBottom: 20,
164 | },
165 | sectionTitle: {
166 | fontSize: 20,
167 | fontWeight: 'bold',
168 | marginTop: 20,
169 | marginBottom: 10,
170 | color: '#333',
171 | },
172 | button: {
173 | flexDirection: 'row',
174 | alignItems: 'center',
175 | justifyContent: 'center',
176 | paddingVertical: 15,
177 | paddingHorizontal: 30,
178 | borderRadius: 12,
179 | marginVertical: 8,
180 | minWidth: 280,
181 | shadowColor: '#000',
182 | shadowOffset: { width: 0, height: 2 },
183 | shadowOpacity: 0.1,
184 | shadowRadius: 4,
185 | elevation: 3,
186 | },
187 | permissionButton: {
188 | backgroundColor: '#6c757d',
189 | },
190 | recordButton: {
191 | backgroundColor: '#007bff',
192 | },
193 | stopButton: {
194 | backgroundColor: '#dc3545',
195 | },
196 | disabled: {
197 | backgroundColor: '#ccc',
198 | opacity: 0.6,
199 | },
200 | buttonText: {
201 | color: '#fff',
202 | fontSize: 18,
203 | fontWeight: '600',
204 | marginLeft: 10,
205 | },
206 | recordingIndicator: {
207 | flexDirection: 'row',
208 | alignItems: 'center',
209 | marginTop: 20,
210 | padding: 15,
211 | backgroundColor: '#fff',
212 | borderRadius: 12,
213 | shadowColor: '#000',
214 | shadowOffset: { width: 0, height: 2 },
215 | shadowOpacity: 0.1,
216 | shadowRadius: 4,
217 | elevation: 3,
218 | },
219 | recordingText: {
220 | fontSize: 16,
221 | color: '#dc3545',
222 | marginLeft: 10,
223 | fontWeight: '600',
224 | },
225 | errorContainer: {
226 | marginTop: 20,
227 | padding: 15,
228 | backgroundColor: '#f8d7da',
229 | borderRadius: 12,
230 | width: '100%',
231 | maxWidth: 400,
232 | },
233 | errorText: {
234 | fontSize: 16,
235 | color: '#721c24',
236 | },
237 | transcriptionContainer: {
238 | marginTop: 30,
239 | padding: 20,
240 | backgroundColor: '#fff',
241 | borderRadius: 12,
242 | width: '100%',
243 | maxWidth: 400,
244 | shadowColor: '#000',
245 | shadowOffset: { width: 0, height: 2 },
246 | shadowOpacity: 0.1,
247 | shadowRadius: 8,
248 | elevation: 5,
249 | },
250 | transcriptionTitle: {
251 | fontSize: 18,
252 | fontWeight: 'bold',
253 | marginBottom: 12,
254 | color: '#333',
255 | },
256 | transcriptionText: {
257 | fontSize: 16,
258 | color: '#555',
259 | lineHeight: 24,
260 | },
261 | finalText: {
262 | fontSize: 14,
263 | color: '#28a745',
264 | fontWeight: 'bold',
265 | marginTop: 10,
266 | },
267 | hintText: {
268 | fontSize: 14,
269 | color: '#999',
270 | marginTop: 20,
271 | textAlign: 'center',
272 | },
273 | });
274 |
275 | export default BufferTranscriptionExample;
276 |
--------------------------------------------------------------------------------
/example/RecordRealTimeAndTranscribe.tsx:
--------------------------------------------------------------------------------
1 | import React, { useState, useEffect } from 'react';
2 | import { View, Text, StyleSheet, TouchableOpacity, ScrollView, Platform, Alert } from 'react-native';
3 | import { Ionicons } from '@expo/vector-icons';
4 | import * as SpeechTranscriber from 'expo-speech-transcriber';
5 |
6 | /**
7 | * RecordRealTimeAndTranscribe Example
8 | *
9 | * Demonstrates real-time speech transcription using expo-speech-transcriber.
10 | * This example follows the API documented in README.md.
11 | *
12 | * Requirements:
13 | * - Android 13+ (API 33) for Android
14 | * - iOS 13+ for iOS
15 | * - Microphone and speech recognition permissions
16 | */
17 | export default function RecordRealTimeAndTranscribe() {
18 | const [permissionStatus, setPermissionStatus] = useState({
19 | speech: 'notDetermined',
20 | microphone: 'denied',
21 | });
22 |
23 | // Use the built-in hook for real-time transcription
24 | const { text, isFinal, error, isRecording } = SpeechTranscriber.useRealTimeTranscription();
25 |
26 | /**
27 | * Request all necessary permissions
28 | */
29 | const requestAllPermissions = async () => {
30 | try {
31 | console.log('🔐 Requesting permissions...');
32 |
33 | let speechPermission = 'notDetermined';
34 |
35 | // Request speech recognition permission only on iOS
36 | if (Platform.OS === 'ios') {
37 | speechPermission = await SpeechTranscriber.requestPermissions();
38 | console.log('Speech permission:', speechPermission);
39 | }
40 |
41 | // Request microphone permission
42 | const micPermission = await SpeechTranscriber.requestMicrophonePermissions();
43 | console.log('Microphone permission:', micPermission);
44 |
45 | // Both `requestPermissions` and `requestMicrophonePermissions` return
46 | // string union types (e.g. 'authorized' | 'denied'), not objects with
47 | // `.status` or `.granted` properties. Store them directly and check
48 | // their values accordingly.
49 | setPermissionStatus({
50 | speech: speechPermission,
51 | microphone: micPermission,
52 | });
53 |
54 | if (Platform.OS === 'ios' && speechPermission !== 'authorized') {
55 | Alert.alert('Permission Denied', 'Speech recognition permission is required.');
56 | } else if (micPermission !== 'granted') {
57 | Alert.alert('Permission Denied', 'Microphone permission is required.');
58 | }
59 | } catch (err) {
60 | console.error('❌ Permission error:', err);
61 | Alert.alert('Error', `Failed to request permissions: ${err}`);
62 | }
63 | };
64 |
65 | /**
66 | * Start real-time transcription
67 | */
68 | const startTranscription = async () => {
69 | try {
70 | console.log('🎤 Starting transcription...');
71 | await SpeechTranscriber.recordRealTimeAndTranscribe();
72 | console.log('✅ Transcription started');
73 | } catch (err) {
74 | console.error('❌ Start error:', err);
75 | Alert.alert('Error', `Failed to start transcription: ${err}`);
76 | }
77 | };
78 |
79 | /**
80 | * Stop real-time transcription
81 | */
82 | const stopTranscription = () => {
83 | try {
84 | console.log('⏹️ Stopping transcription...');
85 | SpeechTranscriber.stopListening();
86 | console.log('✅ Transcription stopped');
87 | } catch (err) {
88 | console.error('❌ Stop error:', err);
89 | Alert.alert('Error', `Failed to stop transcription: ${err}`);
90 | }
91 | };
92 |
93 | // Check permissions on mount
94 | useEffect(() => {
95 | requestAllPermissions();
96 | }, []);
97 |
98 | const hasPermissions = Platform.OS === 'ios'
99 | ? permissionStatus.speech === 'authorized' && permissionStatus.microphone === 'granted'
100 | : permissionStatus.microphone === 'granted';
101 |
102 | return (
103 |
104 | Real-Time Transcription
105 | Platform: {Platform.OS}
106 |
107 | {!hasPermissions && (
108 |
109 |
110 | Request Permissions
111 |
112 | )}
113 |
114 | Recording Controls
115 |
116 |
121 |
122 | Start Transcription
123 |
124 |
125 |
130 |
131 | Stop Transcription
132 |
133 |
134 | {isRecording && (
135 |
136 |
137 | Recording and Transcribing...
138 |
139 | )}
140 |
141 | {error && (
142 |
143 | Error: {error}
144 |
145 | )}
146 |
147 | {text && (
148 |
149 | Transcription:
150 | {text}
151 | {isFinal && Final!}
152 |
153 | )}
154 |
155 | {!isRecording && !text && hasPermissions && (
156 |
157 | Press "Start Transcription" to begin real-time speech recognition.
158 |
159 | )}
160 |
161 |
162 | How to use:
163 | 1. Grant permissions when prompted
164 | 2. Press "Start Transcription" to begin
165 | 3. Speak clearly into your device
166 | 4. Watch the transcription appear in real-time
167 | 5. Press "Stop Transcription" when finished
168 |
169 |
170 | );
171 | }
172 |
173 | const styles = StyleSheet.create({
174 | container: {
175 | flexGrow: 1,
176 | justifyContent: 'center',
177 | alignItems: 'center',
178 | padding: 20,
179 | backgroundColor: '#f5f5f5',
180 | },
181 | title: {
182 | fontSize: 28,
183 | fontWeight: 'bold',
184 | marginBottom: 8,
185 | color: '#333',
186 | },
187 | subtitle: {
188 | fontSize: 14,
189 | color: '#999',
190 | marginBottom: 10,
191 | },
192 | sectionTitle: {
193 | fontSize: 20,
194 | fontWeight: 'bold',
195 | marginTop: 20,
196 | marginBottom: 10,
197 | color: '#333',
198 | },
199 | button: {
200 | flexDirection: 'row',
201 | alignItems: 'center',
202 | justifyContent: 'center',
203 | paddingVertical: 15,
204 | paddingHorizontal: 30,
205 | borderRadius: 12,
206 | marginVertical: 8,
207 | minWidth: 280,
208 | shadowColor: '#000',
209 | shadowOffset: { width: 0, height: 2 },
210 | shadowOpacity: 0.1,
211 | shadowRadius: 4,
212 | elevation: 3,
213 | },
214 | permissionButton: {
215 | backgroundColor: '#6c757d',
216 | },
217 | recordButton: {
218 | backgroundColor: '#007bff',
219 | },
220 | stopButton: {
221 | backgroundColor: '#dc3545',
222 | },
223 | disabled: {
224 | backgroundColor: '#ccc',
225 | opacity: 0.6,
226 | },
227 | buttonText: {
228 | color: '#fff',
229 | fontSize: 18,
230 | fontWeight: '600',
231 | marginLeft: 10,
232 | },
233 | recordingIndicator: {
234 | flexDirection: 'row',
235 | alignItems: 'center',
236 | marginTop: 20,
237 | padding: 15,
238 | backgroundColor: '#fff',
239 | borderRadius: 12,
240 | shadowColor: '#000',
241 | shadowOffset: { width: 0, height: 2 },
242 | shadowOpacity: 0.1,
243 | shadowRadius: 4,
244 | elevation: 3,
245 | },
246 | recordingText: {
247 | fontSize: 16,
248 | color: '#dc3545',
249 | marginLeft: 10,
250 | fontWeight: '600',
251 | },
252 | errorContainer: {
253 | marginTop: 20,
254 | padding: 15,
255 | backgroundColor: '#f8d7da',
256 | borderRadius: 12,
257 | width: '100%',
258 | maxWidth: 400,
259 | },
260 | errorText: {
261 | fontSize: 16,
262 | color: '#721c24',
263 | },
264 | transcriptionContainer: {
265 | marginTop: 30,
266 | padding: 20,
267 | backgroundColor: '#fff',
268 | borderRadius: 12,
269 | width: '100%',
270 | maxWidth: 400,
271 | shadowColor: '#000',
272 | shadowOffset: { width: 0, height: 2 },
273 | shadowOpacity: 0.1,
274 | shadowRadius: 8,
275 | elevation: 5,
276 | },
277 | transcriptionTitle: {
278 | fontSize: 18,
279 | fontWeight: 'bold',
280 | marginBottom: 12,
281 | color: '#333',
282 | },
283 | transcriptionText: {
284 | fontSize: 16,
285 | color: '#555',
286 | lineHeight: 24,
287 | },
288 | finalText: {
289 | fontSize: 14,
290 | color: '#28a745',
291 | fontWeight: 'bold',
292 | marginTop: 10,
293 | },
294 | hintText: {
295 | fontSize: 14,
296 | color: '#999',
297 | marginTop: 20,
298 | textAlign: 'center',
299 | },
300 | instructionsContainer: {
301 | marginTop: 30,
302 | padding: 20,
303 | backgroundColor: '#e7f3ff',
304 | borderRadius: 12,
305 | width: '100%',
306 | maxWidth: 400,
307 | },
308 | instructionsTitle: {
309 | fontSize: 16,
310 | fontWeight: 'bold',
311 | marginBottom: 12,
312 | color: '#333',
313 | },
314 | instructionText: {
315 | fontSize: 14,
316 | color: '#555',
317 | marginBottom: 6,
318 | lineHeight: 20,
319 | },
320 | });
--------------------------------------------------------------------------------
/example/App.tsx:
--------------------------------------------------------------------------------
1 | import React, { useState, useEffect } from 'react';
2 | import { View, Text, TouchableOpacity, StyleSheet, Alert, ScrollView } from 'react-native';
3 | import { Ionicons } from '@expo/vector-icons';
4 | import * as SpeechTranscriber from 'expo-speech-transcriber';
5 | import { useAudioRecorder, RecordingPresets, setAudioModeAsync, useAudioRecorderState } from 'expo-audio';
6 |
7 | const App = () => {
8 | const { text, isFinal, error, isRecording } = SpeechTranscriber.useRealTimeTranscription();
9 | const [recordedUri, setRecordedUri] = useState(null);
10 | const [sfTranscription, setSfTranscription] = useState('');
11 | const [analyzerTranscription, setAnalyzerTranscription] = useState('');
12 | const [permissionsGranted, setPermissionsGranted] = useState(false);
13 |
14 | const audioRecorder = useAudioRecorder(RecordingPresets.HIGH_QUALITY);
15 | const recorderState = useAudioRecorderState(audioRecorder);
16 |
17 | useEffect(() => {
18 | if (isFinal) {
19 | // Optionally handle final transcription
20 | }
21 | }, [isFinal]);
22 |
23 | const requestAllPermissions = async () => {
24 | try {
25 | const speechPermission = await SpeechTranscriber.requestPermissions();
26 | const micPermission = await SpeechTranscriber.requestMicrophonePermissions();
27 | if (speechPermission === "authorized" && micPermission === 'granted') {
28 | // Set audio mode for recording
29 | await setAudioModeAsync({
30 | playsInSilentMode: true,
31 | allowsRecording: true,
32 | });
33 | setPermissionsGranted(true);
34 | Alert.alert('Permissions Granted', 'All permissions are now available.');
35 | } else {
36 | Alert.alert('Permissions Required', 'Speech and microphone permissions are needed.');
37 | }
38 | } catch (err) {
39 | Alert.alert('Error', 'Failed to request permissions');
40 | }
41 | };
42 |
43 | const handleStartTranscription = async () => {
44 | if (!permissionsGranted) {
45 | await requestAllPermissions();
46 | return;
47 | }
48 | try {
49 | await SpeechTranscriber.recordRealTimeAndTranscribe();
50 | } catch (err) {
51 | Alert.alert('Error', 'Failed to start transcription');
52 | }
53 | };
54 |
55 | const handleStopTranscription = () => {
56 | SpeechTranscriber.stopListening();
57 | };
58 |
59 | const startRecording = async () => {
60 | if (!permissionsGranted) {
61 | await requestAllPermissions();
62 | return;
63 | }
64 | try {
65 | await audioRecorder.prepareToRecordAsync();
66 | audioRecorder.record();
67 | } catch (err) {
68 | Alert.alert('Error', 'Failed to start recording');
69 | }
70 | };
71 |
72 | const stopRecording = async () => {
73 | try {
74 | await audioRecorder.stop();
75 | if (audioRecorder.uri) {
76 | setRecordedUri(audioRecorder.uri);
77 | Alert.alert('Recording Complete', `Audio saved at: ${audioRecorder.uri}`);
78 | }
79 | } catch (err) {
80 | Alert.alert('Error', 'Failed to stop recording');
81 | }
82 | };
83 |
84 | const transcribeWithSF = async () => {
85 | if (!recordedUri) {
86 | Alert.alert('No Recording', 'Please record audio first.');
87 | return;
88 | }
89 | try {
90 | const transcription = await SpeechTranscriber.transcribeAudioWithSFRecognizer(recordedUri);
91 | setSfTranscription(transcription);
92 | } catch (err) {
93 | Alert.alert('Error', 'Failed to transcribe with SF Recognizer');
94 | }
95 | };
96 |
97 | const transcribeWithAnalyzer = async () => {
98 | if (!recordedUri) {
99 | Alert.alert('No Recording', 'Please record audio first.');
100 | return;
101 | }
102 | if (!SpeechTranscriber.isAnalyzerAvailable()) {
103 | Alert.alert('Not Available', 'SpeechAnalyzer is not available on this device.');
104 | return;
105 | }
106 | try {
107 | const transcription = await SpeechTranscriber.transcribeAudioWithAnalyzer(recordedUri);
108 | setAnalyzerTranscription(transcription);
109 | } catch (err) {
110 | Alert.alert('Error', 'Failed to transcribe with Analyzer');
111 | }
112 | };
113 |
114 | return (
115 |
116 | Speech Transcriber Demo
117 |
118 | {!permissionsGranted && (
119 |
120 |
121 | Request Permissions
122 |
123 | )}
124 |
125 | Realtime Transcription
126 |
131 |
132 | Start Realtime Transcription
133 |
134 |
135 |
140 |
141 | Stop Realtime Transcription
142 |
143 |
144 | {isRecording && (
145 |
146 |
147 | Recording and Transcribing...
148 |
149 | )}
150 |
151 | {error && (
152 |
153 | Realtime Error: {error}
154 |
155 | )}
156 |
157 | {text && (
158 |
159 | Realtime Transcription:
160 | {text}
161 | {isFinal && Final!}
162 |
163 | )}
164 |
165 | File Transcription
166 |
167 |
168 | Start Recording
169 |
170 |
171 |
172 |
173 | Stop Recording
174 |
175 |
176 | {recorderState.isRecording && (
177 |
178 |
179 | Recording...
180 |
181 | )}
182 |
183 | {recordedUri && (
184 | <>
185 |
186 |
187 | Transcribe with SF Recognizer
188 |
189 |
190 | {SpeechTranscriber.isAnalyzerAvailable() && (
191 |
192 |
193 | Transcribe with Analyzer
194 |
195 | )}
196 |
197 | {sfTranscription && (
198 |
199 | SF Recognizer Result:
200 | {sfTranscription}
201 |
202 | )}
203 |
204 | {analyzerTranscription && (
205 |
206 | Analyzer Result:
207 | {analyzerTranscription}
208 |
209 | )}
210 | >
211 | )}
212 |
213 | {!isRecording && !text && !recordedUri && (
214 |
215 | Request permissions, then try realtime transcription or record audio for file transcription.
216 |
217 | )}
218 |
219 | );
220 | };
221 |
222 | const styles = StyleSheet.create({
223 | container: {
224 | flexGrow: 1,
225 | justifyContent: 'center',
226 | alignItems: 'center',
227 | padding: 20,
228 | backgroundColor: '#f5f5f5',
229 | },
230 | title: {
231 | fontSize: 28,
232 | fontWeight: 'bold',
233 | marginBottom: 8,
234 | color: '#333',
235 | },
236 | sectionTitle: {
237 | fontSize: 20,
238 | fontWeight: 'bold',
239 | marginTop: 20,
240 | marginBottom: 10,
241 | color: '#333',
242 | },
243 | button: {
244 | flexDirection: 'row',
245 | alignItems: 'center',
246 | justifyContent: 'center',
247 | paddingVertical: 15,
248 | paddingHorizontal: 30,
249 | borderRadius: 12,
250 | marginVertical: 8,
251 | minWidth: 280,
252 | shadowColor: '#000',
253 | shadowOffset: { width: 0, height: 2 },
254 | shadowOpacity: 0.1,
255 | shadowRadius: 4,
256 | elevation: 3,
257 | },
258 | permissionButton: {
259 | backgroundColor: '#6c757d',
260 | },
261 | recordButton: {
262 | backgroundColor: '#007bff',
263 | },
264 | stopButton: {
265 | backgroundColor: '#dc3545',
266 | },
267 | transcribeButton: {
268 | backgroundColor: '#28a745',
269 | },
270 | disabled: {
271 | backgroundColor: '#ccc',
272 | opacity: 0.6,
273 | },
274 | buttonText: {
275 | color: '#fff',
276 | fontSize: 18,
277 | fontWeight: '600',
278 | marginLeft: 10,
279 | },
280 | recordingIndicator: {
281 | flexDirection: 'row',
282 | alignItems: 'center',
283 | marginTop: 20,
284 | padding: 15,
285 | backgroundColor: '#fff',
286 | borderRadius: 12,
287 | shadowColor: '#000',
288 | shadowOffset: { width: 0, height: 2 },
289 | shadowOpacity: 0.1,
290 | shadowRadius: 4,
291 | elevation: 3,
292 | },
293 | recordingText: {
294 | fontSize: 16,
295 | color: '#dc3545',
296 | marginLeft: 10,
297 | fontWeight: '600',
298 | },
299 | errorContainer: {
300 | marginTop: 20,
301 | padding: 15,
302 | backgroundColor: '#f8d7da',
303 | borderRadius: 12,
304 | width: '100%',
305 | maxWidth: 400,
306 | },
307 | errorText: {
308 | fontSize: 16,
309 | color: '#721c24',
310 | },
311 | transcriptionContainer: {
312 | marginTop: 30,
313 | padding: 20,
314 | backgroundColor: '#fff',
315 | borderRadius: 12,
316 | width: '100%',
317 | maxWidth: 400,
318 | shadowColor: '#000',
319 | shadowOffset: { width: 0, height: 2 },
320 | shadowOpacity: 0.1,
321 | shadowRadius: 8,
322 | elevation: 5,
323 | },
324 | transcriptionTitle: {
325 | fontSize: 18,
326 | fontWeight: 'bold',
327 | marginBottom: 12,
328 | color: '#333',
329 | },
330 | transcriptionText: {
331 | fontSize: 16,
332 | color: '#555',
333 | lineHeight: 24,
334 | },
335 | finalText: {
336 | fontSize: 14,
337 | color: '#28a745',
338 | fontWeight: 'bold',
339 | marginTop: 10,
340 | },
341 | hintText: {
342 | fontSize: 14,
343 | color: '#999',
344 | marginTop: 20,
345 | textAlign: 'center',
346 | },
347 | });
348 |
349 | export default App;
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # expo-speech-transcriber
2 |
3 | On-device speech transcription for Expo apps. Supports iOS (Apple Speech framework) and Android (SpeechRecognizer API).
4 |
5 | ## Features
6 |
7 | - 🎯 On-device transcription - Works offline, privacy-focused
8 | - 📱 Cross-platform - iOS 13+ and Android 13+ (API 33)
9 | - 🚀 Multiple APIs - SFSpeechRecognizer (iOS 13+), SpeechAnalyzer (iOS 26+), and Android SpeechRecognizer
10 | - 📦 Easy integration - Auto-configures permissions
11 | - 🔒 Secure - All processing happens on device
12 | - ⚡ Realtime transcription - Get live speech-to-text updates with built-in audio capture
13 | - 📁 File transcription - Transcribe pre-recorded audio files
14 | - 🎤 Buffer-based transcription - Stream audio buffers from external sources for real-time transcription
15 |
16 | ## Installation
17 |
18 | ```bash
19 | npx expo install expo-speech-transcriber expo-audio
20 | ```
21 |
22 | Add the plugin to your `app.json`:
23 |
24 | ```json
25 | {
26 | "expo": {
27 | "plugins": ["expo-audio", "expo-speech-transcriber"]
28 | }
29 | }
30 | ```
31 |
32 | ### Custom permission message (recommended):
33 |
34 | Apple requires a clear purpose string for speech recognition and microphone permissions. Without it, your app may be rejected during App Store review. Provide a descriptive message explaining why your app needs access.
35 |
36 | ```json
37 | {
38 | "expo": {
39 | "plugins": [
40 | "expo-audio",
41 | [
42 | "expo-speech-transcriber",
43 | {
44 | "speechRecognitionPermission": "We need speech recognition to transcribe your recordings",
45 | "microphonePermission": "We need microphone access to record audio for transcription"
46 | }
47 | ]
48 | ]
49 | }
50 | }
51 | ```
52 |
53 | For more details, see Apple's guidelines on [requesting access to protected resources](https://developer.apple.com/documentation/uikit/requesting-access-to-protected-resources).
54 |
55 | > **Note for Android:** The plugin automatically adds the `RECORD_AUDIO` permission to your Android manifest. No additional configuration is required.
56 |
57 | ## Usage
58 |
59 | ### Realtime Transcription
60 |
61 | Start transcribing speech in real-time. This does not require `expo-audio`.
62 |
63 | ```typescript
64 | import { Platform } from "react-native";
65 | import * as SpeechTranscriber from "expo-speech-transcriber";
66 |
67 | // Request permissions
68 | // Note: requestPermissions() is only needed on iOS
69 | if (Platform.OS === "ios") {
70 | const speechPermission = await SpeechTranscriber.requestPermissions();
71 | if (speechPermission !== "authorized") {
72 | console.log("Speech permission denied");
73 | return;
74 | }
75 | }
76 |
77 | const micPermission = await SpeechTranscriber.requestMicrophonePermissions();
78 | if (micPermission !== "granted") {
79 | console.log("Microphone permission denied");
80 | return;
81 | }
82 |
83 | // Use the hook for realtime updates
84 | const { text, isFinal, error, isRecording } =
85 | SpeechTranscriber.useRealTimeTranscription();
86 |
87 | // Start transcription
88 | await SpeechTranscriber.recordRealTimeAndTranscribe();
89 |
90 | // Stop when done
91 | SpeechTranscriber.stopListening();
92 | ```
93 | **NOTE**: See [RecordRealTimeAndTrancribe](example/RecordRealTimeAndTranscribe.tsx) for an example on how to use Real Time transcription on android.
94 |
95 | ### File Transcription
96 |
97 | Transcribe pre-recorded audio files. Our library handles transcription but not recording—use `expo-audio` to record audio (see [expo-audio documentation](https://docs.expo.dev/versions/latest/sdk/audio/)), or implement your own recording logic with microphone access via `requestMicrophonePermissions()`.
98 |
99 | ```typescript
100 | import * as SpeechTranscriber from "expo-speech-transcriber";
101 | import { useAudioRecorder, RecordingPresets } from "expo-audio";
102 |
103 | // Record audio with expo-audio
104 | const audioRecorder = useAudioRecorder(RecordingPresets.HIGH_QUALITY);
105 | await audioRecorder.prepareToRecordAsync();
106 | audioRecorder.record();
107 | // ... user speaks ...
108 | await audioRecorder.stop();
109 | const audioUri = audioRecorder.uri;
110 |
111 | // Transcribe with SFSpeechRecognizer (preferred)
112 | const text = await SpeechTranscriber.transcribeAudioWithSFRecognizer(audioUri);
113 | console.log("Transcription:", text);
114 |
115 | // Or with SpeechAnalyzer if available
116 | if (SpeechTranscriber.isAnalyzerAvailable()) {
117 | const text = await SpeechTranscriber.transcribeAudioWithAnalyzer(audioUri);
118 | console.log("Transcription:", text);
119 | }
120 | ```
121 |
122 | For custom recording without `expo-audio`:
123 |
124 | ```typescript
125 | // Request microphone permission for your custom recording implementation
126 | const micPermission = await SpeechTranscriber.requestMicrophonePermissions();
127 | // Implement your own audio recording logic here to save a file
128 | // Then transcribe the resulting audio file URI
129 | ```
130 |
131 | ### Buffer-Based Transcription
132 |
133 | Stream audio buffers directly to the transcriber for real-time processing. This is ideal for integrating with audio processing libraries like [react-native-audio-api](https://docs.swmansion.com/react-native-audio-api/).
134 |
135 | ```typescript
136 | import * as SpeechTranscriber from "expo-speech-transcriber";
137 | import { AudioManager, AudioRecorder } from "react-native-audio-api";
138 |
139 | // Set up audio recorder
140 | const recorder = new AudioRecorder({
141 | sampleRate: 16000,
142 | bufferLengthInSamples: 1600,
143 | });
144 |
145 | AudioManager.setAudioSessionOptions({
146 | iosCategory: "playAndRecord",
147 | iosMode: "spokenAudio",
148 | iosOptions: ["allowBluetooth", "defaultToSpeaker"],
149 | });
150 |
151 | // Request permissions
152 | const speechPermission = await SpeechTranscriber.requestPermissions();
153 | const micPermission = await AudioManager.requestRecordingPermissions();
154 |
155 | // Stream audio buffers to transcriber
156 | recorder.onAudioReady(({ buffer }) => {
157 | const channelData = buffer.getChannelData(0);
158 | SpeechTranscriber.realtimeBufferTranscribe(
159 | channelData, // Float32Array or number[]
160 | 16000, // sample rate
161 | );
162 | });
163 |
164 | // Use the hook to get transcription updates
165 | const { text, isFinal, error } = SpeechTranscriber.useRealTimeTranscription();
166 |
167 | // Start streaming
168 | recorder.start();
169 |
170 | // Stop when done
171 | recorder.stop();
172 | SpeechTranscriber.stopBufferTranscription();
173 | ```
174 |
175 | See the [BufferTranscriptionExample](./example/BufferTranscriptionExample.tsx) for a complete implementation.
176 |
177 |
178 |
179 | ## API Reference
180 |
181 | ### `requestPermissions()`
182 | Request speech recognition permission.
183 |
184 | **Platform:** iOS only. On Android, speech recognition permission is handled through `requestMicrophonePermissions()`.
185 |
186 | **Returns:** `Promise` - One of: `'authorized'`, `'denied'`, `'restricted'`, or `'notDetermined'`
187 |
188 | **Example:**
189 |
190 | ```typescript
191 | import { Platform } from "react-native";
192 |
193 | if (Platform.OS === "ios") {
194 | const status = await SpeechTranscriber.requestPermissions();
195 | }
196 | ```
197 |
198 | ### `requestMicrophonePermissions()`
199 |
200 | Request microphone permission.
201 |
202 | **Returns:** `Promise` - One of: `'granted'` or `'denied'`
203 |
204 | **Example:**
205 |
206 | ```typescript
207 | const status = await SpeechTranscriber.requestMicrophonePermissions();
208 | ```
209 |
210 | ### `recordRealTimeAndTranscribe()`
211 |
212 | Start real-time speech transcription. Listen for events via `useRealTimeTranscription` hook.
213 |
214 | **Returns:** `Promise`
215 |
216 | **Example:**
217 |
218 | ```typescript
219 | await SpeechTranscriber.recordRealTimeAndTranscribe();
220 | ```
221 |
222 | ### `stopListening()`
223 |
224 | Stop real-time transcription.
225 |
226 | **Returns:** `void`
227 |
228 | **Example:**
229 |
230 | ```typescript
231 | SpeechTranscriber.stopListening();
232 | ```
233 |
234 | ### `isRecording()`
235 |
236 | Check if real-time transcription is currently recording.
237 |
238 | **Returns:** `boolean`
239 |
240 | **Example:**
241 |
242 | ```typescript
243 | const recording = SpeechTranscriber.isRecording();
244 | ```
245 |
246 | ### `transcribeAudioWithSFRecognizer(audioFilePath: string)`
247 |
248 | Transcribe audio from a pre-recorded file using SFSpeechRecognizer. I prefer this API for its reliability.
249 |
250 | **Platform:** iOS only
251 |
252 | **Requires:** iOS 13+, pre-recorded audio file URI (record with `expo-audio` or your own implementation)
253 |
254 | **Returns:** `Promise` - Transcribed text
255 |
256 | **Example:**
257 |
258 | ```typescript
259 | const transcription = await SpeechTranscriber.transcribeAudioWithSFRecognizer(
260 | "file://path/to/audio.m4a"
261 | );
262 | ```
263 |
264 | ### `transcribeAudioWithAnalyzer(audioFilePath: string)`
265 |
266 | Transcribe audio from a pre-recorded file using SpeechAnalyzer.
267 |
268 | **Platform:** iOS only
269 |
270 | **Requires:** iOS 26+, pre-recorded audio file URI (record with `expo-audio` or your own implementation)
271 |
272 | **Returns:** `Promise` - Transcribed text
273 |
274 | **Example:**
275 |
276 | ```typescript
277 | const transcription = await SpeechTranscriber.transcribeAudioWithAnalyzer(
278 | "file://path/to/audio.m4a"
279 | );
280 | ```
281 |
282 | ### `isAnalyzerAvailable()`
283 |
284 | Check if SpeechAnalyzer API is available.
285 |
286 | **Platform:** iOS only. Always returns `false` on Android.
287 |
288 | **Returns:** `boolean` - `true` if iOS 26+, `false` otherwise
289 |
290 | **Example:**
291 |
292 | ```typescript
293 | if (SpeechTranscriber.isAnalyzerAvailable()) {
294 | // Use SpeechAnalyzer
295 | }
296 | ```
297 |
298 | ### `useRealTimeTranscription()`
299 |
300 | React hook for real-time transcription state.
301 |
302 | **Returns:** `{ text: string, isFinal: boolean, error: string | null, isRecording: boolean }`
303 |
304 | **Example:**
305 |
306 | ```typescript
307 | const { text, isFinal, error, isRecording } =
308 | SpeechTranscriber.useRealTimeTranscription();
309 | ```
310 |
311 | ### `realtimeBufferTranscribe(buffer, sampleRate)`
312 |
313 | Stream audio buffers for real-time transcription. Ideal for integration with audio processing libraries.
314 |
315 | **Parameters:**
316 |
317 | - `buffer: Float32Array | number[]` - Audio samples
318 | - `sampleRate: number` - Sample rate in Hz (e.g., 16000)
319 |
320 | **NOTE** We currently support transcription for mono audio only. Natively, the channel is set to 1.
321 |
322 | **Returns:** `Promise`
323 |
324 | **Example:**
325 |
326 | ```typescript
327 | const audioBuffer = new Float32Array([...]);
328 | await SpeechTranscriber.realtimeBufferTranscribe(audioBuffer, 16000);
329 | ```
330 |
331 | ### `stopBufferTranscription()`
332 |
333 | Stop buffer-based transcription and clean up resources.
334 |
335 | **Returns:** `void`
336 |
337 | **Example:**
338 |
339 | ```typescript
340 | SpeechTranscriber.stopBufferTranscription();
341 | ```
342 |
343 | ## Example
344 |
345 | See the [example app](./example) for a complete implementation demonstrating all APIs.
346 |
347 | ## Requirements
348 |
349 | ### iOS
350 | - iOS 13.0+
351 | - Expo SDK 52+
352 | - Development build (Expo Go not supported - [why?](https://expo.dev/blog/expo-go-vs-development-builds))
353 |
354 | ### Android
355 | - Android 13+ (API level 33)
356 | - Expo SDK 52+
357 | - Development build (Expo Go not supported)
358 |
359 | ## Limitations
360 |
361 | - **English only** - Currently hardcoded to `en_US` locale
362 | - **File size** - Best for short recordings (< 1 minute)
363 | - **Recording not included** - Real-time transcription captures audio internally; file transcription requires pre-recorded audio files (use `expo-audio` or implement your own recording with `requestMicrophonePermissions()`)
364 | - **Android file transcription** - File-based transcription (`transcribeAudioWithSFRecognizer`, `transcribeAudioWithAnalyzer`) is iOS only. Android supports real-time transcription
365 | - **Android API level** - Android requires API level 33+ (Android 13)
366 |
367 | ## License
368 |
369 | MIT
370 |
371 | ## Contributing
372 |
373 | Contributions welcome! Please open an issue or PR on [GitHub](https://github.com/daveyeke).
374 |
375 | ## Author
376 |
377 | Dave Mkpa Eke - [GitHub](https://github.com/daveyeke) | [X](https://x.com/1804davey)
378 |
--------------------------------------------------------------------------------
/ios/ExpoSpeechTranscriberModule.swift:
--------------------------------------------------------------------------------
1 | import ExpoModulesCore
2 | import Speech
3 | import AVFoundation
4 |
5 | public class ExpoSpeechTranscriberModule: Module {
6 | private var audioEngine = AVAudioEngine()
7 | private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
8 | private var bufferRecognitionRequest: SFSpeechAudioBufferRecognitionRequest?
9 | private var recognitionTask: SFSpeechRecognitionTask?
10 | private var bufferRecognitionTask: SFSpeechRecognitionTask?
11 | private var startedListening = false
12 |
13 | public func definition() -> ModuleDefinition {
14 | Name("ExpoSpeechTranscriber")
15 |
16 | Events("onTranscriptionProgress", "onTranscriptionError")
17 |
18 | // expose realtime recording/transcription
19 | AsyncFunction("recordRealTimeAndTranscribe") { () async -> Void in
20 | await self.recordRealTimeAndTranscribe()
21 | }
22 |
23 | // Method 2: Transcribe from URL using SFSpeechRecognizer (iOS 13+)
24 | AsyncFunction("transcribeAudioWithSFRecognizer") { (audioFilePath: String) async throws -> String in
25 |
26 | let url: URL
27 | if audioFilePath.hasPrefix("file://") {
28 | url = URL(string: audioFilePath)!
29 | } else {
30 | url = URL(fileURLWithPath: audioFilePath)
31 | }
32 |
33 | let transcription = await self.transcribeAudio(url: url)
34 | return transcription
35 | }
36 |
37 | // Method 3: Transcribe from URL using SpeechAnalyzer (iOS 26+)
38 | AsyncFunction("transcribeAudioWithAnalyzer") { (audioFilePath: String) async throws -> String in
39 |
40 | if #available(iOS 26.0, *) {
41 | let url: URL
42 | if audioFilePath.hasPrefix("file://") {
43 | url = URL(string: audioFilePath)!
44 | } else {
45 | url = URL(fileURLWithPath: audioFilePath)
46 | }
47 |
48 | let transcription = try await self.transcribeAudioWithAnalyzer(url: url)
49 | return transcription
50 | } else {
51 | throw NSError(domain: "ExpoSpeechTranscriber", code: 501,
52 | userInfo: [NSLocalizedDescriptionKey: "SpeechAnalyzer requires iOS 26.0 or later"])
53 | }
54 | }
55 |
56 | AsyncFunction("requestPermissions") { () async -> String in
57 | return await self.requestTranscribePermissions()
58 | }
59 |
60 | AsyncFunction("requestMicrophonePermissions") { () async -> String in
61 | return await self.requestMicrophonePermissions()
62 | }
63 |
64 |
65 | Function("stopListening"){ () -> Void in
66 | return self.stopListening()
67 | }
68 |
69 | Function("isRecording") { () -> Bool in
70 | return self.isRecording()
71 | }
72 |
73 | Function("isAnalyzerAvailable") { () -> Bool in
74 | if #available(iOS 26.0, *) {
75 | return true
76 | }
77 | return false
78 | }
79 |
80 | AsyncFunction("realtimeBufferTranscribe") { (buffer: [Float32], sampleRate: Double) async -> Void in
81 | await self.realtimeBufferTranscribe(buffer: buffer, sampleRate: sampleRate)
82 | }
83 |
84 | Function("stopBufferTranscription") { () -> Void in
85 | return self.stopBufferTranscription()
86 | }
87 | }
88 |
89 | // MARK: - Private Implementation Methods
90 |
91 | private func realtimeBufferTranscribe(buffer: [Float32], sampleRate: Double) async -> Void {
92 | if bufferRecognitionRequest == nil {
93 | let speechRecognizer = SFSpeechRecognizer()!
94 | bufferRecognitionRequest = SFSpeechAudioBufferRecognitionRequest()
95 |
96 | guard let recognitionRequest = bufferRecognitionRequest else {
97 | self.sendEvent("onTranscriptionError", ["message": "Unable to create recognition request"])
98 | return
99 | }
100 | recognitionRequest.shouldReportPartialResults = true
101 |
102 | bufferRecognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
103 | if let error = error {
104 | self.sendEvent("onTranscriptionError", ["message": error.localizedDescription])
105 | return
106 | }
107 |
108 | guard let result = result else {
109 | return
110 | }
111 |
112 | let recognizedText = result.bestTranscription.formattedString
113 | self.sendEvent(
114 | "onTranscriptionProgress",
115 | ["text": recognizedText, "isFinal": result.isFinal]
116 | )
117 | }
118 | }
119 |
120 | let format = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: AVAudioChannelCount(1))! // hardcode channel to 1 since we only support mono audio
121 | guard let pcmBuffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(buffer.count)) else {
122 | self.sendEvent("onTranscriptionError", ["message": "Unable to create PCM buffer"])
123 | return
124 | }
125 |
126 | pcmBuffer.frameLength = AVAudioFrameCount(buffer.count)
127 | if let channelData = pcmBuffer.floatChannelData {
128 | buffer.withUnsafeBufferPointer { bufferPointer in
129 | guard let sourceAddress = bufferPointer.baseAddress else { return }
130 |
131 | let destination = channelData[0]
132 | let byteCount = buffer.count * MemoryLayout.size
133 |
134 | memcpy(destination, sourceAddress, byteCount)
135 | }
136 | }
137 |
138 | // Append buffer to recognition request
139 | bufferRecognitionRequest?.append(pcmBuffer)
140 | }
141 |
142 | private func stopBufferTranscription() {
143 | bufferRecognitionRequest?.endAudio()
144 | bufferRecognitionRequest = nil
145 |
146 | bufferRecognitionTask?.cancel()
147 | bufferRecognitionTask = nil
148 | }
149 |
150 | // startRecordingAndTranscription using SFSpeechRecognizer
151 | private func recordRealTimeAndTranscribe() async -> Void {
152 | let speechRecognizer = SFSpeechRecognizer()!
153 | recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
154 | guard let recognitionRequest = recognitionRequest else {
155 | self.sendEvent("onTranscriptionError", ["message": "Unable to create recognition request"])
156 | return
157 | }
158 | recognitionRequest.shouldReportPartialResults = true
159 |
160 | let inputNode = audioEngine.inputNode
161 | let recordingFormat = inputNode.outputFormat(forBus: 0)
162 | inputNode.removeTap(onBus: 0)
163 |
164 | inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, when in
165 | recognitionRequest.append(buffer)
166 | }
167 |
168 | audioEngine.prepare()
169 | do {
170 | try audioEngine.start()
171 | startedListening = true
172 | } catch {
173 | self.sendEvent("onTranscriptionError", ["message": error.localizedDescription])
174 | return
175 | }
176 |
177 | recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
178 | if let error = error {
179 | self.stopListening()
180 | self.sendEvent("onTranscriptionError", ["message": error.localizedDescription])
181 | return
182 | }
183 |
184 | guard let result = result else {
185 | return
186 | }
187 |
188 | let recognizedText = result.bestTranscription.formattedString
189 | self.sendEvent(
190 | "onTranscriptionProgress",
191 | ["text": recognizedText, "isFinal": result.isFinal]
192 | )
193 |
194 | if result.isFinal {
195 | self.stopListening()
196 | }
197 | }
198 | }
199 |
200 | private func stopListening() {
201 | audioEngine.stop()
202 | audioEngine.inputNode.removeTap(onBus: 0)
203 | //recognitionRequest?.endAudio()
204 | recognitionRequest = nil
205 | recognitionTask?.cancel()
206 | recognitionTask = nil
207 | }
208 |
209 |
210 | private func isRecording() -> Bool {
211 | return audioEngine.isRunning
212 | }
213 |
214 |
215 |
216 | // Implemetation for URL transcription with SFSpeechRecognizer
217 | private func transcribeAudio(url: URL) async -> String {
218 |
219 | guard FileManager.default.fileExists(atPath: url.path) else {
220 | let err = "Error: Audio file not found at \(url.path)"
221 | return err
222 | }
223 |
224 | return await withCheckedContinuation { continuation in
225 | guard let recognizer = SFSpeechRecognizer() else {
226 | let err = "Error: Speech recognizer not available for current locale"
227 | continuation.resume(returning: err)
228 | return
229 | }
230 |
231 | guard recognizer.isAvailable else {
232 | let err = "Error: Speech recognizer not available at this time"
233 | continuation.resume(returning: err)
234 | return
235 | }
236 |
237 | let request = SFSpeechURLRecognitionRequest(url: url)
238 | request.shouldReportPartialResults = false
239 | recognizer.recognitionTask(with: request) { (result, error) in
240 | if let error = error {
241 | let errorMsg = "Error: \(error.localizedDescription)"
242 | continuation.resume(returning: errorMsg)
243 | return
244 | }
245 |
246 | guard let result = result else {
247 | let errorMsg = "Error: No transcription available"
248 | continuation.resume(returning: errorMsg)
249 | return
250 | }
251 |
252 | if result.isFinal {
253 | let text = result.bestTranscription.formattedString
254 | let finalResult = text.isEmpty ? "No speech detected" : text
255 | continuation.resume(returning: finalResult)
256 | }
257 | }
258 | }
259 | }
260 |
261 | // Implementation for URL transcription with SpeechAnalyzer (iOS 26+)
262 | @available(iOS 26.0, *)
263 | private func transcribeAudioWithAnalyzer(url: URL) async throws -> String {
264 |
265 | guard FileManager.default.fileExists(atPath: url.path) else {
266 | throw NSError(domain: "ExpoSpeechTranscriber", code: 404,
267 | userInfo: [NSLocalizedDescriptionKey: "Audio file not found at \(url.path)"])
268 | }
269 |
270 | let locale = Locale(identifier: "en_US")
271 |
272 | guard await isLocaleSupported(locale: locale) else {
273 | throw NSError(domain: "ExpoSpeechTranscriber", code: 400,
274 | userInfo: [NSLocalizedDescriptionKey: "English locale not supported"])
275 | }
276 |
277 | let transcriber = SpeechTranscriber(
278 | locale: locale,
279 | transcriptionOptions: [],
280 | reportingOptions: [.volatileResults],
281 | attributeOptions: [.audioTimeRange]
282 | )
283 |
284 | try await ensureModel(transcriber: transcriber, locale: locale)
285 |
286 | let analyzer = SpeechAnalyzer(modules: [transcriber])
287 |
288 | let audioFile = try AVAudioFile(forReading: url)
289 | if let lastSample = try await analyzer.analyzeSequence(from: audioFile) {
290 | try await analyzer.finalizeAndFinish(through: lastSample)
291 | } else {
292 | await analyzer.cancelAndFinishNow()
293 | }
294 |
295 | var finalText = ""
296 | for try await recResponse in transcriber.results {
297 | if recResponse.isFinal {
298 | finalText += String(recResponse.text.characters)
299 | }
300 | }
301 |
302 | let result = finalText.isEmpty ? "No speech detected" : finalText
303 | return result
304 | }
305 |
306 | @available(iOS 26.0, *)
307 | private func isLocaleSupported(locale: Locale) async -> Bool {
308 | guard SpeechTranscriber.isAvailable else { return false }
309 | let supported = await DictationTranscriber.supportedLocales
310 | return supported.map { $0.identifier(.bcp47) }.contains(locale.identifier(.bcp47))
311 | }
312 |
313 | @available(iOS 26.0, *)
314 | private func isLocaleInstalled(locale: Locale) async -> Bool {
315 | let installed = await Set(SpeechTranscriber.installedLocales)
316 | return installed.map { $0.identifier(.bcp47) }.contains(locale.identifier(.bcp47))
317 | }
318 |
319 | @available(iOS 26.0, *)
320 | private func ensureModel(transcriber: SpeechTranscriber, locale: Locale) async throws {
321 | guard await isLocaleSupported(locale: locale) else {
322 | throw NSError(domain: "ExpoSpeechTranscriber", code: 400,
323 | userInfo: [NSLocalizedDescriptionKey: "Locale not supported"])
324 | }
325 |
326 | if await isLocaleInstalled(locale: locale) {
327 | return
328 | } else {
329 | try await downloadModelIfNeeded(for: transcriber)
330 | }
331 | }
332 |
333 | @available(iOS 26.0, *)
334 | private func downloadModelIfNeeded(for module: SpeechTranscriber) async throws {
335 | if let downloader = try await AssetInventory.assetInstallationRequest(supporting: [module]) {
336 | try await downloader.downloadAndInstall()
337 | }
338 | }
339 |
340 | private func requestTranscribePermissions() async -> String {
341 | return await withCheckedContinuation { continuation in
342 | SFSpeechRecognizer.requestAuthorization { authStatus in
343 | let result: String
344 | switch authStatus {
345 | case .authorized:
346 | result = "authorized"
347 | case .denied:
348 | result = "denied"
349 | case .restricted:
350 | result = "restricted"
351 | case .notDetermined:
352 | result = "notDetermined"
353 | @unknown default:
354 | result = "unknown"
355 | }
356 | continuation.resume(returning: result)
357 | }
358 | }
359 | }
360 |
361 | private func requestMicrophonePermissions() async -> String {
362 | return await withCheckedContinuation { continuation in
363 | AVAudioSession.sharedInstance().requestRecordPermission { granted in
364 | let result = granted ? "granted" : "denied"
365 | continuation.resume(returning: result)
366 | }
367 | }
368 | }
369 | }
370 |
371 |
--------------------------------------------------------------------------------