├── .eslintrc.cjs ├── .gitignore ├── Dockerfile ├── README.md ├── bun.lockb ├── package.json ├── src ├── controllers │ ├── ping.ts │ └── transcribe.ts └── server │ ├── handler │ ├── commands.ts │ ├── getCommand.ts │ └── handle.ts │ ├── net │ └── setCors.ts │ ├── readme.md │ └── server.ts └── tsconfig.json /.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | /* eslint-env node */ 2 | module.exports = { 3 | root: true, 4 | parser: '@typescript-eslint/parser', 5 | "parserOptions": { 6 | "ecmaVersion": 2021 7 | }, 8 | env: { 9 | "es2021": true, 10 | "node": true, 11 | }, 12 | extends: ['eslint:recommended', 'plugin:@typescript-eslint/recommended'], 13 | plugins: ['@typescript-eslint'], 14 | rules: { 15 | '@typescript-eslint/no-explicit-any': 'off', 16 | "max-lines": ["error", { "max": 250, "skipComments": true, "skipBlankLines": true }], 17 | }, 18 | overrides: [ 19 | { 20 | files: ['*.test.ts'], 21 | env: { 22 | jest: true, 23 | }, 24 | }, 25 | ], 26 | 27 | }; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | gha-creds-*.json 3 | dist 4 | node_modules 5 | tmp -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM oven/bun 2 | 3 | RUN mkdir -p /usr/src/app 4 | WORKDIR /usr/src/app 5 | 6 | COPY package*.json bun.lockb /usr/src/app/ 7 | RUN bun install --omit=dev 8 | 9 | COPY . /usr/src/app/ 10 | 11 | ENV PORT 8080 12 | ENV NODE_ENV production 13 | 14 | EXPOSE ${PORT} 15 | CMD [ "bun", "src/server/server.ts" ] 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | # OpenAI Whisper API 6 | 7 | **An Open Source Solution for Speech-to-Text and More** 8 | 9 | Welcome to the OpenAI Whisper API, an open-source AI model microservice that leverages the power of OpenAI's whisper api, a state-of-the-art automatic speech recognition (ASR) system as a large language model. This service, built with Node.js, Bun.sh, and Typescript, is designed to run on Docker with zero dependencies, making it a versatile tool for developers across various speech and language-related applications. 10 | 11 | The Whisper API is a speech-to-text model trained on a vast amount of multilingual and multitask training data, including a wide range of audio files and audio recordings. It's a single model that can handle tasks such as language identification, speech translation, and of course, transforming spoken word into written text. 12 | 13 | The model is capable of handling a sequence of tokens and can work with natural language, making it a powerful tool for machine learning applications. It's designed to handle multilingual speech recognition, and it can even manage background noise, making it useful for transcribing a video call, zoom calls, a YouTube video or non-chat use cases in English language and more with full control. 14 | 15 | The API is simple and is designed to be easy to use for developers of all skill levels with simple developer access. It's an open-source project, and it's licensed under the MIT license, meaning you can use it in your own projects with few restrictions. Whether you're looking to transcribe voice messages, improve system performance through a series of system-wide optimizations, or explore the capabilities of the OpenAI Whisper API, this is the place to start. Dive into the following code to learn more about how to use this powerful tool as a first step and get your OpenAI Account with a new api key. 16 | 17 | ## Usage 18 | 19 | This is is a OpenAI Whisper API microservice using Node.js / Bun.sh / Typescript that can run on Docker. With zero dependencies. 20 | It listens to the `/transcribe` route for MP3 files and returns the text transcription. 21 | 22 | ## Running locally 23 | 24 | Install [bun.sh](https://bun.sh/) first, clone this directory and run these commands: 25 | 26 | ```bash 27 | bun install 28 | bun run dev 29 | ``` 30 | 31 | You can now navigate to http://localhost:3000 or the PORT provided, see the Usage section below. 32 | 33 | ## Docker 34 | 35 | - See: https://hub.docker.com/r/illyism/openai-whisper-api 36 | 37 | ## Google Cloud Run Deployment 38 | 39 | Clone this directory and run these commands: 40 | 41 | (Replace `PROJECT_ID` with your own Google Cloud project ID) 42 | 43 | ```bash 44 | docker build --platform linux/amd64 -t gcr.io/PROJECT_ID/whisper-docker . 45 | docker push gcr.io/PROJECT_ID/whisper-docker 46 | 47 | gcloud run deploy whisper-docker \ 48 | --image gcr.io/PROJECT_ID/whisper-docker \ 49 | --region us-central1 \ 50 | --allow-unauthenticated \ 51 | --project PROJECT_ID 52 | ``` 53 | 54 | You should receive a Service URL, see the Usage section below. 55 | 56 | ## Usage 57 | 58 | You can test normal HTTP by opening the /ping endpoint on the URL. 59 | 60 | Connect to the /transcribe and send a POST request with the following body: 61 | 62 | ```json 63 | { 64 | "audio": "BASE64_ENCODED_AUDIO" 65 | } 66 | ``` 67 | 68 | ### API Key 69 | 70 | You need to pass the OpenAI API Key as a HEADER: 71 | 72 | ``` 73 | Authorization: Bearer OPENAI_KEY 74 | ``` 75 | 76 | Or you can launch the docker image or server with `OPENAI_KEY` in the env: 77 | 78 | ```bash 79 | OPENAI_KEY=YOUR_KEY_HERE bun run dev 80 | 81 | # or 82 | 83 | docker run -p 3000:3000 -e OPENAI_KEY=YOUR_KEY_HERE gcr.io/magicbuddy-chat/whisper-docker 84 | 85 | # or set it as env in Cloud Run with the below command or in the Cloud Console UI 86 | 87 | gcloud run deploy whisper-docker \ 88 | --image gcr.io/PROJECT_ID/whisper-docker \ 89 | --set-env-vars OPENAI_KEY=YOUR_KEY_HERE \ 90 | --region us-central1 \ 91 | --allow-unauthenticated \ 92 | --project PROJECT_ID 93 | ``` 94 | 95 | # Live example 96 | 97 | We are using this Whisper API with [MagicBuddy, a Telegram ChatGPT bot](https://magicbuddy.chat/). 98 | 99 | You can use the [OpenAI Whisper Docker](https://magicbuddy.chat/openai-whisper) as a live example here: 100 | 101 | - https://magicbuddy.chat/openai-whisper 102 | -------------------------------------------------------------------------------- /bun.lockb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Illyism/openai-whisper-api/b2bfd6abcddc7552666f22df44d1a2300f910540/bun.lockb -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openai-whisper-api", 3 | "main": "src/server/server.ts", 4 | "devDependencies": { 5 | "bun-types": "^0.6.12" 6 | }, 7 | "description": "This is is a Node.js OpenAI microservice using Node.js / Bun.sh with Docker", 8 | "engines": { 9 | "node": "19" 10 | }, 11 | "private": true, 12 | "scripts": { 13 | "dev": "bun src/server/server.ts", 14 | "build": "bun build ./src/**/*.ts --outdir ./dist --target bun --sourcemap=external --minify", 15 | "start": "bun run build && bun run dist/server/server.js", 16 | "lint": "eslint ./src --ext .js,ts" 17 | } 18 | } -------------------------------------------------------------------------------- /src/controllers/ping.ts: -------------------------------------------------------------------------------- 1 | export default async function ping() { 2 | return Date.now() 3 | } 4 | -------------------------------------------------------------------------------- /src/controllers/transcribe.ts: -------------------------------------------------------------------------------- 1 | import { rm, mkdir } from 'fs/promises' 2 | import { Req } from '../server/handler/handle' 3 | 4 | mkdir(`${import.meta.dir}/tmp`, { recursive: true }) 5 | async function getFilePath(audio: string) { 6 | const randomId = Math.random().toString(36).substring(7) 7 | const mp3Path = `${import.meta.dir}/tmp/${randomId}.mp3` 8 | await Bun.write(mp3Path, Buffer.from(audio, 'base64')) 9 | return mp3Path 10 | } 11 | 12 | async function speechToText(base64Audio: string, apiKey: string) { 13 | const formData = new FormData() 14 | formData.append('model', 'whisper-1') 15 | const fp = await getFilePath(base64Audio) 16 | formData.append('file', Bun.file(fp), 'audio.mp3') 17 | 18 | const resp = await fetch('https://api.openai.com/v1/audio/transcriptions', { 19 | method: 'POST', 20 | headers: { 21 | Authorization: `Bearer ${apiKey}`, 22 | }, 23 | body: formData, 24 | }) 25 | 26 | if (!resp.ok) { 27 | const error = (await resp.json()) as { error: { message: string } } 28 | throw new Error(error.error.message) 29 | } 30 | 31 | await rm(fp) 32 | const body = (await resp.json()) as { text: string } 33 | return body.text 34 | } 35 | 36 | function getApiKey(req: Req) { 37 | if (process.env.OPENAI_KEY) { 38 | return process.env.OPENAI_KEY 39 | } 40 | 41 | const authHeader = req.headers.get('Authorization') 42 | if (!authHeader) throw new Error('Missing API key') 43 | const apiKey = authHeader.split(' ')[1] 44 | if (!apiKey) throw new Error('Wrong API key') 45 | return apiKey 46 | } 47 | 48 | export default async function (req: Req) { 49 | // get API Key from Authorization header 50 | const apiKey = getApiKey(req) 51 | 52 | // parse body 53 | if (!req.request.body) throw new Error('Missing body') 54 | const { audio } = await Bun.readableStreamToJSON(req.request.body) 55 | if (!audio) throw new Error('Missing audio file') 56 | 57 | return await speechToText(audio, apiKey) 58 | } 59 | -------------------------------------------------------------------------------- /src/server/handler/commands.ts: -------------------------------------------------------------------------------- 1 | import { getCommand } from './getCommand.js' 2 | import { Req } from './handle.js' 3 | 4 | /** 5 | * Executes a controller from src/controllers 6 | */ 7 | export const startCommand = async (req: Req) => { 8 | const commandName = req.url.pathname.substring(1) 9 | const command = await getCommand(commandName) 10 | 11 | if (!command) { 12 | return new Response('Command not found', { status: 404 }) 13 | } 14 | 15 | try { 16 | return await command(req) 17 | } catch (e: unknown) { 18 | return handleError(req, e) 19 | } 20 | } 21 | 22 | const handleError = (req: Req, error: any) => { 23 | let message = error.message 24 | 25 | if (error.response) { 26 | message = error.response.data?.error?.message ?? error.response.data?.error 27 | } 28 | 29 | console.error('handleError', error) 30 | 31 | return Response.json( 32 | { 33 | status: error?.response?.status ?? 400, 34 | code: error.code, 35 | message, 36 | }, 37 | { 38 | status: error?.response?.status ?? 400, 39 | } 40 | ) 41 | } 42 | -------------------------------------------------------------------------------- /src/server/handler/getCommand.ts: -------------------------------------------------------------------------------- 1 | import path from 'path' 2 | import { Req } from './handle.js' 3 | 4 | const commandMap = new Map< 5 | string, 6 | null | ((req: Req) => Promise) 7 | >() 8 | 9 | /** 10 | * Reads the src/controllers directory, and loads a module with 11 | * `commandName` to memory. We avoid repeat `require` calls to improve 12 | * performance. 13 | * 14 | * This is just a simple cache + filter. 15 | */ 16 | const warmupCommand = async (commandName: string) => { 17 | if (commandMap.has(commandName)) { 18 | return 19 | } 20 | 21 | if (!commandName.match(/^[a-zA-Z0-9/]+$/)) { 22 | if (commandName !== '/favicon.ico') { 23 | console.error('Invalid command name', commandName) 24 | } 25 | commandMap.set(commandName, null) 26 | return 27 | } 28 | 29 | const commandPath = path.join( 30 | __dirname, 31 | '../../controllers', 32 | commandName + '.js' 33 | ) 34 | try { 35 | // eslint-disable-next-line @typescript-eslint/no-var-requires 36 | const command = require(commandPath).default 37 | if (typeof command !== 'function') { 38 | console.error('Invalid command', commandPath) 39 | commandMap.set(commandName, null) 40 | } 41 | commandMap.set(commandName, command) 42 | return 43 | } catch (e) { 44 | console.error('Error loading command', commandPath) 45 | console.error(e) 46 | console.trace() 47 | commandMap.set(commandName, null) 48 | } 49 | } 50 | 51 | /** 52 | * Loads a module from src/controllers 53 | */ 54 | export const getCommand = async ( 55 | commandName: string 56 | ): Promise Promise)> => { 57 | await warmupCommand(commandName) 58 | return commandMap.get(commandName) ?? null 59 | } 60 | -------------------------------------------------------------------------------- /src/server/handler/handle.ts: -------------------------------------------------------------------------------- 1 | import setCors from '../net/setCors.js' 2 | import { startCommand } from './commands.js' 3 | 4 | export interface Req { 5 | request: Request 6 | headers: Headers 7 | url: URL 8 | method: string 9 | query: URLSearchParams 10 | } 11 | 12 | export interface Post extends Req { 13 | body: T 14 | } 15 | 16 | async function createRequest(request: Request): Promise { 17 | return { 18 | request, 19 | headers: request.headers, 20 | url: new URL(request.url), 21 | method: request.method, 22 | query: request.url.includes('?') 23 | ? new URLSearchParams(request.url.split('?')[1]) 24 | : new URLSearchParams(), 25 | } 26 | } 27 | 28 | function handleResponse(req: Req, response: Response | any): Response { 29 | if (!(response instanceof Response)) { 30 | if (typeof response === 'string') { 31 | response = new Response(response, { status: 200 }) 32 | } else if (typeof response === 'object') { 33 | response = new Response(JSON.stringify(response), { status: 200 }) 34 | } else if (typeof response === 'number') { 35 | response = new Response(response.toString(), { status: 200 }) 36 | } else { 37 | response = new Response(null, { status: 204 }) 38 | } 39 | } 40 | 41 | const origin = req.headers.get('Origin') 42 | response.headers.set('Access-Control-Allow-Origin', origin || '*') 43 | response.headers.set('Access-Control-Allow-Credentials', 'true') 44 | 45 | return response 46 | } 47 | 48 | /** 49 | * Handles incoming requests. 50 | * 1. Check CORS 51 | * 2. Load command 52 | * 3. Execute command 53 | * 4. Handles errors 54 | */ 55 | export async function handleRequest(request: Request) { 56 | try { 57 | const req = await createRequest(request) 58 | 59 | const corsResponse = setCors(req) 60 | if (corsResponse) return corsResponse 61 | 62 | const response = await startCommand(req) 63 | return handleResponse(req, response) 64 | } catch (e: unknown) { 65 | console.error('handleRequest', e) 66 | return new Response('Internal Server Error', { status: 500 }) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/server/net/setCors.ts: -------------------------------------------------------------------------------- 1 | import { Req } from '../handler/handle' 2 | 3 | export default function setCors(req: Req) { 4 | if (req.method === 'OPTIONS') { 5 | const origin = req.headers.get('Origin') 6 | const responseHeaders: Record = { 7 | 'Access-Control-Allow-Origin': origin || '*', 8 | 'Access-Control-Allow-Credentials': 'true', 9 | } 10 | responseHeaders['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS' 11 | responseHeaders['Access-Control-Allow-Headers'] = 12 | 'Content-Type, Authorization, X-Requested-With, X-TLC, X-PROJECT' 13 | responseHeaders['Access-Control-Max-Age'] = '3600' 14 | return new Response(null, { 15 | status: 204, 16 | headers: responseHeaders, 17 | }) 18 | } 19 | 20 | return false 21 | } 22 | -------------------------------------------------------------------------------- /src/server/readme.md: -------------------------------------------------------------------------------- 1 | # OpenaI Whisper Docker Server 2 | 3 | This is a simple Bun.sh based HTTP server. It's made in multiple files to be easy to understand. 4 | 5 | This is a lot more performant than Express, Koa, NestJs, tRPC, Fastify or other frameworks just running it in Node.js, as it's a plain barebones HTTP server with 0 dependencies. 6 | 7 | But running it in Bun.sh makes it even more performant. Easily being able to handle 10x more requests per second than the above frameworks. 8 | 9 | I've avoided routing here by simply reading the filesystem in src/controllers, any files you put in there will be automatically routed to and will become an endpoint. There are no "params" in the path like `/users/123`, however, but you can simply use query strings /users?id=123, HEADERs or JSON bodies. -------------------------------------------------------------------------------- /src/server/server.ts: -------------------------------------------------------------------------------- 1 | import { serve } from 'bun' 2 | import { handleRequest } from './handler/handle.js' 3 | 4 | const server = serve({ fetch: handleRequest }) 5 | console.log('🔥', server.port) 6 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | // add Bun type definitions 4 | "types": ["bun-types"], 5 | 6 | // enable latest features 7 | "lib": ["esnext"], 8 | "module": "esnext", 9 | "target": "esnext", 10 | 11 | // if TS 5.x+ 12 | "moduleResolution": "bundler", 13 | "moduleDetection": "force", 14 | 15 | "jsx": "react-jsx", // support JSX 16 | "allowJs": true, // allow importing `.js` from `.ts` 17 | "esModuleInterop": true, // allow default imports for CommonJS modules 18 | 19 | // best practices 20 | "strict": true, 21 | "forceConsistentCasingInFileNames": true, 22 | "skipLibCheck": true 23 | }, 24 | "include": ["src/**/*"] 25 | } 26 | --------------------------------------------------------------------------------