├── .eslintrc.cjs
├── .gitignore
├── Dockerfile
├── README.md
├── bun.lockb
├── package.json
├── src
├── controllers
│ ├── ping.ts
│ └── transcribe.ts
└── server
│ ├── handler
│ ├── commands.ts
│ ├── getCommand.ts
│ └── handle.ts
│ ├── net
│ └── setCors.ts
│ ├── readme.md
│ └── server.ts
└── tsconfig.json
/.eslintrc.cjs:
--------------------------------------------------------------------------------
1 | /* eslint-env node */
2 | module.exports = {
3 | root: true,
4 | parser: '@typescript-eslint/parser',
5 | "parserOptions": {
6 | "ecmaVersion": 2021
7 | },
8 | env: {
9 | "es2021": true,
10 | "node": true,
11 | },
12 | extends: ['eslint:recommended', 'plugin:@typescript-eslint/recommended'],
13 | plugins: ['@typescript-eslint'],
14 | rules: {
15 | '@typescript-eslint/no-explicit-any': 'off',
16 | "max-lines": ["error", { "max": 250, "skipComments": true, "skipBlankLines": true }],
17 | },
18 | overrides: [
19 | {
20 | files: ['*.test.ts'],
21 | env: {
22 | jest: true,
23 | },
24 | },
25 | ],
26 |
27 | };
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | gha-creds-*.json
3 | dist
4 | node_modules
5 | tmp
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM oven/bun
2 |
3 | RUN mkdir -p /usr/src/app
4 | WORKDIR /usr/src/app
5 |
6 | COPY package*.json bun.lockb /usr/src/app/
7 | RUN bun install --omit=dev
8 |
9 | COPY . /usr/src/app/
10 |
11 | ENV PORT 8080
12 | ENV NODE_ENV production
13 |
14 | EXPOSE ${PORT}
15 | CMD [ "bun", "src/server/server.ts" ]
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |

3 |
4 |
5 | # OpenAI Whisper API
6 |
7 | **An Open Source Solution for Speech-to-Text and More**
8 |
9 | Welcome to the OpenAI Whisper API, an open-source AI model microservice that leverages the power of OpenAI's whisper api, a state-of-the-art automatic speech recognition (ASR) system as a large language model. This service, built with Node.js, Bun.sh, and Typescript, is designed to run on Docker with zero dependencies, making it a versatile tool for developers across various speech and language-related applications.
10 |
11 | The Whisper API is a speech-to-text model trained on a vast amount of multilingual and multitask training data, including a wide range of audio files and audio recordings. It's a single model that can handle tasks such as language identification, speech translation, and of course, transforming spoken word into written text.
12 |
13 | The model is capable of handling a sequence of tokens and can work with natural language, making it a powerful tool for machine learning applications. It's designed to handle multilingual speech recognition, and it can even manage background noise, making it useful for transcribing a video call, zoom calls, a YouTube video or non-chat use cases in English language and more with full control.
14 |
15 | The API is simple and is designed to be easy to use for developers of all skill levels with simple developer access. It's an open-source project, and it's licensed under the MIT license, meaning you can use it in your own projects with few restrictions. Whether you're looking to transcribe voice messages, improve system performance through a series of system-wide optimizations, or explore the capabilities of the OpenAI Whisper API, this is the place to start. Dive into the following code to learn more about how to use this powerful tool as a first step and get your OpenAI Account with a new api key.
16 |
17 | ## Usage
18 |
19 | This is is a OpenAI Whisper API microservice using Node.js / Bun.sh / Typescript that can run on Docker. With zero dependencies.
20 | It listens to the `/transcribe` route for MP3 files and returns the text transcription.
21 |
22 | ## Running locally
23 |
24 | Install [bun.sh](https://bun.sh/) first, clone this directory and run these commands:
25 |
26 | ```bash
27 | bun install
28 | bun run dev
29 | ```
30 |
31 | You can now navigate to http://localhost:3000 or the PORT provided, see the Usage section below.
32 |
33 | ## Docker
34 |
35 | - See: https://hub.docker.com/r/illyism/openai-whisper-api
36 |
37 | ## Google Cloud Run Deployment
38 |
39 | Clone this directory and run these commands:
40 |
41 | (Replace `PROJECT_ID` with your own Google Cloud project ID)
42 |
43 | ```bash
44 | docker build --platform linux/amd64 -t gcr.io/PROJECT_ID/whisper-docker .
45 | docker push gcr.io/PROJECT_ID/whisper-docker
46 |
47 | gcloud run deploy whisper-docker \
48 | --image gcr.io/PROJECT_ID/whisper-docker \
49 | --region us-central1 \
50 | --allow-unauthenticated \
51 | --project PROJECT_ID
52 | ```
53 |
54 | You should receive a Service URL, see the Usage section below.
55 |
56 | ## Usage
57 |
58 | You can test normal HTTP by opening the /ping endpoint on the URL.
59 |
60 | Connect to the /transcribe and send a POST request with the following body:
61 |
62 | ```json
63 | {
64 | "audio": "BASE64_ENCODED_AUDIO"
65 | }
66 | ```
67 |
68 | ### API Key
69 |
70 | You need to pass the OpenAI API Key as a HEADER:
71 |
72 | ```
73 | Authorization: Bearer OPENAI_KEY
74 | ```
75 |
76 | Or you can launch the docker image or server with `OPENAI_KEY` in the env:
77 |
78 | ```bash
79 | OPENAI_KEY=YOUR_KEY_HERE bun run dev
80 |
81 | # or
82 |
83 | docker run -p 3000:3000 -e OPENAI_KEY=YOUR_KEY_HERE gcr.io/magicbuddy-chat/whisper-docker
84 |
85 | # or set it as env in Cloud Run with the below command or in the Cloud Console UI
86 |
87 | gcloud run deploy whisper-docker \
88 | --image gcr.io/PROJECT_ID/whisper-docker \
89 | --set-env-vars OPENAI_KEY=YOUR_KEY_HERE \
90 | --region us-central1 \
91 | --allow-unauthenticated \
92 | --project PROJECT_ID
93 | ```
94 |
95 | # Live example
96 |
97 | We are using this Whisper API with [MagicBuddy, a Telegram ChatGPT bot](https://magicbuddy.chat/).
98 |
99 | You can use the [OpenAI Whisper Docker](https://magicbuddy.chat/openai-whisper) as a live example here:
100 |
101 | - https://magicbuddy.chat/openai-whisper
102 |
--------------------------------------------------------------------------------
/bun.lockb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Illyism/openai-whisper-api/b2bfd6abcddc7552666f22df44d1a2300f910540/bun.lockb
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openai-whisper-api",
3 | "main": "src/server/server.ts",
4 | "devDependencies": {
5 | "bun-types": "^0.6.12"
6 | },
7 | "description": "This is is a Node.js OpenAI microservice using Node.js / Bun.sh with Docker",
8 | "engines": {
9 | "node": "19"
10 | },
11 | "private": true,
12 | "scripts": {
13 | "dev": "bun src/server/server.ts",
14 | "build": "bun build ./src/**/*.ts --outdir ./dist --target bun --sourcemap=external --minify",
15 | "start": "bun run build && bun run dist/server/server.js",
16 | "lint": "eslint ./src --ext .js,ts"
17 | }
18 | }
--------------------------------------------------------------------------------
/src/controllers/ping.ts:
--------------------------------------------------------------------------------
1 | export default async function ping() {
2 | return Date.now()
3 | }
4 |
--------------------------------------------------------------------------------
/src/controllers/transcribe.ts:
--------------------------------------------------------------------------------
1 | import { rm, mkdir } from 'fs/promises'
2 | import { Req } from '../server/handler/handle'
3 |
4 | mkdir(`${import.meta.dir}/tmp`, { recursive: true })
5 | async function getFilePath(audio: string) {
6 | const randomId = Math.random().toString(36).substring(7)
7 | const mp3Path = `${import.meta.dir}/tmp/${randomId}.mp3`
8 | await Bun.write(mp3Path, Buffer.from(audio, 'base64'))
9 | return mp3Path
10 | }
11 |
12 | async function speechToText(base64Audio: string, apiKey: string) {
13 | const formData = new FormData()
14 | formData.append('model', 'whisper-1')
15 | const fp = await getFilePath(base64Audio)
16 | formData.append('file', Bun.file(fp), 'audio.mp3')
17 |
18 | const resp = await fetch('https://api.openai.com/v1/audio/transcriptions', {
19 | method: 'POST',
20 | headers: {
21 | Authorization: `Bearer ${apiKey}`,
22 | },
23 | body: formData,
24 | })
25 |
26 | if (!resp.ok) {
27 | const error = (await resp.json()) as { error: { message: string } }
28 | throw new Error(error.error.message)
29 | }
30 |
31 | await rm(fp)
32 | const body = (await resp.json()) as { text: string }
33 | return body.text
34 | }
35 |
36 | function getApiKey(req: Req) {
37 | if (process.env.OPENAI_KEY) {
38 | return process.env.OPENAI_KEY
39 | }
40 |
41 | const authHeader = req.headers.get('Authorization')
42 | if (!authHeader) throw new Error('Missing API key')
43 | const apiKey = authHeader.split(' ')[1]
44 | if (!apiKey) throw new Error('Wrong API key')
45 | return apiKey
46 | }
47 |
48 | export default async function (req: Req) {
49 | // get API Key from Authorization header
50 | const apiKey = getApiKey(req)
51 |
52 | // parse body
53 | if (!req.request.body) throw new Error('Missing body')
54 | const { audio } = await Bun.readableStreamToJSON(req.request.body)
55 | if (!audio) throw new Error('Missing audio file')
56 |
57 | return await speechToText(audio, apiKey)
58 | }
59 |
--------------------------------------------------------------------------------
/src/server/handler/commands.ts:
--------------------------------------------------------------------------------
1 | import { getCommand } from './getCommand.js'
2 | import { Req } from './handle.js'
3 |
4 | /**
5 | * Executes a controller from src/controllers
6 | */
7 | export const startCommand = async (req: Req) => {
8 | const commandName = req.url.pathname.substring(1)
9 | const command = await getCommand(commandName)
10 |
11 | if (!command) {
12 | return new Response('Command not found', { status: 404 })
13 | }
14 |
15 | try {
16 | return await command(req)
17 | } catch (e: unknown) {
18 | return handleError(req, e)
19 | }
20 | }
21 |
22 | const handleError = (req: Req, error: any) => {
23 | let message = error.message
24 |
25 | if (error.response) {
26 | message = error.response.data?.error?.message ?? error.response.data?.error
27 | }
28 |
29 | console.error('handleError', error)
30 |
31 | return Response.json(
32 | {
33 | status: error?.response?.status ?? 400,
34 | code: error.code,
35 | message,
36 | },
37 | {
38 | status: error?.response?.status ?? 400,
39 | }
40 | )
41 | }
42 |
--------------------------------------------------------------------------------
/src/server/handler/getCommand.ts:
--------------------------------------------------------------------------------
1 | import path from 'path'
2 | import { Req } from './handle.js'
3 |
4 | const commandMap = new Map<
5 | string,
6 | null | ((req: Req) => Promise)
7 | >()
8 |
9 | /**
10 | * Reads the src/controllers directory, and loads a module with
11 | * `commandName` to memory. We avoid repeat `require` calls to improve
12 | * performance.
13 | *
14 | * This is just a simple cache + filter.
15 | */
16 | const warmupCommand = async (commandName: string) => {
17 | if (commandMap.has(commandName)) {
18 | return
19 | }
20 |
21 | if (!commandName.match(/^[a-zA-Z0-9/]+$/)) {
22 | if (commandName !== '/favicon.ico') {
23 | console.error('Invalid command name', commandName)
24 | }
25 | commandMap.set(commandName, null)
26 | return
27 | }
28 |
29 | const commandPath = path.join(
30 | __dirname,
31 | '../../controllers',
32 | commandName + '.js'
33 | )
34 | try {
35 | // eslint-disable-next-line @typescript-eslint/no-var-requires
36 | const command = require(commandPath).default
37 | if (typeof command !== 'function') {
38 | console.error('Invalid command', commandPath)
39 | commandMap.set(commandName, null)
40 | }
41 | commandMap.set(commandName, command)
42 | return
43 | } catch (e) {
44 | console.error('Error loading command', commandPath)
45 | console.error(e)
46 | console.trace()
47 | commandMap.set(commandName, null)
48 | }
49 | }
50 |
51 | /**
52 | * Loads a module from src/controllers
53 | */
54 | export const getCommand = async (
55 | commandName: string
56 | ): Promise Promise)> => {
57 | await warmupCommand(commandName)
58 | return commandMap.get(commandName) ?? null
59 | }
60 |
--------------------------------------------------------------------------------
/src/server/handler/handle.ts:
--------------------------------------------------------------------------------
1 | import setCors from '../net/setCors.js'
2 | import { startCommand } from './commands.js'
3 |
4 | export interface Req {
5 | request: Request
6 | headers: Headers
7 | url: URL
8 | method: string
9 | query: URLSearchParams
10 | }
11 |
12 | export interface Post extends Req {
13 | body: T
14 | }
15 |
16 | async function createRequest(request: Request): Promise {
17 | return {
18 | request,
19 | headers: request.headers,
20 | url: new URL(request.url),
21 | method: request.method,
22 | query: request.url.includes('?')
23 | ? new URLSearchParams(request.url.split('?')[1])
24 | : new URLSearchParams(),
25 | }
26 | }
27 |
28 | function handleResponse(req: Req, response: Response | any): Response {
29 | if (!(response instanceof Response)) {
30 | if (typeof response === 'string') {
31 | response = new Response(response, { status: 200 })
32 | } else if (typeof response === 'object') {
33 | response = new Response(JSON.stringify(response), { status: 200 })
34 | } else if (typeof response === 'number') {
35 | response = new Response(response.toString(), { status: 200 })
36 | } else {
37 | response = new Response(null, { status: 204 })
38 | }
39 | }
40 |
41 | const origin = req.headers.get('Origin')
42 | response.headers.set('Access-Control-Allow-Origin', origin || '*')
43 | response.headers.set('Access-Control-Allow-Credentials', 'true')
44 |
45 | return response
46 | }
47 |
48 | /**
49 | * Handles incoming requests.
50 | * 1. Check CORS
51 | * 2. Load command
52 | * 3. Execute command
53 | * 4. Handles errors
54 | */
55 | export async function handleRequest(request: Request) {
56 | try {
57 | const req = await createRequest(request)
58 |
59 | const corsResponse = setCors(req)
60 | if (corsResponse) return corsResponse
61 |
62 | const response = await startCommand(req)
63 | return handleResponse(req, response)
64 | } catch (e: unknown) {
65 | console.error('handleRequest', e)
66 | return new Response('Internal Server Error', { status: 500 })
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/server/net/setCors.ts:
--------------------------------------------------------------------------------
1 | import { Req } from '../handler/handle'
2 |
3 | export default function setCors(req: Req) {
4 | if (req.method === 'OPTIONS') {
5 | const origin = req.headers.get('Origin')
6 | const responseHeaders: Record = {
7 | 'Access-Control-Allow-Origin': origin || '*',
8 | 'Access-Control-Allow-Credentials': 'true',
9 | }
10 | responseHeaders['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS'
11 | responseHeaders['Access-Control-Allow-Headers'] =
12 | 'Content-Type, Authorization, X-Requested-With, X-TLC, X-PROJECT'
13 | responseHeaders['Access-Control-Max-Age'] = '3600'
14 | return new Response(null, {
15 | status: 204,
16 | headers: responseHeaders,
17 | })
18 | }
19 |
20 | return false
21 | }
22 |
--------------------------------------------------------------------------------
/src/server/readme.md:
--------------------------------------------------------------------------------
1 | # OpenaI Whisper Docker Server
2 |
3 | This is a simple Bun.sh based HTTP server. It's made in multiple files to be easy to understand.
4 |
5 | This is a lot more performant than Express, Koa, NestJs, tRPC, Fastify or other frameworks just running it in Node.js, as it's a plain barebones HTTP server with 0 dependencies.
6 |
7 | But running it in Bun.sh makes it even more performant. Easily being able to handle 10x more requests per second than the above frameworks.
8 |
9 | I've avoided routing here by simply reading the filesystem in src/controllers, any files you put in there will be automatically routed to and will become an endpoint. There are no "params" in the path like `/users/123`, however, but you can simply use query strings /users?id=123, HEADERs or JSON bodies.
--------------------------------------------------------------------------------
/src/server/server.ts:
--------------------------------------------------------------------------------
1 | import { serve } from 'bun'
2 | import { handleRequest } from './handler/handle.js'
3 |
4 | const server = serve({ fetch: handleRequest })
5 | console.log('🔥', server.port)
6 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | // add Bun type definitions
4 | "types": ["bun-types"],
5 |
6 | // enable latest features
7 | "lib": ["esnext"],
8 | "module": "esnext",
9 | "target": "esnext",
10 |
11 | // if TS 5.x+
12 | "moduleResolution": "bundler",
13 | "moduleDetection": "force",
14 |
15 | "jsx": "react-jsx", // support JSX
16 | "allowJs": true, // allow importing `.js` from `.ts`
17 | "esModuleInterop": true, // allow default imports for CommonJS modules
18 |
19 | // best practices
20 | "strict": true,
21 | "forceConsistentCasingInFileNames": true,
22 | "skipLibCheck": true
23 | },
24 | "include": ["src/**/*"]
25 | }
26 |
--------------------------------------------------------------------------------