├── .npmignore ├── .eslintignore ├── .gitignore ├── index.ts ├── .editorConfig ├── tsconfig.json ├── .eslintrc ├── CHANGELOG.md ├── package.json ├── models └── types.ts ├── README.md ├── utils └── replay-readable.utils.ts └── src ├── replay-readable.ts └── voice-recorder.ts /.npmignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | .idea 2 | lib 3 | node_modules -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | .vscode 3 | .idea 4 | 5 | /node_modules 6 | /yarn-error.log 7 | /dist/ 8 | /lib/ 9 | -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | export * from './src/voice-recorder'; 2 | export * from './src/replay-readable'; 3 | export * from './models/types'; 4 | -------------------------------------------------------------------------------- /.editorConfig: -------------------------------------------------------------------------------- 1 | [*] 2 | indent_size = 2 3 | 4 | [*.ts] 5 | end_of_line = lf 6 | indent_style = space 7 | indent_size = 4 8 | insert_final_newline = true -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2016", 4 | "module": "commonjs", 5 | "declaration": true, 6 | "outDir": "./lib", 7 | "esModuleInterop": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "strict": true, 10 | "skipLibCheck": true 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "parser": "@typescript-eslint/parser", 4 | "plugins": [ 5 | "@typescript-eslint" 6 | ], 7 | "parserOptions": { 8 | "project": "./tsconfig.json" 9 | }, 10 | "extends": [ 11 | "eslint:recommended", 12 | "plugin:@typescript-eslint/eslint-recommended", 13 | "plugin:@typescript-eslint/recommended", 14 | "prettier" 15 | ], 16 | "rules": { 17 | "quotes": ["error", "single"], 18 | "indent": ["error", 4], 19 | "@typescript-eslint/no-unnecessary-condition": "error" 20 | } 21 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 1.1.1 2 | - Chore: Allow more `@discordjs/voice` versions. 3 | 4 | # 1.1.0 5 | - Feat: Introduced the `voiceRecorder.isRecording()` function to check if the voice recording is currently running or running on a specific guild. 6 | 7 | # 1.0.6 8 | - Fix: Renamed `maxRecordTimeMs` to `maxRecordTimeMinutes` and accept minutes instead of ms. 9 | 10 | # 1.0.5 11 | - Fix: `maxRecordTimeMs` was actually taken as minutes instead of ms. 12 | - Doc: The description for the record time and length were switched. 13 | 14 | # 1.0.4 15 | - Fix: Changed how user streams are served/merged during the ffmpeg process in Windows. 16 | 17 | # 1.0.3 18 | - Fix: There was a Windows permission error because an invalid temp path was taken. 19 | 20 | # 1.0.2 21 | - I'm just testing something here. 22 | 23 | # 1.0.1 24 | - Feature: You can now export the recording as stream (`getRecordedVoiceAsReadable`). 25 | - Feature: You can now export the recording as buffer (`getRecordedVoiceAsBuffer`). 26 | - Minor: Export some types. 27 | - Minor: User volumes can now set on save rather than on init. -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@kirdock/discordjs-voice-recorder", 3 | "version": "1.1.1", 4 | "license": "MIT", 5 | "main": "lib/index.js", 6 | "types": "lib/index.d.ts", 7 | "author": "Klaus Striessnig", 8 | "repository": { 9 | "type": "git", 10 | "url": "https://github.com/Kirdock/discordjs-voice-recorder.git" 11 | }, 12 | "files": [ 13 | "/lib" 14 | ], 15 | "scripts": { 16 | "typecheck": "tsc --noEmit", 17 | "build": "tsc", 18 | "prepublishOnly": "npm run build", 19 | "lint:check": "eslint ./", 20 | "lint:fix": "eslint --fix ./" 21 | }, 22 | "dependencies": { 23 | "@discordjs/opus": "^0.9.0", 24 | "@discordjs/voice": ">=0.16.0", 25 | "archiver": "^5.3.1", 26 | "fluent-ffmpeg": "^2.1.2" 27 | }, 28 | "devDependencies": { 29 | "@types/archiver": "^5.3.1", 30 | "@types/fluent-ffmpeg": "^2.1.20", 31 | "@types/node": "18.15.11", 32 | "@typescript-eslint/eslint-plugin": "^5.54.1", 33 | "@typescript-eslint/parser": "^5.54.1", 34 | "eslint": "^8.35.0", 35 | "eslint-config-prettier": "^8.7.0", 36 | "typescript": "^4.5.4" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /models/types.ts: -------------------------------------------------------------------------------- 1 | import type { AudioReceiveStream } from '@discordjs/voice'; 2 | import { Server } from 'net'; 3 | import { ReplayReadable } from '../src/replay-readable'; 4 | import { WritableOptions } from 'stream'; 5 | 6 | export type ReadWriteOptions = { length?: number } & WritableOptions; 7 | export type AudioExportType = 'single' | 'separate'; 8 | export type UserVolumesDict = Record; 9 | export type RecordOptions = { 10 | /** 11 | * Maximum size in MB a user stream can have. Default 100. 12 | */ 13 | maxUserRecordingLength: number; 14 | /** 15 | * Keep last x minutes for recording. Older voice chunks will be deleted. Default 10. 16 | */ 17 | maxRecordTimeMinutes: number; 18 | /** 19 | * Target sample rate of the recorded stream. Default 16,000. 20 | */ 21 | sampleRate: number; 22 | /** 23 | * Target channel count of the recorded stream. Default 2. 24 | */ 25 | channelCount: number; 26 | } 27 | 28 | export interface ChunkArrayItem { 29 | chunk: Buffer; 30 | encoding: BufferEncoding 31 | } 32 | 33 | export interface BufferArrayElement { 34 | chunk: Buffer; 35 | encoding: BufferEncoding; 36 | startTime: number; 37 | stopTime: number 38 | } 39 | 40 | export interface EncodingOptions { 41 | chunkSize: number; 42 | sampleRate: number; 43 | numChannels: number; 44 | bytesPerElement: number; 45 | } 46 | 47 | export interface SocketServerConfig { 48 | url: string; 49 | server: Server; 50 | } 51 | 52 | export interface UserStreams { 53 | [userId: string]: { 54 | source: AudioReceiveStream, 55 | out: ReplayReadable, 56 | } | undefined; 57 | } 58 | 59 | export interface DiscordClientInterface { 60 | users: { 61 | fetch: (userId: string) => Promise<{username: string}> 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Voice recorder for discord.js 2 | Voice recorder or more like a replay buffer for discord.js. Base functionality is "save last x minutes". 3 | The output format can be determined to just be a single `.mp3` file or a `.zip` file that contains one audio track per user. 4 | 5 | [![npm version](https://img.shields.io/npm/v/%40kirdock%2Fdiscordjs-voice-recorder)](https://www.npmjs.com/package/@kirdock/discordjs-voice-recorder) 6 | 7 | ## Difference to other voice recording solutions 8 | You get the track as you would hear it in the voice channel. It's not just the chunks when someone is talking. => The time when someone is not speaking is added. 9 | 10 | ## Requirements 11 | - `ffmpeg` has to be installed 12 | 13 | **Disclaimer** 14 | 15 | I suggest not using Windows (or just use WSL). Reason: `sodium` is cumbersome to install. 16 | 17 | ## How to install 18 | Simply run `npm install @kirdock/discordjs-voice-recorder` or `yarn add @kirdock/discordjs-voice-recorder` 19 | 20 | ## How to use 21 | 22 | ```ts 23 | import { VoiceRecorder } from '@kirdock/discordjs-voice-recorder'; 24 | 25 | const voiceRecorder = new VoiceRecorder(); 26 | // optionally provide your Discord client as second parameter in order to have ${username}.mp3 for .zip export rather than ${userId}.mp3 27 | 28 | 29 | // start recording on a specific connection 30 | voiceRecorder.startRecording(myVoiceConnection); 31 | 32 | // save last 5 minutes as .mp3 33 | await voiceRecorder.getRecordedVoice(yourWriteStream, guildId, 'single', 5); 34 | // {yourWriteStream} can be any writeStream. E.g. response object of express or just fs.createWriteStream('myFile.mp3') 35 | 36 | // save last 5 minutes as .zip 37 | await voiceRecorder.getRecordedVoice(yourWriteStream, guildId, 'separate', 5); 38 | // {yourWriteStream} can be any writeStream. E.g. response object of express or just fs.createWriteStream('myFile.zip') 39 | 40 | // optionally you can provide a dict {[userId]: volume} to adjust the user volume of specific users 41 | await voiceRecorder.getRecordedVoice(yourWriteStream, guildId, 'single', 5, {['1234567']: 80}); // 80% 42 | 43 | // stop recording on a specific connection 44 | voiceRecorder.stopRecording(myVoiceConnection); 45 | ``` 46 | 47 | # Implementation example 48 | https://github.com/Kirdock/recordy 49 | 50 | ## Why is voice recording with discord.js such a big pain? 51 | Because Discord just provides audio chunks (20ms per chunk I guess) when a user is speaking. 52 | Problems are 53 | 1. We don't have a single track for a voice channel. Each user has its own stream. 54 | 2. We don't have the delay when a user stops and starts speaking again. 55 | 56 | => We have to manually sync the user streams and manually add the delays when a user is speaking. 57 | -------------------------------------------------------------------------------- /utils/replay-readable.utils.ts: -------------------------------------------------------------------------------- 1 | import { BufferArrayElement, EncodingOptions } from '../models/types'; 2 | 3 | export function addSilentTime(bufArr: BufferArrayElement[], timeMs: number, encoding: BufferEncoding, options: EncodingOptions): void { 4 | let endTimeBefore = getLastStopTime(bufArr); 5 | if (timeMs <= 0 || !endTimeBefore) { 6 | return; 7 | } 8 | const silentBuffers = secondsToBuffer(timeMs / 1_000, options); 9 | if (!silentBuffers.length) { 10 | return; 11 | } 12 | const step = timeMs / silentBuffers.length; 13 | for (const chunk of silentBuffers) { 14 | bufArr.push({chunk, encoding, startTime: endTimeBefore, stopTime: endTimeBefore + step}); 15 | endTimeBefore += step; // step instead of this.chunkTimeMs, just to be sure 16 | } 17 | } 18 | 19 | export function secondsToBuffer(seconds: number, options: EncodingOptions): Buffer[] { 20 | const bytes = secondsToBytes(seconds, options.sampleRate, options.numChannels, options.bytesPerElement); 21 | return bytesToBuffer(bytes, options.chunkSize); 22 | } 23 | 24 | /** 25 | * Silent padding will be added if the stream is missing time (if asynchronous or when the user didn't speak for a while). Then it will be synchronous again 26 | * @param bufArr 27 | * @param chunkStartTimeBefore 28 | * @param chunkStartTimeNew 29 | * @param encodingOptions 30 | */ 31 | export function syncStream(bufArr: BufferArrayElement[], chunkStartTimeBefore: number, chunkStartTimeNew: number, encodingOptions: EncodingOptions): void { 32 | const timeFromStartToStart = chunkStartTimeNew - chunkStartTimeBefore; 33 | const recordTime = getRecordTimeTillEnd(bufArr, chunkStartTimeBefore, encodingOptions.sampleRate, encodingOptions.numChannels, encodingOptions.bytesPerElement); 34 | addSilentTime(bufArr, timeFromStartToStart - recordTime, 'buffer' as BufferEncoding, encodingOptions); 35 | } 36 | 37 | export function getLastStopTime(bufArr: BufferArrayElement[]): number | undefined { 38 | return bufArr[bufArr.length - 1]?.stopTime; 39 | } 40 | 41 | function bytesToBuffer(bytes: number, chunkSize: number): Buffer[] { 42 | const silentPerChunk = Math.floor(bytes / chunkSize); 43 | const buffers: Buffer[] = []; 44 | for (let i = 0; i < silentPerChunk; ++i) { 45 | buffers.push(Buffer.alloc(chunkSize)); 46 | } 47 | 48 | return buffers; 49 | } 50 | 51 | function secondsToBytes(silenceTimeSec: number, sampleRate: number, numChannels: number, bytesPerElement: number): number { 52 | const totalSamples = silenceTimeSec * sampleRate; 53 | return totalSamples * numChannels * bytesPerElement; 54 | } 55 | 56 | export function getChunkTimeMs(chunk: Buffer, sampleRate: number, numChannels: number, bytesPerElement: number): number { 57 | const totalSamples = chunk.byteLength / bytesPerElement / numChannels; 58 | return (totalSamples / sampleRate) * 1_000; 59 | } 60 | 61 | function getRecordTimeTillEnd(bufArr: BufferArrayElement[], startTime: number, sampleRate: number, numChannels: number, bytesPerElement: number): number { 62 | return bufArr.reduce((accTime, element) => { 63 | const time = element.startTime < startTime ? 0 : getChunkTimeMs(element.chunk, sampleRate, numChannels, bytesPerElement); 64 | return accTime + time; 65 | }, 0); 66 | } 67 | -------------------------------------------------------------------------------- /src/replay-readable.ts: -------------------------------------------------------------------------------- 1 | import { OpusEncoder } from '@discordjs/opus'; 2 | import { Readable, Writable, WritableOptions } from 'stream'; 3 | import { getChunkTimeMs, getLastStopTime, secondsToBuffer, syncStream } from '../utils/replay-readable.utils'; 4 | import { BufferArrayElement, ChunkArrayItem, EncodingOptions, ReadWriteOptions } from '../models/types'; 5 | import Timeout = NodeJS.Timeout; 6 | 7 | export class ReplayReadable extends Writable { 8 | private readonly _highWaterMark: number; 9 | private readonly _bufArr: BufferArrayElement[]; 10 | private readonly _bufArrLength: number; // max _bufArr length 11 | private readonly _readableOptions: ReadWriteOptions; 12 | private _waiting: ((error?: Error | null) => void) | null; 13 | private readonly fadeOutInterval: Timeout; 14 | private readonly _encoder: OpusEncoder; 15 | private readonly encodingOptions: EncodingOptions; 16 | private _startTimeOfNextChunk?: number; 17 | private _startTimeOfChunkBefore?: number; 18 | 19 | /** 20 | * 21 | * @param lifeTimeMs max record time in milliseconds. Older chunks get deleted 22 | * @param sampleRate 23 | * @param numChannels 24 | * @param getUserSpeakingTime 25 | * @param options 26 | */ 27 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 28 | // @ts-ignore ignore that super() has to be called at the very top 29 | constructor(lifeTimeMs: number, sampleRate: number, numChannels: number, private getUserSpeakingTime: () => number | undefined, options?: ReadWriteOptions) { 30 | const adjustedOptions = Object.assign({ 31 | length: 1048576, // 2^20 = 1 MB 32 | highWaterMark: 32, 33 | dropInterval: 1e3 34 | }, options) as WritableOptions & { length: number, highWaterMark: number, dropInterval: number }; 35 | super(adjustedOptions); 36 | 37 | const chunkTimeMs = 20; 38 | const bytesPerElement = 2; // buffer is Uint8Array but the data inside is PCM 16-bit 39 | this._readableOptions = adjustedOptions; 40 | this._encoder = new OpusEncoder(sampleRate, numChannels); 41 | this.encodingOptions = { 42 | numChannels, 43 | sampleRate, 44 | chunkSize: (chunkTimeMs / 1000) * sampleRate * numChannels * Uint8Array.BYTES_PER_ELEMENT * bytesPerElement, 45 | bytesPerElement, 46 | } 47 | this._highWaterMark = adjustedOptions.highWaterMark; 48 | this._bufArrLength = adjustedOptions.length; 49 | this._bufArr = []; 50 | this._waiting = null; 51 | this.fadeOutInterval = setInterval(() => { 52 | this.fadeOutCheck(lifeTimeMs); 53 | }, 5_000); // check every 5 seconds if some chunks timed out 54 | } 55 | 56 | private get startTimeOfNextChunk(): undefined | number { 57 | return this._startTimeOfNextChunk; 58 | } 59 | 60 | private set startTimeOfNextChunk(time: number | undefined) { 61 | if (this._startTimeOfChunkBefore && time) { 62 | syncStream(this._bufArr, this._startTimeOfChunkBefore, time, this.encodingOptions) 63 | } 64 | this._startTimeOfNextChunk = this._startTimeOfChunkBefore = time; 65 | } 66 | 67 | public get startTimeMs(): number { 68 | return this._bufArr[0]?.startTime ?? Date.now(); 69 | } 70 | 71 | public _write(chunk: Buffer, encoding: BufferEncoding, callback: (error?: Error | null) => void) { 72 | // encoding is 'buffer'... whatever... 73 | 74 | const userStartedSpeaking = this.getUserSpeakingTime(); 75 | const userJustBeganSpeaking = userStartedSpeaking !== this._startTimeOfChunkBefore; 76 | if(userJustBeganSpeaking) { 77 | this.startTimeOfNextChunk = userStartedSpeaking; 78 | } 79 | 80 | // start time of the user in the speaking map is probably the real start time and not the time the chunk is received. So it's probably not startTime - chunkTime 81 | const addTime = this.getStartTimeOfNextChunk(); 82 | 83 | chunk = this.decodeChunk(chunk); // always 1280 bytes; 20 ms 84 | const startTimeOfNewChunk = userJustBeganSpeaking ? addTime : getLastStopTime(this._bufArr) as number; // there must be an element because isCorrectStartTime is true before it starts recording 85 | 86 | this._bufArr.push({ 87 | chunk, 88 | encoding, 89 | startTime: startTimeOfNewChunk, 90 | stopTime: startTimeOfNewChunk + getChunkTimeMs(chunk, this.encodingOptions.sampleRate, this.encodingOptions.numChannels, this.encodingOptions.bytesPerElement) 91 | }); 92 | this.checkAndDrop(callback); 93 | this.emit('wrote'); 94 | } 95 | 96 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 97 | public _writev(chunks: Array, callback: (error?: Error | null) => void) { 98 | this.emit('wrote'); 99 | } 100 | 101 | public _destroy(error: Error | null, callback: (error?: (Error | null)) => void) { 102 | clearInterval(this.fadeOutInterval); 103 | super._destroy(error, callback); 104 | } 105 | 106 | private drop(): void { 107 | if (this._bufArr.length > this._bufArrLength) { 108 | this.emit('drop', this._bufArr.splice(0, this._bufArr.length - this._bufArrLength).length); 109 | } 110 | } 111 | 112 | public rewind(startTime: number, stopTime: number): Readable { 113 | const ret: Readable = new Readable({ 114 | highWaterMark: this._readableOptions.highWaterMark, 115 | read: () => { 116 | // continue to write the user stream within the time frame 117 | for (let i = this.writeSkipAndDelay(ret, startTime); i < this._bufArr.length && this._bufArr[i].startTime < stopTime; ++i) { 118 | const element = this._bufArr[i]; 119 | const resp = ret.push(element.chunk, element.encoding); 120 | if (!resp) { // until there's not willing to read 121 | break; 122 | } 123 | } 124 | 125 | ret.push(null); // null = end of stream 126 | } 127 | }); 128 | 129 | return ret; 130 | } 131 | 132 | /** 133 | * Skips the user stream up to the start of the record time or adds a delay until the start time 134 | * @param ret 135 | * @param startTime 136 | * @private 137 | * @return index of the next buffer element that can be processed 138 | */ 139 | private writeSkipAndDelay(ret: Readable, startTime: number): number { 140 | for (let i = 0; i < this._bufArr.length; ++i) { 141 | const element = this._bufArr[i]; 142 | 143 | if (element.startTime >= startTime) { 144 | // add delay time till start time of user 145 | const delayTimeSec = (element.startTime - startTime) / 1_000; 146 | if (delayTimeSec > 0) { 147 | const buffers = secondsToBuffer(delayTimeSec, this.encodingOptions); 148 | for (const buffer of buffers) { 149 | ret.push(buffer, this._bufArr[0].encoding); 150 | } 151 | } 152 | return i; 153 | } // else skipTime 154 | } 155 | return this._bufArr.length; 156 | } 157 | 158 | private checkAndDrop(callback: (error?: Error | null) => void): void { 159 | if (this._bufArr.length > this._bufArrLength) { 160 | this._waiting = callback; 161 | this.drop(); 162 | } else { 163 | callback(); 164 | } 165 | } 166 | 167 | private getStartTimeOfNextChunk(): number { 168 | const time = this.startTimeOfNextChunk || getLastStopTime(this._bufArr) || Date.now(); 169 | this._startTimeOfNextChunk = undefined; 170 | return time; 171 | } 172 | 173 | private decodeChunk(chunk: Buffer): Buffer { 174 | return this._encoder.decode(chunk); 175 | } 176 | 177 | private fadeOutCheck(lifeTime: number): void { 178 | const newDate = Date.now(); 179 | let dropped = 0; 180 | while (dropped < this._bufArr.length && (newDate - this._bufArr[dropped].startTime) > lifeTime) { 181 | ++dropped 182 | } 183 | if (dropped) { 184 | this._bufArr.splice(0, dropped); 185 | this.emit('drop', dropped); 186 | } 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/voice-recorder.ts: -------------------------------------------------------------------------------- 1 | import type { VoiceConnection } from '@discordjs/voice'; 2 | import { AudioReceiveStream, EndBehaviorType } from '@discordjs/voice'; 3 | import ffmpeg, { FfmpegCommand, FilterSpecification } from 'fluent-ffmpeg'; 4 | import { resolve } from 'path'; 5 | import { ReplayReadable } from './replay-readable'; 6 | import { AudioExportType, DiscordClientInterface, RecordOptions, SocketServerConfig, UserStreams, UserVolumesDict } from '../models/types'; 7 | import { PassThrough, Readable, Writable } from 'stream'; 8 | import * as net from 'net'; 9 | import { Server } from 'net'; 10 | import { randomUUID } from 'crypto'; 11 | import archiver from 'archiver'; 12 | import { platform, tmpdir } from 'os'; 13 | 14 | 15 | export class VoiceRecorder { 16 | private readonly options: RecordOptions; 17 | private static readonly PCM_FORMAT = 's16le'; 18 | private static readonly tempPath = tmpdir(); 19 | private writeStreams: Record void; 22 | } | undefined> = {}; 23 | 24 | /** 25 | * 26 | * @param options Record options 27 | * @param discordClient The client is used to translate the userId into the username. This is just important for .zip export. The filename contains the username, else it contains the userId 28 | */ 29 | constructor(options: Partial = {}, private discordClient?: DiscordClientInterface) { 30 | this.options = { 31 | maxUserRecordingLength: (options.maxUserRecordingLength ?? 100) * 1_024 * 1_024, 32 | maxRecordTimeMinutes: (options.maxRecordTimeMinutes ?? 10) * 60 * 1_000, 33 | sampleRate: (options.sampleRate ?? 16_000), 34 | channelCount: (options.channelCount ?? 2) 35 | }; 36 | } 37 | 38 | 39 | /** 40 | * Checks if the recording is currently in progress. Optionally a guild id can be provided to check if a recording is running on a certain guild. 41 | * @param guildId 42 | */ 43 | public isRecording(guildId?: string): boolean { 44 | if(guildId) { 45 | return !!this.writeStreams[guildId]; 46 | } 47 | return !!Object.keys(this.writeStreams).length; 48 | } 49 | 50 | public startRecording(connection: VoiceConnection): void { 51 | const guildId = connection.joinConfig.guildId; 52 | if (this.writeStreams[guildId]) { 53 | return; 54 | } 55 | const listener = (userId: string) => { 56 | const streams: {source: AudioReceiveStream, out: ReplayReadable} | undefined = this.writeStreams[guildId]?.userStreams[userId]; 57 | if(streams) { 58 | // already listening 59 | return; 60 | } 61 | this.startRecordStreamOfUser(guildId, userId, connection); 62 | } 63 | this.writeStreams[guildId] = { 64 | userStreams: {}, 65 | listener, 66 | }; 67 | connection.receiver.speaking.on('start', listener); 68 | } 69 | 70 | private startRecordStreamOfUser(guildId: string, userId: string, connection: VoiceConnection): void { 71 | const serverStream = this.writeStreams[guildId]; 72 | if(!serverStream) { 73 | return; 74 | } 75 | 76 | const recordStream = new ReplayReadable(this.options.maxRecordTimeMinutes, this.options.sampleRate, this.options.channelCount, ()=> connection.receiver.speaking.users.get(userId), { 77 | highWaterMark: this.options.maxUserRecordingLength, 78 | length: this.options.maxUserRecordingLength 79 | }); 80 | const opusStream = connection.receiver.subscribe(userId, { 81 | end: { 82 | behavior: EndBehaviorType.AfterSilence, 83 | duration: this.options.maxRecordTimeMinutes, 84 | }, 85 | }); 86 | 87 | opusStream.on('error', (error: Error) => { 88 | console.error(error, `Error while recording voice for user ${userId} in server: ${guildId}`); 89 | }); 90 | 91 | opusStream.on('end', () => { 92 | this.stopUserRecording(guildId, userId); 93 | }); 94 | 95 | opusStream.pipe(recordStream, {end: false}); 96 | 97 | serverStream.userStreams[userId] = { out: recordStream, source: opusStream }; 98 | } 99 | 100 | /** 101 | * Stops the voice recording for the specified voice connection 102 | * @param connection 103 | */ 104 | public stopRecording(connection: VoiceConnection): void { 105 | const guildId = connection.joinConfig.guildId; 106 | const serverStreams = this.writeStreams[guildId]; 107 | if(!serverStreams) { 108 | return; 109 | } 110 | connection.receiver.speaking.removeListener('start', serverStreams.listener); 111 | 112 | for (const userId in serverStreams.userStreams) { 113 | this.stopUserRecording(guildId, userId); 114 | } 115 | delete this.writeStreams[guildId]; 116 | } 117 | 118 | private stopUserRecording(guildId: string, userId: string): void { 119 | const serverStreams = this.writeStreams[guildId]; 120 | if(!serverStreams) { 121 | return; 122 | } 123 | const userStream = serverStreams.userStreams[userId]; 124 | if(!userStream) { 125 | return; 126 | } 127 | userStream.source.destroy(); 128 | userStream.out.destroy(); 129 | delete serverStreams.userStreams[userId]; 130 | } 131 | 132 | /** 133 | * 134 | * @param writeStream The write stream in that the mp3 or zip file has to be saved. e.g. the response object of express or simply fs.createWriteStream('myFile.mp3') 135 | * @param guildId Guild id of the server. Determines on which server the recording should be saved 136 | * @param exportType Export type of the recording. Can either be 'single' => .mp3 or 'separate' => .zip 137 | * @param minutes Determines how many minutes (max is options.maxRecordTimeMs/1_000/60) 138 | * @param userVolumes User dict {[userId]: number} that determines the volume for a user. Default 100 per user (100%) 139 | */ 140 | public async getRecordedVoice(writeStream: T, guildId: string, exportType: AudioExportType = 'single', minutes = 10, userVolumes: UserVolumesDict = {}): Promise { 141 | const serverStream = this.writeStreams[guildId]; 142 | if (!serverStream) { 143 | console.warn(`server with id ${guildId} does not have any streams`, 'Record voice'); 144 | return false; 145 | } 146 | const minStartTimeMs = this.getMinStartTime(guildId); 147 | 148 | if (!minStartTimeMs) { 149 | return false; 150 | } 151 | 152 | const recordDurationMs = Math.min(Math.abs(minutes) * 60 * 1_000, this.options.maxRecordTimeMinutes); 153 | const endTimeMs = Date.now(); 154 | const maxRecordTime = endTimeMs - recordDurationMs; 155 | const startRecordTime = Math.max(minStartTimeMs, maxRecordTime); 156 | const recordMethod = (exportType === 'single' ? this.generateMergedRecording : this.generateSplitRecording).bind(this); 157 | 158 | return recordMethod(serverStream.userStreams, startRecordTime, endTimeMs, writeStream, userVolumes); 159 | } 160 | 161 | /** 162 | * 163 | * @param guildId Guild id of the server. Determines on which server the recording should be saved 164 | * @param exportType Export type of the recording. Can either be 'single' => .mp3 or 'separate' => .zip 165 | * @param minutes Determines how many minutes (max is options.maxRecordTimeMs/1_000/60) 166 | * @param userVolumes User dict {[userId]: number} that determines the volume for a user. Default 100 per user (100%) 167 | */ 168 | public async getRecordedVoiceAsBuffer(guildId: string, exportType: AudioExportType = 'single', minutes = 10, userVolumes: UserVolumesDict = {}): Promise { 169 | const bufferStream = new PassThrough(); 170 | const buffers: Buffer[] = []; 171 | const bufferPromise = new Promise((resolve) => { 172 | bufferStream.on('finish', resolve); 173 | bufferStream.on('error', resolve); 174 | }); 175 | 176 | bufferStream.on('data', (data) => { 177 | buffers.push(data); 178 | }); 179 | 180 | const result = await this.getRecordedVoice(bufferStream, guildId, exportType, minutes, userVolumes); 181 | if(!result) { 182 | return Buffer.from([]); 183 | } 184 | await bufferPromise; 185 | return Buffer.concat(buffers); 186 | } 187 | 188 | /** 189 | * 190 | * @param guildId Guild id of the server. Determines on which server the recording should be saved 191 | * @param exportType Export type of the recording. Can either be 'single' => .mp3 or 'separate' => .zip 192 | * @param minutes Determines how many minutes (max is options.maxRecordTimeMs/1_000/60) 193 | * @param userVolumes User dict {[userId]: number} that determines the volume for a user. Default 100 per user (100%) 194 | */ 195 | public getRecordedVoiceAsReadable(guildId: string, exportType: AudioExportType = 'single', minutes = 10, userVolumes: UserVolumesDict = {}): Readable { 196 | const passThrough = new PassThrough({allowHalfOpen: true}); 197 | void this.getRecordedVoice(passThrough, guildId, exportType, minutes, userVolumes); 198 | return passThrough; 199 | } 200 | 201 | private generateMergedRecording(userStreams: UserStreams, startRecordTime: number, endTime: number, writeStream: Writable, userVolumes?: UserVolumesDict): Promise { 202 | return new Promise((resolve, reject) => { 203 | const {command, openServers} = this.getFfmpegSpecs(userStreams, startRecordTime, endTime, userVolumes); 204 | if (!openServers.length) { 205 | return resolve(false); 206 | } 207 | command 208 | .on('end', () => { 209 | openServers.forEach(server => server.close()); 210 | resolve(true); 211 | }) 212 | .on('error', (error) => { 213 | openServers.forEach(server => server.close()); 214 | reject(error); 215 | }) 216 | .outputFormat('mp3') 217 | .writeToStream(writeStream, {end: true}); 218 | }); 219 | } 220 | 221 | private async generateSplitRecording(userStreams: UserStreams, startRecordTime: number, endTime: number, writeStream: Writable, userVolumes?: UserVolumesDict): Promise { 222 | const archive = archiver('zip'); 223 | const userIds = Object.keys(userStreams); 224 | if (!userIds.length) { 225 | return false; 226 | } 227 | for (const userId of userIds) { 228 | //eslint-disable-next-line @typescript-eslint/no-non-null-assertion 229 | const passThroughStream = this.getUserRecordingStream(userStreams[userId]!.out.rewind(startRecordTime, endTime), userId, userVolumes); 230 | const username = await this.getUsername(userId); 231 | archive.append(passThroughStream, { 232 | name: `${username}.mp3` 233 | }); 234 | } 235 | 236 | return new Promise((resolve, reject) => { 237 | archive 238 | .on('end', () => resolve(true)) 239 | .on('error', reject) 240 | .pipe(writeStream, {end: true}); 241 | archive.finalize(); 242 | }); 243 | } 244 | 245 | private async getUsername(userId: string): Promise { 246 | if (this.discordClient) { 247 | try { 248 | const { username } = await this.discordClient.users.fetch(userId); 249 | return username; 250 | } catch (error) { 251 | console.error(`Username of userId: ${userId} can't be fetched!`, error); 252 | } 253 | } 254 | return userId; 255 | } 256 | 257 | private getUserRecordingStream(stream: Readable, userId: string, userVolumes?: UserVolumesDict): PassThrough { 258 | const passThroughStream = new PassThrough({allowHalfOpen: false}); 259 | 260 | ffmpeg(stream) 261 | .inputOptions(this.getRecordInputOptions()) 262 | .audioFilters([ 263 | { 264 | filter: 'volume', 265 | options: ((this.getUserVolume(userId, userVolumes)) / 100).toString(), 266 | } 267 | ] 268 | ) 269 | .outputFormat('mp3') 270 | .output(passThroughStream, {end: true}) 271 | .run(); 272 | return passThroughStream; 273 | } 274 | 275 | private getUserVolume(userId: string, userVolumes?: UserVolumesDict): number { 276 | return userVolumes?.[userId] ?? 100; 277 | } 278 | 279 | private getMinStartTime(guildId: string): number | undefined { 280 | let minStartTime: number | undefined; 281 | const userStreams: UserStreams = this.writeStreams[guildId]?.userStreams ?? {}; 282 | 283 | for (const userId in userStreams) { 284 | //eslint-disable-next-line @typescript-eslint/no-non-null-assertion 285 | const startTime = userStreams[userId]!.out.startTimeMs; 286 | 287 | if (!minStartTime || (startTime < minStartTime)) { 288 | minStartTime = startTime; 289 | } 290 | } 291 | return minStartTime; 292 | } 293 | 294 | private getFfmpegSpecs(streams: UserStreams, startRecordTime: number, endTimeMs: number, userVolumesDict?: UserVolumesDict): { command: FfmpegCommand, openServers: Server[] } { 295 | let ffmpegOptions = ffmpeg(); 296 | const amixStrings: string[] = []; 297 | const volumeFilter: FilterSpecification[] = []; 298 | const openServers: Server[] = []; 299 | 300 | for (const userId in streams) { 301 | //eslint-disable-next-line @typescript-eslint/no-non-null-assertion 302 | const stream = streams[userId]!.out; 303 | try { 304 | const output = `[s${volumeFilter.length}]`; 305 | const {server, url} = this.serveStream(stream, startRecordTime, endTimeMs); 306 | 307 | ffmpegOptions = ffmpegOptions 308 | .addInput(url) 309 | .inputOptions(this.getRecordInputOptions()); 310 | 311 | volumeFilter.push({ 312 | filter: 'volume', 313 | options: [(this.getUserVolume(userId, userVolumesDict) / 100).toString()], 314 | inputs: `${volumeFilter.length}:0`, 315 | outputs: output, 316 | }); 317 | openServers.push(server); 318 | amixStrings.push(output); 319 | } catch (e) { 320 | console.error(e as Error, 'Error while saving user recording'); 321 | } 322 | } 323 | 324 | return { 325 | command: ffmpegOptions.complexFilter([ 326 | ...volumeFilter, 327 | { 328 | filter: `amix=inputs=${volumeFilter.length}`, 329 | inputs: amixStrings.join(''), 330 | } 331 | ]), 332 | openServers, 333 | } 334 | } 335 | 336 | private getRecordInputOptions(): string[] { 337 | return [`-f ${VoiceRecorder.PCM_FORMAT}`, `-ar ${this.options.sampleRate}`, `-ac ${this.options.channelCount}`]; 338 | } 339 | 340 | private serveStream(stream: ReplayReadable, startRecordTime: number, endTimeMs: number): SocketServerConfig { 341 | let socketPath: string, url: string; 342 | 343 | if(platform() === 'win32') { 344 | socketPath = url = `\\\\.\\pipe\\${randomUUID()}`; 345 | } else { 346 | socketPath = resolve(VoiceRecorder.tempPath, `${randomUUID()}.sock`); 347 | url = `unix:${socketPath}`; 348 | } 349 | const server = net.createServer((socket) => stream.rewind(startRecordTime, endTimeMs).pipe(socket)); 350 | server.listen(socketPath); 351 | // complex filters are probably reading the files several times. Therefore, the server can't be closed after the stream is read. 352 | return { 353 | url, 354 | server 355 | }; 356 | } 357 | } 358 | --------------------------------------------------------------------------------