├── .gitignore ├── LICENSE ├── README.md ├── bot_conversation.py ├── bot_repeat.py ├── bot_say.py ├── bot_transcribe.py ├── create_conversation_log.py ├── main.js ├── package.json ├── recordingbot └── __init__.py ├── requirements.txt └── run.bat /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask instance folder 57 | instance/ 58 | 59 | # Scrapy stuff: 60 | .scrapy 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyBuilder 66 | target/ 67 | 68 | # IPython Notebook 69 | .ipynb_checkpoints 70 | 71 | # pyenv 72 | .python-version 73 | 74 | # celery beat schedule file 75 | celerybeat-schedule 76 | 77 | # dotenv 78 | .env 79 | 80 | # virtualenv 81 | venv/ 82 | ENV/ 83 | 84 | # Spyder project settings 85 | .spyderproject 86 | 87 | # Rope project settings 88 | .ropeproject 89 | 90 | # ========================= 91 | # Operating System Files 92 | # ========================= 93 | 94 | # OSX 95 | # ========================= 96 | 97 | .DS_Store 98 | .AppleDouble 99 | .LSOverride 100 | 101 | # Thumbnails 102 | ._* 103 | 104 | # Files that might appear in the root of a volume 105 | .DocumentRevisions-V100 106 | .fseventsd 107 | .Spotlight-V100 108 | .TemporaryItems 109 | .Trashes 110 | .VolumeIcon.icns 111 | 112 | # Directories potentially created on remote AFP share 113 | .AppleDB 114 | .AppleDesktop 115 | Network Trash Folder 116 | Temporary Items 117 | .apdisk 118 | 119 | # Windows 120 | # ========================= 121 | 122 | # Windows image file caches 123 | Thumbs.db 124 | ehthumbs.db 125 | 126 | # Folder config file 127 | Desktop.ini 128 | 129 | # Recycle Bin used on file shares 130 | $RECYCLE.BIN/ 131 | 132 | # Windows Installer files 133 | *.cab 134 | *.msi 135 | *.msm 136 | *.msp 137 | 138 | # Windows shortcuts 139 | *.lnk 140 | 141 | # Other 142 | node_modules/ 143 | *.opus_hex 144 | *.pcm_raw 145 | *.wav 146 | voiceapi.json 147 | conv.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Shrivu Shankar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Recording Bot 2 | A bot built to record and transcribe audio fragments from Discord. 3 | 4 | Some functions taken from [podbot](https://github.com/Fiddlekins/podbot). 5 | 6 | ## Dependencies 7 | + [NodeJS](https://nodejs.org/) 8 | + [Python3](https://www.python.org/) 9 | + [SpeechRecognition](https://pypi.python.org/pypi/SpeechRecognition/) 10 | + [Discord.js](https://discord.js.org) 11 | + [node-opus](https://www.npmjs.com/package/node-opus) 12 | 13 | ## Usage 14 | 15 | #### Install 16 | + Set up the [Cloud Speech Api](https://cloud.google.com/speech/) and download credentials json. 17 | + Create a [Discord Bot](https://discordapp.com/developers/). 18 | + ```git clone https://github.com/sshh12/Recording-Bot.git``` 19 | + ```npm install``` 20 | + ```pip install -r requirements.txt``` 21 | 22 | #### Run 23 | + In ```main.js```, change ```pythonapp``` to the name of one of the bot_\*.py scripts 24 | + Change ```token``` to the Discord bot token 25 | + ```node main.js``` 26 | 27 | #### Scripts 28 | Depending on what you want to do with the recordings, you can select or write a python 29 | app and point ```main.js``` to it. 30 | + ```bot_transcribe.py``` transcribes the speech of a user to their text channel. 31 | + ```bot_repeat.py``` plays back each audio file. 32 | + ```bot_conversation.py``` records conversations (prompt -> response) then replays response the next time 33 | it hears the same prompt. This requires ```create_conversation_log.py``` to preprocess audio files. 34 | + ```bot_say.py``` plays lastest audio file with transcription of X when user says "say X". -------------------------------------------------------------------------------- /bot_conversation.py: -------------------------------------------------------------------------------- 1 | from recordingbot import Bot 2 | import os 3 | 4 | 5 | conversationfn = 'conv.txt' # create this file on first run 6 | converations = {} 7 | 8 | 9 | with open(conversationfn, 'r') as conv: # run create_conversation_log.py to generate this 10 | 11 | for line in conv.read().split("\n"): 12 | 13 | if "#" in line: 14 | 15 | text, fn = line.split("#") 16 | converations.update({text : fn}) 17 | 18 | 19 | class ConversationBot(Bot): 20 | 21 | def process(self, text, memberid, timestamp, wavefn): 22 | 23 | newfn = "{}!{}!{}".format(memberid, timestamp, text.replace(" ", "_") + ".wav") 24 | 25 | os.rename(wavefn, os.path.join(self.datapath, newfn)) 26 | 27 | if text in converations: 28 | self.play(converations[text]) 29 | 30 | if __name__ == "__main__": 31 | 32 | bot = ConversationBot('voicedata', 'voiceapi.json') 33 | bot.run() 34 | -------------------------------------------------------------------------------- /bot_repeat.py: -------------------------------------------------------------------------------- 1 | from recordingbot import Bot 2 | 3 | class RepeatBot(Bot): 4 | 5 | def process(self, text, memberid, timestamp, wavefn): 6 | 7 | self.play(wavefn) 8 | 9 | if __name__ == "__main__": 10 | 11 | bot = RepeatBot('voicedata', 'voiceapi.json') 12 | bot.run() 13 | -------------------------------------------------------------------------------- /bot_say.py: -------------------------------------------------------------------------------- 1 | from recordingbot import Bot 2 | 3 | class SayBot(Bot): 4 | 5 | def process(self, text, memberid, timestamp, wavefn): 6 | 7 | newfn = "{}!{}!{}".format(memberid, timestamp, text.replace(" ", "_") + ".wav") 8 | 9 | os.rename(wavefn, os.path.join(self.datapath, newfn)) 10 | 11 | if text.startswith("say"): 12 | 13 | for fn in os.listdir('voicedata'): 14 | 15 | if "!" in fn: 16 | 17 | file_text = fn.split("!")[-1][:-4].replace("_", " ") 18 | 19 | if file_text in text: 20 | 21 | self.play(fn) 22 | 23 | if __name__ == "__main__": 24 | 25 | bot = SayBot('voicedata', 'voiceapi.json') 26 | bot.run() 27 | -------------------------------------------------------------------------------- /bot_transcribe.py: -------------------------------------------------------------------------------- 1 | from recordingbot import Bot 2 | 3 | class TranscribeBot(Bot): 4 | 5 | def process(self, text, memberid, timestamp, wavefn): 6 | 7 | self.message(text) 8 | 9 | if __name__ == "__main__": 10 | 11 | bot = TranscribeBot('voicedata', 'voiceapi.json') 12 | bot.run() 13 | -------------------------------------------------------------------------------- /create_conversation_log.py: -------------------------------------------------------------------------------- 1 | from itertools import combinations 2 | import wave 3 | import os 4 | 5 | datapath = 'voicedata' 6 | conversationfn = 'conv.txt' 7 | response_delay = 2000 # Max time between people to be considered a prompt and response 8 | 9 | class Recording(object): 10 | 11 | def __init__(self, fn): 12 | """ 13 | An object that represents an audio file that passed through the api 14 | 15 | Parameters 16 | ---------- 17 | fn : str 18 | Path to the audio file 19 | """ 20 | self.fn = fn 21 | 22 | self.userid, self.timestamp, self.text = self.fn[:-4].split("!") 23 | 24 | self.timestamp = int(self.timestamp) 25 | self.text = self.text.replace("_", " ") 26 | 27 | with wave.open(os.path.join(datapath, self.fn), 'rb') as wav: 28 | self.frames = wav.getnframes() 29 | self.framerate = wav.getframerate() 30 | 31 | self.length = 1000 * wav.getnframes() / wav.getframerate() # Time in millis 32 | 33 | self.stop = self.timestamp # Timestamps are based on end of recording 34 | self.start = self.timestamp - self.length 35 | 36 | def main(): 37 | 38 | recordings = [Recording(fn) for fn in os.listdir(datapath) if "!" in fn] 39 | 40 | with open(conversationfn, 'w') as conv: 41 | 42 | for a, b in combinations(recordings, 2): 43 | 44 | if a == b or a.userid == b.userid or a.stop > b.start: 45 | continue 46 | 47 | delay = b.start - a.stop # Time between prompt end and response start 48 | 49 | if delay <= response_delay: 50 | conv.write("#".join([a.text, b.fn]) + "\n") # Writes prompt and response filename for every valid conversation 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const fs = require('fs'); 4 | const path = require('path'); 5 | const Discord = require('discord.js'); 6 | const opus = require('node-opus'); 7 | 8 | const datapath = 'voicedata'; 9 | const rate = 48000; 10 | const frame_size = 1920; 11 | const channels = 2; 12 | const pythonapp = 'bot_conversation.py' 13 | const token = ''; 14 | 15 | const child = require('child_process'); 16 | 17 | let voiceConnections = new Map(); 18 | let voiceReceivers = new Map(); 19 | let writeStreams = new Map(); 20 | 21 | let client = new Discord.Client() 22 | 23 | let textChannel; 24 | 25 | client.on('ready', () => { 26 | console.log("Started!"); 27 | }); 28 | 29 | client.on('message', (msg) => { 30 | if (msg.content.charAt(0) === '!') { 31 | switch (msg.content.slice(1)) { 32 | case 'on': 33 | textChannel = msg.channel; 34 | start(msg.member); 35 | break; 36 | case 'off': 37 | stop(msg.member); 38 | break; 39 | } 40 | } 41 | }); 42 | 43 | client.on('guildMemberSpeaking', (member, speaking) => { 44 | 45 | if (!speaking && member.voiceChannel) { 46 | let receiver = voiceReceivers.get(member.voiceChannelID); 47 | if (receiver) { 48 | let writeStream = writeStreams.get(member.id); 49 | if (writeStream) { 50 | writeStreams.delete(member.id); 51 | writeStream.end((err) => { 52 | if (err) { 53 | console.error(err); 54 | } else { 55 | let pcmpath = writeStream.path.replace(".opus_hex", ".pcm_raw"); 56 | save(writeStream.path, pcmpath, member); 57 | } 58 | }); 59 | } 60 | } 61 | } 62 | 63 | }); 64 | 65 | let start = (member) => { 66 | 67 | if (!member || !member.voiceChannel) { 68 | return; 69 | } 70 | 71 | member.voiceChannel.join().then((voiceConnection) => { 72 | 73 | console.log("Recording..."); 74 | 75 | voiceConnections.set(member.voiceChannelID, voiceConnection); 76 | let voiceReceiver = voiceConnection.createReceiver(); 77 | voiceReceiver.on('opus', (user, data) => { 78 | let hexString = data.toString('hex'); 79 | let writeStream = writeStreams.get(user.id); 80 | if (!writeStream) { 81 | if (hexString === 'f8fffe') { 82 | return; 83 | } 84 | let outputPath = path.join(datapath, `${Date.now()}.opus_hex`); 85 | writeStream = fs.createWriteStream(outputPath); 86 | writeStreams.set(user.id, writeStream); 87 | } 88 | writeStream.write(`,${hexString}`); 89 | }); 90 | voiceReceivers.set(member.voiceChannelID, voiceReceiver); 91 | }).catch(console.error); 92 | 93 | } 94 | 95 | let stop = (member) => { 96 | 97 | if (!member || !member.voiceChannel) { 98 | return; 99 | } 100 | 101 | console.log("Stopping..."); 102 | 103 | if (voiceReceivers.get(member.voiceChannelID)) { 104 | voiceReceivers.get(member.voiceChannelID).destroy(); 105 | voiceReceivers.delete(member.voiceChannelID); 106 | voiceConnections.get(member.voiceChannelID).disconnect(); 107 | voiceConnections.delete(member.voiceChannelID); 108 | } 109 | 110 | } 111 | 112 | let save = (inputPath, filename, member) => { 113 | 114 | let encoder = new opus.OpusEncoder(rate, channels); 115 | const inputStream = fs.createReadStream(inputPath); 116 | const outputStream = fs.createWriteStream(filename); 117 | let data = ''; 118 | 119 | inputStream.on('data', chunk => { 120 | data += chunk.toString(); 121 | const frames = data.split(','); 122 | if (frames.length) { 123 | data = frames.pop(); 124 | } 125 | for (let frame of frames) { 126 | if (frame !== '') { 127 | const decodedBuffer = getDecodedFrame(frame, encoder, filename); 128 | if (decodedBuffer) { 129 | outputStream.write(decodedBuffer); 130 | } 131 | } 132 | } 133 | }); 134 | 135 | inputStream.on('end', () => { 136 | outputStream.end((err) => { 137 | if (err) { 138 | console.error(err); 139 | } else { 140 | let py = child.spawn('python3', [pythonapp, 'open', filename, ""+member.id, ""+Date.now()]); 141 | py.stdout.on('data', function(data) { 142 | let content = data.toString().trim(); 143 | console.log(content); 144 | if (content.startsWith("msg")) { 145 | console.log(">> sending message"); 146 | textChannel.send(content.slice(4)); 147 | } else if (content.startsWith("play")) { 148 | console.log(">> playing audio"); 149 | playFile(member, "./" + datapath + "/" + content.slice(5)); 150 | } 151 | }); 152 | } 153 | }); 154 | }); 155 | 156 | }; 157 | 158 | let getDecodedFrame = (frameString, encoder, filename) => { 159 | let buffer = Buffer.from(frameString, 'hex'); 160 | try { 161 | buffer = encoder.decode(buffer, frame_size); 162 | } catch (err) { 163 | try { 164 | buffer = encoder.decode(buffer.slice(8), frame_size); 165 | } catch (err) { 166 | console.log(`${filename} was unable to be decoded`); 167 | return null; 168 | } 169 | } 170 | return buffer; 171 | }; 172 | 173 | let playFile = (member, filename) => { 174 | let voiceConnection = voiceConnections.get(member.voiceChannelID) 175 | let disp = voiceConnection.playFile(filename); 176 | disp.on("end", (end) => {}); 177 | } 178 | 179 | client.login(token).catch(console.error); 180 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Recording Bot", 3 | "version": "1.0.0", 4 | "description": "A bot built to record and transcribe audio fragments from Discord.", 5 | "main": "main.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/sshh12/Recording-Bot/" 12 | }, 13 | "author": "shrivu1122", 14 | "license": "MIT", 15 | "bugs": { 16 | "url": "https://github.com/sshh12/Recording-Bot/issues" 17 | }, 18 | "homepage": "https://github.com/sshh12/Recording-Bot/tree/master#recording-bot", 19 | "dependencies": { 20 | "discord.js": "github:hydrabolt/discord.js", 21 | "node-opus": "^0.2.4" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /recordingbot/__init__.py: -------------------------------------------------------------------------------- 1 | import speech_recognition as sr 2 | import wave 3 | import sys 4 | import os 5 | 6 | class Bot(object): 7 | 8 | def __init__(self, datapath, voiceapi_cred_file, frame_threshold=60000, clearfiles=True): 9 | """ 10 | Initialize a bot. 11 | 12 | Parameters 13 | ---------- 14 | datapath : str 15 | Path for voice files 16 | voiceapi_cred_file : str 17 | Path for cred json 18 | frame_threshold : int 19 | Number a frames and audio file must be to be considered a voice 20 | clearfiles : bool 21 | True will clear files after each run 22 | """ 23 | self.datapath = datapath 24 | self.clearfiles = clearfiles 25 | 26 | with open(voiceapi_cred_file, 'r') as cred: 27 | self.API_JSON = cred.read() 28 | 29 | self.frame_threshold = frame_threshold 30 | 31 | def message(self, msg): 32 | """Sends a message in the Discord text channel.""" 33 | print("msg:" + msg) 34 | 35 | def play(self, fn): 36 | """Plays an audio file into the Discord voice channel.""" 37 | print("play:" + fn) 38 | 39 | def run(self): 40 | """Converts input to .wav and runs the bot's process.""" 41 | if len(sys.argv) == 5: 42 | 43 | pcmfn = sys.argv[2] 44 | opusfn = pcmfn.replace(".pcm_raw", ".opus_hex") 45 | wavefn = os.path.join(self.datapath, sys.argv[4] + '.wav') 46 | 47 | memberid = sys.argv[3] 48 | timestamp = sys.argv[4] 49 | 50 | with open(pcmfn, 'rb') as pcm: 51 | pcmdata = pcm.read() 52 | 53 | with wave.open(wavefn, 'wb') as wavfile: # Converts pcm to wave 54 | wavfile.setparams((2, 2, 48000, 0, 'NONE', 'NONE')) 55 | wavfile.writeframes(pcmdata) 56 | frames = wavfile.getnframes() 57 | 58 | if frames > self.frame_threshold: # Checks for minimum time requirement 59 | 60 | r = sr.Recognizer() 61 | with sr.AudioFile(wavefn) as source: 62 | audio = r.record(source) 63 | result = r.recognize_google_cloud(audio, credentials_json=self.API_JSON).strip() 64 | 65 | try: 66 | self.process(result, memberid, timestamp, wavefn) 67 | except Exception as e: 68 | print(e) 69 | 70 | if self.clearfiles: 71 | os.remove(pcmfn) 72 | os.remove(wavefn) 73 | 74 | else: 75 | raise Exception("Bot must be run with commands passed from main.js") 76 | 77 | def process(self, text, memberid, timestamp, wavefn): # Override 78 | """Does something once the file has been converted.""" 79 | pass 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | SpeechRecognition==3.6.5 2 | -------------------------------------------------------------------------------- /run.bat: -------------------------------------------------------------------------------- 1 | node main.js 2 | pause --------------------------------------------------------------------------------