├── annotationbot.py └── readme.md /annotationbot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # pylint: disable=C0116 3 | # This program is dedicated to the public domain under the CC0 license. 4 | # author: M. yusuf Sarıgöz - github.com/monatis 5 | 6 | """ 7 | This simple Telegram bot is intended to varify ASR dataset annotations on Telegram. 8 | You need to obtain your own API token from Bot Father on Telegram and make a few adjustments in the capitalized variables below. 9 | """ 10 | 11 | import logging 12 | import os 13 | from typing import Any, Dict, List 14 | 15 | from telegram import ReplyKeyboardMarkup, ReplyKeyboardRemove, Update 16 | from telegram.ext import ( 17 | Updater, 18 | CommandHandler, 19 | MessageHandler, 20 | Filters, 21 | ConversationHandler, 22 | CallbackContext, 23 | ) 24 | 25 | TOKEN = "" 26 | BASE_DIR = '/path/to/main/directory/holding/your/dataset' # change if necessary 27 | OPUS_DIR = os.path.join(BASE_DIR, 'opus') # Telegram expects voice files in Opus format 28 | METADATA_FILE = os.path.join(BASE_DIR, 'metadata.csv') # file that contains annotations in ljspeech 1.1 format. 29 | CORRECT_METADATA_FILE = os.path.join(BASE_DIR, 'correct_metadata.csv') # file to be created to write varified annotations. 30 | START_BTN_TEXT = "Let's get started! 🚀" # change if necessary 31 | CORRECT_BTN_TEXT = "Correct! 👍" # change if necessary 32 | SKIP_BTN_TEXT = "Skip! ⏩" # change if necessary 33 | HELP_TEXT = """Hello! I'm here to help you varify some voice annotations for automatic speech recognition (ASR) training). 34 | I'll send you voice files with their transcripts. You're supposed to listen to it and varify if it is correctly annotated with a single tap of a button. 35 | If it's not correct, then you can type the correct annotation. 36 | If you cannot hear what is said in the voice, you may use the skip button to go to the next one. 37 | """ 38 | 39 | 40 | # Enable logging 41 | logging.basicConfig( 42 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO 43 | ) 44 | 45 | 46 | logger = logging.getLogger(__name__) 47 | 48 | SHOW_HELP, ASK_TRANSCRIPT = range(2) 49 | 50 | def start(update: Update, context: CallbackContext) -> int: 51 | reply_keyboard = [[START_BTN_TEXT]] 52 | 53 | update.message.reply_text( 54 | HELP_TEXT, 55 | reply_markup=ReplyKeyboardMarkup(reply_keyboard, one_time_keyboard=True), 56 | ) 57 | 58 | return SHOW_HELP 59 | 60 | 61 | def ask_transcript(update: Update, context: CallbackContext) -> int: 62 | user = update.message.from_user 63 | msg = update.message.text 64 | logger.info("%s: '%s'", user.first_name, msg) 65 | if msg != START_BTN_TEXT: 66 | # parse user message and correct annotation accordingly 67 | id = context.bot_data['cur_id'] 68 | annotation = context.bot_data['annotations'][id] 69 | out_file = open(CORRECT_METADATA_FILE, 'a+', encoding='utf8') 70 | if msg == CORRECT_BTN_TEXT: 71 | out_file.write("{}|{}\n".format(annotation['file'], annotation['text'])) 72 | elif msg == SKIP_BTN_TEXT: 73 | logger.debug("{} skipped by {}".format(annotation['file'], user)) 74 | else: 75 | out_file.write("{}|{}\n".format(annotation['file'], msg)) 76 | 77 | id += 1 78 | context.bot_data['cur_id'] = id 79 | out_file.close() 80 | 81 | send_annotation(update, context.bot_data['annotations'][context.bot_data['cur_id']]) 82 | 83 | return ASK_TRANSCRIPT 84 | 85 | def send_annotation(update: Update, annotation: Dict[str, Any]) -> None: 86 | reply_keyboard = [[CORRECT_BTN_TEXT, SKIP_BTN_TEXT]] 87 | 88 | with open(os.path.join(OPUS_DIR, annotation['file'] + ".opus"), 'rb') as opus_file: 89 | update.message.reply_voice( 90 | opus_file, 91 | filename=annotation['file'], 92 | caption=annotation['text'], 93 | reply_markup=ReplyKeyboardMarkup(reply_keyboard, one_time_keyboard=True) 94 | ) 95 | 96 | 97 | def cancel(update: Update, _: CallbackContext) -> int: 98 | user = update.message.from_user 99 | logger.info("User %s canceled the conversation.", user.first_name) 100 | update.message.reply_text( 101 | 'Bye! I hope we can talk again some day.', reply_markup=ReplyKeyboardRemove() 102 | ) 103 | 104 | return ConversationHandler.END 105 | 106 | 107 | def main(annotations: List[Dict[str, Any]]) -> None: 108 | # Create the Updater and pass it your bot's token. 109 | updater = Updater(TOKEN) 110 | 111 | # Get the dispatcher to register handlers 112 | dispatcher = updater.dispatcher 113 | 114 | # Add conversation handler with the states GENDER, PHOTO, LOCATION and BIO 115 | conv_handler = ConversationHandler( 116 | entry_points=[CommandHandler('start', start)], 117 | states={ 118 | SHOW_HELP: [MessageHandler(Filters.regex('^Hadi başlayalım!$'), ask_transcript)], 119 | ASK_TRANSCRIPT: [MessageHandler(Filters.regex('.*'), ask_transcript)], 120 | }, 121 | fallbacks=[CommandHandler('cancel', cancel)], 122 | ) 123 | 124 | dispatcher.add_handler(conv_handler) 125 | dispatcher.bot_data['annotations'] = annotations 126 | dispatcher.bot_data['cur_id'] = 0 127 | 128 | # Start the Bot 129 | updater.start_polling() 130 | 131 | # Run the bot until you press Ctrl-C or the process receives SIGINT, 132 | # SIGTERM or SIGABRT. This should be used most of the time, since 133 | # start_polling() is non-blocking and will stop the bot gracefully. 134 | updater.idle() 135 | 136 | 137 | if __name__ == '__main__': 138 | 139 | 140 | try: 141 | with open(METADATA_FILE, 'r', encoding='utf8') as csv_file: 142 | annotations = csv_file.readlines() 143 | annotations = [{"file": annotation.split('|')[0], "text": annotation.split('|')[1]} for annotation in annotations] 144 | main(annotations) 145 | 146 | except OSError as err: 147 | logger.error(f"Unable to open metadata file. Searched in {METADATA_FILE}.\n\n" + str(err)) 148 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # asr-annotation-bot 2 | 3 | Simple Telegram bot to varify Automatic Speech Recognition (ASR) dataset annotations 4 | 5 | ## Motivation 6 | 7 | I know that there exist a great number of annotation tools including the ones written by me, but I feel tired from configuring them and telling annotators how to use them. So I wanted to use an interface that is familiar and accessible to anyone and anytime. And, Telegram did a great job. It didn't take even 20 mins to write the whole code (in fact, it was shorter than the time spent to publish this repo). I'm sharing this simple code hoping that it may be an inspiration for others to develop Telegram bots to annotate data for other machine learning tasks. 8 | ## How to use 9 | The only dependency is `python-telegram-bot` which is pip-installable: 10 | ```shell 11 | pip install python-telegram-bot 12 | ``` 13 | 14 | the code is simple and self-explanatory with short and useful comments. Basically you need to obtain an API token from Bot Father on Telegram and update `TOKEN` variable with that one in `annotationbot.py`. Second, prepare your dataset in a LJSpeech-like format with a few changes: 15 | - Audio files should be kept in Opus format as Telegram excepts voice files in this format. 16 | - `metadata.csv` file should contain one sample on a line with file name and unvarified annotation separated with a single pipe character (|). 17 | 18 | After everythin is ready, you can simply run: 19 | ```shell 20 | python annotationbot.py 21 | ``` 22 | 23 | Go to Telegram and send `/start` to start talking to your bot. 24 | 25 | ## See also 26 | You may also want to take a look at my other audio annotation tool called [label-snd](https://github.com/monatis/label-snd). --------------------------------------------------------------------------------