├── annotationbot.py
└── readme.md


/annotationbot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # pylint: disable=C0116
  3 | # This program is dedicated to the public domain under the CC0 license.
  4 | # author: M. yusuf Sarıgöz - github.com/monatis
  5 | 
  6 | """
  7 | This simple Telegram bot is intended to varify ASR dataset annotations on Telegram.
  8 | You need to obtain your own API token from Bot Father on Telegram and make a few adjustments in the capitalized variables below.
  9 | """
 10 | 
 11 | import logging
 12 | import os
 13 | from typing import Any, Dict, List
 14 | 
 15 | from telegram import ReplyKeyboardMarkup, ReplyKeyboardRemove, Update
 16 | from telegram.ext import (
 17 |     Updater,
 18 |     CommandHandler,
 19 |     MessageHandler,
 20 |     Filters,
 21 |     ConversationHandler,
 22 |     CallbackContext,
 23 | )
 24 | 
 25 | TOKEN = "<change_this_with_yours>"
 26 | BASE_DIR = '/path/to/main/directory/holding/your/dataset' # change if necessary
 27 | OPUS_DIR = os.path.join(BASE_DIR, 'opus') # Telegram expects voice files in Opus format
 28 | METADATA_FILE = os.path.join(BASE_DIR, 'metadata.csv') # file that contains annotations in ljspeech 1.1 format.
 29 | CORRECT_METADATA_FILE = os.path.join(BASE_DIR, 'correct_metadata.csv') # file to be created to write varified annotations.
 30 | START_BTN_TEXT = "Let's get started! 🚀" # change if necessary
 31 | CORRECT_BTN_TEXT = "Correct! 👍" # change if necessary
 32 | SKIP_BTN_TEXT = "Skip! ⏩" # change if necessary
 33 | HELP_TEXT = """Hello! I'm here to help you varify some voice annotations for automatic speech recognition (ASR) training).
 34 | I'll send you voice files with their transcripts. You're supposed to listen to it and varify if it is correctly annotated with a single tap of a button.
 35 | If it's not correct, then you can type the correct annotation.
 36 | If you cannot hear what is said in the voice, you may use the skip button to go to the next one.
 37 | """
 38 | 
 39 | 
 40 | # Enable logging
 41 | logging.basicConfig(
 42 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
 43 | )
 44 | 
 45 | 
 46 | logger = logging.getLogger(__name__)
 47 | 
 48 | SHOW_HELP, ASK_TRANSCRIPT = range(2)
 49 | 
 50 | def start(update: Update, context: CallbackContext) -> int:
 51 |     reply_keyboard = [[START_BTN_TEXT]]
 52 | 
 53 |     update.message.reply_text(
 54 |         HELP_TEXT,
 55 |         reply_markup=ReplyKeyboardMarkup(reply_keyboard, one_time_keyboard=True),
 56 |     )
 57 | 
 58 |     return SHOW_HELP
 59 | 
 60 | 
 61 | def ask_transcript(update: Update, context: CallbackContext) -> int:
 62 |     user = update.message.from_user
 63 |     msg = update.message.text
 64 |     logger.info("%s: '%s'", user.first_name, msg)
 65 |     if msg != START_BTN_TEXT:
 66 |         # parse user message and correct annotation accordingly
 67 |         id = context.bot_data['cur_id']
 68 |         annotation = context.bot_data['annotations'][id]
 69 |         out_file = open(CORRECT_METADATA_FILE, 'a+', encoding='utf8')
 70 |         if msg == CORRECT_BTN_TEXT:
 71 |             out_file.write("{}|{}\n".format(annotation['file'], annotation['text']))
 72 |         elif msg == SKIP_BTN_TEXT:
 73 |             logger.debug("{} skipped by {}".format(annotation['file'], user))
 74 |         else:
 75 |             out_file.write("{}|{}\n".format(annotation['file'], msg))
 76 | 
 77 |         id += 1
 78 |         context.bot_data['cur_id'] = id
 79 |         out_file.close()
 80 | 
 81 |     send_annotation(update, context.bot_data['annotations'][context.bot_data['cur_id']])
 82 |     
 83 |     return ASK_TRANSCRIPT
 84 | 
 85 | def send_annotation(update: Update, annotation: Dict[str, Any]) -> None:
 86 |     reply_keyboard = [[CORRECT_BTN_TEXT, SKIP_BTN_TEXT]]
 87 |     
 88 |     with open(os.path.join(OPUS_DIR, annotation['file'] + ".opus"), 'rb') as opus_file:
 89 |         update.message.reply_voice(
 90 |             opus_file,
 91 |             filename=annotation['file'],
 92 |             caption=annotation['text'],
 93 |             reply_markup=ReplyKeyboardMarkup(reply_keyboard, one_time_keyboard=True)
 94 |         )
 95 |     
 96 | 
 97 | def cancel(update: Update, _: CallbackContext) -> int:
 98 |     user = update.message.from_user
 99 |     logger.info("User %s canceled the conversation.", user.first_name)
100 |     update.message.reply_text(
101 |         'Bye! I hope we can talk again some day.', reply_markup=ReplyKeyboardRemove()
102 |     )
103 | 
104 |     return ConversationHandler.END
105 | 
106 | 
107 | def main(annotations: List[Dict[str, Any]]) -> None:
108 |     # Create the Updater and pass it your bot's token.
109 |     updater = Updater(TOKEN)
110 | 
111 |     # Get the dispatcher to register handlers
112 |     dispatcher = updater.dispatcher
113 | 
114 |     # Add conversation handler with the states GENDER, PHOTO, LOCATION and BIO
115 |     conv_handler = ConversationHandler(
116 |         entry_points=[CommandHandler('start', start)],
117 |         states={
118 |             SHOW_HELP: [MessageHandler(Filters.regex('^Hadi başlayalım!$'), ask_transcript)],
119 |             ASK_TRANSCRIPT: [MessageHandler(Filters.regex('.*'), ask_transcript)],
120 |         },
121 |         fallbacks=[CommandHandler('cancel', cancel)],
122 |     )
123 | 
124 |     dispatcher.add_handler(conv_handler)
125 |     dispatcher.bot_data['annotations'] = annotations
126 |     dispatcher.bot_data['cur_id'] = 0
127 | 
128 |     # Start the Bot
129 |     updater.start_polling()
130 | 
131 |     # Run the bot until you press Ctrl-C or the process receives SIGINT,
132 |     # SIGTERM or SIGABRT. This should be used most of the time, since
133 |     # start_polling() is non-blocking and will stop the bot gracefully.
134 |     updater.idle()
135 | 
136 | 
137 | if __name__ == '__main__':
138 | 
139 | 
140 |     try:
141 |         with open(METADATA_FILE, 'r', encoding='utf8') as csv_file:
142 |             annotations = csv_file.readlines()
143 |             annotations = [{"file": annotation.split('|')[0], "text": annotation.split('|')[1]} for annotation in annotations]
144 |             main(annotations)
145 | 
146 |     except OSError as err:
147 |         logger.error(f"Unable to open metadata file. Searched in {METADATA_FILE}.\n\n" + str(err))
148 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # asr-annotation-bot
 2 | 
 3 | Simple Telegram bot to varify Automatic Speech Recognition (ASR) dataset annotations
 4 | 
 5 | ## Motivation
 6 | 
 7 | I know that there exist a great number of annotation tools including the ones written by me, but I feel tired from configuring them and telling annotators how to use them. So I wanted to use an interface that is familiar and accessible to anyone and anytime. And, Telegram did a great job. It didn't take even 20 mins to write the whole code (in fact, it was shorter than the time spent to publish this repo). I'm sharing this simple code hoping that it may be an inspiration for others to develop Telegram bots to annotate data for other machine learning tasks.
 8 | ## How to use
 9 | The only dependency is `python-telegram-bot` which is pip-installable:
10 | ```shell
11 | pip install python-telegram-bot
12 | ```
13 | 
14 | the code is simple and self-explanatory with short and useful comments. Basically you need to obtain an API token from Bot Father on Telegram and update `TOKEN` variable with that one in `annotationbot.py`. Second, prepare your dataset in a LJSpeech-like format with a few changes:
15 | - Audio files should be kept in Opus format as Telegram excepts voice files in this format.
16 | - `metadata.csv` file should contain one sample on a line with file name and unvarified annotation separated with a single pipe character (|).
17 | 
18 | After everythin is ready, you can simply run:
19 | ```shell
20 | python annotationbot.py
21 | ```
22 | 
23 | Go to Telegram and send `/start` to start talking to your bot.
24 | 
25 | ## See also
26 | You may also want to take a look at my other audio annotation tool called [label-snd](https://github.com/monatis/label-snd).


--------------------------------------------------------------------------------