├── .gitignore ├── Dockerfile ├── README.md ├── bot ├── __init__.py ├── __main__.py ├── clip.py ├── ffmpeg_runner.py ├── status.py └── utils.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | **/__pycache__/ 3 | __pycache__/ 4 | *.exe 5 | outputs/ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-alpine 2 | 3 | WORKDIR /usr/src/app 4 | COPY requirements.txt . 5 | RUN apk update && \ 6 | apk add ffmpeg && \ 7 | pip install --no-cache-dir -r requirements.txt 8 | COPY . . 9 | 10 | CMD ["python","-m", "bot"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # youtube-clipper-bot 2 | Telegram bot to clip youtube videos 3 | # How to deploy? 4 | Create a file called **config.env** 5 | **BOT_TOKEN**: Provide your bot token generated by talking to @Botfather 6 | To host, run: 7 | ```shell 8 | docker build . -t bot 9 | docker run bot 10 | ``` 11 | -------------------------------------------------------------------------------- /bot/__init__.py: -------------------------------------------------------------------------------- 1 | from telegram.ext import Updater, Dispatcher 2 | import logging 3 | from dotenv import load_dotenv 4 | import os 5 | 6 | 7 | logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 8 | level=logging.INFO) 9 | load_dotenv('config.env') 10 | 11 | try: 12 | BOT_TOKEN = os.environ["BOT_TOKEN"] 13 | except KeyError: 14 | logging.error("Bot token not provided!") 15 | BOT_TOKEN = None 16 | quit() 17 | 18 | updater = Updater(token=BOT_TOKEN) 19 | dispatcher: Dispatcher = updater.dispatcher 20 | -------------------------------------------------------------------------------- /bot/__main__.py: -------------------------------------------------------------------------------- 1 | from bot import updater, dispatcher 2 | from telegram.ext import CommandHandler 3 | from telegram.update import Update 4 | from telegram.ext import CallbackContext 5 | import bot.clip as clip 6 | 7 | 8 | def start(update: Update, context: CallbackContext): 9 | context.bot.send_message(update.message.chat_id, text="send /clip youtube-link 3:20 4:25 to clip a youtube video " 10 | "where 3:20 is the start time of the clip and 4:25 is the " 11 | "end time") 12 | 13 | 14 | start_handler = CommandHandler('start', start, run_async=True) 15 | dispatcher.add_handler(start_handler) 16 | clip.add_handler() 17 | updater.start_polling() 18 | -------------------------------------------------------------------------------- /bot/clip.py: -------------------------------------------------------------------------------- 1 | from telegram.update import Update 2 | from telegram import Message 3 | from telegram.error import BadRequest 4 | from telegram.ext import CallbackContext, CommandHandler 5 | import subprocess 6 | from bot.ffmpeg_runner import FfmpegRunner 7 | from bot.status import ClipStatus 8 | import bot.utils as utils 9 | import youtube_dl 10 | from typing import Optional 11 | import os 12 | from bot import dispatcher, updater 13 | import logging 14 | 15 | 16 | def clip(update: Update, context: CallbackContext): 17 | splits = update.message.text.split(' ') 18 | try: 19 | youtube_link = splits[1] 20 | except KeyError: 21 | context.bot.send_message(update.message.chat_id, text="Youtube link not provided") 22 | return 23 | try: 24 | start_time = splits[2] 25 | duration = splits[3] 26 | except KeyError: 27 | return context.bot.send_message(update.message.chat_id, text="Start or end time not provided", 28 | reply_to_message_id=update.message.message_id) 29 | uid = f'{update.message.chat_id}-{update.message.message_id}' 30 | 31 | status = ClipStatus(uid, "Extracting metadata", 0, '') 32 | message = context.bot.send_message(chat_id=update.message.chat_id, reply_to_message_id=update.message.message_id, 33 | text=utils.get_readable_message(status), parse_mode='html') 34 | r = subprocess.Popen(['youtube-dl', '--get-url', '--youtube-skip-dash-manifest', youtube_link], 35 | stdout=subprocess.PIPE) 36 | out, err = r.communicate() 37 | if r.returncode == 0: 38 | outs = out.split(str.encode('\n')) 39 | print(outs) 40 | youtube_video = outs[0].decode("utf-8") 41 | youtube_audio = outs[1].decode("utf-8") 42 | else: 43 | context.bot.delete_message(chat_id=update.message.chat_id, message_id=update.message.message_id) 44 | context.bot.send_message(chat_id=update.message.chat_id, text="Cannot extract url from youtube-dl! " 45 | "Try again later", 46 | reply_to_message_id=update.message.message_id) 47 | return 48 | with youtube_dl.YoutubeDL() as ydl: 49 | info = ydl.extract_info(youtube_link, download=False) 50 | if 'entries' in info: 51 | return context.bot.send_message(update.message.chat_id, text="Playlists cannot be clipped") 52 | status.name = os.path.basename(ydl.prepare_filename(info)) 53 | status.status = "Clipping video" 54 | update_progress(status, context, message) 55 | video_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'outputs', 56 | f'{update.message.chat_id}-{update.message.message_id}.mp4') 57 | print(video_path) 58 | ffmpeg_cmd = ['ffmpeg', '-ss', f'{start_time}', '-i', f'{youtube_video}', '-ss', f'{start_time}', '-i', 59 | youtube_audio, '-t', f'{duration}', '-map', '0:v', '-map', '1:a', 60 | '-c:v', 'libx264', '-c:a', 'aac', 61 | video_path] 62 | runner = FfmpegRunner(cmd=ffmpeg_cmd) 63 | 64 | def on_clip_complete(): 65 | status.status = utils.Status.STATUS_UPLOADING 66 | update_progress(status=status, context=context, message=message) 67 | with open(video_path, 'rb') as file: 68 | context.bot.send_video(chat_id=update.message.chat_id, video=file, filename=status.name + '.mp4', 69 | reply_to_message_id=update.message.message_id, supports_streaming=True) 70 | context.bot.delete_message(message.chat_id, message.message_id) 71 | os.remove(video_path) 72 | 73 | def on_error(_, __): 74 | context.bot.delete_message(message.chat_id, message.message_id) 75 | context.bot.send_message(update.message.chat_id, text="An error occurred while clipping video!", 76 | reply_to_message_id=update.message.message_id) 77 | 78 | runner.run_command(on_clip_complete, on_error) 79 | # progress wont work as ffmpeg does not provides progress for http downloads 80 | # update_task = utils.SetInterval(4, update_progress, args=(status, context, message, runner)) 81 | # status.task = update_task 82 | 83 | 84 | def update_progress(status: ClipStatus, context: CallbackContext, message: Message): 85 | try: 86 | msg = utils.get_readable_message(status) 87 | context.bot.edit_message_text(text=msg, chat_id=message.chat_id, message_id=message.message_id, 88 | parse_mode='html') 89 | 90 | except BadRequest as e: 91 | logging.info(e) 92 | 93 | 94 | def add_handler(): 95 | clip_handler = CommandHandler('clip', clip, run_async=True) 96 | dispatcher.add_handler(clip_handler) 97 | -------------------------------------------------------------------------------- /bot/ffmpeg_runner.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import re 3 | from typing import Generator, List, Callable, Optional 4 | import threading 5 | 6 | def to_ms(string: str = None, precision: int = None, **kwargs) -> float: 7 | """ 8 | Convert a string to milliseconds. 9 | You can either pass a string, or a set of keyword args ("hour", "min", "sec", "ms") to convert. 10 | If "precision" is set, the result is rounded to the number of decimals given. 11 | From: https://gist.github.com/Hellowlol/5f8545e999259b4371c91ac223409209 12 | """ 13 | if string: 14 | hour = int(string[0:2]) 15 | minute = int(string[3:5]) 16 | sec = int(string[6:8]) 17 | ms = int(string[10:11]) 18 | else: 19 | hour = int(kwargs.get("hour", 0)) 20 | minute = int(kwargs.get("min", 0)) 21 | sec = int(kwargs.get("sec", 0)) 22 | ms = int(kwargs.get("ms", 0)) 23 | 24 | result = (hour * 60 * 60 * 1000) + (minute * 60 * 1000) + (sec * 1000) + ms 25 | if precision and isinstance(precision, int): 26 | return round(result, precision) 27 | return result 28 | 29 | 30 | class FfmpegRunner: 31 | class CommandNotRunException(Exception): 32 | """ 33 | Raised when get_progress method is called before calling run_command (You cannot get progress until you 34 | run the command). Call the run_command method first 35 | """ 36 | 37 | DUR_REGEX = re.compile( 38 | r"Duration: (?P\d{2}):(?P\d{2}):(?P\d{2})\.(?P\d{2})" 39 | ) 40 | TIME_REGEX = re.compile( 41 | r"out_time=(?P\d{2}):(?P\d{2}):(?P\d{2})\.(?P\d{2})" 42 | ) 43 | 44 | def __init__(self, cmd: List[str], dry_run=False) -> None: 45 | """Initialize the FfmpegProgress class. 46 | 47 | Args: 48 | cmd (List[str]): A list of command line elements, e.g. ["ffmpeg", "-i", ...] 49 | dry_run (bool, optional): Only show what would be done. Defaults to False. 50 | """ 51 | self.cmd = ( 52 | [cmd[0]] 53 | + ["-progress", "-", "-nostats"] 54 | + cmd[1:] 55 | ) 56 | self.dry_run = dry_run 57 | self.stderr = None 58 | self.p: Optional[subprocess.Popen] = None 59 | self.total_dur = None 60 | 61 | def run_command(self, on_done: Callable, on_error: Callable[[str, int], None], popen_kwargs: dict = None) -> None: 62 | """ 63 | Run the ffmpeg command, subsequent progress can be captured by calling get_progress method Args: :param 64 | popen_kwargs: A dict to specify extra arguments to the popen call, e.g. { creationflags: CREATE_NO_WINDOW } 65 | :param on_done: A function which will be called when the task is completed 66 | :param on_error: A function which will be called when the task results in an error. 67 | A error string and return code is passed while calling 68 | """ 69 | if popen_kwargs is None: 70 | popen_kwargs = {} 71 | self.p = subprocess.Popen( 72 | self.cmd, 73 | stdin=subprocess.PIPE, # Apply stdin isolation by creating separate pipe. 74 | stdout=subprocess.PIPE, 75 | stderr=subprocess.STDOUT, 76 | universal_newlines=False, 77 | **popen_kwargs, 78 | ) 79 | def check_for_exit(): 80 | print('kkek') 81 | nonlocal on_done, on_error 82 | while self.p.poll() is None: 83 | continue 84 | if self.p.returncode != 0: 85 | return on_error(f"Error running command {self.cmd}", self.p.returncode) 86 | on_done() 87 | threading.Thread(target=check_for_exit).start() 88 | 89 | def get_progress(self) -> float: 90 | stderr = [] 91 | if self.p is None: 92 | raise FfmpegRunner.CommandNotRunException() 93 | if self.p.stdout is None: 94 | return 0 95 | stderr_line = self.p.stdout.readline().decode("utf-8", errors="replace").strip() 96 | if stderr_line == "" and self.p.poll() is not None and self.p.returncode == 0: 97 | return 100 98 | 99 | self.stderr = "\n".join(stderr) 100 | 101 | total_dur_match = self.DUR_REGEX.search(stderr_line) 102 | if total_dur_match: 103 | total_dur = total_dur_match.groupdict() 104 | else: 105 | return 0 106 | total_dur = to_ms(**total_dur) 107 | progress_time = self.TIME_REGEX.search(stderr_line) 108 | if progress_time: 109 | elapsed_time = to_ms(**progress_time.groupdict()) 110 | else: 111 | return 0 112 | return round((elapsed_time / total_dur) * 100, 2) 113 | 114 | def run_command_with_progress(self, popen_kwargs: dict = None) -> Generator[int, None, None]: 115 | """ 116 | Run an ffmpeg command, trying to capture the process output and calculate 117 | the duration / progress. 118 | Yields the progress in percent. 119 | 120 | Args: 121 | popen_kwargs (dict): A dict to specify extra arguments to the popen call, e.g. { creationflags: CREATE_NO_WINDOW } 122 | """ 123 | if popen_kwargs is None: 124 | popen_kwargs = {} 125 | if self.dry_run: 126 | return 127 | 128 | total_dur = None 129 | 130 | stderr = [] 131 | 132 | self.p = subprocess.Popen( 133 | self.cmd, 134 | stdin=subprocess.PIPE, # Apply stdin isolation by creating separate pipe. 135 | stdout=subprocess.PIPE, 136 | stderr=subprocess.STDOUT, 137 | universal_newlines=False, 138 | **popen_kwargs 139 | ) 140 | 141 | yield 0 142 | 143 | while True: 144 | if self.p is None or self.p.stdout is None: 145 | continue 146 | 147 | stderr_line = self.p.stdout.readline().decode("utf-8", errors="replace").strip() 148 | 149 | if stderr_line == "" and self.p.poll() is not None: 150 | break 151 | 152 | stderr.append(stderr_line.strip()) 153 | 154 | self.stderr = "\n".join(stderr) 155 | 156 | total_dur_match = self.DUR_REGEX.search(stderr_line) 157 | if total_dur is None and total_dur_match: 158 | total_dur = total_dur_match.groupdict() 159 | total_dur = to_ms(**total_dur) 160 | continue 161 | if total_dur: 162 | progress_time = self.TIME_REGEX.search(stderr_line) 163 | if progress_time: 164 | elapsed_time = to_ms(**progress_time.groupdict()) 165 | yield int(elapsed_time / total_dur * 100) 166 | 167 | if self.p.returncode != 0: 168 | raise RuntimeError( 169 | "Error running command {}: {}".format(self.cmd, str("\n".join(stderr))) 170 | ) 171 | 172 | yield 100 173 | -------------------------------------------------------------------------------- /bot/status.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | 4 | class ClipStatus: 5 | uuid: str 6 | status: str 7 | progress: float 8 | name: str 9 | 10 | def __init__(self, uuid: str, status: str, progress: float, name: str, task=None): 11 | self.uuid = uuid 12 | self.status = status 13 | self.progress = progress 14 | self.name = name 15 | self.task = task 16 | -------------------------------------------------------------------------------- /bot/utils.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | from bot.status import ClipStatus 4 | 5 | class Status: 6 | STATUS_UPLOADING = "Uploading" 7 | STATUS_CLIPPING = "Clipping" 8 | STATUS_METADATA = "Extracting metadata" 9 | 10 | 11 | def get_readable_file_size(size_in_bytes) -> str: 12 | size_units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB'] 13 | if size_in_bytes is None: 14 | return '0B' 15 | index = 0 16 | while size_in_bytes >= 1024: 17 | size_in_bytes /= 1024 18 | index += 1 19 | try: 20 | return f'{round(size_in_bytes, 2)}{size_units[index]}' 21 | except IndexError: 22 | return 'File too large' 23 | 24 | 25 | def get_progress_bar_string(progress: float) -> str: 26 | PROGRESS_MAX_SIZE = 100 // 8 27 | PROGRESS_INCOMPLETE = ['▏', '▎', '▍', '▌', '▋', '▊', '▉'] 28 | p = round(progress) 29 | cFull = p // 8 30 | cPart = p % 8 - 1 31 | p_str = '█' * cFull 32 | if cPart >= 0: 33 | p_str += PROGRESS_INCOMPLETE[cPart] 34 | p_str += ' ' * (PROGRESS_MAX_SIZE - cFull) 35 | p_str = f"[{p_str}]" 36 | return p_str 37 | 38 | 39 | class SetInterval: 40 | def __init__(self, interval, action, args=()): 41 | self.interval = interval 42 | self.action = action 43 | self.stopEvent = threading.Event() 44 | self.args = args 45 | thread = threading.Thread(target=self.__setInterval) 46 | thread.start() 47 | 48 | def __setInterval(self): 49 | nextTime = time.time() + self.interval 50 | while not self.stopEvent.wait(nextTime - time.time()): 51 | nextTime += self.interval 52 | self.action(*self.args) 53 | 54 | def cancel(self): 55 | self.stopEvent.set() 56 | 57 | 58 | def get_readable_message(status: ClipStatus) -> str: 59 | msg = "" 60 | msg += f"{status.name} \n" 61 | msg += f"{status.status}\n" 62 | return msg 63 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-telegram-bot>=13.7,<14.0 2 | python-dotenv>=0.19.1,<1.0 3 | ffmpeg-python>=0.2.0,<1.0.0 4 | youtube-dl 5 | ffmpeg-progress-yield>=0.1.2,<1.0.0 --------------------------------------------------------------------------------