├── .gitignore
├── requirements.txt
├── Dockerfile
├── README.md
└── bot
├── status.py
├── __init__.py
├── __main__.py
├── utils.py
├── clip.py
└── ffmpeg_runner.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | **/__pycache__/
3 | __pycache__/
4 | *.exe
5 | outputs/
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | python-telegram-bot>=13.7,<14.0
2 | python-dotenv>=0.19.1,<1.0
3 | ffmpeg-python>=0.2.0,<1.0.0
4 | youtube-dl
5 | ffmpeg-progress-yield>=0.1.2,<1.0.0
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.10-alpine
2 |
3 | WORKDIR /usr/src/app
4 | COPY requirements.txt .
5 | RUN apk update && \
6 | apk add ffmpeg && \
7 | pip install --no-cache-dir -r requirements.txt
8 | COPY . .
9 |
10 | CMD ["python","-m", "bot"]
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # youtube-clipper-bot
2 | Telegram bot to clip youtube videos
3 | # How to deploy?
4 | Create a file called **config.env**
5 | **BOT_TOKEN**: Provide your bot token generated by talking to @Botfather
6 | To host, run:
7 | ```shell
8 | docker build . -t bot
9 | docker run bot
10 | ```
11 |
--------------------------------------------------------------------------------
/bot/status.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 |
4 | class ClipStatus:
5 | uuid: str
6 | status: str
7 | progress: float
8 | name: str
9 |
10 | def __init__(self, uuid: str, status: str, progress: float, name: str, task=None):
11 | self.uuid = uuid
12 | self.status = status
13 | self.progress = progress
14 | self.name = name
15 | self.task = task
16 |
--------------------------------------------------------------------------------
/bot/__init__.py:
--------------------------------------------------------------------------------
1 | from telegram.ext import Updater, Dispatcher
2 | import logging
3 | from dotenv import load_dotenv
4 | import os
5 |
6 |
7 | logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
8 | level=logging.INFO)
9 | load_dotenv('config.env')
10 |
11 | try:
12 | BOT_TOKEN = os.environ["BOT_TOKEN"]
13 | except KeyError:
14 | logging.error("Bot token not provided!")
15 | BOT_TOKEN = None
16 | quit()
17 |
18 | updater = Updater(token=BOT_TOKEN)
19 | dispatcher: Dispatcher = updater.dispatcher
20 |
--------------------------------------------------------------------------------
/bot/__main__.py:
--------------------------------------------------------------------------------
1 | from bot import updater, dispatcher
2 | from telegram.ext import CommandHandler
3 | from telegram.update import Update
4 | from telegram.ext import CallbackContext
5 | import bot.clip as clip
6 |
7 |
8 | def start(update: Update, context: CallbackContext):
9 | context.bot.send_message(update.message.chat_id, text="send /clip youtube-link 3:20 4:25 to clip a youtube video "
10 | "where 3:20 is the start time of the clip and 4:25 is the "
11 | "end time")
12 |
13 |
14 | start_handler = CommandHandler('start', start, run_async=True)
15 | dispatcher.add_handler(start_handler)
16 | clip.add_handler()
17 | updater.start_polling()
18 |
--------------------------------------------------------------------------------
/bot/utils.py:
--------------------------------------------------------------------------------
1 | import threading
2 | import time
3 | from bot.status import ClipStatus
4 |
5 | class Status:
6 | STATUS_UPLOADING = "Uploading"
7 | STATUS_CLIPPING = "Clipping"
8 | STATUS_METADATA = "Extracting metadata"
9 |
10 |
11 | def get_readable_file_size(size_in_bytes) -> str:
12 | size_units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
13 | if size_in_bytes is None:
14 | return '0B'
15 | index = 0
16 | while size_in_bytes >= 1024:
17 | size_in_bytes /= 1024
18 | index += 1
19 | try:
20 | return f'{round(size_in_bytes, 2)}{size_units[index]}'
21 | except IndexError:
22 | return 'File too large'
23 |
24 |
25 | def get_progress_bar_string(progress: float) -> str:
26 | PROGRESS_MAX_SIZE = 100 // 8
27 | PROGRESS_INCOMPLETE = ['▏', '▎', '▍', '▌', '▋', '▊', '▉']
28 | p = round(progress)
29 | cFull = p // 8
30 | cPart = p % 8 - 1
31 | p_str = '█' * cFull
32 | if cPart >= 0:
33 | p_str += PROGRESS_INCOMPLETE[cPart]
34 | p_str += ' ' * (PROGRESS_MAX_SIZE - cFull)
35 | p_str = f"[{p_str}]"
36 | return p_str
37 |
38 |
39 | class SetInterval:
40 | def __init__(self, interval, action, args=()):
41 | self.interval = interval
42 | self.action = action
43 | self.stopEvent = threading.Event()
44 | self.args = args
45 | thread = threading.Thread(target=self.__setInterval)
46 | thread.start()
47 |
48 | def __setInterval(self):
49 | nextTime = time.time() + self.interval
50 | while not self.stopEvent.wait(nextTime - time.time()):
51 | nextTime += self.interval
52 | self.action(*self.args)
53 |
54 | def cancel(self):
55 | self.stopEvent.set()
56 |
57 |
58 | def get_readable_message(status: ClipStatus) -> str:
59 | msg = ""
60 | msg += f"{status.name} \n"
61 | msg += f"{status.status}\n"
62 | return msg
63 |
--------------------------------------------------------------------------------
/bot/clip.py:
--------------------------------------------------------------------------------
1 | from telegram.update import Update
2 | from telegram import Message
3 | from telegram.error import BadRequest
4 | from telegram.ext import CallbackContext, CommandHandler
5 | import subprocess
6 | from bot.ffmpeg_runner import FfmpegRunner
7 | from bot.status import ClipStatus
8 | import bot.utils as utils
9 | import youtube_dl
10 | from typing import Optional
11 | import os
12 | from bot import dispatcher, updater
13 | import logging
14 |
15 |
16 | def clip(update: Update, context: CallbackContext):
17 | splits = update.message.text.split(' ')
18 | try:
19 | youtube_link = splits[1]
20 | except KeyError:
21 | context.bot.send_message(update.message.chat_id, text="Youtube link not provided")
22 | return
23 | try:
24 | start_time = splits[2]
25 | duration = splits[3]
26 | except KeyError:
27 | return context.bot.send_message(update.message.chat_id, text="Start or end time not provided",
28 | reply_to_message_id=update.message.message_id)
29 | uid = f'{update.message.chat_id}-{update.message.message_id}'
30 |
31 | status = ClipStatus(uid, "Extracting metadata", 0, '')
32 | message = context.bot.send_message(chat_id=update.message.chat_id, reply_to_message_id=update.message.message_id,
33 | text=utils.get_readable_message(status), parse_mode='html')
34 | r = subprocess.Popen(['youtube-dl', '--get-url', '--youtube-skip-dash-manifest', youtube_link],
35 | stdout=subprocess.PIPE)
36 | out, err = r.communicate()
37 | if r.returncode == 0:
38 | outs = out.split(str.encode('\n'))
39 | print(outs)
40 | youtube_video = outs[0].decode("utf-8")
41 | youtube_audio = outs[1].decode("utf-8")
42 | else:
43 | context.bot.delete_message(chat_id=update.message.chat_id, message_id=update.message.message_id)
44 | context.bot.send_message(chat_id=update.message.chat_id, text="Cannot extract url from youtube-dl! "
45 | "Try again later",
46 | reply_to_message_id=update.message.message_id)
47 | return
48 | with youtube_dl.YoutubeDL() as ydl:
49 | info = ydl.extract_info(youtube_link, download=False)
50 | if 'entries' in info:
51 | return context.bot.send_message(update.message.chat_id, text="Playlists cannot be clipped")
52 | status.name = os.path.basename(ydl.prepare_filename(info))
53 | status.status = "Clipping video"
54 | update_progress(status, context, message)
55 | video_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'outputs',
56 | f'{update.message.chat_id}-{update.message.message_id}.mp4')
57 | print(video_path)
58 | ffmpeg_cmd = ['ffmpeg', '-ss', f'{start_time}', '-i', f'{youtube_video}', '-ss', f'{start_time}', '-i',
59 | youtube_audio, '-t', f'{duration}', '-map', '0:v', '-map', '1:a',
60 | '-c:v', 'libx264', '-c:a', 'aac',
61 | video_path]
62 | runner = FfmpegRunner(cmd=ffmpeg_cmd)
63 |
64 | def on_clip_complete():
65 | status.status = utils.Status.STATUS_UPLOADING
66 | update_progress(status=status, context=context, message=message)
67 | with open(video_path, 'rb') as file:
68 | context.bot.send_video(chat_id=update.message.chat_id, video=file, filename=status.name + '.mp4',
69 | reply_to_message_id=update.message.message_id, supports_streaming=True)
70 | context.bot.delete_message(message.chat_id, message.message_id)
71 | os.remove(video_path)
72 |
73 | def on_error(_, __):
74 | context.bot.delete_message(message.chat_id, message.message_id)
75 | context.bot.send_message(update.message.chat_id, text="An error occurred while clipping video!",
76 | reply_to_message_id=update.message.message_id)
77 |
78 | runner.run_command(on_clip_complete, on_error)
79 | # progress wont work as ffmpeg does not provides progress for http downloads
80 | # update_task = utils.SetInterval(4, update_progress, args=(status, context, message, runner))
81 | # status.task = update_task
82 |
83 |
84 | def update_progress(status: ClipStatus, context: CallbackContext, message: Message):
85 | try:
86 | msg = utils.get_readable_message(status)
87 | context.bot.edit_message_text(text=msg, chat_id=message.chat_id, message_id=message.message_id,
88 | parse_mode='html')
89 |
90 | except BadRequest as e:
91 | logging.info(e)
92 |
93 |
94 | def add_handler():
95 | clip_handler = CommandHandler('clip', clip, run_async=True)
96 | dispatcher.add_handler(clip_handler)
97 |
--------------------------------------------------------------------------------
/bot/ffmpeg_runner.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import re
3 | from typing import Generator, List, Callable, Optional
4 | import threading
5 |
6 | def to_ms(string: str = None, precision: int = None, **kwargs) -> float:
7 | """
8 | Convert a string to milliseconds.
9 | You can either pass a string, or a set of keyword args ("hour", "min", "sec", "ms") to convert.
10 | If "precision" is set, the result is rounded to the number of decimals given.
11 | From: https://gist.github.com/Hellowlol/5f8545e999259b4371c91ac223409209
12 | """
13 | if string:
14 | hour = int(string[0:2])
15 | minute = int(string[3:5])
16 | sec = int(string[6:8])
17 | ms = int(string[10:11])
18 | else:
19 | hour = int(kwargs.get("hour", 0))
20 | minute = int(kwargs.get("min", 0))
21 | sec = int(kwargs.get("sec", 0))
22 | ms = int(kwargs.get("ms", 0))
23 |
24 | result = (hour * 60 * 60 * 1000) + (minute * 60 * 1000) + (sec * 1000) + ms
25 | if precision and isinstance(precision, int):
26 | return round(result, precision)
27 | return result
28 |
29 |
30 | class FfmpegRunner:
31 | class CommandNotRunException(Exception):
32 | """
33 | Raised when get_progress method is called before calling run_command (You cannot get progress until you
34 | run the command). Call the run_command method first
35 | """
36 |
37 | DUR_REGEX = re.compile(
38 | r"Duration: (?P\d{2}):(?P\d{2}):(?P\d{2})\.(?P\d{2})"
39 | )
40 | TIME_REGEX = re.compile(
41 | r"out_time=(?P\d{2}):(?P\d{2}):(?P\d{2})\.(?P\d{2})"
42 | )
43 |
44 | def __init__(self, cmd: List[str], dry_run=False) -> None:
45 | """Initialize the FfmpegProgress class.
46 |
47 | Args:
48 | cmd (List[str]): A list of command line elements, e.g. ["ffmpeg", "-i", ...]
49 | dry_run (bool, optional): Only show what would be done. Defaults to False.
50 | """
51 | self.cmd = (
52 | [cmd[0]]
53 | + ["-progress", "-", "-nostats"]
54 | + cmd[1:]
55 | )
56 | self.dry_run = dry_run
57 | self.stderr = None
58 | self.p: Optional[subprocess.Popen] = None
59 | self.total_dur = None
60 |
61 | def run_command(self, on_done: Callable, on_error: Callable[[str, int], None], popen_kwargs: dict = None) -> None:
62 | """
63 | Run the ffmpeg command, subsequent progress can be captured by calling get_progress method Args: :param
64 | popen_kwargs: A dict to specify extra arguments to the popen call, e.g. { creationflags: CREATE_NO_WINDOW }
65 | :param on_done: A function which will be called when the task is completed
66 | :param on_error: A function which will be called when the task results in an error.
67 | A error string and return code is passed while calling
68 | """
69 | if popen_kwargs is None:
70 | popen_kwargs = {}
71 | self.p = subprocess.Popen(
72 | self.cmd,
73 | stdin=subprocess.PIPE, # Apply stdin isolation by creating separate pipe.
74 | stdout=subprocess.PIPE,
75 | stderr=subprocess.STDOUT,
76 | universal_newlines=False,
77 | **popen_kwargs,
78 | )
79 | def check_for_exit():
80 | print('kkek')
81 | nonlocal on_done, on_error
82 | while self.p.poll() is None:
83 | continue
84 | if self.p.returncode != 0:
85 | return on_error(f"Error running command {self.cmd}", self.p.returncode)
86 | on_done()
87 | threading.Thread(target=check_for_exit).start()
88 |
89 | def get_progress(self) -> float:
90 | stderr = []
91 | if self.p is None:
92 | raise FfmpegRunner.CommandNotRunException()
93 | if self.p.stdout is None:
94 | return 0
95 | stderr_line = self.p.stdout.readline().decode("utf-8", errors="replace").strip()
96 | if stderr_line == "" and self.p.poll() is not None and self.p.returncode == 0:
97 | return 100
98 |
99 | self.stderr = "\n".join(stderr)
100 |
101 | total_dur_match = self.DUR_REGEX.search(stderr_line)
102 | if total_dur_match:
103 | total_dur = total_dur_match.groupdict()
104 | else:
105 | return 0
106 | total_dur = to_ms(**total_dur)
107 | progress_time = self.TIME_REGEX.search(stderr_line)
108 | if progress_time:
109 | elapsed_time = to_ms(**progress_time.groupdict())
110 | else:
111 | return 0
112 | return round((elapsed_time / total_dur) * 100, 2)
113 |
114 | def run_command_with_progress(self, popen_kwargs: dict = None) -> Generator[int, None, None]:
115 | """
116 | Run an ffmpeg command, trying to capture the process output and calculate
117 | the duration / progress.
118 | Yields the progress in percent.
119 |
120 | Args:
121 | popen_kwargs (dict): A dict to specify extra arguments to the popen call, e.g. { creationflags: CREATE_NO_WINDOW }
122 | """
123 | if popen_kwargs is None:
124 | popen_kwargs = {}
125 | if self.dry_run:
126 | return
127 |
128 | total_dur = None
129 |
130 | stderr = []
131 |
132 | self.p = subprocess.Popen(
133 | self.cmd,
134 | stdin=subprocess.PIPE, # Apply stdin isolation by creating separate pipe.
135 | stdout=subprocess.PIPE,
136 | stderr=subprocess.STDOUT,
137 | universal_newlines=False,
138 | **popen_kwargs
139 | )
140 |
141 | yield 0
142 |
143 | while True:
144 | if self.p is None or self.p.stdout is None:
145 | continue
146 |
147 | stderr_line = self.p.stdout.readline().decode("utf-8", errors="replace").strip()
148 |
149 | if stderr_line == "" and self.p.poll() is not None:
150 | break
151 |
152 | stderr.append(stderr_line.strip())
153 |
154 | self.stderr = "\n".join(stderr)
155 |
156 | total_dur_match = self.DUR_REGEX.search(stderr_line)
157 | if total_dur is None and total_dur_match:
158 | total_dur = total_dur_match.groupdict()
159 | total_dur = to_ms(**total_dur)
160 | continue
161 | if total_dur:
162 | progress_time = self.TIME_REGEX.search(stderr_line)
163 | if progress_time:
164 | elapsed_time = to_ms(**progress_time.groupdict())
165 | yield int(elapsed_time / total_dur * 100)
166 |
167 | if self.p.returncode != 0:
168 | raise RuntimeError(
169 | "Error running command {}: {}".format(self.cmd, str("\n".join(stderr)))
170 | )
171 |
172 | yield 100
173 |
--------------------------------------------------------------------------------