├── .gitignore
├── .vscode
    ├── launch.json
    └── settings.json
├── index.py
├── readme.md
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | videos


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "Python: Current File",
 9 |             "type": "python",
10 |             "request": "launch",
11 |             "program": "${file}",
12 |             "console": "integratedTerminal",
13 |             "justMyCode": true
14 |         }
15 |     ]
16 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "[python]": {
3 |         "editor.defaultFormatter": "ms-python.black-formatter"
4 |     },
5 |     "python.formatting.provider": "none"
6 | }


--------------------------------------------------------------------------------
/index.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | import os
  3 | from pydub import AudioSegment, effects
  4 | from pydub.silence import detect_nonsilent
  5 | import itertools
  6 | from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip
  7 | import logging
  8 | import argparse
  9 | 
 10 | # Configure the logging format
 11 | logging.basicConfig(
 12 |     format="%(asctime)s - %(levelname)s - %(message)s",
 13 |     datefmt="%Y-%m-%d %H:%M:%S",
 14 |     level=logging.INFO,  # You can set the desired log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
 15 | )
 16 | 
 17 | 
 18 | def detect_non_silent(audio_file, min_silence_len=200, silence_thresh=-50):
 19 |     audio = AudioSegment.from_file(audio_file)
 20 |     non_silent_segments = split_on_silence(
 21 |         audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh
 22 |     )
 23 |     return non_silent_segments
 24 | 
 25 | 
 26 | def split_on_silence(
 27 |     audio_segment,
 28 |     min_silence_len=1000,
 29 |     silence_thresh=-16,
 30 |     keep_silence=100,
 31 |     seek_step=1,
 32 | ):
 33 |     def pairwise(iterable):
 34 |         a, b = itertools.tee(iterable)
 35 |         next(b, None)
 36 |         return zip(a, b)
 37 | 
 38 |     if isinstance(keep_silence, bool):
 39 |         keep_silence = len(audio_segment) if keep_silence else 0
 40 | 
 41 |     output_ranges = [
 42 |         [start - keep_silence, end + keep_silence]
 43 |         for (start, end) in detect_nonsilent(
 44 |             audio_segment, min_silence_len, silence_thresh, seek_step
 45 |         )
 46 |     ]
 47 | 
 48 |     for range_i, range_ii in pairwise(output_ranges):
 49 |         last_end = range_i[1]
 50 |         next_start = range_ii[0]
 51 |         if next_start < last_end:
 52 |             range_i[1] = (last_end + next_start) // 2
 53 |             range_ii[0] = range_i[1]
 54 | 
 55 |     results = []
 56 | 
 57 |     for start, end in output_ranges:
 58 |         the_start = max(start, 0)
 59 |         the_end = min(end, len(audio_segment))
 60 |         results.append(
 61 |             {
 62 |                 "segment": audio_segment[the_start:the_end],
 63 |                 "start": the_start,
 64 |                 "end": the_end,
 65 |             }
 66 |         )
 67 |     return results
 68 | 
 69 | 
 70 | def remove_silence_from_video(
 71 |     input_video_path, output_video_path, min_silence_len=200, silence_thresh=-50
 72 | ):
 73 |     try:
 74 |         audio_file = "temp_audio.wav"
 75 | 
 76 |         video_clip = VideoFileClip(input_video_path)
 77 |         video_clip.audio.write_audiofile(audio_file)
 78 | 
 79 |         logging.info("detectings non silent parts")
 80 |         non_silent_segments = detect_non_silent(
 81 |             audio_file, min_silence_len, silence_thresh
 82 |         )
 83 |         logging.info(
 84 |             "non-silent parts detected, length:" + str(len(non_silent_segments))
 85 |         )
 86 | 
 87 |         video_clips = []
 88 |         count = 1
 89 |         for segment in non_silent_segments:
 90 |             print(
 91 |                 "working on count "
 92 |                 + str(count)
 93 |                 + " of "
 94 |                 + str(len(non_silent_segments))
 95 |             )
 96 |             video_clips.append(
 97 |                 video_clip.subclip(
 98 |                     segment.get("start") / 1000, segment.get("end") / 1000
 99 |                 )
100 |             )
101 |             count += 1
102 | 
103 |         logging.info("concatenating clips")
104 |         final_clip = concatenate_videoclips(video_clips)
105 | 
106 |         logging.info("writing new video file")
107 |         final_clip.write_videofile(output_video_path)
108 | 
109 |         # Clean up temporary audio file
110 |         logging.info("cleaning up")
111 |         video_clip.audio.reader.close_proc()
112 |         video_clip.reader.close()
113 |         final_clip.close()
114 |         video_clip.close()
115 |     except Exception as error:
116 |         print(error)
117 |     os.remove(audio_file)
118 | 
119 | 
120 | def extract_audio_from_video(video_path, audio_path):
121 |     video = VideoFileClip(video_path)
122 |     audio = video.audio
123 |     audio.write_audiofile(audio_path)
124 | 
125 |     video.audio.reader.close_proc()
126 |     audio.close()
127 |     video.reader.close()
128 |     video.close()
129 | 
130 | def normalize_audio(audio_path, target_dBFS=-20.0):
131 |     audio: AudioSegment = AudioSegment.from_file(audio_path, format="wav")
132 |     change_in_dBFS = target_dBFS - audio.dBFS
133 |     normalized_audio = audio.apply_gain(change_in_dBFS)
134 |     normalized_audio.export(audio_path, format="wav")
135 | 
136 | def compress_audio(audio_path, target_bitrate="64k"):
137 |     audio: AudioSegment = AudioSegment.from_file(audio_path, format="wav")
138 |     compressed_audio = effects.compress_dynamic_range(audio, threshold=-12, attack=200, release=1000, ratio=2)
139 |     compressed_audio.export(audio_path, format="wav", bitrate=target_bitrate)
140 | 
141 | def quick_eq(audio_path):
142 |     audio: AudioSegment = AudioSegment.from_file(audio_path, format="wav")
143 |     
144 |     # 1. high pass filter 120
145 |     audio = audio.high_pass_filter(120)  # Adjust the frequency as needed
146 |     
147 |     # 2. boost presence -> gentle boost, 2k to 5k hz
148 |     
149 |     # 3. reduce harshness -> gentle cut for 6k to 10k
150 |     # 4. reduce boxiness -> gentle cut 300-500 hz
151 | 
152 |     audio.export(audio_path, format="wav")
153 |     
154 | 
155 | def replace_audio_in_video(video_path, new_audio_path, output_video_path):
156 |     video = VideoFileClip(video_path)
157 |     new_audio = AudioFileClip(new_audio_path)
158 |     video = video.set_audio(new_audio)
159 |     video.write_videofile(output_video_path, codec="libx264")
160 | 
161 |     video.reader.close()
162 |     video.close()
163 |     new_audio.close()
164 | 
165 | 
166 | def eq_audio(audio_path):
167 |     # Load the audio file
168 |     audio = AudioSegment.from_file(audio_path)
169 |     
170 |     # High-Pass Filter (remove low-frequency noise)
171 |     audio = audio.high_pass_filter(120)  # Adjust the frequency as needed
172 | 
173 |     # Low Shelf Filter (add warmth)
174 |     # audio = audio.low_shelf(gain=3.0, frequency=100)  # Adjust gain and frequency as needed
175 | 
176 |     # Parametric EQ (fine-tune audio)
177 |     # audio = scipy_effects.eq(audio,  500, 250, "L+R", "notch")
178 |     # audio = scipy_effects.eq(audio, 500, 400, "L+R", "notch")
179 |     # audio = scipy_effects.eq(audio, 500, 1000, "L+R", "notch", gain_dB=2)
180 |     # audio = scipy_effects.eq(audio, 500, 5000, "L+R", "notch", gain_dB=-1)
181 |     # audio = scipy_effects.eq(audio, 500, 6000, "L+R", "notch", gain_dB=-3)
182 | 
183 |     # # audio = scipy_effects.high_pass_filter(audio, )
184 |     # audio = audio.filter("bandpass", frequency=250, Q=1.5)  # Cut frequencies around 250Hz
185 |     # audio = audio.filter("bandpass", frequency=400, Q=1.5)  # Cut frequencies around 400Hz
186 |     # audio = audio.filter("bandpass", frequency=1000, Q=1.0, gain=2.0)  # Boost frequencies around 1kHz
187 |     # audio = audio.filter("bandpass", frequency=5000, Q=1.0, gain=-1.0)  # Cut frequencies around 5kHz
188 |     # audio = audio.filter("bandpass", frequency=6000, Q=2.0, gain=-3.0)  # Cut sibilant range around 6kHz to 8kHz
189 | 
190 |     audio = audio.high_shelf(
191 |         gain=2.0, frequency=10000
192 |     )  # Boost high-end around 10kHz to 15kHz
193 | 
194 |     # High-Pass Filter (remove low-end muddiness)
195 |     audio = audio.high_pass_filter(200)  # Adjust the frequency as needed
196 | 
197 |     # Export the enhanced audio to a new file
198 |     audio.export(audio_path, format="wav")
199 | 
200 | 
201 | def band_reject_filter(audio: AudioSegment, reject_start, reject_end):
202 |     # Apply a low-pass filter (cut off frequencies above reject_start)
203 |     low_pass_audio = audio.low_pass_filter(reject_start)
204 | 
205 |     # Apply a high-pass filter (cut off frequencies below reject_end)
206 |     high_pass_audio = low_pass_audio.high_pass_filter(reject_end)
207 | 
208 |     return high_pass_audio
209 | 
210 | 
211 | def de_ess_audio(audio_path):
212 |     # Load the audio file
213 |     audio = AudioSegment.from_file(audio_path)
214 | 
215 |     # Define the frequency range for de-essing (typically 6kHz to 8kHz)
216 |     de_ess_start = 6000
217 |     de_ess_end = 8000
218 | 
219 |     # Create a notch filter to reduce the sibilant frequencies
220 |     # filter_bandwidth = 100  # Adjust the bandwidth as needed
221 |     notch_filter = band_reject_filter(audio, de_ess_start, de_ess_end)
222 | 
223 |     # Apply the notch filter to the audio
224 |     de_essed_audio = audio.overlay(notch_filter)
225 | 
226 |     # Export the de-essed audio to a new file
227 |     de_essed_audio.export(audio_path, format="wav")
228 | 
229 | 
230 | def remove_plosives(audio_path):
231 |     # Load the audio file
232 |     audio = AudioSegment.from_file(audio_path)
233 | 
234 |     # Apply a high-pass filter (cut off frequencies below cutoff_freq)
235 |     cutoff_freq = 100
236 |     audio = audio.high_pass_filter(cutoff_freq)
237 | 
238 |     # Apply a low-pass filter (cut off frequencies above cutoff_freq)
239 |     audio = audio.low_pass_filter(cutoff_freq)
240 | 
241 |     # Export the plosive-removed audio to a new file
242 |     audio.export(audio_path, format="wav")
243 | 
244 | 
245 | def normalize_loudness(audio_path, target_lufs=-16.0):
246 |     # Load the audio file
247 |     audio = AudioSegment.from_file(audio_path)
248 | 
249 |     # Calculate the current loudness in LUFS
250 |     current_lufs = audio.dBFS
251 | 
252 |     # Calculate the gain adjustment needed for normalization
253 |     gain_adjustment = target_lufs - current_lufs
254 | 
255 |     # Apply the gain adjustment to the audio
256 |     normalized_audio = audio.apply_gain(gain_adjustment)
257 | 
258 |     # Export the normalized audio to a new file
259 |     normalized_audio.export(audio_path, format="wav")
260 | 
261 | 
262 | def improve_audio(video_path, output_video_path):
263 |     audio_path = "tmp_extracted_audio.wav"
264 | 
265 |     logging.info("extracting audio from video")
266 |     extract_audio_from_video(video_path, audio_path)
267 | 
268 |     # logging.info('removing plossives from audio')
269 |     # remove_plosives(audio_path)
270 | 
271 |     # logging.info('de-essing audio')
272 |     # de_ess_audio(audio_path)
273 | 
274 |     # logging.info('eq adjust audio')
275 |     # eq_audio(audio_path)
276 | 
277 |     logging.info("normalizing audio")
278 |     normalize_audio(audio_path)
279 | 
280 |     logging.info("compressing audio")
281 |     compress_audio(audio_path)
282 | 
283 |     quick_eq(audio_path)
284 | 
285 |     # logging.info('normalize loudness in audio')
286 |     # normalize_loudness(audio_path)
287 | 
288 |     # Step 6: Replace audio in the video
289 |     logging.info("replacing audio in video")
290 |     replace_audio_in_video(video_path, audio_path, output_video_path)
291 | 
292 |     # Clean up intermediate files
293 |     os.remove(audio_path)
294 | 
295 | 
296 | def remove_silence_and_normalize(input_video_file):
297 |     tmp_output_video_file = str(input_video_file).replace(".mp4", "-tmp.mp4")
298 |     output_video_file = str(input_video_file).replace(".mp4", "-edited.mp4")
299 | 
300 |     remove_silence_from_video(input_video_file, tmp_output_video_file)
301 | 
302 |     improve_audio(tmp_output_video_file, output_video_file)
303 | 
304 |     # os.unlink(tmp_output_video_file) # todo - remove this file
305 | 
306 | 
307 | if __name__ == "__main__":
308 |     parser = argparse.ArgumentParser(description="A CLI to remove silence and enhance audio in a video")
309 |     
310 |     parser.add_argument("-p", "--path", help="Full path to video file")
311 | 
312 |     args = parser.parse_args()
313 | 
314 |     if not args.path or not str(args.path).strip():
315 |         logging.error('filepath not provided!\nRun script with -p argument e.g. "python index.py -p /full/path/to/file.mp4"')
316 |         exit(1)        
317 | 
318 |     remove_silence_and_normalize(args.path)
319 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Video Editing Automation
 2 | 
 3 | This script automates some basic video/audio editing that I usually do for my YouTube videos.
 4 | 
 5 | ## Usage
 6 | 
 7 | Run the script by passing a full filepath of the video you want to edit
 8 | 
 9 | ```
10 | python index.py -p /full/path/to/file.mp4
11 | ```
12 | 
13 | This will create a new video file with `-edited` suffix in the same location as the input video file. E.g. `/full/path/to/file-edited.mp4`.
14 | 
15 | The new file will have the following edits:
16 | 1. Remove all 'silent' parts of the video
17 | 2. Normalize the audio volume
18 | 3. Add compression to the audio
19 | 4. Basic EQ - high pass filter
20 | 
21 | ## Todo:
22 | 1. denoise
23 | 2. deesser
24 | 3. better EQ for podcast voice


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.4
 2 | audioread==3.0.0
 3 | certifi==2023.7.22
 4 | cffi==1.15.1
 5 | charset-normalizer==3.2.0
 6 | colorama==0.4.6
 7 | decorator==4.4.2
 8 | ffmpeg==1.4
 9 | idna==3.4
10 | imageio==2.31.1
11 | imageio-ffmpeg==0.4.8
12 | joblib==1.3.1
13 | lazy_loader==0.3
14 | librosa==0.10.0.post2
15 | llvmlite==0.40.1
16 | moviepy==1.0.3
17 | msgpack==1.0.5
18 | numba==0.57.1
19 | numpy==1.24.4
20 | packaging==23.1
21 | Pillow==10.0.0
22 | pooch==1.6.0
23 | proglog==0.1.10
24 | pycparser==2.21
25 | pydub==0.25.1
26 | requests==2.31.0
27 | scikit-learn==1.3.0
28 | scipy==1.11.1
29 | soundfile==0.12.1
30 | soxr==0.3.5
31 | threadpoolctl==3.2.0
32 | tqdm==4.65.0
33 | typing_extensions==4.7.1
34 | urllib3==2.0.4
35 | argparse==1.4.0


--------------------------------------------------------------------------------