├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── index.py ├── readme.md └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | videos -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Current File", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal", 13 | "justMyCode": true 14 | } 15 | ] 16 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.defaultFormatter": "ms-python.black-formatter" 4 | }, 5 | "python.formatting.provider": "none" 6 | } -------------------------------------------------------------------------------- /index.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import os 3 | from pydub import AudioSegment, effects 4 | from pydub.silence import detect_nonsilent 5 | import itertools 6 | from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip 7 | import logging 8 | import argparse 9 | 10 | # Configure the logging format 11 | logging.basicConfig( 12 | format="%(asctime)s - %(levelname)s - %(message)s", 13 | datefmt="%Y-%m-%d %H:%M:%S", 14 | level=logging.INFO, # You can set the desired log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) 15 | ) 16 | 17 | 18 | def detect_non_silent(audio_file, min_silence_len=200, silence_thresh=-50): 19 | audio = AudioSegment.from_file(audio_file) 20 | non_silent_segments = split_on_silence( 21 | audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh 22 | ) 23 | return non_silent_segments 24 | 25 | 26 | def split_on_silence( 27 | audio_segment, 28 | min_silence_len=1000, 29 | silence_thresh=-16, 30 | keep_silence=100, 31 | seek_step=1, 32 | ): 33 | def pairwise(iterable): 34 | a, b = itertools.tee(iterable) 35 | next(b, None) 36 | return zip(a, b) 37 | 38 | if isinstance(keep_silence, bool): 39 | keep_silence = len(audio_segment) if keep_silence else 0 40 | 41 | output_ranges = [ 42 | [start - keep_silence, end + keep_silence] 43 | for (start, end) in detect_nonsilent( 44 | audio_segment, min_silence_len, silence_thresh, seek_step 45 | ) 46 | ] 47 | 48 | for range_i, range_ii in pairwise(output_ranges): 49 | last_end = range_i[1] 50 | next_start = range_ii[0] 51 | if next_start < last_end: 52 | range_i[1] = (last_end + next_start) // 2 53 | range_ii[0] = range_i[1] 54 | 55 | results = [] 56 | 57 | for start, end in output_ranges: 58 | the_start = max(start, 0) 59 | the_end = min(end, len(audio_segment)) 60 | results.append( 61 | { 62 | "segment": audio_segment[the_start:the_end], 63 | "start": the_start, 64 | "end": the_end, 65 | } 66 | ) 67 | return results 68 | 69 | 70 | def remove_silence_from_video( 71 | input_video_path, output_video_path, min_silence_len=200, silence_thresh=-50 72 | ): 73 | try: 74 | audio_file = "temp_audio.wav" 75 | 76 | video_clip = VideoFileClip(input_video_path) 77 | video_clip.audio.write_audiofile(audio_file) 78 | 79 | logging.info("detectings non silent parts") 80 | non_silent_segments = detect_non_silent( 81 | audio_file, min_silence_len, silence_thresh 82 | ) 83 | logging.info( 84 | "non-silent parts detected, length:" + str(len(non_silent_segments)) 85 | ) 86 | 87 | video_clips = [] 88 | count = 1 89 | for segment in non_silent_segments: 90 | print( 91 | "working on count " 92 | + str(count) 93 | + " of " 94 | + str(len(non_silent_segments)) 95 | ) 96 | video_clips.append( 97 | video_clip.subclip( 98 | segment.get("start") / 1000, segment.get("end") / 1000 99 | ) 100 | ) 101 | count += 1 102 | 103 | logging.info("concatenating clips") 104 | final_clip = concatenate_videoclips(video_clips) 105 | 106 | logging.info("writing new video file") 107 | final_clip.write_videofile(output_video_path) 108 | 109 | # Clean up temporary audio file 110 | logging.info("cleaning up") 111 | video_clip.audio.reader.close_proc() 112 | video_clip.reader.close() 113 | final_clip.close() 114 | video_clip.close() 115 | except Exception as error: 116 | print(error) 117 | os.remove(audio_file) 118 | 119 | 120 | def extract_audio_from_video(video_path, audio_path): 121 | video = VideoFileClip(video_path) 122 | audio = video.audio 123 | audio.write_audiofile(audio_path) 124 | 125 | video.audio.reader.close_proc() 126 | audio.close() 127 | video.reader.close() 128 | video.close() 129 | 130 | def normalize_audio(audio_path, target_dBFS=-20.0): 131 | audio: AudioSegment = AudioSegment.from_file(audio_path, format="wav") 132 | change_in_dBFS = target_dBFS - audio.dBFS 133 | normalized_audio = audio.apply_gain(change_in_dBFS) 134 | normalized_audio.export(audio_path, format="wav") 135 | 136 | def compress_audio(audio_path, target_bitrate="64k"): 137 | audio: AudioSegment = AudioSegment.from_file(audio_path, format="wav") 138 | compressed_audio = effects.compress_dynamic_range(audio, threshold=-12, attack=200, release=1000, ratio=2) 139 | compressed_audio.export(audio_path, format="wav", bitrate=target_bitrate) 140 | 141 | def quick_eq(audio_path): 142 | audio: AudioSegment = AudioSegment.from_file(audio_path, format="wav") 143 | 144 | # 1. high pass filter 120 145 | audio = audio.high_pass_filter(120) # Adjust the frequency as needed 146 | 147 | # 2. boost presence -> gentle boost, 2k to 5k hz 148 | 149 | # 3. reduce harshness -> gentle cut for 6k to 10k 150 | # 4. reduce boxiness -> gentle cut 300-500 hz 151 | 152 | audio.export(audio_path, format="wav") 153 | 154 | 155 | def replace_audio_in_video(video_path, new_audio_path, output_video_path): 156 | video = VideoFileClip(video_path) 157 | new_audio = AudioFileClip(new_audio_path) 158 | video = video.set_audio(new_audio) 159 | video.write_videofile(output_video_path, codec="libx264") 160 | 161 | video.reader.close() 162 | video.close() 163 | new_audio.close() 164 | 165 | 166 | def eq_audio(audio_path): 167 | # Load the audio file 168 | audio = AudioSegment.from_file(audio_path) 169 | 170 | # High-Pass Filter (remove low-frequency noise) 171 | audio = audio.high_pass_filter(120) # Adjust the frequency as needed 172 | 173 | # Low Shelf Filter (add warmth) 174 | # audio = audio.low_shelf(gain=3.0, frequency=100) # Adjust gain and frequency as needed 175 | 176 | # Parametric EQ (fine-tune audio) 177 | # audio = scipy_effects.eq(audio, 500, 250, "L+R", "notch") 178 | # audio = scipy_effects.eq(audio, 500, 400, "L+R", "notch") 179 | # audio = scipy_effects.eq(audio, 500, 1000, "L+R", "notch", gain_dB=2) 180 | # audio = scipy_effects.eq(audio, 500, 5000, "L+R", "notch", gain_dB=-1) 181 | # audio = scipy_effects.eq(audio, 500, 6000, "L+R", "notch", gain_dB=-3) 182 | 183 | # # audio = scipy_effects.high_pass_filter(audio, ) 184 | # audio = audio.filter("bandpass", frequency=250, Q=1.5) # Cut frequencies around 250Hz 185 | # audio = audio.filter("bandpass", frequency=400, Q=1.5) # Cut frequencies around 400Hz 186 | # audio = audio.filter("bandpass", frequency=1000, Q=1.0, gain=2.0) # Boost frequencies around 1kHz 187 | # audio = audio.filter("bandpass", frequency=5000, Q=1.0, gain=-1.0) # Cut frequencies around 5kHz 188 | # audio = audio.filter("bandpass", frequency=6000, Q=2.0, gain=-3.0) # Cut sibilant range around 6kHz to 8kHz 189 | 190 | audio = audio.high_shelf( 191 | gain=2.0, frequency=10000 192 | ) # Boost high-end around 10kHz to 15kHz 193 | 194 | # High-Pass Filter (remove low-end muddiness) 195 | audio = audio.high_pass_filter(200) # Adjust the frequency as needed 196 | 197 | # Export the enhanced audio to a new file 198 | audio.export(audio_path, format="wav") 199 | 200 | 201 | def band_reject_filter(audio: AudioSegment, reject_start, reject_end): 202 | # Apply a low-pass filter (cut off frequencies above reject_start) 203 | low_pass_audio = audio.low_pass_filter(reject_start) 204 | 205 | # Apply a high-pass filter (cut off frequencies below reject_end) 206 | high_pass_audio = low_pass_audio.high_pass_filter(reject_end) 207 | 208 | return high_pass_audio 209 | 210 | 211 | def de_ess_audio(audio_path): 212 | # Load the audio file 213 | audio = AudioSegment.from_file(audio_path) 214 | 215 | # Define the frequency range for de-essing (typically 6kHz to 8kHz) 216 | de_ess_start = 6000 217 | de_ess_end = 8000 218 | 219 | # Create a notch filter to reduce the sibilant frequencies 220 | # filter_bandwidth = 100 # Adjust the bandwidth as needed 221 | notch_filter = band_reject_filter(audio, de_ess_start, de_ess_end) 222 | 223 | # Apply the notch filter to the audio 224 | de_essed_audio = audio.overlay(notch_filter) 225 | 226 | # Export the de-essed audio to a new file 227 | de_essed_audio.export(audio_path, format="wav") 228 | 229 | 230 | def remove_plosives(audio_path): 231 | # Load the audio file 232 | audio = AudioSegment.from_file(audio_path) 233 | 234 | # Apply a high-pass filter (cut off frequencies below cutoff_freq) 235 | cutoff_freq = 100 236 | audio = audio.high_pass_filter(cutoff_freq) 237 | 238 | # Apply a low-pass filter (cut off frequencies above cutoff_freq) 239 | audio = audio.low_pass_filter(cutoff_freq) 240 | 241 | # Export the plosive-removed audio to a new file 242 | audio.export(audio_path, format="wav") 243 | 244 | 245 | def normalize_loudness(audio_path, target_lufs=-16.0): 246 | # Load the audio file 247 | audio = AudioSegment.from_file(audio_path) 248 | 249 | # Calculate the current loudness in LUFS 250 | current_lufs = audio.dBFS 251 | 252 | # Calculate the gain adjustment needed for normalization 253 | gain_adjustment = target_lufs - current_lufs 254 | 255 | # Apply the gain adjustment to the audio 256 | normalized_audio = audio.apply_gain(gain_adjustment) 257 | 258 | # Export the normalized audio to a new file 259 | normalized_audio.export(audio_path, format="wav") 260 | 261 | 262 | def improve_audio(video_path, output_video_path): 263 | audio_path = "tmp_extracted_audio.wav" 264 | 265 | logging.info("extracting audio from video") 266 | extract_audio_from_video(video_path, audio_path) 267 | 268 | # logging.info('removing plossives from audio') 269 | # remove_plosives(audio_path) 270 | 271 | # logging.info('de-essing audio') 272 | # de_ess_audio(audio_path) 273 | 274 | # logging.info('eq adjust audio') 275 | # eq_audio(audio_path) 276 | 277 | logging.info("normalizing audio") 278 | normalize_audio(audio_path) 279 | 280 | logging.info("compressing audio") 281 | compress_audio(audio_path) 282 | 283 | quick_eq(audio_path) 284 | 285 | # logging.info('normalize loudness in audio') 286 | # normalize_loudness(audio_path) 287 | 288 | # Step 6: Replace audio in the video 289 | logging.info("replacing audio in video") 290 | replace_audio_in_video(video_path, audio_path, output_video_path) 291 | 292 | # Clean up intermediate files 293 | os.remove(audio_path) 294 | 295 | 296 | def remove_silence_and_normalize(input_video_file): 297 | tmp_output_video_file = str(input_video_file).replace(".mp4", "-tmp.mp4") 298 | output_video_file = str(input_video_file).replace(".mp4", "-edited.mp4") 299 | 300 | remove_silence_from_video(input_video_file, tmp_output_video_file) 301 | 302 | improve_audio(tmp_output_video_file, output_video_file) 303 | 304 | # os.unlink(tmp_output_video_file) # todo - remove this file 305 | 306 | 307 | if __name__ == "__main__": 308 | parser = argparse.ArgumentParser(description="A CLI to remove silence and enhance audio in a video") 309 | 310 | parser.add_argument("-p", "--path", help="Full path to video file") 311 | 312 | args = parser.parse_args() 313 | 314 | if not args.path or not str(args.path).strip(): 315 | logging.error('filepath not provided!\nRun script with -p argument e.g. "python index.py -p /full/path/to/file.mp4"') 316 | exit(1) 317 | 318 | remove_silence_and_normalize(args.path) 319 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Video Editing Automation 2 | 3 | This script automates some basic video/audio editing that I usually do for my YouTube videos. 4 | 5 | ## Usage 6 | 7 | Run the script by passing a full filepath of the video you want to edit 8 | 9 | ``` 10 | python index.py -p /full/path/to/file.mp4 11 | ``` 12 | 13 | This will create a new video file with `-edited` suffix in the same location as the input video file. E.g. `/full/path/to/file-edited.mp4`. 14 | 15 | The new file will have the following edits: 16 | 1. Remove all 'silent' parts of the video 17 | 2. Normalize the audio volume 18 | 3. Add compression to the audio 19 | 4. Basic EQ - high pass filter 20 | 21 | ## Todo: 22 | 1. denoise 23 | 2. deesser 24 | 3. better EQ for podcast voice -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.4 2 | audioread==3.0.0 3 | certifi==2023.7.22 4 | cffi==1.15.1 5 | charset-normalizer==3.2.0 6 | colorama==0.4.6 7 | decorator==4.4.2 8 | ffmpeg==1.4 9 | idna==3.4 10 | imageio==2.31.1 11 | imageio-ffmpeg==0.4.8 12 | joblib==1.3.1 13 | lazy_loader==0.3 14 | librosa==0.10.0.post2 15 | llvmlite==0.40.1 16 | moviepy==1.0.3 17 | msgpack==1.0.5 18 | numba==0.57.1 19 | numpy==1.24.4 20 | packaging==23.1 21 | Pillow==10.0.0 22 | pooch==1.6.0 23 | proglog==0.1.10 24 | pycparser==2.21 25 | pydub==0.25.1 26 | requests==2.31.0 27 | scikit-learn==1.3.0 28 | scipy==1.11.1 29 | soundfile==0.12.1 30 | soxr==0.3.5 31 | threadpoolctl==3.2.0 32 | tqdm==4.65.0 33 | typing_extensions==4.7.1 34 | urllib3==2.0.4 35 | argparse==1.4.0 --------------------------------------------------------------------------------