├── .dockerignore ├── .gitignore ├── LICENSE ├── README.md ├── cog.yaml ├── predict.py └── samples.py /.dockerignore: -------------------------------------------------------------------------------- 1 | tmp/ 2 | sample* 3 | output.* 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | output.* 3 | sample_* 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Paul 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cog-cpu-toolkit 2 | 3 | https://replicate.com/fofr/toolkit 4 | 5 | Common video tasks in a single model: 6 | 7 | - convert any video or GIF to an MP4 8 | - convert any video to a GIF 9 | - extract audio from a video as MP3 10 | - convert a zipped folder of frames to a video or GIF 11 | - extract video or GIF frames (all frames or using fps) 12 | 13 | Helpful CPU based model that wraps common ffmpeg tasks. 14 | -------------------------------------------------------------------------------- /cog.yaml: -------------------------------------------------------------------------------- 1 | build: 2 | gpu: false 3 | system_packages: 4 | - "ffmpeg" 5 | python_version: "3.11" 6 | predict: "predict.py:Predictor" 7 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | from cog import BasePredictor, Input, Path 2 | from typing import List 3 | import subprocess 4 | import os 5 | import shutil 6 | import zipfile 7 | 8 | VIDEO_FILE_EXTENSIONS = [ 9 | ".3g2", 10 | ".3gp", 11 | ".a64", 12 | ".avi", 13 | ".flv", 14 | ".gif", 15 | ".gifv", 16 | ".m2v", 17 | ".m4v", 18 | ".mkv", 19 | ".mov", 20 | ".mp4", 21 | ".mpeg", 22 | ".mpg", 23 | ".mv", 24 | ".mxf", 25 | ".nsv", 26 | ".ogg", 27 | ".ogv", 28 | ".rm", 29 | ".rmvb", 30 | ".roq", 31 | ".svi", 32 | ".vob", 33 | ".webm", 34 | ".wmv", 35 | ".yuv", 36 | ] 37 | 38 | VIDEO_TASKS = [ 39 | "convert_input_to_mp4", 40 | "convert_input_to_gif", 41 | "extract_video_audio_as_mp3", 42 | "extract_frames_from_input", 43 | "reverse_video", 44 | "bounce_video", 45 | ] 46 | ZIP_TASKS = ["zipped_frames_to_mp4", "zipped_frames_to_gif"] 47 | 48 | 49 | class Predictor(BasePredictor): 50 | def validate_inputs(self, task: str, input_file: Path): 51 | """Validate inputs""" 52 | if task in ZIP_TASKS: 53 | if input_file.suffix.lower() != ".zip": 54 | raise ValueError("Input file must be a zip file") 55 | 56 | elif task in VIDEO_TASKS: 57 | if input_file.suffix.lower() not in VIDEO_FILE_EXTENSIONS: 58 | raise ValueError( 59 | "Input file must be a video file with one of the following extensions: " 60 | + ", ".join(VIDEO_FILE_EXTENSIONS) 61 | ) 62 | 63 | def predict( 64 | self, 65 | task: str = Input( 66 | description="Task to perform", 67 | choices=[ 68 | "convert_input_to_mp4", 69 | "convert_input_to_gif", 70 | "extract_video_audio_as_mp3", 71 | "zipped_frames_to_mp4", 72 | "zipped_frames_to_gif", 73 | "extract_frames_from_input", 74 | "reverse_video", 75 | "bounce_video", 76 | ], 77 | ), 78 | input_file: Path = Input(description="File – zip, image or video to process"), 79 | fps: int = Input( 80 | description="frames per second, if relevant. Use 0 to keep original fps (or use default). Converting to GIF defaults to 12fps", 81 | default=0, 82 | ), 83 | ) -> List[Path]: 84 | """Run prediction""" 85 | if os.path.exists("/tmp/outputs"): 86 | shutil.rmtree("/tmp/outputs") 87 | os.makedirs("/tmp/outputs") 88 | 89 | self.validate_inputs(task, input_file) 90 | self.fps = fps 91 | 92 | if task == "convert_input_to_mp4": 93 | return self.convert_video_to(input_file, "mp4") 94 | elif task == "convert_input_to_gif": 95 | return self.convert_video_to(input_file, "gif") 96 | elif task == "extract_video_audio_as_mp3": 97 | return self.extract_video_audio_as_mp3(input_file) 98 | elif task == "zipped_frames_to_mp4": 99 | return self.zipped_frames_to(input_file, "mp4") 100 | elif task == "zipped_frames_to_gif": 101 | return self.zipped_frames_to(input_file, "gif") 102 | elif task == "extract_frames_from_input": 103 | return self.extract_frames_from_input(input_file) 104 | elif task == "reverse_video": 105 | return self.reverse_video(input_file) 106 | elif task == "bounce_video": 107 | return self.bounce_video(input_file) 108 | 109 | return [] 110 | 111 | def unzip(self, input_path: Path) -> List[Path]: 112 | """Unzip file""" 113 | print("Unzipping file") 114 | with zipfile.ZipFile(input_path, "r") as zip_ref: 115 | zip_ref.extractall("/tmp/outputs/zip") 116 | 117 | for filename in os.listdir("/tmp/outputs/zip"): 118 | os.rename( 119 | "/tmp/outputs/zip/" + filename, 120 | "/tmp/outputs/zip/" + filename.lower(), 121 | ) 122 | 123 | print("Files in zip:") 124 | for filename in sorted(os.listdir("/tmp/outputs/zip")): 125 | print(filename) 126 | 127 | def run_ffmpeg(self, input, output_path: str, command: List[str]): 128 | """Run ffmpeg command""" 129 | 130 | prepend = ["ffmpeg"] 131 | if input: 132 | prepend.extend(["-i", str(input)]) 133 | 134 | append = [output_path] 135 | command = prepend + command + append 136 | print("Running ffmpeg command: " + " ".join(command)) 137 | try: 138 | subprocess.run(command, check=True) 139 | except subprocess.CalledProcessError as e: 140 | raise RuntimeError( 141 | "Command '{}' returned with error (code {}): {}".format( 142 | e.cmd, e.returncode, e.output 143 | ) 144 | ) 145 | return [Path(output_path)] 146 | 147 | def convert_video_to(self, video_path: Path, type: str = "mp4") -> List[Path]: 148 | """Convert video to format using ffmpeg""" 149 | command = [ 150 | "-pix_fmt", 151 | "yuv420p", # Pixel format: YUV with 4:2:0 chroma subsampling 152 | ] 153 | 154 | if type == "gif": 155 | command.extend( 156 | [ 157 | "-vf", 158 | f"fps={self.fps or 12},scale=512:-1:flags=lanczos", # Set frame rate and scale 159 | "-c:v", 160 | "gif", # Video codec: GIF 161 | ] 162 | ) 163 | else: 164 | command.extend( 165 | [ 166 | "-c:v", 167 | "libx264", # Video codec: H.264 168 | "-c:a", 169 | "aac", # Audio codec: AAC 170 | "-q:a", 171 | "0", # Specify audio quality (0 is the highest) 172 | ] 173 | ) 174 | 175 | if self.fps != 0: 176 | command.extend(["-r", str(self.fps)]) 177 | 178 | return self.run_ffmpeg(video_path, f"/tmp/outputs/video.{type}", command) 179 | 180 | def extract_video_audio_as_mp3(self, video_path: Path) -> List[Path]: 181 | """Extract audio from video using ffmpeg""" 182 | command = [ 183 | "-q:a", 184 | "0", # Specify audio quality (0 is the highest) 185 | "-map", 186 | "a", # Map audio tracks (ignore video) 187 | ] 188 | 189 | return self.run_ffmpeg(video_path, "/tmp/outputs/audio.mp3", command) 190 | 191 | def extract_frames_from_input(self, video_path: Path) -> List[Path]: 192 | """Extract frames from video using ffmpeg""" 193 | command = ["-vf", f"fps={self.fps}"] if self.fps != 0 else [] 194 | self.run_ffmpeg(video_path, "/tmp/outputs/out%03d.png", command) 195 | 196 | output_files = [] 197 | for filename in os.listdir("/tmp/outputs"): 198 | if filename.endswith(".png") and filename.startswith("out"): 199 | output_files.append(filename) 200 | 201 | with zipfile.ZipFile("/tmp/outputs/frames.zip", "w") as zip_ref: 202 | for filename in output_files: 203 | zip_ref.write(f"/tmp/outputs/{filename}", filename) 204 | 205 | return [Path("/tmp/outputs/frames.zip")] 206 | 207 | def zipped_frames_to(self, input_file: Path, type: str = "mp4") -> List[Path]: 208 | """Convert frames to video using ffmpeg""" 209 | self.unzip(input_file) 210 | frames_directory = "/tmp/outputs/zip" 211 | image_filetypes = ["jpg", "jpeg", "png"] 212 | frame_filetype = None 213 | for file in os.listdir(frames_directory): 214 | potential_filetype = file.split(".")[-1] 215 | if potential_filetype in image_filetypes: 216 | frame_filetype = potential_filetype 217 | break 218 | if frame_filetype is None: 219 | raise ValueError("No image files found in the zip file.") 220 | 221 | command = [ 222 | "-framerate", 223 | str(12 if self.fps == 0 else self.fps), # Set the frame rate 224 | "-pattern_type", 225 | "glob", # Use glob pattern matching 226 | "-i", 227 | f"{frames_directory}/*.{frame_filetype}", 228 | "-pix_fmt", 229 | "yuv420p", # Pixel format: YUV with 4:2:0 chroma subsampling 230 | ] 231 | 232 | if type == "gif": 233 | command.extend( 234 | [ 235 | "-vf", 236 | "scale=512:-1:flags=lanczos", 237 | "-c:v", 238 | "gif", # Video codec: GIF 239 | ] 240 | ) 241 | else: 242 | command.extend( 243 | [ 244 | "-c:v", 245 | "libx264", # Video codec: H.264 246 | ] 247 | ) 248 | 249 | return self.run_ffmpeg(False, f"/tmp/outputs/video.{type}", command) 250 | 251 | def reverse_video(self, video_path: Path) -> List[Path]: 252 | """Reverse video using ffmpeg""" 253 | output_file = "/tmp/outputs/reversed" + video_path.suffix 254 | command = [ 255 | "-vf", 256 | "reverse", 257 | "-af", 258 | "areverse", 259 | ] 260 | 261 | return self.run_ffmpeg(video_path, output_file, command) 262 | 263 | def bounce_video(self, video_path: Path) -> List[Path]: 264 | """Bounce video or gif using ffmpeg""" 265 | reversed_video_path = "/tmp/outputs/reversed" + video_path.suffix 266 | self.reverse_video(video_path) 267 | 268 | with open("/tmp/outputs/concat_list.txt", "w") as f: 269 | f.write(f"file '{video_path}'\nfile '{reversed_video_path}'\n") 270 | 271 | command = [ 272 | "-f", 273 | "concat", 274 | "-safe", 275 | "0", 276 | "-i", 277 | "/tmp/outputs/concat_list.txt", # Use the temporary file as input 278 | ] 279 | 280 | if video_path.suffix == ".gif": 281 | command.extend( 282 | [ 283 | "-vf", 284 | "scale=512:-1:flags=lanczos", 285 | "-c:v", 286 | "gif", # Video codec: GIF 287 | ] 288 | ) 289 | else: 290 | command.extend( 291 | [ 292 | "-c", 293 | "copy", 294 | ] 295 | ) 296 | 297 | return self.run_ffmpeg( 298 | None, f"/tmp/outputs/bounced{video_path.suffix}", command 299 | ) 300 | -------------------------------------------------------------------------------- /samples.py: -------------------------------------------------------------------------------- 1 | """ 2 | To set up, first run a local cog server using: 3 | cog run -p 5000 python -m cog.server.http 4 | Then, in a separate terminal, generate samples 5 | python samples.py 6 | """ 7 | 8 | import base64 9 | import os 10 | import sys 11 | import requests 12 | import glob 13 | import time 14 | 15 | 16 | def run(output_fn, **kwargs): 17 | if glob.glob(f"{output_fn.rsplit('.', 1)[0]}*"): 18 | print("Already ran", output_fn) 19 | return 20 | 21 | prediction_start = time.time() 22 | print("Running prediction", output_fn) 23 | url = "http://localhost:5000/predictions" 24 | response = requests.post(url, json={"input": kwargs}) 25 | print(f"Prediction took: {time.time() - prediction_start:.2f}s") 26 | data = response.json() 27 | try: 28 | for i, datauri in enumerate(data["output"]): 29 | base64_encoded_data = datauri.split(",")[1] 30 | decoded_data = base64.b64decode(base64_encoded_data) 31 | with open( 32 | f"{output_fn.rsplit('.', 1)[0]}_{i}.{output_fn.rsplit('.', 1)[1]}", "wb" 33 | ) as f: 34 | f.write(decoded_data) 35 | print("Wrote", output_fn) 36 | except Exception as e: 37 | print("Error!", str(e)) 38 | print("input:", kwargs) 39 | print(data["logs"]) 40 | sys.exit(1) 41 | 42 | 43 | def main(): 44 | run( 45 | "sample_reverse_video.mp4", 46 | task="reverse_video", 47 | input_file="https://replicate.delivery/pbxt/0hNQY7Gy2eSiG6ghDRkabuJeV4oDNETFB6cWi2NdfB2TdMvhA/out.mp4", 48 | ) 49 | 50 | run( 51 | "sample_bounce_video.mp4", 52 | task="bounce_video", 53 | input_file="https://replicate.delivery/pbxt/CmppJesjwO3jPSmdd1fflCjGeODlOpVy5I0PyXlgLeMmanVRC/video.mp4", 54 | ) 55 | 56 | run( 57 | "sample_bounce_gif.gif", 58 | task="bounce_video", 59 | input_file="https://replicate.delivery/pbxt/KCBdyVkWcgjiCM3nzdg9JpqX8xzTGUimj4mWdpWgCQOm6umr/replicate-prediction-3rcqh5dbvob5u7gsd5vammyfwy.gif", 60 | ) 61 | 62 | run( 63 | "sample_extract_video_audio_as_mp3.mp3", 64 | task="extract_video_audio_as_mp3", 65 | input_file="https://replicate.delivery/pbxt/0hNQY7Gy2eSiG6ghDRkabuJeV4oDNETFB6cWi2NdfB2TdMvhA/out.mp4", 66 | ) 67 | 68 | run( 69 | "sample_convert_to_mp4.mp4", 70 | task="convert_input_to_mp4", 71 | input_file="https://replicate.delivery/pbxt/RU9CI33SMCKMFBFQplELLexGPsOGNIU42VpauosBZZLkhW2IA/tmp.gif", 72 | ) 73 | 74 | run( 75 | "sample_convert_to_mp4_with_fps.mp4", 76 | task="convert_input_to_mp4", 77 | fps=1, 78 | input_file="https://replicate.delivery/pbxt/RU9CI33SMCKMFBFQplELLexGPsOGNIU42VpauosBZZLkhW2IA/tmp.gif", 79 | ) 80 | 81 | run( 82 | "sample_convert_to_gif.gif", 83 | task="convert_input_to_gif", 84 | input_file="https://replicate.delivery/pbxt/0hNQY7Gy2eSiG6ghDRkabuJeV4oDNETFB6cWi2NdfB2TdMvhA/out.mp4", 85 | ) 86 | 87 | run( 88 | "sample_frames_to_mp4.mp4", 89 | task="zipped_frames_to_mp4", 90 | input_file="https://replicate.delivery/pbxt/IyPciuTwd9miRkQm3AVd4ZZrNta1i1M8rKs7vJtpy83uAIIi/frames.zip", 91 | ) 92 | 93 | run( 94 | "sample_frames_to_mp4_with_fps.mp4", 95 | task="zipped_frames_to_mp4", 96 | fps=1, 97 | input_file="https://replicate.delivery/pbxt/IyPciuTwd9miRkQm3AVd4ZZrNta1i1M8rKs7vJtpy83uAIIi/frames.zip", 98 | ) 99 | 100 | run( 101 | "sample_frames_to_gif.gif", 102 | task="zipped_frames_to_gif", 103 | input_file="https://replicate.delivery/pbxt/IyPciuTwd9miRkQm3AVd4ZZrNta1i1M8rKs7vJtpy83uAIIi/frames.zip", 104 | ) 105 | 106 | run( 107 | "sample_frames_to_gif_with_fps.gif", 108 | task="zipped_frames_to_gif", 109 | fps=1, 110 | input_file="https://replicate.delivery/pbxt/IyPciuTwd9miRkQm3AVd4ZZrNta1i1M8rKs7vJtpy83uAIIi/frames.zip", 111 | ) 112 | 113 | run( 114 | "sample_extract_frames_from_input.zip", 115 | task="extract_frames_from_input", 116 | fps=12, 117 | input_file="https://replicate.delivery/pbxt/0hNQY7Gy2eSiG6ghDRkabuJeV4oDNETFB6cWi2NdfB2TdMvhA/out.mp4", 118 | ) 119 | 120 | 121 | if __name__ == "__main__": 122 | main() 123 | --------------------------------------------------------------------------------