├── LICENSE ├── README.md ├── app.py ├── edited_output.mp4 └── requirements.txt /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 SamurAIGPT 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Clip Anything 2 | Clip any moment from any video with prompts 3 | 4 | Multimodal AI clipping that lets you clip any moment from any video using visual, audio, and sentiment cues 5 | 6 | Just type your prompt. 7 | 8 | AI will clip the right moments for you from any video. 9 | 10 | If you wish to add Clip Anything into your application, here is an api to create clips from long-form video :- https://docs.vadoo.tv/docs/guide/create-ai-clips 11 | 12 | ### Youtube Tutorial -> https://youtu.be/R_3kexWz4TU 13 | 14 | ### Medium Article -> https://medium.com/@anilmatcha/clipanything-free-ai-video-editor-in-python-tutorial-526f7a972829 15 | 16 | ![hqdefault](https://github.com/user-attachments/assets/9689a74c-598a-4aab-b02e-54673941c2b9) 17 | 18 | ### Properties 19 | 20 | ##### Advanced Video Analysis 21 | 22 | Harness cutting-edge technology to analyze every aspect of your video. Our state-of-the-art system evaluates each frame, combining visual, audio, and sentiment cues to identify objects, scenes, actions, sounds, emotions, texts, and more. Each scene is rated for its potential virality, giving you insights into what makes your content compelling. 23 | 24 | ##### Customizable Video Clipping 25 | 26 | Tailor your video clips to your exact needs. Whether you're looking to compile highlights from a sports game or showcase the best moments from a travel vlog, simply enter your prompts. We'll personalize your clips, automatically capturing key moments to align with your vision. 27 | 28 | ##### Demo Input -> https://www.youtube.com/watch?v=U9mJuUkhUzk 29 | 30 | ##### Output Video -> https://github.com/SamurAIGPT/ClipAnything/blob/main/edited_output.mp4 31 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import whisper 3 | from moviepy.editor import VideoFileClip, concatenate_videoclips 4 | import requests 5 | import json 6 | import ast 7 | 8 | # Step 1: Transcribe the Video 9 | def transcribe_video(video_path, model_name="base"): 10 | model = whisper.load_model(model_name) 11 | audio_path = "temp_audio.wav" 12 | os.system(f"ffmpeg -i {video_path} -ar 16000 -ac 1 -b:a 64k -f mp3 {audio_path}") 13 | result = model.transcribe(audio_path) 14 | transcription = [] 15 | for segment in result['segments']: 16 | transcription.append({ 17 | 'start': segment['start'], 18 | 'end': segment['end'], 19 | 'text': segment['text'].strip() 20 | }) 21 | return transcription 22 | 23 | def get_relevant_segments(transcript, user_query): 24 | prompt = f"""You are an expert video editor who can read video transcripts and perform video editing. Given a transcript with segments, your task is to identify all the conversations related to a user query. Follow these guidelines when choosing conversations. A group of continuous segments in the transcript is a conversation. 25 | 26 | Guidelines: 27 | 1. The conversation should be relevant to the user query. The conversation should include more than one segment to provide context and continuity. 28 | 2. Include all the before and after segments needed in a conversation to make it complete. 29 | 3. The conversation should not cut off in the middle of a sentence or idea. 30 | 4. Choose multiple conversations from the transcript that are relevant to the user query. 31 | 5. Match the start and end time of the conversations using the segment timestamps from the transcript. 32 | 6. The conversations should be a direct part of the video and should not be out of context. 33 | 34 | Output format: {{ "conversations": [{{"start": "s1", "end": "e1"}}, {{"start": "s2", "end": "e2"}}] }} 35 | 36 | Transcript: 37 | {transcript} 38 | 39 | User query: 40 | {user_query}""" 41 | url = "https://api.groq.com/openai/v1/chat/completions" 42 | headers = { 43 | "Content-Type": "application/json", 44 | "Authorization": "Bearer groq-key" 45 | } 46 | 47 | data = { 48 | "messages": [ 49 | { 50 | "role": "system", 51 | "content": prompt 52 | } 53 | ], 54 | "model": "llama-3.1-70b-versatile", 55 | "temperature": 1, 56 | "max_tokens": 1024, 57 | "top_p": 1, 58 | "stream": False, 59 | "stop": None 60 | } 61 | response = requests.post(url, headers=headers, json=data) 62 | data = response.json()["choices"][0]["message"]["content"] 63 | conversations = ast.literal_eval(data)["conversations"] 64 | return conversations 65 | 66 | def edit_video(original_video_path, segments, output_video_path, fade_duration=0.5): 67 | video = VideoFileClip(original_video_path) 68 | clips = [] 69 | for seg in segments: 70 | start = seg['start'] 71 | end = seg['end'] 72 | clip = video.subclip(start, end).fadein(fade_duration).fadeout(fade_duration) 73 | clips.append(clip) 74 | if clips: 75 | final_clip = concatenate_videoclips(clips, method="compose") 76 | final_clip.write_videofile(output_video_path, codec="libx264", audio_codec="aac") 77 | else: 78 | print("No segments to include in the edited video.") 79 | 80 | # Main Function 81 | def main(): 82 | # Paths 83 | input_video = "input_video.mp4" 84 | output_video = "edited_output.mp4" 85 | 86 | # User Query 87 | user_query = "Find all clips where there is discussion around GPT-4 Turbo" 88 | 89 | # Step 1: Transcribe 90 | print("Transcribing video...") 91 | transcription = transcribe_video(input_video, model_name="base") 92 | 93 | relevant_segments = get_relevant_segments(transcription, user_query) 94 | 95 | # Step 5: Edit Video 96 | print("Editing video...") 97 | edit_video(input_video, relevant_segments, output_video) 98 | print(f"Edited video saved to {output_video}") 99 | 100 | if __name__ == "__main__": 101 | main() 102 | -------------------------------------------------------------------------------- /edited_output.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SamurAIGPT/ClipAnything/ef7a034680662dac220da8b17c6e73ec1040b4a7/edited_output.mp4 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2024.8.30 2 | charset-normalizer==3.3.2 3 | decorator==4.4.2 4 | filelock==3.16.1 5 | fsspec==2024.9.0 6 | idna==3.10 7 | imageio==2.35.1 8 | imageio-ffmpeg==0.5.1 9 | Jinja2==3.1.4 10 | llvmlite==0.43.0 11 | MarkupSafe==2.1.5 12 | more-itertools==10.5.0 13 | moviepy==1.0.3 14 | mpmath==1.3.0 15 | networkx==3.3 16 | numba==0.60.0 17 | numpy==2.0.2 18 | openai-whisper==20231117 19 | pillow==10.4.0 20 | proglog==0.1.10 21 | regex==2024.9.11 22 | requests==2.32.3 23 | sympy==1.13.2 24 | tiktoken==0.7.0 25 | torch==2.4.1 26 | tqdm==4.66.5 27 | typing_extensions==4.12.2 28 | urllib3==2.2.3 29 | --------------------------------------------------------------------------------