├── .gitignore ├── .style.yapf ├── fast_scene_detection ├── __init__.py ├── exceptions.py ├── scene.py ├── video_library.py ├── video_file_handler.py ├── video.py └── scene_extractor.py ├── requirements.txt ├── main.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | venv -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = pep8 3 | column_limit=120 -------------------------------------------------------------------------------- /fast_scene_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .exceptions import InvalidPathException 2 | from .scene import Scene 3 | from .scene_extractor import SceneExtractor 4 | from .video import Video 5 | from .video_file_handler import VideoFileHandler 6 | from .video_library import VideoLibrary 7 | -------------------------------------------------------------------------------- /fast_scene_detection/exceptions.py: -------------------------------------------------------------------------------- 1 | class InvalidPathException(Exception): 2 | def __init__(self): 3 | Exception.__init__( 4 | self, "The path you've provided must be a supported video file path or a directory " 5 | "containing supported video files") 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.10.0 2 | ImageHash==4.0 3 | kiwisolver==1.0.1 4 | matplotlib==3.0.2 5 | numpy==1.16.0 6 | opencv-python==4.0.0.21 7 | Pillow==6.2.0 8 | pyparsing==2.3.1 9 | python-dateutil==2.7.5 10 | PyWavelets==1.0.1 11 | scipy==1.2.0 12 | six==1.12.0 13 | tqdm==4.29.1 14 | -------------------------------------------------------------------------------- /fast_scene_detection/scene.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | import numpy as np 4 | 5 | 6 | class Scene: 7 | """Class intended to store scene attributes for later processing""" 8 | def __init__(self, frame, hash_delta, frame_number, frames_per_second): 9 | self.frame = np.array(frame) 10 | self.hash_delta = hash_delta 11 | self.frame_number = frame_number 12 | self.time_stamp = timedelta(seconds=frame_number / frames_per_second) 13 | -------------------------------------------------------------------------------- /fast_scene_detection/video_library.py: -------------------------------------------------------------------------------- 1 | import _pickle as pickle 2 | import os 3 | from pathlib import Path 4 | 5 | 6 | class VideoLibrary: 7 | """Class that handles storing previously segmented scenes for later use""" 8 | def __init__(self, video_list): 9 | """ 10 | :param video_list: a list of Video Objects 11 | """ 12 | self.video_list = video_list 13 | 14 | def append(self, video): 15 | """Appends a video to the library""" 16 | self.video_list.append(video) 17 | 18 | def save(self, save_directory, file_name="video_collection"): 19 | """Saves the VideoLibrary object 20 | 21 | :param save_directory: a string representing a destination path to save to 22 | :param file_name: a string representing the name of the saved file 23 | :return None: 24 | """ 25 | Path(save_directory).mkdir(parents=True, exist_ok=True) 26 | 27 | with open(os.path.join(save_directory, file_name), "wb") as fp: 28 | pickle.dump(self.video_list, fp) 29 | 30 | def open(self, library_path): 31 | """Loads a video list from a source path 32 | 33 | :param library_path: a string representing a path containing a video library pickle object""" 34 | with open(library_path, "rb") as fp: 35 | self.video_list = pickle.load(fp) 36 | -------------------------------------------------------------------------------- /fast_scene_detection/video_file_handler.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | from .exceptions import InvalidPathException 5 | 6 | 7 | class VideoFileHandler: 8 | """Class for parsing and handling video input sources""" 9 | def __init__(self, video_source): 10 | self.video_source = video_source 11 | 12 | # Define valid file extensions and initialize paths list store 13 | self.valid_file_extensions = [".mp4", ".wmv", ".avi", ".mpeg", ".mkv"] 14 | self.video_paths_list = [] 15 | 16 | self.parse_video_source() 17 | 18 | def parse_video_source(self): 19 | """Creates a list of videos from a video or directory path""" 20 | # If the video source given is a directory, get all the files in that directory and extract valid video paths 21 | if os.path.isdir(self.video_source): 22 | # Compose a list of paths for use in the glob module 23 | glob_paths = [os.path.join(self.video_source, f"*{extension}") for extension in self.valid_file_extensions] 24 | 25 | # Gather the video file paths 26 | for glob_path in glob_paths: 27 | full_video_path = glob.glob(glob_path) 28 | # Ensure added globs are non-empty 29 | if len(full_video_path) > 0: 30 | self.video_paths_list.extend(full_video_path) 31 | 32 | if len(self.video_paths_list) == 0: 33 | raise InvalidPathException 34 | # If the given path is a file and ends with a valid extension 35 | elif os.path.isfile(self.video_source) and self.video_source.endswith(tuple(self.valid_file_extensions)): 36 | self.video_paths_list.append(self.video_source) 37 | else: 38 | raise InvalidPathException 39 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from fast_scene_detection.scene_extractor import SceneExtractor 4 | from fast_scene_detection.video_library import VideoLibrary 5 | 6 | 7 | def arg_parser(): 8 | """Parses user's command line input""" 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--source_path', 11 | action='store', 12 | help='A video source file or directory containing videos', 13 | dest='source_path') 14 | parser.add_argument('--save_path', 15 | action='store', 16 | help='A path to which you would like to save the segmented ' 17 | 'videos', 18 | dest='save_path') 19 | parser.add_argument('--library_path', 20 | action='store', 21 | help='A path to a pickled video video library object', 22 | dest='library_path') 23 | parser.add_argument('--visualize_scenes', 24 | action='store_true', 25 | help='Whether to visualize scenes or not', 26 | dest='visualize_scenes') 27 | cmdline_args = parser.parse_args() 28 | 29 | return cmdline_args 30 | 31 | 32 | def main(): 33 | # Parse user command line arguments and initialize the video library object 34 | cmdline_args = arg_parser() 35 | video_library = VideoLibrary(video_list=[]) 36 | 37 | # Check if we're going to open previously extracted scenes or if we'll need to extract them 38 | if cmdline_args.library_path: 39 | print(f"Opening a video library from path: {cmdline_args.library_path}") 40 | video_library.open(library_path=cmdline_args.library_path) 41 | elif cmdline_args.source_path: 42 | print(f"Extracting scenes from path: {cmdline_args.source_path}") 43 | extractor = SceneExtractor(video_source=cmdline_args.source_path, 44 | step_size_constant=0.00429584, 45 | video_library=video_library) 46 | extractor.process_scenes() 47 | 48 | # Print the scene timestamp and, optionally, visualize the scenes 49 | for segmented_video in video_library.video_list: 50 | segmented_video.get_scenes(n=40) 51 | 52 | if cmdline_args.visualize_scenes: 53 | segmented_video.visualize_scenes() 54 | 55 | # If user has specified path, save the video 56 | if cmdline_args.save_path: 57 | print(f"Saving segmented videos to library at: {cmdline_args.save_path}") 58 | video_library.save(save_directory=cmdline_args.save_path) 59 | 60 | # Ask for user input so the visualization doesn't close immediately 61 | input("Press ENTER to exit.") 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /fast_scene_detection/video.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | class Video: 6 | """Class that handles storing and visualizing scenes of a particular video""" 7 | def __init__(self, scenes): 8 | """ 9 | :param scenes: A list of sorted Scene object 10 | """ 11 | self.scenes = scenes 12 | self.sorted_scenes = None 13 | 14 | def get_scenes(self, n=40): 15 | """Extracts the top n scenes from a video, ranked by the largest changes in consecutive perceptual hashes 16 | 17 | :param n: an integer to determine the number of scenes to extract from a video 18 | :return None: 19 | """ 20 | # Sort the scenes list according to each scene's hash change, from largest to smallest 21 | self.sorted_scenes = sorted(self.scenes, key=lambda scene_key: scene_key.hash_delta, reverse=True) 22 | 23 | # Truncate the list to preserve only the largest hash change 24 | self.sorted_scenes = self.sorted_scenes[:n] 25 | 26 | # Resort the scenes according to the time they occurred 27 | self.sorted_scenes = sorted(self.sorted_scenes, key=lambda scene_key: scene_key.frame_number) 28 | 29 | print(f"These are the top {n} scene timestamps:") 30 | for scene in self.sorted_scenes: 31 | print(f"- {scene.time_stamp}") 32 | 33 | def visualize_scenes(self): 34 | """Once we've found top N scene changes, use this function to visualize each scene according to time stamp""" 35 | ''' 36 | Suppose we want to have twice as many columns as there are rows in visualization 37 | number_of_scenes = number_of_rows * number_of_columns 38 | number_of_columns = 2 * number_of_rows 39 | --> number_of_scenes = number_of_rows * (2 * number_of_rows) 40 | --> number_of_columns = 2 * (sqrt(number_of_scenes / 2)) 41 | ''' 42 | number_of_scenes = len(self.sorted_scenes) 43 | number_of_columns = int(2 * ((number_of_scenes / 2)**0.5)) 44 | number_of_rows = int((number_of_scenes / 2)**0.5) 45 | 46 | just_incremented_rows = False 47 | 48 | # Enable interactivity mode to allow us to continuously display plots 49 | plt.ion() 50 | 51 | # Since we're calling int() on column/row values, number_of_columns * number_of_rows may be < number_of_scenes 52 | # This happens since int() may round a decimal down 53 | # Increment either row or column number until their product is > number_of_scenes 54 | while number_of_columns * number_of_rows < number_of_scenes: 55 | if not just_incremented_rows: 56 | number_of_rows += 1 57 | just_incremented_rows = True 58 | else: 59 | number_of_columns += 1 60 | just_incremented_rows = False 61 | 62 | fig = plt.figure() 63 | fig.canvas.manager.full_screen_toggle() 64 | 65 | # Display each scene as a subplot in matplotlib 66 | for counter, scene in enumerate(self.sorted_scenes): 67 | 68 | y = fig.add_subplot(number_of_rows, number_of_columns, counter + 1) 69 | 70 | plt.subplots_adjust(left=0, bottom=0, right=1.0, top=0.99, wspace=0.0, hspace=0.07) 71 | y.imshow(cv2.cvtColor(scene.frame, cv2.COLOR_BGR2RGB)) 72 | 73 | # If want to continuously display plots for multiple videos in directory, we need to insert this pause 74 | # https://stackoverflow.com/questions/11874767/how-do-i-plot-in-real-time-in-a-while-loop-using-matplotlib 75 | plt.pause(.0000001) 76 | 77 | # Label each scene with its corresponding time 78 | plt.title(scene.time_stamp) 79 | 80 | y.axes.get_xaxis().set_visible(False) 81 | y.axes.get_yaxis().set_visible(False) 82 | 83 | plt.show() 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Fast Scene Detection 2 | -------------------- 3 | 4 | HD videos are large files that contain a huge amount of information. However, a video can be summarized by extracting 5 | the scenes that comprise it. This tool attempts to extract scenes quickly from an input video. 6 | 7 | 8 | Installation 9 | ------------ 10 | 11 | Make a virtualenv and install the dependencies by running the following 3 commands: 12 | 13 | ``` 14 | virtualenv venv 15 | source venv/bin/activate 16 | python3 -m pip install -r requirements.txt 17 | ``` 18 | 19 | 20 | Usage 21 | ----- 22 | 23 | General: 24 | 25 | ``` 26 | usage: main.py [-h] [--source_path SOURCE_PATH] [--save_path SAVE_PATH] 27 | [--library_path LIBRARY_PATH] [--visualize_scenes] 28 | ``` 29 | 30 | Available options: 31 | 32 | | Argument | Values | Purpose | 33 | |---------------------|----------------------------------------------|----------------------------------------------------| 34 | |`--source_path` | "path/to/video.mp4", "path/to/video_folder" | A video source file or directory containing videos | 35 | |`--save_path` | "path/to/save/to" | A path to save a pickled segmented video object | 36 | |`--library_path` | "path/to/pickled/video_collection" | A path to a saved segmented video pickle object | 37 | |`--visualize_scenes` | None | Whether or not to visualize scenes | 38 | 39 | 40 | FAQs 41 | --- 42 | 43 | 1. #### What is this? 44 | 45 | It's a small package that attempts to divide a video into scenes based on its content. You give it a video and it 46 | gives you a list of timestamps where each scene starts. It also lets you visualize and save extracted scenes. 47 | 48 | 2. #### How does it work? 49 | 50 | It works by taking the perceptual hashes (pHash) of input video frames. 51 | If the perceptual hash difference of consecutive frames is large (i.e. their Hamming distance), then this is considered 52 | a scene change. The top N biggest hash differences are returned and are considered to be the "scenes" of the video. 53 | 54 | 3. #### What makes it fast? 55 | 56 | Higher speed is achieved by reducing scene search granularity: i.e. not all frames are hashed. For a video with 100,000 57 | frames, this tool would hash 1 out of every ~430 frames. If the video is 60 FPS, then that means a scene change is 58 | checked for roughly every ~7 seconds. The trade-off between search granularity and speed is controllable via command 59 | line parameters. 60 | 61 | 4. #### Aren't there tools that already do this kind of thing? 62 | 63 | There is a great library called PySceneDetect that is commonly used for scene detection. It works great but it seemed a 64 | little slow for longer videos so I threw this script together quickly. I highly recommend PySceneDetect, though! 65 | 66 | 5. #### Is this method even a good way to extract scenes from a video? 67 | 68 | Truthfully, I can't give you an objective answer, but I think it's okay based on my subjective experience. 69 | However, for all I know, it could be horrible compared to other methods! If someone runs the results through an 70 | objective metric of scene detection, tell me how this script does compared to other things. This is purely experimental, 71 | so use at your own discretion. 72 | 73 | 6. #### What are the limitations? 74 | 75 | Within the results, there are times when 2 or more consecutive scenes look too similar. I suspect this happens 76 | because the perceptual hashes of the frames are sufficiently different to appear in the results, but don't necessarily 77 | qualify for what a human might call two different scenes. 78 | 79 | Also, fade transitions, where two or more frames slowly blend into one another, could give this algorithm a very hard 80 | time. Transition effects are problematic because the Hamming distance of perceptual hashes of blended frames can be 81 | low, and therefore may not register as a scene change. 82 | 83 | 84 | Sample Visualization 85 | -------------------- 86 | 87 | ![](https://i.imgur.com/R1ZNw1p.jpg) 88 | -------------------------------------------------------------------------------- /fast_scene_detection/scene_extractor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import imagehash 3 | from PIL import Image 4 | from tqdm import tqdm 5 | 6 | from .scene import Scene 7 | from .video import Video 8 | from .video_file_handler import VideoFileHandler 9 | 10 | 11 | class SceneExtractor: 12 | """Class for handling videos and segmenting them into scenes""" 13 | def __init__(self, video_source, step_size_constant=0.00429584, video_library=None): 14 | self.hash_size = 128 15 | self.step_size_constant = step_size_constant 16 | self.video_library = video_library 17 | 18 | self.file_handler = VideoFileHandler(video_source) 19 | 20 | @staticmethod 21 | def get_video_details(video_capture): 22 | """Extracts the frames per second and number of frames in an given video input 23 | 24 | :param video_capture: a cv2 video capture object 25 | :return frames_per_second: a float value storing the video frames per second 26 | :return number_of_frames: an integer storing the number of frames in an input video 27 | """ 28 | frames_per_second = video_capture.get(cv2.CAP_PROP_FPS) 29 | number_of_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) 30 | 31 | return frames_per_second, number_of_frames 32 | 33 | def process_scenes(self): 34 | """Iterates through a list of video paths and performs processing on each video""" 35 | for video_path in self.file_handler.video_paths_list: 36 | segmented_video = self.segment_video(video_path) 37 | 38 | if self.video_library is not None: 39 | self.video_library.append(segmented_video) 40 | 41 | def process_frames(self, video_capture, number_of_frames, frames_per_second, step_size): 42 | """Iterates through a stream of video frames and extracts hashes to determine video scenes 43 | 44 | :param video_capture: a cv2 video capture object 45 | :param number_of_frames: an integer storing the number of frames in an input video 46 | :param frames_per_second: a float value storing the video frames per second 47 | :param step_size: an integer that determines rate at which to skip frames. i.e. if step_size = 3, the 1st frame 48 | is read, 3 frames are skipped, the 4th frame is read, 3 are skipped, the 7th read and so on 49 | :return scenes_list: a list of Scene objects 50 | """ 51 | previous_frame_hash, current_frame_hash, hash_delta, current_frame_number = None, None, None, 0 52 | scenes_list = [] 53 | 54 | # Iterate through all the video frames while the capture is open 55 | progress_bar = tqdm(total=number_of_frames) 56 | while video_capture.isOpened(): 57 | current_frame_number += 1 58 | progress_bar.update(1) 59 | 60 | # Allows us to skip some number of frames defined by stepsize 61 | # Note: Since setting CAP_PROP_POS_FRAMES is slow, we'll need a sufficiently large step size for us to 62 | # take advantage of the speed boost that it offers in terms of skipping frames 63 | if current_frame_number % step_size != 0: 64 | continue 65 | 66 | # Tell OpenCV to start reading the video from the current frame number 67 | video_capture.set(cv2.CAP_PROP_POS_FRAMES, current_frame_number) 68 | ret, frame = video_capture.read() 69 | 70 | if not ret: 71 | video_capture.release() 72 | break 73 | 74 | frame = Image.fromarray(frame) 75 | previous_frame_hash, hash_delta = self.calculate_frame_hashes(frame, previous_frame_hash, hash_delta) 76 | 77 | if hash_delta is not None: 78 | scenes_list.append(Scene(frame, hash_delta, current_frame_number, frames_per_second)) 79 | 80 | return scenes_list 81 | 82 | def calculate_frame_hashes(self, frame, previous_frame_hash, hash_delta): 83 | """Uses the imagehash library to calculate the perceptual hash differences between frames 84 | 85 | :param frame: a pillow image object 86 | :param previous_frame_hash: the perceptual hash of the previously read frame 87 | :param hash_delta: the change in the perceptual hash, i.e. the Hamming distance 88 | :return previous_frame_hash: the perceptual hash of the previously read frame 89 | :return hash_delta: the change in the perceptual hash, i.e. the Hamming distance 90 | """ 91 | # Handle special case where we're just reading the first frame, and there's nothing else to compare it to 92 | if previous_frame_hash is None: 93 | previous_frame_hash = imagehash.phash(frame, hash_size=self.hash_size) 94 | else: 95 | # Calculate the current frame's hash and calculate the Hamming distance so we can 96 | # compare it to previous frame 97 | current_frame_hash = imagehash.phash(frame, hash_size=self.hash_size) 98 | hash_delta = previous_frame_hash - current_frame_hash 99 | previous_frame_hash = current_frame_hash 100 | 101 | return previous_frame_hash, hash_delta 102 | 103 | def segment_video(self, video_path): 104 | """Reads and segments a given video into scenes 105 | 106 | :param video_path: a string representing a path to a video 107 | :return video: a Video object containing scenes 108 | """ 109 | # Create a video capture and get the video details 110 | video_capture = cv2.VideoCapture(video_path) 111 | frames_per_second, number_of_frames = self.get_video_details(video_capture) 112 | 113 | # The stepsize defines how many frames we skip when we compare hashes to see if the scene has changed 114 | # We define a stepsize constant so we can adapt the stepsize based on the length of the video we're breaking 115 | # into scenes 116 | step_size = int(number_of_frames * self.step_size_constant) 117 | if step_size < 1: 118 | step_size = 1 119 | 120 | # Get the scenes and create the video object from them 121 | scenes_list = self.process_frames(video_capture, number_of_frames, frames_per_second, step_size) 122 | video = Video(scenes_list) 123 | 124 | return video 125 | --------------------------------------------------------------------------------