├── .gitignore
├── .style.yapf
├── fast_scene_detection
    ├── __init__.py
    ├── exceptions.py
    ├── scene.py
    ├── video_library.py
    ├── video_file_handler.py
    ├── video.py
    └── scene_extractor.py
├── requirements.txt
├── main.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | venv


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = pep8
3 | column_limit=120


--------------------------------------------------------------------------------
/fast_scene_detection/__init__.py:
--------------------------------------------------------------------------------
1 | from .exceptions import InvalidPathException
2 | from .scene import Scene
3 | from .scene_extractor import SceneExtractor
4 | from .video import Video
5 | from .video_file_handler import VideoFileHandler
6 | from .video_library import VideoLibrary
7 | 


--------------------------------------------------------------------------------
/fast_scene_detection/exceptions.py:
--------------------------------------------------------------------------------
1 | class InvalidPathException(Exception):
2 |     def __init__(self):
3 |         Exception.__init__(
4 |             self, "The path you've provided must be a supported video file path or a directory "
5 |             "containing supported video files")
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cycler==0.10.0
 2 | ImageHash==4.0
 3 | kiwisolver==1.0.1
 4 | matplotlib==3.0.2
 5 | numpy==1.16.0
 6 | opencv-python==4.0.0.21
 7 | Pillow==6.2.0
 8 | pyparsing==2.3.1
 9 | python-dateutil==2.7.5
10 | PyWavelets==1.0.1
11 | scipy==1.2.0
12 | six==1.12.0
13 | tqdm==4.29.1
14 | 


--------------------------------------------------------------------------------
/fast_scene_detection/scene.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Scene:
 7 |     """Class intended to store scene attributes for later processing"""
 8 |     def __init__(self, frame, hash_delta, frame_number, frames_per_second):
 9 |         self.frame = np.array(frame)
10 |         self.hash_delta = hash_delta
11 |         self.frame_number = frame_number
12 |         self.time_stamp = timedelta(seconds=frame_number / frames_per_second)
13 | 


--------------------------------------------------------------------------------
/fast_scene_detection/video_library.py:
--------------------------------------------------------------------------------
 1 | import _pickle as pickle
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | 
 6 | class VideoLibrary:
 7 |     """Class that handles storing previously segmented scenes for later use"""
 8 |     def __init__(self, video_list):
 9 |         """
10 |         :param video_list: a list of Video Objects
11 |         """
12 |         self.video_list = video_list
13 | 
14 |     def append(self, video):
15 |         """Appends a video to the library"""
16 |         self.video_list.append(video)
17 | 
18 |     def save(self, save_directory, file_name="video_collection"):
19 |         """Saves the VideoLibrary object
20 | 
21 |         :param save_directory: a string representing a destination path to save to
22 |         :param file_name: a string representing the name of the saved file
23 |         :return None:
24 |         """
25 |         Path(save_directory).mkdir(parents=True, exist_ok=True)
26 | 
27 |         with open(os.path.join(save_directory, file_name), "wb") as fp:
28 |             pickle.dump(self.video_list, fp)
29 | 
30 |     def open(self, library_path):
31 |         """Loads a video list from a source path
32 | 
33 |         :param library_path: a string representing a path containing a video library pickle object"""
34 |         with open(library_path, "rb") as fp:
35 |             self.video_list = pickle.load(fp)
36 | 


--------------------------------------------------------------------------------
/fast_scene_detection/video_file_handler.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | from .exceptions import InvalidPathException
 5 | 
 6 | 
 7 | class VideoFileHandler:
 8 |     """Class for parsing and handling video input sources"""
 9 |     def __init__(self, video_source):
10 |         self.video_source = video_source
11 | 
12 |         # Define valid file extensions and initialize paths list store
13 |         self.valid_file_extensions = [".mp4", ".wmv", ".avi", ".mpeg", ".mkv"]
14 |         self.video_paths_list = []
15 | 
16 |         self.parse_video_source()
17 | 
18 |     def parse_video_source(self):
19 |         """Creates a list of videos from a video or directory path"""
20 |         # If the video source given is a directory, get all the files in that directory and extract valid video paths
21 |         if os.path.isdir(self.video_source):
22 |             # Compose a list of paths for use in the glob module
23 |             glob_paths = [os.path.join(self.video_source, f"*{extension}") for extension in self.valid_file_extensions]
24 | 
25 |             # Gather the video file paths
26 |             for glob_path in glob_paths:
27 |                 full_video_path = glob.glob(glob_path)
28 |                 # Ensure added globs are non-empty
29 |                 if len(full_video_path) > 0:
30 |                     self.video_paths_list.extend(full_video_path)
31 | 
32 |             if len(self.video_paths_list) == 0:
33 |                 raise InvalidPathException
34 |         # If the given path is a file and ends with a valid extension
35 |         elif os.path.isfile(self.video_source) and self.video_source.endswith(tuple(self.valid_file_extensions)):
36 |             self.video_paths_list.append(self.video_source)
37 |         else:
38 |             raise InvalidPathException
39 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from fast_scene_detection.scene_extractor import SceneExtractor
 4 | from fast_scene_detection.video_library import VideoLibrary
 5 | 
 6 | 
 7 | def arg_parser():
 8 |     """Parses user's command line input"""
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument('--source_path',
11 |                         action='store',
12 |                         help='A video source file or directory containing videos',
13 |                         dest='source_path')
14 |     parser.add_argument('--save_path',
15 |                         action='store',
16 |                         help='A path to which you would like to save the segmented '
17 |                         'videos',
18 |                         dest='save_path')
19 |     parser.add_argument('--library_path',
20 |                         action='store',
21 |                         help='A path to a pickled video video library object',
22 |                         dest='library_path')
23 |     parser.add_argument('--visualize_scenes',
24 |                         action='store_true',
25 |                         help='Whether to visualize scenes or not',
26 |                         dest='visualize_scenes')
27 |     cmdline_args = parser.parse_args()
28 | 
29 |     return cmdline_args
30 | 
31 | 
32 | def main():
33 |     # Parse user command line arguments and initialize the video library object
34 |     cmdline_args = arg_parser()
35 |     video_library = VideoLibrary(video_list=[])
36 | 
37 |     # Check if we're going to open previously extracted scenes or if we'll need to extract them
38 |     if cmdline_args.library_path:
39 |         print(f"Opening a video library from path: {cmdline_args.library_path}")
40 |         video_library.open(library_path=cmdline_args.library_path)
41 |     elif cmdline_args.source_path:
42 |         print(f"Extracting scenes from path: {cmdline_args.source_path}")
43 |         extractor = SceneExtractor(video_source=cmdline_args.source_path,
44 |                                    step_size_constant=0.00429584,
45 |                                    video_library=video_library)
46 |         extractor.process_scenes()
47 | 
48 |     # Print the scene timestamp and, optionally, visualize the scenes
49 |     for segmented_video in video_library.video_list:
50 |         segmented_video.get_scenes(n=40)
51 | 
52 |         if cmdline_args.visualize_scenes:
53 |             segmented_video.visualize_scenes()
54 | 
55 |     # If user has specified path, save the video
56 |     if cmdline_args.save_path:
57 |         print(f"Saving segmented videos to library at: {cmdline_args.save_path}")
58 |         video_library.save(save_directory=cmdline_args.save_path)
59 | 
60 |     # Ask for user input so the visualization doesn't close immediately
61 |     input("Press ENTER to exit.")
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     main()
66 | 


--------------------------------------------------------------------------------
/fast_scene_detection/video.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | class Video:
 6 |     """Class that handles storing and visualizing scenes of a particular video"""
 7 |     def __init__(self, scenes):
 8 |         """
 9 |         :param scenes: A list of sorted Scene object
10 |         """
11 |         self.scenes = scenes
12 |         self.sorted_scenes = None
13 | 
14 |     def get_scenes(self, n=40):
15 |         """Extracts the top n scenes from a video, ranked by the largest changes in consecutive perceptual hashes
16 | 
17 |         :param n: an integer to determine the number of scenes to extract from a video
18 |         :return None:
19 |         """
20 |         # Sort the scenes list according to each scene's hash change, from largest to smallest
21 |         self.sorted_scenes = sorted(self.scenes, key=lambda scene_key: scene_key.hash_delta, reverse=True)
22 | 
23 |         # Truncate the list to preserve only the largest hash change
24 |         self.sorted_scenes = self.sorted_scenes[:n]
25 | 
26 |         # Resort the scenes according to the time they occurred
27 |         self.sorted_scenes = sorted(self.sorted_scenes, key=lambda scene_key: scene_key.frame_number)
28 | 
29 |         print(f"These are the top {n} scene timestamps:")
30 |         for scene in self.sorted_scenes:
31 |             print(f"- {scene.time_stamp}")
32 | 
33 |     def visualize_scenes(self):
34 |         """Once we've found top N scene changes, use this function to visualize each scene according to time stamp"""
35 |         '''
36 |         Suppose we want to have twice as many columns as there are rows in visualization
37 |             number_of_scenes = number_of_rows * number_of_columns
38 |             number_of_columns = 2 * number_of_rows
39 |             --> number_of_scenes = number_of_rows * (2 * number_of_rows)
40 |             --> number_of_columns = 2 * (sqrt(number_of_scenes / 2))
41 |         '''
42 |         number_of_scenes = len(self.sorted_scenes)
43 |         number_of_columns = int(2 * ((number_of_scenes / 2)**0.5))
44 |         number_of_rows = int((number_of_scenes / 2)**0.5)
45 | 
46 |         just_incremented_rows = False
47 | 
48 |         # Enable interactivity mode to allow us to continuously display plots
49 |         plt.ion()
50 | 
51 |         # Since we're calling int() on column/row values, number_of_columns * number_of_rows may be < number_of_scenes
52 |         # This happens since int() may round a decimal down
53 |         # Increment either row or column number until their product is > number_of_scenes
54 |         while number_of_columns * number_of_rows < number_of_scenes:
55 |             if not just_incremented_rows:
56 |                 number_of_rows += 1
57 |                 just_incremented_rows = True
58 |             else:
59 |                 number_of_columns += 1
60 |                 just_incremented_rows = False
61 | 
62 |         fig = plt.figure()
63 |         fig.canvas.manager.full_screen_toggle()
64 | 
65 |         # Display each scene as a subplot in matplotlib
66 |         for counter, scene in enumerate(self.sorted_scenes):
67 | 
68 |             y = fig.add_subplot(number_of_rows, number_of_columns, counter + 1)
69 | 
70 |             plt.subplots_adjust(left=0, bottom=0, right=1.0, top=0.99, wspace=0.0, hspace=0.07)
71 |             y.imshow(cv2.cvtColor(scene.frame, cv2.COLOR_BGR2RGB))
72 | 
73 |             # If want to continuously display plots for multiple videos in directory, we need to insert this pause
74 |             # https://stackoverflow.com/questions/11874767/how-do-i-plot-in-real-time-in-a-while-loop-using-matplotlib
75 |             plt.pause(.0000001)
76 | 
77 |             # Label each scene with its corresponding time
78 |             plt.title(scene.time_stamp)
79 | 
80 |             y.axes.get_xaxis().set_visible(False)
81 |             y.axes.get_yaxis().set_visible(False)
82 | 
83 |         plt.show()
84 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Fast Scene Detection
 2 | --------------------
 3 | 
 4 | HD videos are large files that contain a huge amount of information. However, a video can be summarized by extracting
 5 | the scenes that comprise it. This tool attempts to extract scenes quickly from an input video.
 6 | 
 7 | 
 8 | Installation
 9 | ------------
10 | 
11 | Make a virtualenv and install the dependencies by running the following 3 commands:
12 | 
13 | ```
14 | virtualenv venv
15 | source venv/bin/activate
16 | python3 -m pip install -r requirements.txt
17 | ```
18 | 
19 | 
20 | Usage
21 | -----
22 | 
23 | General:
24 | 
25 | ```
26 | usage: main.py [-h] [--source_path SOURCE_PATH] [--save_path SAVE_PATH]
27 |                [--library_path LIBRARY_PATH] [--visualize_scenes]            
28 | ```
29 | 
30 | Available options:
31 | 
32 | | Argument            | Values                                       | Purpose                                            |
33 | |---------------------|----------------------------------------------|----------------------------------------------------|
34 | |`--source_path`      | "path/to/video.mp4", "path/to/video_folder"  | A video source file or directory containing videos |        
35 | |`--save_path`        | "path/to/save/to"                            | A path to save a pickled segmented video object    |
36 | |`--library_path`     | "path/to/pickled/video_collection"           | A path to a saved segmented video pickle object    |
37 | |`--visualize_scenes` | None                                         | Whether or not to visualize scenes                 |
38 | 
39 | 
40 | FAQs
41 | ---
42 | 
43 | 1. #### What is this?
44 | 
45 |     It's a small package that attempts to divide a video into scenes based on its content. You give it a video and it
46 | gives you a list of timestamps where each scene starts. It also lets you visualize and save extracted scenes.
47 | 
48 | 2. #### How does it work?
49 | 
50 |    It works by taking the perceptual hashes (pHash) of input video frames.
51 | If the perceptual hash difference of consecutive frames is large (i.e. their Hamming distance), then this is considered
52 | a scene change. The top N biggest hash differences are returned and are considered to be the "scenes" of the video.
53 | 
54 | 3. #### What makes it fast?
55 | 
56 |    Higher speed is achieved by reducing scene search granularity: i.e. not all frames are hashed. For a video with 100,000 
57 | frames, this tool would hash 1 out of every ~430 frames. If the video is 60 FPS, then that means a scene change is 
58 | checked for roughly every ~7 seconds. The trade-off between search granularity and speed is controllable via command 
59 | line parameters.
60 | 
61 | 4. #### Aren't there tools that already do this kind of thing?
62 | 
63 |    There is a great library called PySceneDetect that is commonly used for scene detection. It works great but it seemed a
64 | little slow for longer videos so I threw this script together quickly. I highly recommend PySceneDetect, though! 
65 | 
66 | 5. #### Is this method even a good way to extract scenes from a video?
67 | 
68 |    Truthfully, I can't give you an objective answer, but I think it's okay based on my subjective experience. 
69 | However, for all I know, it could be horrible compared to other methods! If someone runs the results through an 
70 | objective metric of scene detection, tell me how this script does compared to other things. This is purely experimental,
71 | so use at your own discretion.
72 | 
73 | 6. #### What are the limitations?
74 | 
75 |    Within the results, there are times when 2 or more consecutive scenes look too similar. I suspect this happens 
76 | because the perceptual hashes of the frames are sufficiently different to appear in the results, but don't necessarily 
77 | qualify for what a human might call two different scenes.
78 | 
79 |    Also, fade transitions, where two or more frames slowly blend into one another, could give this algorithm a very hard
80 | time. Transition effects are problematic because the Hamming distance of perceptual hashes of blended frames can be
81 | low, and therefore may not register as a scene change. 
82 | 
83 | 
84 | Sample Visualization 
85 | --------------------
86 | 
87 | ![](https://i.imgur.com/R1ZNw1p.jpg)
88 | 


--------------------------------------------------------------------------------
/fast_scene_detection/scene_extractor.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import imagehash
  3 | from PIL import Image
  4 | from tqdm import tqdm
  5 | 
  6 | from .scene import Scene
  7 | from .video import Video
  8 | from .video_file_handler import VideoFileHandler
  9 | 
 10 | 
 11 | class SceneExtractor:
 12 |     """Class for handling videos and segmenting them into scenes"""
 13 |     def __init__(self, video_source, step_size_constant=0.00429584, video_library=None):
 14 |         self.hash_size = 128
 15 |         self.step_size_constant = step_size_constant
 16 |         self.video_library = video_library
 17 | 
 18 |         self.file_handler = VideoFileHandler(video_source)
 19 | 
 20 |     @staticmethod
 21 |     def get_video_details(video_capture):
 22 |         """Extracts the frames per second and number of frames in an given video input
 23 | 
 24 |         :param video_capture: a cv2 video capture object
 25 |         :return frames_per_second: a float value storing the video frames per second
 26 |         :return number_of_frames: an integer storing the number of frames in an input video
 27 |         """
 28 |         frames_per_second = video_capture.get(cv2.CAP_PROP_FPS)
 29 |         number_of_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
 30 | 
 31 |         return frames_per_second, number_of_frames
 32 | 
 33 |     def process_scenes(self):
 34 |         """Iterates through a list of video paths and performs processing on each video"""
 35 |         for video_path in self.file_handler.video_paths_list:
 36 |             segmented_video = self.segment_video(video_path)
 37 | 
 38 |             if self.video_library is not None:
 39 |                 self.video_library.append(segmented_video)
 40 | 
 41 |     def process_frames(self, video_capture, number_of_frames, frames_per_second, step_size):
 42 |         """Iterates through a stream of video frames and extracts hashes to determine video scenes
 43 | 
 44 |         :param video_capture: a cv2 video capture object
 45 |         :param number_of_frames: an integer storing the number of frames in an input video
 46 |         :param frames_per_second: a float value storing the video frames per second
 47 |         :param step_size: an integer that determines rate at which to skip frames. i.e. if step_size = 3, the 1st frame
 48 |                           is read, 3 frames are skipped, the 4th frame is read, 3 are skipped, the 7th read and so on
 49 |         :return scenes_list: a list of Scene objects
 50 |         """
 51 |         previous_frame_hash, current_frame_hash, hash_delta, current_frame_number = None, None, None, 0
 52 |         scenes_list = []
 53 | 
 54 |         # Iterate through all the video frames while the capture is open
 55 |         progress_bar = tqdm(total=number_of_frames)
 56 |         while video_capture.isOpened():
 57 |             current_frame_number += 1
 58 |             progress_bar.update(1)
 59 | 
 60 |             # Allows us to skip some number of frames defined by stepsize
 61 |             # Note: Since setting CAP_PROP_POS_FRAMES is slow, we'll need a sufficiently large step size for us to
 62 |             # take advantage of the speed boost that it offers in terms of skipping frames
 63 |             if current_frame_number % step_size != 0:
 64 |                 continue
 65 | 
 66 |             # Tell OpenCV to start reading the video from the current frame number
 67 |             video_capture.set(cv2.CAP_PROP_POS_FRAMES, current_frame_number)
 68 |             ret, frame = video_capture.read()
 69 | 
 70 |             if not ret:
 71 |                 video_capture.release()
 72 |                 break
 73 | 
 74 |             frame = Image.fromarray(frame)
 75 |             previous_frame_hash, hash_delta = self.calculate_frame_hashes(frame, previous_frame_hash, hash_delta)
 76 | 
 77 |             if hash_delta is not None:
 78 |                 scenes_list.append(Scene(frame, hash_delta, current_frame_number, frames_per_second))
 79 | 
 80 |         return scenes_list
 81 | 
 82 |     def calculate_frame_hashes(self, frame, previous_frame_hash, hash_delta):
 83 |         """Uses the imagehash library to calculate the perceptual hash differences between frames
 84 | 
 85 |         :param frame: a pillow image object
 86 |         :param previous_frame_hash: the perceptual hash of the previously read frame
 87 |         :param hash_delta: the change in the perceptual hash, i.e. the Hamming distance
 88 |         :return previous_frame_hash: the perceptual hash of the previously read frame
 89 |         :return hash_delta: the change in the perceptual hash, i.e. the Hamming distance
 90 |         """
 91 |         # Handle special case where we're just reading the first frame, and there's nothing else to compare it to
 92 |         if previous_frame_hash is None:
 93 |             previous_frame_hash = imagehash.phash(frame, hash_size=self.hash_size)
 94 |         else:
 95 |             # Calculate the current frame's hash and calculate the Hamming distance so we can
 96 |             # compare it to previous frame
 97 |             current_frame_hash = imagehash.phash(frame, hash_size=self.hash_size)
 98 |             hash_delta = previous_frame_hash - current_frame_hash
 99 |             previous_frame_hash = current_frame_hash
100 | 
101 |         return previous_frame_hash, hash_delta
102 | 
103 |     def segment_video(self, video_path):
104 |         """Reads and segments a given video into scenes
105 | 
106 |         :param video_path: a string representing a path to a video
107 |         :return video: a Video object containing scenes
108 |         """
109 |         # Create a video capture and get the video details
110 |         video_capture = cv2.VideoCapture(video_path)
111 |         frames_per_second, number_of_frames = self.get_video_details(video_capture)
112 | 
113 |         # The stepsize defines how many frames we skip when we compare hashes to see if the scene has changed
114 |         # We define a stepsize constant so we can adapt the stepsize based on the length of the video we're breaking
115 |         # into scenes
116 |         step_size = int(number_of_frames * self.step_size_constant)
117 |         if step_size < 1:
118 |             step_size = 1
119 | 
120 |         # Get the scenes and create the video object from them
121 |         scenes_list = self.process_frames(video_capture, number_of_frames, frames_per_second, step_size)
122 |         video = Video(scenes_list)
123 | 
124 |         return video
125 | 


--------------------------------------------------------------------------------