├── requirements.txt
├── README.md
├── .gitignore
├── candidate_frames_folder.py
├── extracting_candidate_frames.py
└── clustering_with_hdbscan.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | hdbscan==0.8.24
2 | matplotlib==3.1.1
3 | numpy==1.17.1
4 | scikit-image==0.16.2
5 | scikit-learn==0.21.3
6 | scipy==1.3.2
7 | seaborn==0.9.0
8 | sklearn==0.0
9 | opencv-contrib-python==4.1.1.26


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Project:
 2 | 
 3 | This key frame extraction project is to extract all the unique and high quality key frames (images) from a video.
 4 | 
 5 | # Requirements:
 6 | 
 7 | All the required libraries are mentioned in requirement.txt file. 
 8 | 
 9 | Use pip install -r requirement.txt to install all the requirements.
10 | 
11 | # How to run the code:
12 | 
13 | To run the code, execute below command with all the required parameters. 
14 | 
15 | python candidate_frames_folder.py --input_videos "sample_video.mp4" --output_folder_video_image candidate_frames_and_their_cluster_folder --output_folder_video_final_image final_images
16 | 
17 | This command will create a new folder with the same name as input video name and inside that folder, candidate frames and their clusters based on similarity will be created in "candidate_frames_and_their_cluster_folder" and final key frames in "final_images" folder respectively
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/candidate_frames_folder.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | start_time = time.time()
 3 | import argparse
 4 | import os
 5 | import sys
 6 | import os.path
 7 | import cv2
 8 | import extracting_candidate_frames
 9 | import clustering_with_hdbscan
10 | # from multiprocessing import Pool, Process, cpu_count
11 | import logging
12 | 
13 | 
14 | logging.basicConfig(filename='./logs/key_frames.log',format='%(asctime)s  %(levelname)s:%(message)s',level=logging.DEBUG)
15 | logging.info('---------------------------------------------------------------------------------------------------------')
16 | 
17 | """# Running the code 
18 |  python candidate_frames_folder.py --input_videos sample_video.mp4 --output_folder_video_image candidate_frames_and_their_cluster_folder /
19 |  --output_folder_video_final_image final_images"""
20 | 
21 | 
22 | def main(argv):
23 |     parser = argparse.ArgumentParser()
24 |     parser.add_argument(
25 |         "--input_videos",
26 |         help="Path to the input video file"
27 |     )
28 | 
29 |     # Required arguments: output candidate images of video file.
30 |     parser.add_argument(
31 |         "--output_folder_video_image",
32 |         help="folder for candidates frames"
33 |     )
34 | 
35 |     # Required arguments: output candidate final images of video file..
36 |     parser.add_argument(
37 |         "--output_folder_video_final_image",
38 |         help="FOlder for key frames to be saved."
39 |     )
40 | 
41 |     args = parser.parse_args()
42 |     logging.info('file execution started for input video {}'.format(args.input_videos))
43 |     vd = extracting_candidate_frames.FrameExtractor()
44 |     if not os.path.isdir(args.input_videos.rsplit( ".", 1 )[ 0 ]):
45 |         os.makedirs(args.input_videos.rsplit( ".", 1 )[ 0 ] + '/' + args.output_folder_video_image)
46 |         os.makedirs(args.input_videos.rsplit( ".", 1 )[ 0 ] + '/' + args.output_folder_video_final_image)
47 |     imgs=vd.extract_candidate_frames(args.input_videos)
48 |     for counter, img in enumerate(imgs):
49 |         vd.save_frame_to_disk(
50 |             img,
51 |             file_path=os.path.join(args.input_videos.rsplit( ".", 1 )[ 0 ],args.output_folder_video_image),
52 |             file_name="test_" + str(counter),
53 |             file_ext=".jpeg",
54 |         )
55 |     final_images = clustering_with_hdbscan.ImageSelector()
56 |     imgs_final = final_images.select_best_frames(imgs,os.path.join(args.input_videos.rsplit( ".", 1 )[ 0 ],args.output_folder_video_image))
57 |     for counter, i in enumerate(imgs_final):
58 |         vd.save_frame_to_disk(
59 |             i,
60 |             file_path=os.path.join(args.input_videos.rsplit( ".", 1 )[ 0 ],args.output_folder_video_final_image),
61 |             file_name="test_" + str(counter),
62 |             file_ext=".jpeg",
63 |         )
64 |     logging.info("--- {a} seconds to extract key frames from {b}---".format(a= (time.time() - start_time),b = args.input_videos))
65 | 
66 | if __name__ == "__main__":
67 |     main(sys.argv)


--------------------------------------------------------------------------------
/extracting_candidate_frames.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import operator
  4 | import numpy as np
  5 | from scipy.signal import argrelextrema
  6 | 
  7 | 
  8 | import tempfile
  9 | # import Katna.config as config
 10 | 
 11 | # Class to hold information about each frame
 12 | class Frame:
 13 |     """Class for storing frame ref
 14 |     """
 15 | 
 16 |     def __init__(self, frame, sum_abs_diff):
 17 |         self.frame = frame
 18 |         self.sum_abs_diff = sum_abs_diff
 19 | 
 20 | class Configs:
 21 |     # Setting local maxima criteria
 22 |     USE_LOCAL_MAXIMA = True
 23 |     # Lenght of sliding window taking difference
 24 |     len_window = 10
 25 |     # Chunk size of Images to be processed at a time in memory
 26 |     max_frames_in_chunk = 2500
 27 |     # Type of smoothening window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman' flat window will produce a moving average smoothing.
 28 |     window_type = "hanning"
 29 |     # Setting for optimum Brightness values
 30 |     # min_brightness_value = 1.0
 31 |     # max_brightness_value = 100.0
 32 |     # # Setting for optimum Contrast/Entropy values
 33 |     # min_entropy_value = 1.0
 34 |     # max_entropy_value = 15.0
 35 | 
 36 | class FrameExtractor(object):
 37 |     """Class for extraction of key frames from video : based on sum of absolute differences in LUV colorspace from given video
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         # self.FrameExtractor()
 42 |         # Setting local maxima criteria
 43 |         self.USE_LOCAL_MAXIMA = Configs.USE_LOCAL_MAXIMA
 44 |         # Lenght of sliding window taking difference
 45 |         self.len_window = Configs.len_window
 46 |         # Chunk size of Images to be processed at a time in memory
 47 |         self.max_frames_in_chunk = Configs.max_frames_in_chunk
 48 | 
 49 |         
 50 | 
 51 |     
 52 | 
 53 |     def __calculate_frame_difference(self, frame, curr_frame, prev_frame):
 54 |         """Function to calculate the difference between current frame and previous frame
 55 |         :param frame: frame from the video
 56 |         :type frame: numpy array
 57 |         :param curr_frame: current frame from the video in LUV format
 58 |         :type curr_frame: numpy array
 59 |         :param prev_frame: previous frame from the video in LUV format
 60 |         :type prev_frame: numpy array
 61 |         :return: difference count and frame if None is empty or undefined else None
 62 |         :rtype: tuple
 63 |         """
 64 | 
 65 |         if curr_frame is not None and prev_frame is not None:
 66 |             # Calculating difference between current and previous frame
 67 |             diff = cv2.absdiff(curr_frame, prev_frame)
 68 |             count = np.sum(diff)
 69 |             frame = Frame(frame, count)
 70 | 
 71 |             return count, frame
 72 |         return None
 73 | 
 74 |     def __process_frame(self, frame, prev_frame, frame_diffs, frames):
 75 |         """Function to calculate the difference between current frame and previous frame
 76 |         :param frame: frame from the video
 77 |         :type frame: numpy array
 78 |         :param prev_frame: previous frame from the video in LUV format
 79 |         :type prev_frame: numpy array
 80 |         :param frame_diffs: list of frame differences
 81 |         :type frame_diffs: list of int
 82 |         :param frames: list of frames
 83 |         :type frames: list of numpy array
 84 |         :return: previous frame and current frame
 85 |         :rtype: tuple
 86 |         """
 87 |         # For LUV images
 88 |         # luv = cv2.cvtColor(frame, cv2.COLOR_BGR2LUV)
 89 |         # curr_frame = luv
 90 | 
 91 |         # For GrayScale images
 92 |         grey = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
 93 |         curr_frame = grey
 94 | 
 95 |         # Calculating the frame difference for previous and current frame
 96 |         frame_diff = self.__calculate_frame_difference(frame, curr_frame, prev_frame)
 97 |         
 98 |         if frame_diff is not None:
 99 |             count, frame = frame_diff
100 |             frame_diffs.append(count)
101 |             frames.append(frame)
102 |         prev_frame = curr_frame
103 | 
104 |         return prev_frame, curr_frame
105 | 
106 |     def __extract_all_frames_from_video__(self, videopath):
107 |         """Generator function for extracting frames from a input video which are sufficiently different from each other,
108 |         and return result back as list of opencv images in memory
109 |         :param videopath: inputvideo path
110 |         :type videopath: `str`
111 |         :return: Generator with extracted frames in max_process_frames chunks and difference between frames
112 |         :rtype: generator object with content of type [numpy.ndarray, numpy.ndarray]
113 |         """
114 |         cap = cv2.VideoCapture(str(videopath))
115 | 
116 |         ret, frame = cap.read()
117 |         i = 1
118 |         chunk_no = 0
119 |         while ret:
120 |             curr_frame = None
121 |             prev_frame = None
122 | 
123 |             frame_diffs = []
124 |             frames = []
125 |             for _ in range(0, self.max_frames_in_chunk):
126 |                 if ret:
127 |                     # Calling process frame function to calculate the frame difference and adding the difference
128 |                     # in **frame_diffs** list and frame to **frames** list
129 |                     prev_frame, curr_frame = self.__process_frame(frame, prev_frame, frame_diffs, frames)
130 |                     i = i + 1
131 |                     ret, frame = cap.read()
132 |                     # print(frame_count)
133 |                 else:
134 |                     cap.release()
135 |                     break
136 |             chunk_no = chunk_no + 1
137 |             # print(frames)
138 |             yield frames, frame_diffs
139 |         cap.release()
140 | 
141 |     def __get_frames_in_local_maxima__(self, frames, frame_diffs):
142 |         """ Internal function for getting local maxima of key frames
143 |         This functions Returns one single image with strongest change from its vicinity of frames
144 |         ( vicinity defined using window length )
145 |         :param object: base class inheritance
146 |         :type object: class:`Object`
147 |         :param frames: list of frames to do local maxima on
148 |         :type frames: `list of images`
149 |         :param frame_diffs: list of frame difference values
150 |         :type frame_diffs: `list of images`
151 |         """
152 |         extracted_key_frames = []
153 |         diff_array = np.array(frame_diffs)
154 |         # Normalizing the frame differences based on windows parameters
155 |         sm_diff_array = self.__smooth__(diff_array, self.len_window)
156 | 
157 |         # sm_diff_array = diff_array
158 |         # Get the indexes of those frames which have maximum differences
159 |         frame_indexes = np.asarray(argrelextrema(sm_diff_array, np.greater))[0]
160 | 
161 |         for frame_index in frame_indexes:
162 |             extracted_key_frames.append(frames[frame_index - 1].frame)
163 |         return extracted_key_frames
164 | 
165 |     def __smooth__(self, x, window_len, window=Configs.window_type):
166 |         """smooth the data using a window with requested size.
167 |         This method is based on the convolution of a scaled window with the signal.
168 |         The signal is prepared by introducing reflected copies of the signal
169 |         (with the window size) in both ends so that transient parts are minimized
170 |         in the begining and end part of the output signal.
171 |         example:
172 |         import numpy as np
173 |         t = np.linspace(-2,2,0.1)
174 |         x = np.sin(t)+np.random.randn(len(t))*0.1
175 |         y = smooth(x)
176 |         see also:
177 |         numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
178 |         scipy.signal.lfilter
179 |         
180 |         :param x: the frame difference list
181 |         :type x: numpy.ndarray
182 |         :param window_len: the dimension of the smoothing window
183 |         :type window_len: slidding window length
184 |         :param window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman' flat window will produce a moving average smoothing.
185 |         :type window: str
186 |         :return: the smoothed signal
187 |         :rtype: ndarray
188 |         """
189 |         # This function takes
190 |         if x.ndim != 1:
191 |             raise (ValueError, "smooth only accepts 1 dimension arrays.")
192 | 
193 |         if x.size < window_len:
194 |             raise (ValueError, "Input vector needs to be bigger than window size.")
195 | 
196 |         if window_len < 3:
197 |             return x
198 | 
199 |         if not window in ["flat", "hanning", "hamming", "bartlett", "blackman"]:
200 |             raise (
201 |                 ValueError,
202 |                 "Smoothing Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'",
203 |             )
204 | 
205 |         # Doing row-wise merging of frame differences wrt window length. frame difference
206 |         # by factor of two and subtracting the frame differences from index == window length in reverse direction
207 |         s = np.r_[2 * x[0] - x[window_len:1:-1], x, 2 * x[-1] - x[-1:-window_len:-1]]
208 | 
209 |         if window == "flat":  # moving average
210 |             w = np.ones(window_len, "d")
211 |         else:
212 |             w = getattr(np, window)(window_len)
213 |         y = np.convolve(w / w.sum(), s, mode="same")
214 |         return y[window_len - 1 : -window_len + 1]
215 | 
216 |     def extract_candidate_frames(self, videopath):
217 |         """ Pubic function for this module , Given and input video path
218 |         This functions Returns one list of all candidate key-frames
219 |         :param object: base class inheritance
220 |         :type object: class:`Object`
221 |         :param videopath: inputvideo path
222 |         :type videopath: `str`
223 |         :return: opencv.Image.Image objects
224 |         :rtype: list
225 |         """
226 | 
227 |         extracted_candidate_key_frames = []
228 | 
229 |         # Get all frames from video in chunks using python Generators
230 |         frame_extractor_from_video_generator = self.__extract_all_frames_from_video__(
231 |             videopath
232 |         )
233 | 
234 |         
235 |         # Loop over every frame in the frame extractor generator object and calculate the
236 |         # local maxima of frames
237 |         for frames, frame_diffs in frame_extractor_from_video_generator:
238 |             extracted_candidate_key_frames_chunk = []
239 |             if self.USE_LOCAL_MAXIMA:
240 | 
241 |                 # Getting the frame with maximum frame difference
242 |                 extracted_candidate_key_frames_chunk = self.__get_frames_in_local_maxima__(
243 |                     frames, frame_diffs
244 |                 )
245 |                 extracted_candidate_key_frames.extend(
246 |                     extracted_candidate_key_frames_chunk
247 |                 )
248 | 
249 |         return extracted_candidate_key_frames
250 | 
251 |     def save_frame_to_disk(self, frame, file_path, file_name, file_ext):
252 |         """saves an in-memory numpy image array on drive.
253 |         
254 |         :param frame: In-memory image. This would have been generated by extract_frames_as_images method
255 |         :type frame: numpy.ndarray, required
256 |         :param file_name: name of the image.
257 |         :type file_name: str, required
258 |         :param file_path: Folder location where files needs to be saved
259 |         :type file_path: str, required
260 |         :param file_ext: File extension indicating the file type for example - '.jpg'
261 |         :type file_ext: str, required         
262 |         :return: None
263 |         """
264 | 
265 |         file_full_path = os.path.join(file_path, file_name + file_ext)
266 |         # print(file_full_path)
267 |         cv2.imwrite(file_full_path, frame)


--------------------------------------------------------------------------------
/clustering_with_hdbscan.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | from glob import glob
  5 | import tempfile
  6 | import cv2
  7 | import numpy as np
  8 | from sklearn.cluster import KMeans
  9 | from skimage.filters.rank import entropy
 10 | from skimage.morphology import disk
 11 | from skimage import img_as_float
 12 | import hdbscan
 13 | # from multiprocessing import Pool, Process, cpu_count
 14 | import matplotlib.pyplot as plt
 15 | import seaborn as sns
 16 | 
 17 | import time
 18 | 
 19 | from  extracting_candidate_frames import Configs as config
 20 | 
 21 | 
 22 | class ImageSelector(object):
 23 |     """Class for selection of best top N images from input list of images, Currently following selection method are implemented:
 24 |     brightness filtering, contrast/entropy filtering, clustering of frames and variance of laplacian for non blurred images 
 25 |     selection
 26 |     :param object: base class inheritance
 27 |     :type object: class:`Object`
 28 |     """
 29 | 
 30 |     # def __init__(self): #, pool_obj):
 31 |         # Setting for Multiprocessing Pool Object
 32 |         #self.pool_obj = pool_obj
 33 | 
 34 |         # Setting for optimum Brightness values
 35 |         # self.min_brightness_value = config.min_brightness_value
 36 |         # self.max_brightness_value = config.max_brightness_value
 37 | 
 38 |         # # Setting for optimum Contrast/Entropy values
 39 |         # self.min_entropy_value = config.min_entropy_value
 40 |         # self.max_entropy_value = config.max_entropy_value
 41 | 
 42 |     def __get_brighness_score__(self, image):
 43 |         """Internal function to compute the brightness of input image , returns brightness score between 0 to 100.0 , 
 44 |         :param object: base class inheritance
 45 |         :type object: class:`Object`
 46 |         :param image: input image
 47 |         :type image: Opencv Numpy Image   
 48 |         :return: result of Brighness measurment 
 49 |         :rtype: float value between 0.0 to 100.0    
 50 |         """
 51 |         hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 52 |         _, _, v = cv2.split(hsv)
 53 |         sum = np.sum(v, dtype=np.float32)
 54 |         num_of_pixels = v.shape[0] * v.shape[1]
 55 |         brightness_score = (sum * 100.0) / (num_of_pixels * 255.0)
 56 |         return brightness_score
 57 | 
 58 |     def __get_entropy_score__(self, image):
 59 |         """Internal function to compute the entropy/contrast of input image , returns entropy score between 0 to 10 , 
 60 |  
 61 |         :param object: base class inheritance
 62 |         :type object: class:`Object`
 63 |         :param image: input image
 64 |         :type image: Opencv Numpy Image
 65 |         :return: result of Entropy measurment
 66 |         :rtype: float value between 0.0 to 10.0
 67 |         """
 68 |         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 69 |         entr_img = entropy(gray, disk(5))
 70 |         all_sum = np.sum(entr_img)
 71 |         num_of_pixels = entr_img.shape[0] * entr_img.shape[1]
 72 |         entropy_score = (all_sum) / (num_of_pixels)
 73 | 
 74 |         return entropy_score
 75 | 
 76 |     def __variance_of_laplacian__(self, image):
 77 |         """Internal function to compute the laplacian of the image and then return the focus
 78 |         measure, which is simply the variance of the laplacian,
 79 |  
 80 |         :param object: base class inheritance
 81 |         :type object: class:`Object`
 82 |         :param image: input image
 83 |         :type image: Opencv Numpy Image   
 84 |         :return: result of cv2.Laplacian
 85 |         :rtype: opencv image of type CV_64F    
 86 |         """
 87 | 
 88 |         return cv2.Laplacian(image, cv2.CV_64F).var()
 89 | 
 90 |     def __filter_optimum_brightness_and_contrast_images__(self, input_img_files):
 91 |         """ Internal function for selection of given input images with following parameters :optimum brightness and contrast range ,
 92 |         returns array of image files which are in optimum brigtness and contrast/entropy range.
 93 |  
 94 |         :param object: base class inheritance
 95 |         :type object: class:`Object`
 96 |         :param files: list of input image files 
 97 |         :type files: python list of images
 98 |         :return: Returns list of filtered images  
 99 |         :rtype: python list of images 
100 |         """
101 | 
102 |         n_files = len(input_img_files)
103 |         # -------- calculating the brightness and entropy score by multiprocessing ------
104 |         brightness_score = np.asarray(list(map(self.__get_brighness_score__, input_img_files)))
105 |             # self.pool_obj.
106 |         #     map(self.__get_brighness_score__, input_img_files)
107 |         # )
108 | 
109 |         entropy_score = np.asarray(list(map(self.__get_entropy_score__, input_img_files)))
110 |             # self.pool_obj.
111 |         #     map(self.__get_entropy_score__, input_img_files)
112 |         # )
113 |         # print(list(entropy_score))
114 |         # -------- Check if brightness and contrast scores are in the min and max defined range ------
115 |         brightness_ok = np.where(
116 |             np.logical_and(
117 |                 brightness_score > self.min_brightness_value,
118 |                 brightness_score < self.max_brightness_value,
119 |             ),
120 |             True,
121 |             False,
122 |         )
123 |         contrast_ok = np.where(
124 |             np.logical_and(
125 |                 entropy_score > self.min_entropy_value,
126 |                 entropy_score < self.max_entropy_value,
127 |             ),
128 |             True,
129 |             False,
130 |         )
131 | 
132 |         # Returning only thos images which are have good brightness and contrast
133 | 
134 |         return [
135 |             input_img_files[i]
136 |             for i in range(n_files)
137 |             if brightness_ok[i] and contrast_ok[i]
138 |         ]
139 | 
140 | 
141 |     def __prepare_cluster_sets__hdbscan(self, files):
142 |         """ Internal function for clustering input image files, returns array of indexs of each input file
143 |         (which determines which cluster a given file belongs)
144 |  
145 |         :param object: base class inheritance
146 |         :type object: class:`Object`
147 |         :param files: list of input image files 
148 |         :type files: python list of opencv numpy images
149 |         :return: Returns array containing index for each file for cluster belongingness 
150 |         :rtype: np.array   
151 |         """
152 | 
153 |         # all_hists = []
154 |         all_dst = []
155 |         # Calculating the histograms for each image and adding them into **all_hists** list or all_dst** list
156 |         for img_file in files:
157 |             # img1 = cv2.cvtColor(img_file, cv2.COLOR_BGR2GRAY)
158 |             # # (thresh, img) = cv2.threshold(img1, 150, 255, cv2.THRESH_BINARY)
159 |             # hist = cv2.calcHist([img1], [0], None, [256], [0, 256])
160 |             # hist = hist.reshape((256))
161 |             # all_hists.append(hist)
162 |             img = cv2.cvtColor(img_file, cv2.COLOR_BGR2GRAY)
163 |             img = cv2.resize(img, (256, 256), img)
164 |             imf = np.float32(img) / 255.0  # float conversion/scale
165 |             dst = cv2.dct(imf)  # the dct
166 |             dst = dst[:16, :16]
167 |             dst = dst.reshape((256))
168 |             all_dst.append(dst)
169 | 
170 | 
171 |         # HDBSCAN(algorithm='best', alpha=1.0, approx_min_span_tree=True,
172 |         #         gen_min_span_tree=True, leaf_size=40, memory=Memory(cachedir=None),
173 |         #         metric='euclidean', min_cluster_size=5, min_samples=None, p=None)
174 |         # {'braycurtis': hdbscan.dist_metrics.BrayCurtisDistance,
175 |         # 'canberra': hdbscan.dist_metrics.CanberraDistance,
176 |         # 'chebyshev': hdbscan.dist_metrics.ChebyshevDistance,
177 |         # 'cityblock': hdbscan.dist_metrics.ManhattanDistance,
178 |         # 'dice': hdbscan.dist_metrics.DiceDistance,
179 |         # 'euclidean': hdbscan.dist_metrics.EuclideanDistance,
180 |         # 'hamming': hdbscan.dist_metrics.HammingDistance,
181 |         # 'haversine': hdbscan.dist_metrics.HaversineDistance,
182 |         # 'infinity': hdbscan.dist_metrics.ChebyshevDistance,
183 |         # 'jaccard': hdbscan.dist_metrics.JaccardDistance,
184 |         # 'kulsinski': hdbscan.dist_metrics.KulsinskiDistance,
185 |         # 'l1': hdbscan.dist_metrics.ManhattanDistance,
186 |         # 'l2': hdbscan.dist_metrics.EuclideanDistance,
187 |         # 'mahalanobis': hdbscan.dist_metrics.MahalanobisDistance,
188 |         # 'manhattan': hdbscan.dist_metrics.ManhattanDistance,
189 |         # 'matching': hdbscan.dist_metrics.MatchingDistance,
190 |         # 'minkowski': hdbscan.dist_metrics.MinkowskiDistance,
191 |         # 'p': hdbscan.dist_metrics.MinkowskiDistance,
192 |         # 'pyfunc': hdbscan.dist_metrics.PyFuncDistance,
193 |         # 'rogerstanimoto': hdbscan.dist_metrics.RogersTanimotoDistance,
194 |         # 'russellrao': hdbscan.dist_metrics.RussellRaoDistance,
195 |         # 'seuclidean': hdbscan.dist_metrics.SEuclideanDistance,
196 |         # 'sokalmichener': hdbscan.dist_metrics.SokalMichenerDistance,
197 |         # 'sokalsneath': hdbscan.dist_metrics.SokalSneathDistance,
198 |         # 'wminkowski': hdbscan.dist_metrics.WMinkowskiDistance}
199 |         # Hdbascan = hdbscan.HDBSCAN(min_cluster_size=2,metric='manhattan').fit(all_hists)
200 |         Hdbascan = hdbscan.HDBSCAN(min_cluster_size=2,metric='manhattan').fit(all_dst)
201 |         labels = np.add(Hdbascan.labels_,1)
202 |         nb_clusters = len(np.unique(Hdbascan.labels_))
203 |         # x=self.__plots_for_clustering(Hdbascan,all_dst)
204 |         # del x
205 | 
206 |         files_clusters_index_array = []
207 |         files_clusters_index_array_of_only_one_image = []
208 |         for i in np.arange(nb_clusters):
209 |             # print(i)
210 |             if i==0:
211 |                 index_array = np.where(labels == i)
212 |                 files_clusters_index_array_of_only_one_image.append(index_array)
213 |             else:
214 |                 index_array = np.where(labels == i)
215 |                 files_clusters_index_array.append(index_array)
216 | 
217 |         files_clusters_index_array = np.array(files_clusters_index_array)
218 |         return files_clusters_index_array,files_clusters_index_array_of_only_one_image
219 | 
220 |     def __plots_for_clustering(self,Hdbascan,all_dst):
221 |         # cluster_spanning_tree = Hdbascan.minimum_spanning_tree_.plot(edge_cmap='viridis',
222 |         #                                                             edge_alpha=0.6,
223 |         #                                                             node_size=80,
224 |         #                                                             # edge_linewidth=2)
225 |         single_linkage_tree_dst = Hdbascan.single_linkage_tree_.plot(cmap='viridis', colorbar=True)
226 |         # cluster_spanning_tree.figure.savefig('cluster_spanning_tree_dst.jpeg')
227 |         single_linkage_tree_dst.figure.savefig('cluster_hierarchy_plot .jpeg')
228 | 
229 | 
230 |     def __get_laplacian_scores(self, files, n_images):
231 |         """Function to iteratre over each image in the cluster and calculates the laplacian/blurryness 
232 |            score and adds the score to a list
233 |         :param files: list of input filenames 
234 |         :type files: python list of string
235 |         :param n_images: number of images in the given cluster
236 |         :type n_images: int
237 |         :return: Returns list of laplacian scores for each image in the given cluster
238 |         :rtype: python list 
239 |         """
240 | 
241 |         variance_laplacians = []
242 |         # Iterate over all images in image list
243 |         for image_i in n_images:
244 |             img_file = files[n_images[image_i]]
245 |             img = cv2.cvtColor(img_file, cv2.COLOR_BGR2GRAY)
246 | 
247 |             # Calculating the blurryness of image
248 |             variance_laplacian = self.__variance_of_laplacian__(img)
249 |             variance_laplacians.append(variance_laplacian)
250 | 
251 |         return variance_laplacians
252 | 
253 |     def __get_best_images_index_from_each_cluster__(
254 |         self, files, files_clusters_index_array
255 |     ):
256 |         """ Internal function returns index of one best image from each cluster
257 |         :param object: base class inheritance
258 |         :type object: class:`Object`
259 |         :param files: list of input filenames 
260 |         :type files: python list of string
261 |         :param files_clusters_index_array: Input is array containing index for each file for cluster belongingness 
262 |         :type: np.array   
263 |         :return: Returns list of filtered files which are best candidate from each cluster
264 |         :rtype: python list 
265 |         """
266 | 
267 |         filtered_items = []
268 | 
269 |         # Iterating over every image in each cluster to find the best images from every cluster
270 |         clusters = np.arange(len(files_clusters_index_array))
271 |         for cluster_i in clusters:
272 |             curr_row = files_clusters_index_array[cluster_i][0] 
273 |             # kp_lengths = []
274 |             n_images = np.arange(len(curr_row))
275 |             variance_laplacians = self.__get_laplacian_scores(files, n_images)
276 | 
277 |             # Selecting image with low burr(high laplacian) score
278 |             try:
279 |                 selected_frame_of_current_cluster = curr_row[np.argmax(variance_laplacians)]
280 |                 filtered_items.append(selected_frame_of_current_cluster)
281 |             except:
282 |                 break 
283 | 
284 |         return filtered_items
285 | 
286 |     def __getstate__(self):
287 |         """Function to get the state of initialized class object and remove the pool object from it
288 |         """
289 |         self_dict = self.__dict__.copy()
290 |         # del self_dict["pool_obj"]
291 |         return self_dict
292 | 
293 |     def __setstate__(self, state):
294 |         """Function to update the state of initialized class object woth the pool object
295 |         """
296 |         self.__dict__.update(state)
297 | 
298 |     def select_best_frames(self, input_key_frames,output_folder):
299 |     # def select_best_frames(self, input_key_frames, number_of_frames):
300 |         """[summary] Public function for Image selector class: takes list of key-frames images and number of required
301 |         frames as input, returns list of filtered keyframes
302 |         :param object: base class inheritance
303 |         :type object: class:`Object`
304 |         :param input_key_frames: list of input keyframes in list of opencv image format 
305 |         :type input_key_frames: python list opencv images
306 |         :param number_of_frames: Required number of images 
307 |         :type: int   
308 |         :return: Returns list of filtered image files 
309 |         :rtype: python list of images
310 |         """
311 | 
312 |         filtered_images_list = []
313 | 
314 |         # Selecting only those images which have good brishtness and contrast
315 |         # input_key_frames = self.__filter_optimum_brightness_and_contrast_images__(
316 |         #     input_key_frames
317 |         # )
318 |         
319 |         # Selecting the best images from each cluster by first preparing the clusters on basis of histograms 
320 |         # and then selecting the best images from every cluster
321 |         # if len(input_key_frames) >= self.nb_clusters:
322 |         if len(input_key_frames) >= 1:
323 |             files_clusters_index_array,files_clusters_index_array_of_only_one_image = self.__prepare_cluster_sets__hdbscan(input_key_frames)
324 |             selected_images_index = self.__get_best_images_index_from_each_cluster__(
325 |                 input_key_frames, files_clusters_index_array
326 |             )
327 |             files_clusters_index_array_of_only_one_image = [item for t in files_clusters_index_array_of_only_one_image for item in t]
328 |             files_clusters_index_array_of_only_one_image = files_clusters_index_array_of_only_one_image[0].tolist()
329 |             selected_images_index.extend(files_clusters_index_array_of_only_one_image)
330 |             for index in selected_images_index:
331 |                 img = input_key_frames[index]
332 |                 filtered_images_list.append(img)
333 |             # saving images of same clusters 
334 |             i=0
335 |             for images in files_clusters_index_array:
336 |                 # try:
337 |                 path = output_folder+'/'+str(i)
338 |                 try:
339 |                     if not os.path.isdir(output_folder):
340 |                         os.mkdir(output_folder)
341 |                         print(output_folder)
342 |                 except OSError:
343 |                     print ("Creation of the directory %s failed" % output_folder)
344 |                 try:
345 |                     os.makedirs(path)
346 |                 except:
347 |                     pass
348 |                 for image in images[0]:
349 |                     cv2.imwrite(os.path.join(path, str(image)+'.jpeg'),input_key_frames[image])
350 |                 i=i+1
351 |         else:
352 |             # if the imput candidate frames are less than a single cluster.
353 |             for img in input_key_frames:
354 |                 filtered_images_list.append(img)
355 | 
356 |         # saving clusters of single image cluster
357 |         for images in files_clusters_index_array_of_only_one_image:
358 |             print(files_clusters_index_array_of_only_one_image)
359 |             path = output_folder+'/'+str(i)
360 |             try:
361 |                 if not os.path.isdir(output_folder):
362 |                     os.mkdir(output_folder)
363 |                     print(output_folder)
364 |             except OSError:
365 |                 print ("Creation of the directory %s failed" % output_folder)
366 |             try:
367 |                 os.makedirs(path)
368 |             except:
369 |                 pass
370 |             cv2.imwrite(os.path.join(path, str(image)+'.jpeg'),input_key_frames[image])
371 |             i=i+1
372 | 
373 |           
374 |         return filtered_images_list


--------------------------------------------------------------------------------