├── requirements.txt ├── .gitignore ├── videoio ├── __init__.py ├── info.py ├── video_uint16.py └── video_rgb.py ├── setup.py ├── README.md └── LICENSE /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | ffmpeg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.pyc 3 | build/ 4 | dist/ 5 | videoio.egg-info/ 6 | .DS_Store 7 | -------------------------------------------------------------------------------- /videoio/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.3.0' 2 | from .video_rgb import videoread, videosave, VideoReader, VideoWriter 3 | from .video_uint16 import uint16read, uint16save, Uint16Reader, Uint16Writer 4 | from .info import read_video_params 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | version = '0.3.0' 4 | 5 | with open("README.md", "r") as fi: 6 | long_description = fi.read() 7 | 8 | keywords = ["mp4", "png", "h264", "video", "image", "depth", "ffmpeg"] 9 | 10 | classifiers = [ 11 | 'Intended Audience :: Developers', 12 | 'License :: OSI Approved :: Apache Software License', 13 | 'Natural Language :: English', 14 | 'Operating System :: OS Independent', 15 | 'Programming Language :: Python', 16 | 'Programming Language :: Python :: 3' 17 | ] 18 | 19 | setup( 20 | name="videoio", 21 | packages=["videoio"], 22 | version=version, 23 | description="Module for saving and loading images and depth as H.264 video", 24 | author="Vladimir Guzov", 25 | author_email="guzov.mail@gmail.com", 26 | url="https://github.com/vguzov/videoio", 27 | keywords=keywords, 28 | long_description=long_description, 29 | long_description_content_type='text/markdown', 30 | install_requires=["numpy", "ffmpeg-python"], 31 | classifiers=classifiers 32 | ) 33 | -------------------------------------------------------------------------------- /videoio/info.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ffmpeg 3 | import subprocess 4 | from typing import Dict, Union 5 | from pathlib import Path 6 | 7 | H264_PRESETS = ['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'veryslow'] 8 | 9 | 10 | def read_video_params(path: Union[str, Path], stream_number: int = 0) -> Dict: 11 | """ 12 | Read _resolution and frame rate of the video 13 | Args: 14 | path (str, Path): Path to input file 15 | stream_number (int): Stream number to extract video parameters from 16 | Returns: 17 | dict: Dictionary with height, width and FPS of the video 18 | """ 19 | path = str(path) 20 | if not os.path.isfile(path): 21 | raise FileNotFoundError("{} does not exist".format(path)) 22 | try: 23 | probe = ffmpeg.probe(path) 24 | except FileNotFoundError: 25 | raise FileNotFoundError("ffprobe not found, please reinstall ffmpeg") 26 | video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video'] 27 | stream_params = video_streams[stream_number] 28 | fps_splitted = [int(x) for x in stream_params['avg_frame_rate'].split('/')] 29 | fps = fps_splitted[0] if fps_splitted[1] == 1 else fps_splitted[0] / float(fps_splitted[1]) 30 | width = stream_params['width'] 31 | height = stream_params['height'] 32 | if 'nb_frames' in stream_params: 33 | try: 34 | length = int(stream_params['nb_frames']) 35 | except ValueError: 36 | length = None 37 | else: 38 | length = None 39 | if ('tags' in stream_params) and ('rotate' in stream_params['tags']): 40 | rotation = int(stream_params['tags']['rotate']) 41 | if rotation % 90 == 0 and rotation % 180 != 0: 42 | width = stream_params['height'] 43 | height = stream_params['width'] 44 | params = {'width': width, 'height': height, 'fps': fps} 45 | if length is not None: 46 | params['length'] = length 47 | return params 48 | 49 | 50 | def ensure_encoder_presence(codec="libx264"): 51 | try: 52 | p = subprocess.Popen(["ffprobe", "-encoders"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 53 | encoders_list, err_stream = p.communicate() 54 | except FileNotFoundError: 55 | raise FileNotFoundError("ffprobe not found, please reinstall ffmpeg") 56 | 57 | if codec not in encoders_list.decode("utf-8"): 58 | err_message = f"Codec {codec} is not available in the installed ffmpeg version." 59 | if codec in ["libx264", "libx265"]: 60 | err_message += (f"Make sure ffmpeg is installed with --enable-{codec} \n" 61 | f"HINT: For conda users, run `conda remove ffmpeg` and `conda install ffmpeg {codec[-4:]} -c conda-forge`") 62 | raise ValueError(err_message) 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # videoio: save/load image sequence as H.264 video 2 | 3 | A small Python module for saving and loading RGB and uint16 (depth) frames as H.264 encoded video 4 | 5 | ## Prerequisites 6 | 7 | - `ffmpeg>=2.1` **with libx264 enabled** 8 | - If you are using conda, you can install the correct version with `conda remove ffmpeg` and `conda install ffmpeg x264 -c conda-forge` 9 | - `ffprobe` (usually comes with ffmpeg) 10 | 11 | ## Quickstart 12 | 13 | ##### Save/load RGB frames: 14 | 15 | ```python 16 | import numpy as np 17 | from videoio import videosave, videoread 18 | 19 | frames = np.random.random((20, 200, 400, 3)) # [framesNr, height, width, RGB] 20 | # Save to video 21 | videosave("out.mp4", frames) 22 | # Load from video 23 | frames = videoread("out.mp4") 24 | ``` 25 | 26 | ##### Read frames sequentially: 27 | 28 | ```python 29 | from videoio import VideoReader 30 | 31 | for frame in VideoReader("in.mp4"): 32 | do_something_with(frame) 33 | ``` 34 | 35 | ##### Write frames sequentially: 36 | 37 | ```python 38 | from videoio import VideoWriter 39 | 40 | writer = VideoWriter("out.mp4", resolution=(400, 200)) # [width, height] 41 | for i in range(100): 42 | frame = get_frame() 43 | writer.write(frame) 44 | writer.close() 45 | ``` 46 | 47 | or 48 | 49 | ```python 50 | with VideoWriter("out.mp4", resolution=(400, 200)) as writer: 51 | for i in range(100): 52 | frame = get_frame() 53 | writer.write(frame) 54 | ``` 55 | 56 | ##### Lossless write/read of uint16 3D arrays (useful for saving depth frames stored in mm, for example Kinect data): 57 | 58 | ```python 59 | import numpy as np 60 | from videoio import uint16save, uint16read 61 | 62 | # Generate 20 random depth frames 63 | depth_frames = (np.random.random((20, 200, 400)) * 65535).astype(np.uint16) 64 | # Save 65 | uint16save("out_depth.mp4", depth_frames) 66 | # Load 67 | depth_frames = uint16read("out_depth.mp4") 68 | ``` 69 | 70 | ##### Save RGB frames in lossless mode with different compression preset and different FPS: 71 | 72 | ```python 73 | videosave("out.mp4", frames, lossless=True, preset="veryfast", fps=10.5) 74 | ``` 75 | 76 | ##### Read RGB frames and scale them to target resolution simultaneously: 77 | 78 | ```python 79 | frames = videoread("in.mp4", output_resolution=(100, 250)) 80 | ``` 81 | 82 | ##### Read video/uint16-array starting from certain frame: 83 | 84 | (Works if the input video was created by videoio, other cases are not guaranteed) 85 | 86 | ```python 87 | frames = videoread("in.mp4", start_frame=100) 88 | 89 | for frame in VideoReader("in.mp4", start_frame=100): 90 | do_something_with(frame) 91 | ``` 92 | 93 | ## Installation 94 | 95 | From pip: 96 | 97 | ``` 98 | pip install videoio 99 | ``` 100 | 101 | From source: 102 | 103 | ``` 104 | git clone https://github.com/vguzov/videoio.git 105 | python setup.py install 106 | ``` 107 | -------------------------------------------------------------------------------- /videoio/video_uint16.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import ffmpeg 4 | from pathlib import Path 5 | from typing import Tuple, Union 6 | from .info import read_video_params, H264_PRESETS, ensure_encoder_presence 7 | 8 | 9 | def uint16read(path: Union[str, Path], output_resolution: Tuple[int, int] = None, start_frame: int = 0) -> np.ndarray: 10 | """ 11 | Read 16-bit unsigned integer array encoded with uint16save function 12 | Args: 13 | path (str, Path): Path to input file 14 | output_resolution (Tuple[int, int]): Sets the _resolution of the result (width, height). 15 | If None, _resolution will be the same as _resolution of original video. 16 | Warning: changing this parameter may lead to undesirable data corruption. 17 | start_frame (int): frame to start reading from. 18 | Correct behaviour is guaranteed only if input array was produced by videoio. 19 | Returns: 20 | np.ndarray: 3-dimensional array of uint16 datatype 21 | """ 22 | path = str(path) 23 | assert start_frame >= 0, "Starting frame should be positive" 24 | if not os.path.isfile(path): 25 | raise FileNotFoundError("{} does not exist".format(path)) 26 | 27 | video_params = read_video_params(path, stream_number=0) 28 | resolution = (video_params['width'], video_params['height']) 29 | if start_frame != 0: 30 | start_frame_time = (start_frame - 0.5) / video_params['fps'] 31 | ffmpeg_input = ffmpeg.input(path, loglevel='quiet', ss=start_frame_time) 32 | else: 33 | ffmpeg_input = ffmpeg.input(path, loglevel='quiet') 34 | if output_resolution is not None: 35 | resolution = output_resolution 36 | ffmpeg_input = ffmpeg_input.filter("scale", *resolution) 37 | frames = [] 38 | ffmpeg_process = ( 39 | ffmpeg_input 40 | .output('pipe:', format='rawvideo', pix_fmt='yuv444p') 41 | .global_args('-nostdin') 42 | .run_async(pipe_stdout=True) 43 | ) 44 | try: 45 | while True: 46 | in_bytes = ffmpeg_process.stdout.read(np.prod(resolution) * 3) 47 | if not in_bytes: 48 | break 49 | in_frame = ( 50 | np 51 | .frombuffer(in_bytes, np.uint8) 52 | .reshape(3, *resolution[::-1]) 53 | ) 54 | upper_part = in_frame[2, :, :] 55 | lower_coding = in_frame[0, :, :] 56 | upper_isodd = (upper_part & 1) == 1 57 | lower_part = lower_coding.copy() 58 | lower_part[upper_isodd] = 255 - lower_part[upper_isodd] 59 | frame = lower_part.astype(np.uint16) + (upper_part.astype(np.uint16) << 8) 60 | frames.append(frame) 61 | finally: 62 | ffmpeg_process.stdout.close() 63 | ffmpeg_process.wait() 64 | return np.stack(frames, axis=0) 65 | 66 | 67 | def uint16save(path: Union[str, Path], data: np.ndarray, preset: str = 'slow', fps: float = None): 68 | """ 69 | Store 3-dimensional uint16 array in H.264 encoded video 70 | Args: 71 | path (str, Path): Path to output video 72 | data (np.ndarray): 3-dimentional uint16 NumPy array 73 | preset (str): H.264 compression preset 74 | fps (float): Target FPS. If None, will be set to ffmpeg's default 75 | """ 76 | ensure_encoder_presence() 77 | path = str(path) 78 | data = np.array(data) 79 | assert len(data[0].shape) == 2, "Multiple dimentions is not supported" 80 | assert data.dtype == np.uint16 or data.dtype == np.uint8, "Dtype {} is not supported".format(data.dtype) 81 | assert preset in H264_PRESETS, "Preset '{}' is not supported by libx264, supported presets are {}". \ 82 | format(preset, H264_PRESETS) 83 | resolution = data[0].shape[::-1] 84 | input_params = dict(format='rawvideo', pix_fmt='yuv444p', s='{}x{}'.format(*resolution), loglevel='quiet') 85 | if fps is not None: 86 | input_params['framerate'] = fps 87 | ffmpeg_input = ffmpeg.input('pipe:', **input_params) 88 | encoding_params = {'c:v': 'libx264', 'preset': preset, 'profile:v': 'high444', 'crf': 0} 89 | zeros = np.zeros(data.shape, dtype=np.uint8) 90 | if data.dtype == np.uint16: 91 | upper_part = (data >> 8).astype(np.uint8) 92 | lower_part = (data & 255).astype(np.uint8) 93 | upper_isodd = (upper_part & 1) == 1 94 | lower_coding = lower_part.copy() 95 | lower_coding[upper_isodd] = 255 - lower_coding[upper_isodd] 96 | data = np.stack([lower_coding, zeros, upper_part], axis=1) 97 | else: 98 | data = np.stack([data, zeros, zeros], axis=1) 99 | ffmpeg_process = ( 100 | ffmpeg_input 101 | .output(path, pix_fmt='yuv444p', **encoding_params) 102 | .overwrite_output() 103 | .run_async(pipe_stdin=True) 104 | ) 105 | try: 106 | for frame in data: 107 | ffmpeg_process.stdin.write(frame.tobytes()) 108 | finally: 109 | ffmpeg_process.stdin.close() 110 | ffmpeg_process.wait() 111 | 112 | 113 | class Uint16Reader: 114 | def __init__(self, path: Union[str, Path], output_resolution: Tuple[int, int] = None, start_frame: int = 0): 115 | """ 116 | Iterable class for reading uint16 data sequentially 117 | Args: 118 | path (str, Path): Path to input file 119 | output_resolution (Tuple[int, int]): Sets the _resolution of the result (width, height). 120 | If None, _resolution will be the same as _resolution of original video. 121 | Warning: changing this parameter may lead to undesirable data corruption. 122 | start_frame (int): frame to start reading from. 123 | Correct behaviour is guaranteed only if input array was produced by videoio. 124 | """ 125 | path = str(path) 126 | assert start_frame >= 0, "Starting frame should be positive" 127 | self.path = path 128 | self.start_frame = start_frame 129 | if not os.path.isfile(path): 130 | raise FileNotFoundError("{} does not exist".format(path)) 131 | 132 | self.video_params = read_video_params(path, stream_number=0) 133 | self.resolution = np.array((self.video_params['width'], self.video_params['height'])) 134 | if output_resolution is not None: 135 | self.resolution = output_resolution 136 | self.apply_scale = True 137 | else: 138 | self.apply_scale = False 139 | self.ffmpeg_process = None 140 | 141 | def __iter__(self): 142 | if self.start_frame != 0: 143 | start_frame_time = (self.start_frame - 0.5) / self.video_params['fps'] 144 | ffmpeg_input = ffmpeg.input(self.path, loglevel='quiet', ss=start_frame_time) 145 | else: 146 | ffmpeg_input = ffmpeg.input(self.path, loglevel='quiet') 147 | if self.apply_scale: 148 | ffmpeg_input = ffmpeg_input.filter("scale", *self.resolution) 149 | self.ffmpeg_process = ( 150 | ffmpeg_input 151 | .output('pipe:', format='rawvideo', pix_fmt='yuv444p') 152 | .global_args('-nostdin') 153 | .run_async(pipe_stdout=True) 154 | ) 155 | return self 156 | 157 | def __len__(self) -> int: 158 | if 'length' in self.video_params: 159 | return max(self.video_params['length'] - self.start_frame, 0) 160 | else: 161 | return 0 162 | 163 | def close(self): 164 | """ 165 | Close reader thread 166 | """ 167 | if hasattr(self, "ffmpeg_process") and self.ffmpeg_process is not None: 168 | self.ffmpeg_process.stdout.close() 169 | self.ffmpeg_process.wait() 170 | 171 | def __next__(self) -> np.ndarray: 172 | in_bytes = self.ffmpeg_process.stdout.read(np.prod(self.resolution) * 3) 173 | if not in_bytes: 174 | raise StopIteration 175 | in_frame = np.frombuffer(in_bytes, np.uint8).reshape(3, *self.resolution[::-1]) 176 | upper_part = in_frame[2, :, :] 177 | lower_coding = in_frame[0, :, :] 178 | upper_isodd = (upper_part & 1) == 1 179 | lower_part = lower_coding.copy() 180 | lower_part[upper_isodd] = 255 - lower_part[upper_isodd] 181 | frame = lower_part.astype(np.uint16) + (upper_part.astype(np.uint16) << 8) 182 | return frame 183 | 184 | def __del__(self): 185 | self.close() 186 | 187 | 188 | class Uint16Writer: 189 | """ 190 | Class for storing a sequence of uint16 arrays in H.264 encoded video 191 | """ 192 | 193 | def __init__(self, path: Union[str, Path], resolution: Tuple[int, int], preset: str = 'slow', fps: float = None): 194 | """ 195 | Args: 196 | path (str, Path): Path to output video 197 | resolution (Tuple[int, int]): Resolution of the input frames and output video (width, height) 198 | preset (str): H.264 compression preset 199 | fps (float): Target FPS. If None, will be set to ffmpeg's default 200 | """ 201 | ensure_encoder_presence() 202 | path = str(path) 203 | assert preset in H264_PRESETS, "Preset '{}' is not supported by libx264, supported presets are {}". \ 204 | format(preset, H264_PRESETS) 205 | input_params = dict(format='rawvideo', pix_fmt='yuv444p', s='{}x{}'.format(*resolution), loglevel='quiet') 206 | if fps is not None: 207 | input_params['framerate'] = fps 208 | ffmpeg_input = ffmpeg.input('pipe:', **input_params) 209 | encoding_params = {'c:v': 'libx264', 'preset': preset, 'profile:v': 'high444', 'crf': 0} 210 | self.ffmpeg_process = ( 211 | ffmpeg_input 212 | .output(path, pix_fmt='yuv444p', **encoding_params) 213 | .overwrite_output() 214 | .run_async(pipe_stdin=True) 215 | ) 216 | 217 | def write(self, data: np.ndarray): 218 | """ 219 | Write next portion of data 220 | Args: 221 | data (np.ndarray): data to write 222 | """ 223 | assert len(data.shape) == 2, "Multiple dimensions is not supported" 224 | assert data.dtype == np.uint16 or data.dtype == np.uint8, "Dtype {} is not supported".format(data.dtype) 225 | zeros = np.zeros(data.shape, dtype=np.uint8) 226 | if data.dtype == np.uint16: 227 | upper_part = (data >> 8).astype(np.uint8) 228 | lower_part = (data & 255).astype(np.uint8) 229 | upper_isodd = (upper_part & 1) == 1 230 | lower_coding = lower_part.copy() 231 | lower_coding[upper_isodd] = 255 - lower_coding[upper_isodd] 232 | data = np.stack([lower_coding, zeros, upper_part], axis=0) 233 | else: 234 | data = np.stack([data, zeros, zeros], axis=0) 235 | self.ffmpeg_process.stdin.write(data.tobytes()) 236 | 237 | def close(self): 238 | """ 239 | Finish video creation process and close video file 240 | """ 241 | if hasattr(self, "ffmpeg_process"): 242 | self.ffmpeg_process.stdin.close() 243 | self.ffmpeg_process.wait() 244 | 245 | def __enter__(self): 246 | return self 247 | 248 | def __exit__(self, exc_type, exc_val, exc_tb): 249 | self.close() 250 | 251 | def __del__(self): 252 | self.close() 253 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2020 Vladimir Guzov 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /videoio/video_rgb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import ffmpeg 4 | import warnings 5 | from pathlib import Path 6 | from typing import Tuple, Dict, Union, Optional 7 | from .info import read_video_params, H264_PRESETS, ensure_encoder_presence 8 | 9 | 10 | def videoread(path: Union[str, Path], return_attributes: bool = False, stream_number: int = 0, 11 | output_resolution: Tuple[int, int] = None, output_fps: float = None, start_frame: int = 0, 12 | respect_original_timestamps: bool = False) \ 13 | -> Union[np.ndarray, Tuple[np.ndarray, Dict]]: 14 | """ 15 | Reads an input video to a NumPy array 16 | Args: 17 | path (str, Path): Path to input file 18 | return_attributes (bool): Whether to return a dictionary with original video _resolution and frame rate 19 | stream_number (int): Stream number to extract video parameters from 20 | output_resolution (Tuple[int, int]): Sets the _resolution of the result (width, height). 21 | If None, _resolution will be the same as _resolution of original video. 22 | output_fps (float): Sets the output framerate of the video to the given value. 23 | Useful to work with VFR (Variable Frame Rate) videos. If None, will keep the original framerate (whether variable or constant). 24 | start_frame (int): frame to start reading from. 25 | Correct behaviour is guaranteed only if input video was produced by videoio. 26 | If output_fps is set, the timing is calculated according to the output_fps, otherwise average framerate of the original video is used. 27 | respect_original_timestamps (bool): whether to read frames according to timestamps or not 28 | If True, frames will be extracted according to framerate and video timestamps, 29 | otherwise just a raw stream of frames will be read 30 | 31 | Returns: 32 | np.ndarray: (if return_attributes == False) Frames of the video 33 | tuple: (if return_attributes == True) Tuple containing: 34 | np.ndarray: Frames of the video 35 | dict: Parameter of the video (original height and width and frame rate) 36 | """ 37 | path = str(path) 38 | assert start_frame >= 0, "Starting frame should be positive" 39 | if not os.path.isfile(path): 40 | raise FileNotFoundError("{} does not exist".format(path)) 41 | 42 | video_params = read_video_params(path, stream_number=stream_number) 43 | resolution = np.array((video_params['width'], video_params['height'])) 44 | if start_frame != 0: 45 | if output_fps is None: 46 | start_frame_time = (start_frame - 0.5) / video_params['fps'] 47 | else: 48 | start_frame_time = (start_frame - 0.5) / output_fps 49 | ffmpeg_input = ffmpeg.input(path, loglevel='quiet', ss=start_frame_time) 50 | else: 51 | ffmpeg_input = ffmpeg.input(path, loglevel='quiet') 52 | if output_resolution is not None: 53 | resolution = output_resolution 54 | ffmpeg_input = ffmpeg_input.filter("scale", *resolution) 55 | if output_fps is not None: 56 | ffmpeg_input = ffmpeg_input.filter("fps", output_fps) 57 | respect_original_timestamps = True 58 | images = [] 59 | if respect_original_timestamps: 60 | ffmpeg_output = ffmpeg_input.output('pipe:', format='rawvideo', pix_fmt='rgb24') 61 | else: 62 | ffmpeg_output = ffmpeg_input.output('pipe:', format='rawvideo', pix_fmt='rgb24', vsync='0') 63 | ffmpeg_process = ffmpeg_output.global_args('-nostdin').run_async(pipe_stdout=True) 64 | try: 65 | while True: 66 | in_bytes = ffmpeg_process.stdout.read(np.prod(resolution) * 3) 67 | if not in_bytes: 68 | break 69 | in_frame = np.frombuffer(in_bytes, np.uint8).reshape(*resolution[::-1], 3) 70 | images.append(in_frame) 71 | finally: 72 | ffmpeg_process.stdout.close() 73 | ffmpeg_process.wait() 74 | images = np.stack(images, axis=0) 75 | if return_attributes: 76 | return images, video_params 77 | return images 78 | 79 | 80 | def videosave(path: Union[str, Path], images: np.ndarray, lossless: bool = False, preset: str = 'slow', fps: float = None): 81 | """ 82 | Saves the video with encoded with H.264 codec 83 | Args: 84 | path (str, Path): Path to output video 85 | images (np.ndarray): NumPy array of video frames 86 | lossless (bool): Whether to apply lossless encoding. 87 | Be aware: lossless format is still lossy due to RGB to YUV conversion inaccuracy 88 | preset (str): H.264 compression preset 89 | fps (float): Target FPS. If None, will be set to ffmpeg's default 90 | """ 91 | ensure_encoder_presence() 92 | path = str(path) 93 | assert images[0].shape[2] == 3, "Alpha channel is not supported" 94 | assert preset in H264_PRESETS, "Preset '{}' is not supported by libx264, supported presets are {}". \ 95 | format(preset, H264_PRESETS) 96 | resolution = images[0].shape[:2][::-1] 97 | input_params = dict(format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(*resolution), loglevel='quiet') 98 | if fps is not None: 99 | input_params['framerate'] = fps 100 | ffmpeg_input = ffmpeg.input('pipe:', **input_params) 101 | encoding_params = {"c:v": "libx264", "preset": preset} 102 | if lossless: 103 | encoding_params['profile:v'] = 'high444' 104 | encoding_params['crf'] = 0 105 | 106 | ffmpeg_process = ffmpeg_input.output(path, pix_fmt='yuv444p' if lossless else 'yuv420p', **encoding_params) 107 | 108 | ffmpeg_process = ffmpeg_process.overwrite_output().run_async(pipe_stdin=True) 109 | try: 110 | for color_frame in images: 111 | if color_frame.dtype == np.float16 or color_frame.dtype == np.float32 or color_frame.dtype == np.float64: 112 | color_frame = (color_frame * 255).astype(np.uint8) 113 | elif color_frame.dtype != np.uint8: 114 | raise NotImplementedError("Dtype {} is not supported".format(color_frame.dtype)) 115 | ffmpeg_process.stdin.write(color_frame.tobytes()) 116 | finally: 117 | ffmpeg_process.stdin.close() 118 | ffmpeg_process.wait() 119 | 120 | 121 | class VideoReader: 122 | """ 123 | Iterable class for reading video frame-by-frame 124 | """ 125 | 126 | def __init__(self, path: Union[str, Path], stream_number: int = 0, 127 | output_resolution: Tuple[int, int] = None, output_fps: float = None, start_frame: int = 0, 128 | respect_original_timestamps: bool = False): 129 | """ 130 | Args: 131 | path (str, Path): Path to input video 132 | stream_number (int): Stream number to extract video parameters from 133 | output_resolution (Tuple[int, int]): Sets the _resolution of the result (width, height). 134 | If None, _resolution will be the same as _resolution of original video. 135 | output_fps (float): Sets the output framerate of the video to the given value. 136 | Useful to work with VFR (Variable Frame Rate) videos. If None, will keep the original framerate (whether variable or constant). 137 | start_frame (int): frame to start reading from. 138 | Correct behaviour is guaranteed only if input video was produced by videoio. 139 | If output_fps is set, the timing is calculated according to the output_fps, otherwise average framerate of the original video is used. 140 | respect_original_timestamps (bool): whether to read frames according to timestamps or not 141 | If True, frames will be extracted according to framerate and video timestamps, 142 | otherwise just a raw stream of frames will be read 143 | """ 144 | path = str(path) 145 | assert start_frame >= 0, "Starting frame should be positive" 146 | self.path = path 147 | self.start_frame = start_frame 148 | self.respect_original_timestamps = respect_original_timestamps 149 | self.output_fps = output_fps 150 | if not os.path.isfile(path): 151 | raise FileNotFoundError("{} does not exist".format(path)) 152 | 153 | self.video_params = read_video_params(path, stream_number=stream_number) 154 | self._resolution = np.array((self.video_params['width'], self.video_params['height'])) 155 | if output_resolution is not None: 156 | self._resolution = output_resolution 157 | self.apply_scale = True 158 | else: 159 | self.apply_scale = False 160 | if self.output_fps is not None: 161 | self.respect_original_timestamps = True 162 | self.ffmpeg_process = None 163 | 164 | def __iter__(self): 165 | if self.start_frame != 0: 166 | if self.output_fps is None: 167 | start_frame_time = (self.start_frame - 0.5) / self.video_params['fps'] 168 | else: 169 | start_frame_time = (self.start_frame - 0.5) / self.output_fps 170 | ffmpeg_input = ffmpeg.input(self.path, loglevel='quiet', ss=start_frame_time) 171 | else: 172 | ffmpeg_input = ffmpeg.input(self.path, loglevel='quiet') 173 | if self.apply_scale: 174 | ffmpeg_input = ffmpeg_input.filter("scale", *self._resolution) 175 | if self.output_fps is not None: 176 | ffmpeg_input = ffmpeg_input.filter("fps", self.output_fps) 177 | if self.respect_original_timestamps: 178 | ffmpeg_output = ffmpeg_input.output('pipe:', format='rawvideo', pix_fmt='rgb24') 179 | else: 180 | ffmpeg_output = ffmpeg_input.output('pipe:', format='rawvideo', pix_fmt='rgb24', vsync='0') 181 | self.ffmpeg_process = ffmpeg_output.global_args('-nostdin').run_async(pipe_stdout=True) 182 | return self 183 | 184 | def __len__(self) -> int: 185 | if 'length' in self.video_params: 186 | return max(self.video_params['length'] - self.start_frame, 0) 187 | else: 188 | return 0 189 | 190 | @property 191 | def resolution(self) -> Tuple[int, int]: 192 | """ 193 | Output frame resolution 194 | Returns: 195 | Tuple[int, int]: resolution in pixels 196 | """ 197 | return self._resolution 198 | 199 | @property 200 | def fps(self) -> Optional[float]: 201 | """ 202 | Output framerate, 1/time difference between frames (average time for VFR videos) 203 | Returns: 204 | float: framerate (1/sec) or None if no framerate info is found 205 | """ 206 | if self.output_fps is not None: 207 | return self.output_fps 208 | else: 209 | if 'fps' in self.video_params: 210 | return self.video_params['fps'] 211 | else: 212 | return None 213 | 214 | def close(self): 215 | """ 216 | Close reader thread 217 | """ 218 | if hasattr(self, "ffmpeg_process") and self.ffmpeg_process is not None: 219 | self.ffmpeg_process.stdout.close() 220 | self.ffmpeg_process.wait() 221 | 222 | def __next__(self) -> np.ndarray: 223 | in_bytes = self.ffmpeg_process.stdout.read(np.prod(self._resolution) * 3) 224 | if not in_bytes: 225 | raise StopIteration 226 | in_frame = np.frombuffer(in_bytes, np.uint8).reshape(*self._resolution[::-1], 3) 227 | return in_frame 228 | 229 | def __del__(self): 230 | self.close() 231 | 232 | 233 | class VideoWriter: 234 | """ 235 | Class for writing a video frame-by-frame 236 | """ 237 | 238 | def __init__(self, path: Union[str, Path], resolution: Tuple[int, int], lossless: bool = False, 239 | preset: str = 'slow', fps: float = None): 240 | """ 241 | Args: 242 | path (str, Path): Path to output video 243 | resolution (Tuple[int, int]): Resolution of the input frames and output video (width, height) 244 | lossless (bool): Whether to apply lossless encoding. 245 | Be aware: lossless format is still lossy due to RGB to YUV conversion inaccuracy 246 | preset (str): H.264 compression preset 247 | fps (float): Target FPS. If None, will be set to ffmpeg's default 248 | """ 249 | ensure_encoder_presence() 250 | path = str(path) 251 | assert preset in H264_PRESETS, "Preset '{}' is not supported by libx264, supported presets are {}". \ 252 | format(preset, H264_PRESETS) 253 | self.resolution = resolution 254 | input_params = dict(format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(*resolution), loglevel='quiet') 255 | if fps is not None: 256 | input_params['framerate'] = fps 257 | ffmpeg_input = ffmpeg.input('pipe:', **input_params) 258 | encoding_params = {"c:v": "libx264", "preset": preset} 259 | if lossless: 260 | encoding_params['profile:v'] = 'high444' 261 | encoding_params['crf'] = 0 262 | 263 | ffmpeg_process = ffmpeg_input.output(path, pix_fmt='yuv444p' if lossless else 'yuv420p', **encoding_params) 264 | 265 | self.ffmpeg_process = ffmpeg_process.overwrite_output().run_async(pipe_stdin=True) 266 | 267 | def write(self, color_frame: np.ndarray): 268 | """ 269 | Write next frame 270 | Args: 271 | color_frame (np.ndarray): RGB frame to write 272 | """ 273 | assert color_frame.shape[2] == 3, "Alpha channel is not supported" 274 | assert all([self.resolution[i] == color_frame.shape[1 - i] for i in range(2)]), \ 275 | "Resolution of color frame does not match with video _resolution – expected {}, got {}". \ 276 | format(self.resolution, color_frame.shape[:2][::-1]) 277 | if color_frame.dtype == np.float16 or color_frame.dtype == np.float32 or color_frame.dtype == np.float64: 278 | color_frame = (color_frame * 255).astype(np.uint8) 279 | elif color_frame.dtype != np.uint8: 280 | raise NotImplementedError("Dtype {} is not supported".format(color_frame.dtype)) 281 | self.ffmpeg_process.stdin.write(color_frame.tobytes()) 282 | 283 | def close(self): 284 | """ 285 | Finish video creation process and close video file 286 | """ 287 | if hasattr(self, "ffmpeg_process"): 288 | self.ffmpeg_process.stdin.close() 289 | self.ffmpeg_process.wait() 290 | 291 | def __enter__(self): 292 | return self 293 | 294 | def __exit__(self, exc_type, exc_val, exc_tb): 295 | self.close() 296 | 297 | def __del__(self): 298 | self.close() 299 | --------------------------------------------------------------------------------