├── tests ├── __init__.py ├── assets │ ├── example.mp4 │ ├── corrupted.mp4 │ ├── example.webm │ ├── bad_colorspace.mp4 │ └── example-short.mp4 ├── test_metadata.py ├── test_regression.py └── test_frames.py ├── simple_video_utils ├── __init__.py ├── metadata.py └── frames.py ├── .gitignore ├── .github └── workflows │ ├── lint.yaml │ ├── test.yaml │ └── release.yaml ├── LICENSE ├── pyproject.toml └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /simple_video_utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Simple video utilities for frame extraction and metadata.""" 2 | -------------------------------------------------------------------------------- /tests/assets/example.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sign/simple-video-utils/main/tests/assets/example.mp4 -------------------------------------------------------------------------------- /tests/assets/corrupted.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sign/simple-video-utils/main/tests/assets/corrupted.mp4 -------------------------------------------------------------------------------- /tests/assets/example.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sign/simple-video-utils/main/tests/assets/example.webm -------------------------------------------------------------------------------- /tests/assets/bad_colorspace.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sign/simple-video-utils/main/tests/assets/bad_colorspace.mp4 -------------------------------------------------------------------------------- /tests/assets/example-short.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sign/simple-video-utils/main/tests/assets/example-short.mp4 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .claude/ 3 | *.egg-info 4 | build/ 5 | dist/ 6 | .env 7 | __pycache__/ 8 | *.pyc 9 | uv.lock 10 | .venv -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | 4 | on: 5 | push: 6 | branches: [ main ] 7 | pull_request: 8 | branches: [ main ] 9 | 10 | 11 | jobs: 12 | lint: 13 | name: Lint 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v6 18 | 19 | - name: Setup uv 20 | uses: astral-sh/setup-uv@v7 21 | with: 22 | python-version: "3.12" 23 | enable-cache: true 24 | activate-environment: true 25 | 26 | - name: Install dependencies 27 | run: uv pip install ".[dev]" 28 | 29 | - name: Lint code 30 | run: uv run ruff check . -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | 4 | on: 5 | push: 6 | branches: [ main ] 7 | pull_request: 8 | branches: [ main ] 9 | 10 | 11 | jobs: 12 | test: 13 | name: Test 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v6 18 | 19 | - name: Install ffmpeg 20 | run: sudo apt-get update && sudo apt-get install -y ffmpeg 21 | 22 | - name: Verify ffprobe 23 | run: ffprobe -version 24 | 25 | - name: Setup uv 26 | uses: astral-sh/setup-uv@v7 27 | with: 28 | python-version: "3.12" 29 | enable-cache: true 30 | activate-environment: true 31 | 32 | - name: Install dependencies 33 | run: uv pip install ".[dev]" 34 | 35 | - name: Test Code 36 | run: uv run pytest -n auto --dist loadscope 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 sign 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | on: 3 | release: 4 | types: [ created ] 5 | 6 | jobs: 7 | pypi-publish: 8 | name: Upload release to PyPI 9 | runs-on: ubuntu-latest 10 | environment: 11 | name: pypi 12 | url: https://pypi.org/p/simple-video-utils 13 | permissions: 14 | id-token: write 15 | steps: 16 | - uses: actions/checkout@v5 17 | 18 | - uses: actions/setup-python@v6 19 | with: 20 | python-version: "3.12" 21 | 22 | - name: Extract release version 23 | id: get_version 24 | run: echo "version=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV 25 | 26 | - name: Update version in pyproject.toml 27 | run: | 28 | sed -i 's/^version = .*/version = "${{ env.version }}"/' pyproject.toml 29 | 30 | - name: Install build dependencies 31 | run: pip install build 32 | 33 | - name: Build a binary wheel dist 34 | run: | 35 | rm -rf dist 36 | python -m build 37 | 38 | - name: Publish distribution 📦 to PyPI 39 | uses: pypa/gh-action-pypi-publish@release/v1 40 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "simple-video-utils" 3 | description = "Shared utilities for processing videos for sign language." 4 | version = "0.0.1" 5 | authors = [ 6 | { name = "Amit Moryossef", email = "amit@sign.mt" }, 7 | ] 8 | license = {text = "MIT"} 9 | readme = "README.md" 10 | requires-python = ">=3.10" 11 | dependencies = [ 12 | "av", 13 | "numpy", 14 | ] 15 | 16 | [project.optional-dependencies] 17 | dev = [ 18 | "ruff", 19 | "pytest", 20 | "pytest-xdist", # For parallel test execution 21 | ] 22 | 23 | [tool.setuptools] 24 | packages = [ 25 | "simple_video_utils", 26 | ] 27 | 28 | [tool.ruff] 29 | line-length = 120 30 | 31 | [tool.ruff.lint] 32 | select = [ 33 | "E", # pycodestyle errors 34 | "W", # pycodestyle warnings 35 | "F", # pyflakes 36 | "C90", # mccabe complexity 37 | "I", # isort 38 | "N", # pep8-naming 39 | "UP", # pyupgrade 40 | "B", # flake8-bugbear 41 | "PT", # flake8-pytest-style 42 | "W605", # invalid escape sequence 43 | "BLE", # flake8-blind-except 44 | "TRY", # tryceratops 45 | ] 46 | 47 | [tool.pytest.ini_options] 48 | addopts = "-v" 49 | testpaths = [ 50 | "simple_video_utils", 51 | "tests", 52 | ] 53 | -------------------------------------------------------------------------------- /simple_video_utils/metadata.py: -------------------------------------------------------------------------------- 1 | import io 2 | from contextlib import contextmanager 3 | from functools import lru_cache 4 | from typing import NamedTuple 5 | 6 | import av 7 | 8 | 9 | class VideoMetadata(NamedTuple): 10 | width: int 11 | height: int 12 | fps: float 13 | nb_frames: int | None 14 | time_base: str | None 15 | 16 | 17 | @contextmanager 18 | def _open_container(source: str | io.BytesIO): 19 | """Context manager for safely opening and closing PyAV containers.""" 20 | container = None 21 | try: 22 | container = av.open(source) 23 | yield container 24 | except Exception as e: 25 | msg = "Failed to open video" 26 | raise RuntimeError(msg) from e 27 | finally: 28 | if container: 29 | container.close() 30 | 31 | 32 | def _get_metadata_from_container(container: av.container.InputContainer) -> VideoMetadata: 33 | """Extract metadata from an open PyAV container.""" 34 | stream = container.streams.video[0] 35 | fps = float(stream.average_rate) if stream.average_rate else 0.0 36 | nb_frames = stream.frames if stream.frames > 0 else None 37 | time_base = str(stream.time_base) if stream.time_base else None 38 | 39 | return VideoMetadata( 40 | width=stream.width, 41 | height=stream.height, 42 | fps=fps, 43 | nb_frames=nb_frames, 44 | time_base=time_base, 45 | ) 46 | 47 | 48 | 49 | 50 | def video_metadata_from_bytes(data: bytes) -> VideoMetadata: 51 | """Return key video stream metadata from video bytes.""" 52 | with _open_container(io.BytesIO(data)) as container: 53 | return _get_metadata_from_container(container) 54 | 55 | 56 | @lru_cache(maxsize=8) 57 | def video_metadata(url_or_path: str) -> VideoMetadata: 58 | """Return key video stream metadata.""" 59 | with _open_container(url_or_path) as container: 60 | return _get_metadata_from_container(container) 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple Video Utils 2 | 3 | Lightweight utilities for extracting frames and metadata from videos. Built for sign language processing workflows. 4 | 5 | ![Python](https://img.shields.io/badge/python-3.10+-blue) 6 | [![License](https://img.shields.io/badge/license-MIT-green)](./LICENSE) 7 | 8 | ## Goal 9 | 10 | Provide simple, efficient tools for video processing in sign language research and applications. 11 | Uses PyAV for fast frame extraction with support for multiple formats (MP4, WebM) and remote URLs. 12 | 13 | ## Installation 14 | 15 | ```bash 16 | pip install simple-video-utils 17 | ``` 18 | 19 | ## Usage 20 | 21 | ### Extract Video Metadata 22 | 23 | ```python 24 | from simple_video_utils.metadata import video_metadata 25 | 26 | meta = video_metadata("video.mp4") 27 | print(f"{meta.width}x{meta.height} @ {meta.fps} fps") 28 | # Output: VideoMetadata(width=1920, height=1080, fps=30.0, nb_frames=450, time_base='1/15360') 29 | ``` 30 | 31 | ### Read Frames from File 32 | 33 | ```python 34 | from simple_video_utils.frames import read_frames_exact 35 | 36 | # Read specific frame range (inclusive) 37 | frames = list(read_frames_exact("video.mp4", start_frame=0, end_frame=10)) 38 | # Returns 11 frames as numpy arrays (H, W, 3) in RGB format 39 | 40 | # Read from frame to end of video 41 | frames = list(read_frames_exact("video.mp4", start_frame=5, end_frame=None)) 42 | ``` 43 | 44 | ### Read Frames from Stream 45 | 46 | ```python 47 | from simple_video_utils.frames import read_frames_from_stream 48 | 49 | # Useful for uploaded files or in-memory video data 50 | with open("video.mp4", "rb") as f: 51 | meta, frames_gen = read_frames_from_stream(f) 52 | for frame in frames_gen: 53 | # Process each frame (numpy array) 54 | pass 55 | ``` 56 | 57 | ### Remote Videos 58 | 59 | ```python 60 | from simple_video_utils.metadata import video_metadata 61 | from simple_video_utils.frames import read_frames_exact 62 | 63 | # Works with remote URLs 64 | url = "https://example.com/video.mp4" 65 | meta = video_metadata(url) 66 | frames = list(read_frames_exact(url, 0, 5)) 67 | ``` 68 | 69 | ## Development 70 | 71 | ```bash 72 | pip install -e ".[dev]" 73 | pytest tests/ 74 | ruff check . 75 | ``` 76 | 77 | -------------------------------------------------------------------------------- /tests/test_metadata.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from simple_video_utils.metadata import video_metadata, video_metadata_from_bytes 6 | 7 | 8 | class TestVideoMetadata: 9 | """Tests for video metadata extraction functions.""" 10 | 11 | @pytest.fixture 12 | def video_path(self): 13 | """Path to the example video file.""" 14 | return str(Path(__file__).parent / "assets" / "example.mp4") 15 | 16 | @pytest.fixture 17 | def video_bytes(self, video_path): 18 | """Load example video as bytes.""" 19 | return Path(video_path).read_bytes() 20 | 21 | def test_video_metadata(self, video_path): 22 | """Test that we can read video metadata.""" 23 | meta = video_metadata(video_path) 24 | 25 | assert meta.width > 0 26 | assert meta.height > 0 27 | assert meta.fps > 0 28 | assert isinstance(meta.width, int) 29 | assert isinstance(meta.height, int) 30 | assert isinstance(meta.fps, float) 31 | 32 | def test_video_metadata_from_bytes(self, video_bytes): 33 | """Test metadata extraction from video bytes.""" 34 | meta = video_metadata_from_bytes(video_bytes) 35 | 36 | assert meta.width > 0 37 | assert meta.height > 0 38 | assert meta.fps > 0 39 | assert isinstance(meta.width, int) 40 | assert isinstance(meta.height, int) 41 | assert isinstance(meta.fps, float) 42 | 43 | def test_video_metadata_from_bytes_matches_file(self, video_bytes, video_path): 44 | """Test that bytes-based metadata matches file-based metadata.""" 45 | meta_bytes = video_metadata_from_bytes(video_bytes) 46 | meta_file = video_metadata(video_path) 47 | 48 | assert meta_bytes.width == meta_file.width 49 | assert meta_bytes.height == meta_file.height 50 | assert meta_bytes.fps == meta_file.fps 51 | 52 | def test_bad_color_space_video(self): 53 | """Test metadata extraction from a video with unusual color space.""" 54 | strange_video = str(Path(__file__).parent / "assets" / "bad_colorspace.mp4") 55 | 56 | meta = video_metadata(strange_video) 57 | assert meta.width > 0 58 | assert meta.height > 0 59 | assert meta.fps > 0 60 | 61 | def test_webm_file(self): 62 | """Test metadata extraction from WebM file.""" 63 | webm_video = str(Path(__file__).parent / "assets" / "example.webm") 64 | 65 | meta = video_metadata(webm_video) 66 | assert meta.width > 0 67 | assert meta.height > 0 68 | assert meta.fps > 0 69 | 70 | def test_remote_video_url(self): 71 | """Test metadata extraction from a remote video URL.""" 72 | remote_url = "https://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerMeltdowns.mp4" 73 | 74 | meta = video_metadata(remote_url) 75 | assert meta.width > 0 76 | assert meta.height > 0 77 | assert meta.fps > 0 78 | 79 | 80 | if __name__ == "__main__": 81 | pytest.main([__file__]) 82 | -------------------------------------------------------------------------------- /simple_video_utils/frames.py: -------------------------------------------------------------------------------- 1 | import io 2 | from collections.abc import Generator 3 | from typing import BinaryIO 4 | 5 | import av 6 | import numpy as np 7 | 8 | from simple_video_utils.metadata import VideoMetadata, _open_container, video_metadata_from_bytes 9 | 10 | 11 | def _generate_frames( 12 | container: av.container.InputContainer, 13 | skip_frames: int = 0, 14 | max_frames: int | None = None, 15 | ) -> Generator[np.ndarray, None, None]: 16 | """ 17 | Generate RGB frames from a container's current position. 18 | 19 | Decodes frames from the container's current position and yields frames 20 | after skipping the specified number of frames. 21 | 22 | Args: 23 | container: Open PyAV container (may be seeked to any position). 24 | skip_frames: Number of frames to skip from current position before yielding. 25 | max_frames: Maximum number of frames to yield, or None for all remaining. 26 | 27 | Yields: 28 | RGB numpy arrays (H, W, 3) for frames after skipping. 29 | """ 30 | frames_decoded = 0 31 | frames_yielded = 0 32 | 33 | for frame in container.decode(video=0): 34 | if frames_decoded < skip_frames: 35 | frames_decoded += 1 36 | continue 37 | 38 | yield frame.to_ndarray(format='rgb24') 39 | frames_yielded += 1 40 | 41 | if max_frames is not None and frames_yielded >= max_frames: 42 | break 43 | frames_decoded += 1 44 | 45 | def _validate_parameters( 46 | start_frame: int | None, 47 | end_frame: int | None, 48 | start_time: float | None, 49 | end_time: float | None, 50 | ) -> tuple[bool, bool]: 51 | """Validate that time and frame parameters aren't mixed.""" 52 | has_frame_params = start_frame is not None or end_frame is not None 53 | has_time_params = start_time is not None or end_time is not None 54 | 55 | if has_frame_params and has_time_params: 56 | msg = "Cannot mix frame-based and time-based parameters" 57 | raise ValueError(msg) 58 | 59 | return has_frame_params, has_time_params 60 | 61 | 62 | def _convert_time_to_frames( 63 | start_time: float | None, 64 | end_time: float | None, 65 | fps: float, 66 | ) -> tuple[int, int | None]: 67 | """Convert time-based parameters to frame indices.""" 68 | start = int((start_time or 0.0) * fps) 69 | end = int(end_time * fps) if end_time is not None else None 70 | 71 | if end is not None and end < start: 72 | msg = "invalid frame range" 73 | raise ValueError(msg) 74 | 75 | return start, end 76 | 77 | 78 | def _normalize_frame_range( 79 | start_frame: int | None, 80 | end_frame: int | None, 81 | ) -> tuple[int, int | None]: 82 | """Normalize frame parameters with defaults and validation.""" 83 | start = start_frame if start_frame is not None else 0 84 | 85 | if end_frame is not None: 86 | assert end_frame >= start >= 0, "invalid frame range" 87 | else: 88 | assert start >= 0, "start_frame must be non-negative" 89 | 90 | return start, end_frame 91 | 92 | 93 | def _calculate_seek_position( 94 | target_start_frame: int, 95 | fps: float, 96 | stream, 97 | container, 98 | ) -> int: 99 | """ 100 | Calculate and perform seeking if beneficial. 101 | 102 | Seeks directly to the target position in the file - does NOT decode 103 | or process frames before the seek position. This allows efficient 104 | extraction from any point in the video without reading the entire file. 105 | 106 | Returns the frame number where container is positioned after seeking. 107 | """ 108 | min_seek_seconds = 3.0 # Only seek if target is 3+ seconds from start 109 | seek_buffer_seconds = 1.0 # Seek 1 second before target 110 | 111 | target_time = target_start_frame / fps 112 | 113 | # Only seek if far enough from start 114 | if target_time < min_seek_seconds: 115 | return 0 # Start from beginning 116 | 117 | # Seek to 1 second before target (jumps directly in file, no decoding) 118 | seek_time = target_time - seek_buffer_seconds 119 | seek_timestamp = int(seek_time / float(stream.time_base)) 120 | container.seek(seek_timestamp, stream=stream) 121 | 122 | return int(seek_time * fps) 123 | 124 | 125 | def read_frames_exact( 126 | src: str, 127 | start_frame: int | None = None, 128 | end_frame: int | None = None, 129 | start_time: float | None = None, 130 | end_time: float | None = None, 131 | ) -> Generator[np.ndarray, None, None]: 132 | """ 133 | Return frames as RGB np.ndarrays from specified range. 134 | 135 | Supports both frame-based and time-based range specification. 136 | Uses PyAV for efficient frame extraction. 137 | 138 | Args: 139 | src: Path to video file or URL. 140 | start_frame: Starting frame index (0-based). Mutually exclusive with start_time. 141 | end_frame: Ending frame index (inclusive), or None for end of video. 142 | start_time: Starting time in seconds. Mutually exclusive with start_frame. 143 | end_time: Ending time in seconds, or None for end of video. 144 | 145 | Returns: 146 | Generator yielding RGB numpy arrays (H, W, 3). 147 | 148 | Examples: 149 | # All frames 150 | frames = list(read_frames_exact("video.mp4")) 151 | 152 | # Frame-based 153 | frames = list(read_frames_exact("video.mp4", start_frame=0, end_frame=10)) 154 | 155 | # Time-based 156 | frames = list(read_frames_exact("video.mp4", start_time=1.5, end_time=3.0)) 157 | """ 158 | # Validate parameters early (before opening file) 159 | has_frame_params, has_time_params = _validate_parameters( 160 | start_frame, end_frame, start_time, end_time 161 | ) 162 | 163 | # Early validation for frame-based parameters (before opening file) 164 | if has_frame_params: 165 | _normalize_frame_range(start_frame, end_frame) 166 | 167 | with _open_container(src) as container: 168 | stream = container.streams.video[0] 169 | 170 | # Get FPS - required for all operations 171 | if not stream.average_rate: 172 | msg = "Video has no FPS information" 173 | raise ValueError(msg) 174 | fps = float(stream.average_rate) 175 | 176 | # Convert parameters to frame indices 177 | if has_time_params: 178 | target_start, target_end = _convert_time_to_frames(start_time, end_time, fps) 179 | else: 180 | target_start, target_end = _normalize_frame_range(start_frame, end_frame) 181 | 182 | # Seek to approximate position (if beneficial) 183 | seek_position = _calculate_seek_position(target_start, fps, stream, container) 184 | 185 | # Calculate how many frames to skip/yield from seek position 186 | skip_count = target_start - seek_position 187 | frame_count = (target_end - target_start + 1) if target_end is not None else None 188 | 189 | yield from _generate_frames(container, skip_count, frame_count) 190 | 191 | 192 | def read_frames_from_stream( 193 | stream: BinaryIO, 194 | skip_frames: int = 0, 195 | ) -> tuple[VideoMetadata, Generator[np.ndarray, None, None]]: 196 | """ 197 | Read frames from a video stream (file-like object). 198 | 199 | Args: 200 | stream: A file-like object containing video data (e.g., uploaded file). 201 | skip_frames: Number of initial frames to skip (for resume support). 202 | 203 | Returns: 204 | A tuple of (VideoMetadata, frame_generator). 205 | The generator yields np.ndarray frames in RGB format (H, W, 3). 206 | 207 | Note: 208 | PyAV handles format detection and seeking automatically. 209 | Works with MP4, WebM, and other formats. 210 | """ 211 | video_data = stream.read() 212 | meta = video_metadata_from_bytes(video_data) 213 | 214 | def frame_generator() -> Generator[np.ndarray, None, None]: 215 | """Generator that yields frames from the video data.""" 216 | with _open_container(io.BytesIO(video_data)) as container: 217 | # No seeking in stream mode - skip frames from start 218 | yield from _generate_frames(container, skip_frames=skip_frames, max_frames=None) 219 | 220 | return meta, frame_generator() 221 | -------------------------------------------------------------------------------- /tests/test_regression.py: -------------------------------------------------------------------------------- 1 | """Regression tests comparing PyAV implementation against ffprobe/ffmpeg ground truth.""" 2 | 3 | import json 4 | import subprocess 5 | from collections.abc import Generator 6 | from functools import lru_cache 7 | from pathlib import Path 8 | from typing import NamedTuple 9 | 10 | import numpy as np 11 | import pytest 12 | 13 | from simple_video_utils.frames import read_frames_exact as pyav_read_frames_exact 14 | from simple_video_utils.metadata import video_metadata as pyav_video_metadata 15 | 16 | 17 | class VideoMetadata(NamedTuple): 18 | width: int 19 | height: int 20 | fps: float 21 | nb_frames: int | None 22 | time_base: str | None 23 | 24 | 25 | @lru_cache(maxsize=8) 26 | def ffprobe(url_or_path: str) -> VideoMetadata: 27 | """Return key video stream metadata using ffprobe.""" 28 | cmd = ["ffprobe", "-v", "error", "-print_format", "json", "-show_streams", url_or_path] 29 | try: 30 | result = subprocess.run(cmd, check=True, capture_output=True, text=True) 31 | except subprocess.CalledProcessError as e: 32 | msg = f"ffprobe failed: {e.stderr.strip()}" 33 | raise RuntimeError(msg) from e 34 | 35 | info = json.loads(result.stdout) 36 | v = next(s for s in info["streams"] if s.get("codec_type") == "video") 37 | num, den = map(int, v["avg_frame_rate"].split("/")) if "avg_frame_rate" in v else (0, 1) 38 | fps = num / den if den else 0.0 39 | nb_frames = v.get("nb_frames", "/") 40 | if not nb_frames.isdigit(): 41 | nb_frames = None 42 | else: 43 | nb_frames = int(nb_frames) 44 | 45 | return VideoMetadata( 46 | width=int(v["width"]), 47 | height=int(v["height"]), 48 | fps=fps, 49 | nb_frames=nb_frames, 50 | time_base=v.get("time_base"), 51 | ) 52 | 53 | 54 | def ffmpeg_read_frames_exact( # noqa: C901 55 | src: str, 56 | start_frame: int, 57 | end_frame: int | None = None, 58 | ) -> Generator[np.ndarray, None, None]: 59 | """ 60 | Return frames [start_frame, end_frame] inclusive as RGB np.ndarrays using ffmpeg. 61 | If end_frame is None, reads from start_frame to the end of the video. 62 | """ 63 | if end_frame is not None: 64 | assert end_frame >= start_frame >= 0, "invalid frame range" 65 | else: 66 | assert start_frame >= 0, "start_frame must be non-negative" 67 | 68 | meta = ffprobe(src) 69 | w, h, fps = meta.width, meta.height, meta.fps 70 | if fps <= 0: 71 | msg = "Could not determine FPS from container" 72 | raise RuntimeError(msg) 73 | 74 | frame_bytes = w * h * 3 75 | 76 | # If start_frame is 0, don't seek at all for optimal performance 77 | if start_frame == 0: 78 | seek_time = None 79 | if end_frame is None: 80 | # Read entire video from start 81 | vf = ["setpts=N/FRAME_RATE/TB"] 82 | else: 83 | # Read from start to end_frame 84 | vf = [f"select='lte(n\\,{end_frame})'", "setpts=N/FRAME_RATE/TB"] 85 | else: 86 | # Seek *near* the target, then select by absolute frame index in that window. 87 | # We back off a small margin to ensure keyframe landing < start_frame. 88 | backoff_frames = 2 * int(round(fps)) # ~2 seconds 89 | seek_frame = max(0, start_frame - backoff_frames) 90 | seek_time = seek_frame / fps 91 | 92 | # After demuxer-level seek (-ss before -i), ffmpeg's select 'n' restarts at 0. 93 | # So we select frames [offset .. offset + N-1] relative to the seek point. 94 | relative_start = start_frame - seek_frame 95 | 96 | if end_frame is None: 97 | # Read from start_frame to end of video 98 | vf = [f"select='gte(n\\,{relative_start})'", "setpts=N/FRAME_RATE/TB"] 99 | else: 100 | relative_end = end_frame - seek_frame 101 | vf = [ 102 | f"select='between(n\\,{relative_start}\\,{relative_end})'", 103 | "setpts=N/FRAME_RATE/TB", # normalize PTS after select 104 | ] 105 | 106 | vf_str = ",".join(vf) 107 | 108 | cmd = [ 109 | "ffmpeg", 110 | "-hide_banner", 111 | "-loglevel", 112 | "error", 113 | ] 114 | 115 | if seek_time is not None: 116 | cmd.extend(["-ss", f"{seek_time:.6f}"]) # fast demuxer seek close to target 117 | 118 | cmd.extend([ 119 | "-i", 120 | src, 121 | "-vf", 122 | vf_str, 123 | "-vsync", 124 | "0", # don't duplicate/drop after select 125 | "-f", 126 | "rawvideo", 127 | "-pix_fmt", 128 | "rgb24", 129 | "pipe:1", 130 | ]) 131 | 132 | proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) 133 | num_frames = 0 134 | max_frames = None if end_frame is None else (end_frame - start_frame + 1) 135 | 136 | try: 137 | while True: 138 | buf = proc.stdout.read(frame_bytes) 139 | if not buf: 140 | break 141 | if len(buf) < frame_bytes: 142 | # truncated last read 143 | break 144 | yield np.frombuffer(buf, dtype=np.uint8).reshape(h, w, 3) 145 | 146 | num_frames += 1 147 | if max_frames is not None and num_frames >= max_frames: 148 | break 149 | finally: 150 | if proc.stdout: 151 | proc.stdout.close() 152 | proc.terminate() 153 | proc.wait() 154 | 155 | 156 | class TestRegressionAgainstFFmpeg: 157 | """Regression tests comparing PyAV implementation against ffmpeg/ffprobe.""" 158 | 159 | @pytest.fixture 160 | def video_path(self): 161 | """Path to the example video file.""" 162 | return str(Path(__file__).parent / "assets" / "example.mp4") 163 | 164 | def test_metadata_matches_ffprobe(self, video_path): 165 | """Test that our metadata extraction matches ffprobe output.""" 166 | # Get metadata using ffprobe (ground truth) 167 | ffprobe_meta = ffprobe(video_path) 168 | 169 | # Get metadata using our implementation 170 | pyav_meta = pyav_video_metadata(video_path) 171 | 172 | # Compare all fields 173 | assert pyav_meta.width == ffprobe_meta.width, "Width mismatch" 174 | assert pyav_meta.height == ffprobe_meta.height, "Height mismatch" 175 | assert abs(pyav_meta.fps - ffprobe_meta.fps) < 0.01, "FPS mismatch" 176 | 177 | # nb_frames might be slightly different or missing, but should be close if both exist 178 | if pyav_meta.nb_frames is not None and ffprobe_meta.nb_frames is not None: 179 | assert abs(pyav_meta.nb_frames - ffprobe_meta.nb_frames) <= 1, "Frame count mismatch" 180 | 181 | def test_frames_match_ffmpeg_from_start(self, video_path): 182 | """Test that frames extracted from start match ffmpeg output.""" 183 | start_frame = 0 184 | end_frame = 10 185 | 186 | # Extract using ffmpeg (ground truth) 187 | ffmpeg_frames = list(ffmpeg_read_frames_exact(video_path, start_frame, end_frame)) 188 | 189 | # Extract using our implementation 190 | pyav_frames = list(pyav_read_frames_exact(video_path, start_frame=start_frame, end_frame=end_frame)) 191 | 192 | # Should have same number of frames 193 | assert len(pyav_frames) == len(ffmpeg_frames), ( 194 | f"Frame count mismatch: PyAV={len(pyav_frames)}, FFmpeg={len(ffmpeg_frames)}" 195 | ) 196 | 197 | # Every frame should be identical (pixel-perfect) 198 | for i, (pyav_frame, ffmpeg_frame) in enumerate(zip(pyav_frames, ffmpeg_frames, strict=False)): 199 | np.testing.assert_array_equal( 200 | pyav_frame, 201 | ffmpeg_frame, 202 | err_msg=f"Frame {i} differs between PyAV and FFmpeg", 203 | ) 204 | 205 | def test_frames_match_ffmpeg_with_seeking(self, video_path): 206 | """Test that frames extracted with seeking match ffmpeg output.""" 207 | start_frame = 50 208 | end_frame = 60 209 | 210 | # First, extract from start to verify ground truth 211 | ffmpeg_from_start = list(ffmpeg_read_frames_exact(video_path, 0, 65)) 212 | pyav_from_start = list(pyav_read_frames_exact(video_path, start_frame=0, end_frame=65)) 213 | 214 | # Verify our implementation matches ffmpeg when no seeking 215 | for i in range(start_frame, min(end_frame + 1, len(ffmpeg_from_start))): 216 | assert np.array_equal(pyav_from_start[i], ffmpeg_from_start[i]), ( 217 | f"Frame {i} differs from start - implementation issue" 218 | ) 219 | 220 | # Now test with seeking - Extract using ffmpeg 221 | ffmpeg_frames = list(ffmpeg_read_frames_exact(video_path, start_frame, end_frame)) 222 | 223 | # Extract using our implementation 224 | pyav_frames = list(pyav_read_frames_exact(video_path, start_frame=start_frame, end_frame=end_frame)) 225 | 226 | # Should have same number of frames 227 | assert len(pyav_frames) == len(ffmpeg_frames), ( 228 | f"Frame count mismatch: PyAV={len(pyav_frames)}, FFmpeg={len(ffmpeg_frames)}" 229 | ) 230 | 231 | # Check if PyAV matches the ground truth from start 232 | for i in range(len(pyav_frames)): 233 | frame_num = start_frame + i 234 | pyav_matches_start = np.array_equal(pyav_frames[i], pyav_from_start[frame_num]) 235 | ffmpeg_matches_start = np.array_equal(ffmpeg_frames[i], ffmpeg_from_start[frame_num]) 236 | 237 | if not pyav_matches_start: 238 | pytest.fail(f"PyAV frame {frame_num} with seeking doesn't match frame from start") 239 | if not ffmpeg_matches_start: 240 | pytest.skip( 241 | f"FFmpeg seeking inaccurate: frame {frame_num} doesn't match ground truth. " 242 | "This is a known limitation of the ffmpeg select filter with seeking." 243 | ) 244 | 245 | def test_frames_match_ffmpeg_time_based(self, video_path): 246 | """Test that time-based extraction matches ffmpeg frame-based output.""" 247 | # Get FPS to convert time to frames 248 | meta = pyav_video_metadata(video_path) 249 | fps = meta.fps 250 | 251 | # Test 1-2 seconds 252 | start_time = 1.0 253 | end_time = 2.0 254 | start_frame = int(start_time * fps) 255 | end_frame = int(end_time * fps) 256 | 257 | # Extract using ffmpeg with frame indices (ground truth) 258 | ffmpeg_frames = list(ffmpeg_read_frames_exact(video_path, start_frame, end_frame)) 259 | 260 | # Extract using our time-based implementation 261 | pyav_frames = list(pyav_read_frames_exact(video_path, start_time=start_time, end_time=end_time)) 262 | 263 | # Should have same number of frames 264 | assert len(pyav_frames) == len(ffmpeg_frames), ( 265 | f"Frame count mismatch: PyAV={len(pyav_frames)}, FFmpeg={len(ffmpeg_frames)}" 266 | ) 267 | 268 | # Every frame should be identical 269 | for i, (pyav_frame, ffmpeg_frame) in enumerate(zip(pyav_frames, ffmpeg_frames, strict=False)): 270 | actual_frame_num = start_frame + i 271 | np.testing.assert_array_equal( 272 | pyav_frame, 273 | ffmpeg_frame, 274 | err_msg=f"Frame {actual_frame_num} ({start_time + i/fps:.3f}s) differs between PyAV and FFmpeg", 275 | ) 276 | 277 | def test_single_frame_matches_ffmpeg(self, video_path): 278 | """Test that single frame extraction matches ffmpeg.""" 279 | frame_idx = 42 280 | 281 | # Get ground truth from start 282 | ffmpeg_from_start = list(ffmpeg_read_frames_exact(video_path, 0, 50)) 283 | pyav_from_start = list(pyav_read_frames_exact(video_path, start_frame=0, end_frame=50)) 284 | 285 | # Extract using ffmpeg with seeking 286 | ffmpeg_frames = list(ffmpeg_read_frames_exact(video_path, frame_idx, frame_idx)) 287 | assert len(ffmpeg_frames) == 1 288 | 289 | # Extract using our implementation with seeking 290 | pyav_frames = list(pyav_read_frames_exact(video_path, start_frame=frame_idx, end_frame=frame_idx)) 291 | assert len(pyav_frames) == 1 292 | 293 | # Check if our implementation matches ground truth 294 | if not np.array_equal(pyav_frames[0], pyav_from_start[frame_idx]): 295 | pytest.fail(f"PyAV frame {frame_idx} with seeking doesn't match frame from start") 296 | 297 | # If ffmpeg doesn't match ground truth, skip (known limitation) 298 | if not np.array_equal(ffmpeg_frames[0], ffmpeg_from_start[frame_idx]): 299 | pytest.skip( 300 | f"FFmpeg seeking inaccurate for frame {frame_idx}. " 301 | "PyAV implementation is more accurate." 302 | ) 303 | 304 | 305 | if __name__ == "__main__": 306 | pytest.main([__file__, "-v"]) 307 | -------------------------------------------------------------------------------- /tests/test_frames.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from pathlib import Path 3 | 4 | import numpy as np 5 | import pytest 6 | 7 | from simple_video_utils.frames import read_frames_exact, read_frames_from_stream 8 | from simple_video_utils.metadata import video_metadata 9 | 10 | 11 | class TestReadFramesExact: 12 | """Tests for the read_frames_exact function using example.mp4.""" 13 | 14 | @pytest.fixture 15 | def video_path(self): 16 | """Path to the example video file.""" 17 | return str(Path(__file__).parent / "assets" / "example.mp4") 18 | 19 | def test_invalid_frame_range_negative_start(self): 20 | """Test that negative start frame raises AssertionError.""" 21 | with pytest.raises(AssertionError, match="invalid frame range"): 22 | list(read_frames_exact("example.mp4", -1, 5)) 23 | 24 | def test_invalid_frame_range_end_before_start(self): 25 | """Test that end_frame < start_frame raises AssertionError.""" 26 | with pytest.raises(AssertionError, match="invalid frame range"): 27 | list(read_frames_exact("example.mp4", 10, 5)) 28 | 29 | def test_read_single_frame(self, video_path): 30 | """Test reading a single frame from example.mp4.""" 31 | frames = list(read_frames_exact(video_path, 0, 0)) 32 | 33 | assert len(frames) == 1 34 | frame = frames[0] 35 | 36 | # Check frame properties 37 | assert isinstance(frame, np.ndarray) 38 | assert frame.dtype == np.uint8 39 | assert len(frame.shape) == 3 40 | assert frame.shape[2] == 3 # RGB channels 41 | 42 | # Check that frame contains actual image data (not all zeros) 43 | assert np.sum(frame) > 0 44 | 45 | def test_read_multiple_frames(self, video_path): 46 | """Test reading multiple consecutive frames.""" 47 | frames = list(read_frames_exact(video_path, 0, 2)) 48 | 49 | assert len(frames) == 3 # frames 0, 1, 2 (inclusive) 50 | 51 | for frame in frames: 52 | assert isinstance(frame, np.ndarray) 53 | assert frame.dtype == np.uint8 54 | assert len(frame.shape) == 3 55 | assert frame.shape[2] == 3 56 | 57 | def test_frame_range_consistency(self, video_path): 58 | """Test that reading the same frame multiple times gives consistent results.""" 59 | frame1 = list(read_frames_exact(video_path, 5, 5))[0] 60 | frame2 = list(read_frames_exact(video_path, 5, 5))[0] 61 | 62 | np.testing.assert_array_equal(frame1, frame2) 63 | 64 | def test_sequential_vs_range_reading(self, video_path): 65 | """Test that reading frames individually vs as range gives same results.""" 66 | # Read frames 1, 2, 3 as a range 67 | range_frames = list(read_frames_exact(video_path, 1, 3)) 68 | 69 | # Read each frame individually 70 | individual_frames = [ 71 | list(read_frames_exact(video_path, 1, 1))[0], 72 | list(read_frames_exact(video_path, 2, 2))[0], 73 | list(read_frames_exact(video_path, 3, 3))[0], 74 | ] 75 | 76 | assert len(range_frames) == len(individual_frames) == 3 77 | 78 | for range_frame, individual_frame in zip(range_frames, individual_frames, strict=False): 79 | np.testing.assert_array_equal(range_frame, individual_frame) 80 | 81 | def test_frames_are_different(self, video_path): 82 | """Test that consecutive frames are actually different (video has motion).""" 83 | frames = list(read_frames_exact(video_path, 0, 10)) 84 | 85 | if len(frames) >= 2: 86 | # Check that not all frames are identical 87 | differences = [] 88 | for i in range(len(frames) - 1): 89 | diff = np.sum(np.abs(frames[i].astype(np.int16) - frames[i + 1].astype(np.int16))) 90 | differences.append(diff) 91 | 92 | # At least some frames should be different 93 | assert max(differences) > 0, "All consecutive frames are identical" 94 | 95 | def test_large_frame_range(self, video_path): 96 | """Test reading a larger range of frames.""" 97 | # Get video metadata first to know how many frames we have 98 | meta = video_metadata(video_path) 99 | max_frames = meta.nb_frames or 30 # Default to 30 if unknown 100 | 101 | if max_frames and max_frames > 10: 102 | end_frame = min(max_frames - 1, 20) # Read up to frame 20 or video end 103 | frames = list(read_frames_exact(video_path, 0, end_frame)) 104 | 105 | expected_count = end_frame + 1 106 | assert len(frames) == expected_count 107 | 108 | # All frames should have same dimensions 109 | shapes = [frame.shape for frame in frames] 110 | assert all(shape == shapes[0] for shape in shapes) 111 | 112 | def test_end_frame_none_from_start(self, video_path): 113 | """Test reading from start to end of video with end_frame=None.""" 114 | # Read entire video from start 115 | frames_all = list(read_frames_exact(video_path, 0, None)) 116 | 117 | # Read first few frames with explicit end_frame 118 | frames_partial = list(read_frames_exact(video_path, 0, 5)) 119 | 120 | # All frames should be valid 121 | assert len(frames_all) > 0 122 | assert len(frames_all) >= len(frames_partial) 123 | 124 | # First frames should match 125 | for i in range(min(len(frames_all), len(frames_partial))): 126 | np.testing.assert_array_equal(frames_all[i], frames_partial[i]) 127 | 128 | def test_end_frame_none_from_middle(self, video_path): 129 | """Test reading from middle to end of video with end_frame=None.""" 130 | start_frame = 5 131 | 132 | # Read from middle to end with end_frame=None 133 | frames_to_end = list(read_frames_exact(video_path, start_frame, None)) 134 | 135 | # Should get some frames 136 | assert len(frames_to_end) > 0 137 | 138 | # Each frame should be valid 139 | for frame in frames_to_end: 140 | assert isinstance(frame, np.ndarray) 141 | assert frame.dtype == np.uint8 142 | assert len(frame.shape) == 3 143 | assert frame.shape[2] == 3 144 | 145 | def test_start_frame_zero_no_seeking(self, video_path): 146 | """Test that start_frame=0 optimization works correctly.""" 147 | # These should produce identical results 148 | frames_with_end = list(read_frames_exact(video_path, 0, 5)) 149 | frames_without_end = list(read_frames_exact(video_path, 0, None))[:6] # Take first 6 frames 150 | 151 | # Compare first 6 frames 152 | assert len(frames_with_end) == 6 # frames 0-5 inclusive 153 | assert len(frames_without_end) >= 6 154 | 155 | for i in range(6): 156 | np.testing.assert_array_equal(frames_with_end[i], frames_without_end[i]) 157 | 158 | def test_end_frame_none_consistency(self, video_path): 159 | """Test that end_frame=None gives consistent results.""" 160 | # Read twice with end_frame=None 161 | frames1 = list(read_frames_exact(video_path, 0, None)) 162 | frames2 = list(read_frames_exact(video_path, 0, None)) 163 | 164 | # Should get same number of frames 165 | assert len(frames1) == len(frames2) 166 | 167 | # Frames should be identical 168 | for f1, f2 in zip(frames1, frames2, strict=False): 169 | np.testing.assert_array_equal(f1, f2) 170 | 171 | def test_end_frame_none_vs_explicit_end(self, video_path): 172 | """Test end_frame=None vs explicit end_frame for entire video.""" 173 | # Get video metadata to find total frames 174 | meta = video_metadata(video_path) 175 | total_frames = meta.nb_frames 176 | 177 | if total_frames and total_frames > 10: 178 | # Read with end_frame=None 179 | frames_none = list(read_frames_exact(video_path, 0, None)) 180 | 181 | # Read with explicit end_frame (assuming we know total frames) 182 | frames_explicit = list(read_frames_exact(video_path, 0, total_frames - 1)) 183 | 184 | # Should get same number of frames (or close due to container metadata) 185 | # Allow small difference due to potential metadata inaccuracy 186 | assert abs(len(frames_none) - len(frames_explicit)) <= 1 187 | 188 | # First several frames should match 189 | min_len = min(len(frames_none), len(frames_explicit)) 190 | for i in range(min(min_len, 10)): # Compare first 10 frames 191 | np.testing.assert_array_equal(frames_none[i], frames_explicit[i]) 192 | 193 | def test_bad_color_space_video(self): 194 | """Test reading frames from a video with unusual color space metadata.""" 195 | strange_video = str(Path(__file__).parent / "assets" / "bad_colorspace.mp4") 196 | 197 | # Test reading frames (ffmpeg 8.0+ handles this video correctly) 198 | frames = list(read_frames_exact(strange_video, 0)) 199 | assert len(frames) == 182 200 | 201 | def test_webm_file(self): 202 | """Test reading frames from a WebM file.""" 203 | webm_video = str(Path(__file__).parent / "assets" / "example.webm") 204 | 205 | # Test reading frames 206 | frames = list(read_frames_exact(webm_video, 0)) 207 | assert len(frames) == 67 208 | 209 | def test_remote_video_url(self): 210 | """Test reading frames from a remote video URL.""" 211 | remote_url = "https://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerMeltdowns.mp4" 212 | 213 | # Test reading first frame 214 | frames = list(read_frames_exact(remote_url, 0, 0)) 215 | assert len(frames) == 1 216 | 217 | frame = frames[0] 218 | assert isinstance(frame, np.ndarray) 219 | assert frame.dtype == np.uint8 220 | assert len(frame.shape) == 3 221 | assert frame.shape[2] == 3 222 | assert np.sum(frame) > 0 223 | 224 | # Test reading multiple frames 225 | frames_multi = list(read_frames_exact(remote_url, 0, 2)) 226 | assert len(frames_multi) == 3 227 | 228 | def test_time_based_extraction(self, video_path): 229 | """Test reading frames using time-based parameters.""" 230 | # Read using time parameters 231 | frames = list(read_frames_exact(video_path, start_time=0.0, end_time=1.0)) 232 | 233 | # Should get approximately 1 second worth of frames 234 | meta = video_metadata(video_path) 235 | expected_frames = int(meta.fps) + 1 # +1 because end frame is inclusive 236 | # Allow some tolerance for frame extraction 237 | assert abs(len(frames) - expected_frames) <= 2 238 | 239 | def test_time_vs_frame_equivalence(self, video_path): 240 | """Test that time-based and frame-based extraction produce equivalent results.""" 241 | meta = video_metadata(video_path) 242 | fps = meta.fps 243 | 244 | # Extract frames 10-20 using frame indices 245 | frames_by_index = list(read_frames_exact(video_path, start_frame=10, end_frame=20)) 246 | 247 | # Extract same frames using time 248 | start_time = 10 / fps 249 | end_time = 20 / fps 250 | frames_by_time = list(read_frames_exact(video_path, start_time=start_time, end_time=end_time)) 251 | 252 | # Should get same number of frames 253 | assert len(frames_by_index) == len(frames_by_time) 254 | 255 | # Frames should be identical 256 | for i, (frame_idx, frame_time) in enumerate(zip(frames_by_index, frames_by_time, strict=False)): 257 | np.testing.assert_array_equal( 258 | frame_idx, 259 | frame_time, 260 | err_msg=f"Frame {i} differs between time and index extraction", 261 | ) 262 | 263 | def test_time_based_start_only(self, video_path): 264 | """Test time-based extraction with only start_time specified.""" 265 | frames = list(read_frames_exact(video_path, start_time=0.5)) 266 | 267 | # Should get frames from 0.5 seconds to end 268 | assert len(frames) > 0 269 | for frame in frames: 270 | assert isinstance(frame, np.ndarray) 271 | assert frame.dtype == np.uint8 272 | 273 | def test_time_based_end_only(self, video_path): 274 | """Test time-based extraction with only end_time specified.""" 275 | frames = list(read_frames_exact(video_path, end_time=1.0)) 276 | 277 | # Should get frames from start to 1.0 seconds 278 | meta = video_metadata(video_path) 279 | expected_frames = int(meta.fps) + 1 280 | assert abs(len(frames) - expected_frames) <= 2 281 | 282 | def test_cannot_mix_frame_and_time_params(self, video_path): 283 | """Test that mixing frame and time parameters raises ValueError.""" 284 | with pytest.raises(ValueError, match="Cannot mix frame-based and time-based"): 285 | list(read_frames_exact(video_path, start_frame=0, end_time=1.0)) 286 | 287 | with pytest.raises(ValueError, match="Cannot mix frame-based and time-based"): 288 | list(read_frames_exact(video_path, start_time=0.0, end_frame=10)) 289 | 290 | with pytest.raises(ValueError, match="Cannot mix frame-based and time-based"): 291 | list(read_frames_exact(video_path, start_frame=0, start_time=0.0)) 292 | 293 | def test_no_parameters_reads_all(self, video_path): 294 | """Test that calling with no parameters reads all frames from start.""" 295 | frames_no_params = list(read_frames_exact(video_path)) 296 | frames_explicit = list(read_frames_exact(video_path, start_frame=0)) 297 | 298 | # Should produce same result 299 | assert len(frames_no_params) == len(frames_explicit) 300 | for f1, f2 in zip(frames_no_params, frames_explicit, strict=False): 301 | np.testing.assert_array_equal(f1, f2) 302 | 303 | def test_time_vs_frame_seeking_precision_remote(self): 304 | """Test that time and frame seeking produce identical frames on a longer remote video.""" 305 | remote_url = "https://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerMeltdowns.mp4" 306 | 307 | # Get video metadata to calculate frame indices 308 | meta = video_metadata(remote_url) 309 | fps = meta.fps 310 | 311 | # Test 5-7 seconds 312 | start_time_sec = 5.0 313 | end_time_sec = 7.0 314 | 315 | # Calculate corresponding frame indices 316 | start_frame_idx = int(start_time_sec * fps) 317 | end_frame_idx = int(end_time_sec * fps) 318 | 319 | # Extract using time parameters 320 | frames_by_time = list(read_frames_exact(remote_url, start_time=start_time_sec, end_time=end_time_sec)) 321 | 322 | # Extract using frame indices 323 | frames_by_frame = list(read_frames_exact(remote_url, start_frame=start_frame_idx, end_frame=end_frame_idx)) 324 | 325 | # Should get same number of frames 326 | assert len(frames_by_time) == len(frames_by_frame), ( 327 | f"Frame count mismatch: time-based={len(frames_by_time)}, " 328 | f"frame-based={len(frames_by_frame)}" 329 | ) 330 | 331 | # Verify we got the expected number of frames 332 | expected_frame_count = end_frame_idx - start_frame_idx + 1 # +1 because end is inclusive 333 | assert len(frames_by_time) == expected_frame_count, ( 334 | f"Expected {expected_frame_count} frames (from frame {start_frame_idx} to {end_frame_idx}), " 335 | f"got {len(frames_by_time)}" 336 | ) 337 | 338 | # Every frame should be identical 339 | for i, (frame_time, frame_idx) in enumerate(zip(frames_by_time, frames_by_frame, strict=False)): 340 | actual_frame_num = start_frame_idx + i 341 | np.testing.assert_array_equal( 342 | frame_time, 343 | frame_idx, 344 | err_msg=f"Frame {actual_frame_num} differs between time-based and frame-based extraction", 345 | ) 346 | 347 | # Verify frames are not all identical (video has content) 348 | if len(frames_by_time) >= 2: 349 | diff = np.sum(np.abs(frames_by_time[0].astype(np.int16) - frames_by_time[-1].astype(np.int16))) 350 | assert diff > 0, "First and last frames are identical - video may not have motion" 351 | 352 | def test_corrupted_video_metadata_readable(self): 353 | """Test that metadata can be read from corrupted video (ffprobe passes).""" 354 | corrupted_path = str(Path(__file__).parent / "assets" / "corrupted.mp4") 355 | 356 | # Metadata should be readable even though video is corrupted 357 | meta = video_metadata(corrupted_path) 358 | assert meta.width == 256 359 | assert meta.height == 256 360 | assert meta.fps == 25.0 361 | 362 | def test_corrupted_video_full_read_fails(self): 363 | """Test that reading all frames from corrupted video raises RuntimeError.""" 364 | corrupted_path = str(Path(__file__).parent / "assets" / "corrupted.mp4") 365 | 366 | # Reading all frames should fail when hitting corrupted data 367 | with pytest.raises(RuntimeError, match="Failed to open video"): 368 | list(read_frames_exact(corrupted_path, 0, None)) 369 | 370 | 371 | class TestReadFramesFromStream: 372 | """Tests for streaming video input via read_frames_from_stream.""" 373 | 374 | @pytest.fixture 375 | def video_path(self): 376 | """Path to the example video file.""" 377 | return str(Path(__file__).parent / "assets" / "example.mp4") 378 | 379 | @pytest.fixture 380 | def video_bytes(self, video_path): 381 | """Load example video as bytes.""" 382 | return Path(video_path).read_bytes() 383 | 384 | def test_read_frames_from_stream_basic(self, video_bytes): 385 | """Test reading frames from a BytesIO stream.""" 386 | stream = BytesIO(video_bytes) 387 | meta, frames_gen = read_frames_from_stream(stream) 388 | 389 | # Check metadata 390 | assert meta.width > 0 391 | assert meta.height > 0 392 | assert meta.fps > 0 393 | 394 | # Read first frame 395 | frame = next(frames_gen) 396 | assert isinstance(frame, np.ndarray) 397 | assert frame.dtype == np.uint8 398 | assert frame.shape == (meta.height, meta.width, 3) 399 | assert np.sum(frame) > 0 400 | 401 | def test_read_frames_from_stream_all_frames(self, video_bytes, video_path): 402 | """Test that stream reading produces same frames as file reading.""" 403 | stream = BytesIO(video_bytes) 404 | meta, frames_gen = read_frames_from_stream(stream) 405 | 406 | stream_frames = list(frames_gen) 407 | file_frames = list(read_frames_exact(video_path, 0, None)) 408 | 409 | # Same number of frames 410 | assert len(stream_frames) == len(file_frames) 411 | 412 | # Frames should be identical 413 | for i, (stream_frame, file_frame) in enumerate(zip(stream_frames, file_frames, strict=False)): 414 | np.testing.assert_array_equal( 415 | stream_frame, 416 | file_frame, 417 | err_msg=f"Frame {i} differs between stream and file reading", 418 | ) 419 | 420 | def test_read_frames_from_stream_skip_frames(self, video_bytes, video_path): 421 | """Test skipping initial frames from stream.""" 422 | skip = 5 423 | 424 | stream = BytesIO(video_bytes) 425 | _, frames_gen = read_frames_from_stream(stream, skip_frames=skip) 426 | stream_frames = list(frames_gen) 427 | 428 | # Compare with file-based reading starting at frame 5 429 | file_frames = list(read_frames_exact(video_path, skip, None)) 430 | 431 | assert len(stream_frames) == len(file_frames) 432 | 433 | for i, (stream_frame, file_frame) in enumerate(zip(stream_frames, file_frames, strict=False)): 434 | np.testing.assert_array_equal( 435 | stream_frame, 436 | file_frame, 437 | err_msg=f"Frame {i} (skipped {skip}) differs", 438 | ) 439 | 440 | def test_read_frames_from_stream_metadata_matches(self, video_bytes, video_path): 441 | """Test that returned metadata matches expected values.""" 442 | stream = BytesIO(video_bytes) 443 | meta_stream, _ = read_frames_from_stream(stream) 444 | meta_file = video_metadata(video_path) 445 | 446 | assert meta_stream.width == meta_file.width 447 | assert meta_stream.height == meta_file.height 448 | assert meta_stream.fps == meta_file.fps 449 | 450 | def test_read_frames_from_stream_webm(self): 451 | """Test reading frames from a WebM stream.""" 452 | video_path = Path(__file__).parent / "assets" / "example.webm" 453 | video_bytes = video_path.read_bytes() 454 | 455 | stream = BytesIO(video_bytes) 456 | meta, frames_gen = read_frames_from_stream(stream) 457 | 458 | assert meta.width > 0 459 | assert meta.height > 0 460 | assert meta.fps > 0 461 | 462 | frames = list(frames_gen) 463 | assert len(frames) == 67 # Same as test_webm_file 464 | 465 | 466 | if __name__ == "__main__": 467 | pytest.main([__file__]) 468 | --------------------------------------------------------------------------------