├── version.txt ├── examples ├── decoding │ ├── README.rst │ └── audio_decoding.py ├── encoding │ ├── README.rst │ └── audio_encoding.py └── README.rst ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── documentation.yml │ ├── feature-request.yml │ └── bug-report.yml └── workflows │ ├── lint.yaml │ ├── reference_resources.yaml │ ├── cpp_tests.yaml │ ├── build_ffmpeg.yaml │ └── macos_wheel.yaml ├── test ├── resources │ ├── nasa_13013.mp4.stream3.frame000180.pt │ ├── testsrc2.mp4 │ ├── av1_video.mkv │ ├── h265_video.mp4 │ ├── nasa_13013.mp4 │ ├── h264_10bits.mp4 │ ├── h265_10bits.mp4 │ ├── sine_mono_s16.wav │ ├── sine_mono_s32.wav │ ├── testsrc2_h265.mp4 │ ├── testsrc2_vp8.webm │ ├── testsrc2_vp9.webm │ ├── testsrc2_mpeg4.avi │ ├── bt709_full_range.mp4 │ ├── sine_mono_s32_8000.wav │ ├── nasa_13013.mp4.audio.mp3 │ ├── sine_mono_s32_44100.wav │ ├── nasa_13013.mp4.audio_44100.mp3 │ ├── nasa_13013.mp4.time10.000000.pt │ ├── nasa_13013.mp4.time12.979633.pt │ ├── nasa_13013.mp4.time6.000000.pt │ ├── nasa_13013.mp4.time6.100000.pt │ ├── av1_video.mkv.stream0.frame000010.pt │ ├── h265_video.mp4.stream0.frame000005.pt │ ├── nasa_13013.mp4.stream0.frame000000.pt │ ├── nasa_13013.mp4.stream0.frame000001.pt │ ├── nasa_13013.mp4.stream0.frame000002.pt │ ├── nasa_13013.mp4.stream0.frame000003.pt │ ├── nasa_13013.mp4.stream0.frame000004.pt │ ├── nasa_13013.mp4.stream0.frame000005.pt │ ├── nasa_13013.mp4.stream0.frame000006.pt │ ├── nasa_13013.mp4.stream0.frame000007.pt │ ├── nasa_13013.mp4.stream0.frame000008.pt │ ├── nasa_13013.mp4.stream0.frame000009.pt │ ├── nasa_13013.mp4.stream0.frame000015.pt │ ├── nasa_13013.mp4.stream0.frame000020.pt │ ├── nasa_13013.mp4.stream0.frame000025.pt │ ├── nasa_13013.mp4.stream0.frame000030.pt │ ├── nasa_13013.mp4.stream0.frame000035.pt │ ├── nasa_13013.mp4.stream3.frame000000.pt │ ├── nasa_13013.mp4.stream3.frame000001.pt │ ├── nasa_13013.mp4.stream3.frame000002.pt │ ├── nasa_13013.mp4.stream3.frame000003.pt │ ├── nasa_13013.mp4.stream3.frame000004.pt │ ├── nasa_13013.mp4.stream3.frame000005.pt │ ├── nasa_13013.mp4.stream3.frame000006.pt │ ├── nasa_13013.mp4.stream3.frame000007.pt │ ├── nasa_13013.mp4.stream3.frame000008.pt │ ├── nasa_13013.mp4.stream3.frame000009.pt │ ├── nasa_13013.mp4.stream3.frame000015.pt │ ├── nasa_13013.mp4.stream3.frame000020.pt │ ├── nasa_13013.mp4.stream3.frame000025.pt │ ├── nasa_13013.mp4.stream3.frame000030.pt │ ├── nasa_13013.mp4.stream3.frame000035.pt │ ├── nasa_13013.mp4.stream3.frame000386.pt │ ├── nasa_13013.mp4.stream3.frame000387.pt │ ├── nasa_13013.mp4.stream3.frame000388.pt │ ├── nasa_13013.mp4.stream3.frame000389.pt │ ├── nasa_13013.mp4.stream4.all_frames.pt │ ├── sine_mono_s16.wav.stream0.all_frames.pt │ ├── sine_mono_s32.wav.stream0.all_frames.pt │ ├── nasa_13013.mp4.audio.mp3.stream0.all_frames.pt │ ├── nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000000.pt │ ├── nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000015.pt │ ├── nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000200.pt │ ├── nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000389.pt │ ├── nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000017.pt │ ├── nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000230.pt │ ├── nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000389.pt │ ├── sine_mono_s16.wav.stream0.all_frames_info.json │ └── sine_mono_s32_8000.wav.stream0.all_frames_info.json ├── test_version.py ├── __init__.py ├── test_policy.py ├── CMakeLists.txt ├── test_video_clip_sampler.py └── conftest.py ├── src └── torchcodec │ ├── encoders │ └── __init__.py │ ├── samplers │ ├── __init__.py │ └── _common.py │ ├── _samplers │ └── __init__.py │ ├── transforms │ └── __init__.py │ ├── _core │ ├── NVCUVIDRuntimeLoader.h │ ├── ValidationUtils.h │ ├── ValidationUtils.cpp │ ├── AVIOTensorContext.h │ ├── __init__.py │ ├── NVDECCache.cpp │ ├── CUDACommon.h │ ├── AVIOContextHolder.cpp │ ├── FilterGraph.h │ ├── Frame.cpp │ ├── pybind_ops.cpp │ ├── AVIOFileLikeContext.h │ ├── Frame.h │ ├── StreamOptions.h │ ├── CudaDeviceInterface.h │ ├── Transform.h │ ├── AVIOContextHolder.h │ ├── Metadata.h │ ├── NVDECCache.h │ ├── AVIOFileLikeContext.cpp │ ├── DeviceInterface.cpp │ ├── Transform.cpp │ ├── Metadata.cpp │ ├── Cache.h │ ├── AVIOTensorContext.cpp │ ├── fetch_and_expose_non_gpl_ffmpeg_libs.cmake │ └── CpuDeviceInterface.h │ ├── decoders │ ├── __init__.py │ └── _decoder_utils.py │ ├── __init__.py │ ├── _internally_replaced_utils.py │ └── share │ └── cmake │ └── TorchCodec │ └── TorchCodecConfig.cmake ├── benchmarks ├── decoders │ ├── benchmark_readme_chart.png │ ├── generate_readme_chart.py │ ├── memprofile_decoders.py │ ├── benchmark_audio_decoders.py │ └── generate_readme_data.py └── samplers │ └── benchmark_samplers.py ├── docs ├── source │ ├── _static │ │ ├── img │ │ │ ├── pytorch-logo-dark.png │ │ │ ├── pytorch-logo-flame.png │ │ │ ├── generic-pytorch-logo.png │ │ │ ├── card-background.svg │ │ │ ├── pytorch-logo-flame.svg │ │ │ └── pytorch-logo-dark.svg │ │ └── css │ │ │ └── custom_torchcodec.css │ ├── _templates │ │ ├── function.rst │ │ ├── class.rst │ │ ├── dataclass.rst │ │ └── layout.html │ ├── api_ref_torchcodec.rst │ ├── api_ref_encoders.rst │ ├── api_ref_transforms.rst │ ├── api_ref_samplers.rst │ ├── api_ref_decoders.rst │ ├── glossary.rst │ └── index.rst ├── requirements.txt └── Makefile ├── mypy.ini ├── .flake8 ├── MANIFEST.in ├── packaging ├── fake_smoke_test.py ├── pre_build_script.sh ├── build_ffmpeg.bat ├── helpers.sh ├── vc_env_helper.bat ├── post_build_script.sh └── check_glibcxx.py ├── CMakeLists.txt ├── .gitignore ├── .pre-commit-config.yaml ├── pyproject.toml ├── LICENSE ├── CODE_OF_CONDUCT.md ├── .clang-format └── CONTRIBUTING.md /version.txt: -------------------------------------------------------------------------------- 1 | 0.10.0a0 2 | -------------------------------------------------------------------------------- /examples/decoding/README.rst: -------------------------------------------------------------------------------- 1 | Decoding 2 | -------- 3 | -------------------------------------------------------------------------------- /examples/encoding/README.rst: -------------------------------------------------------------------------------- 1 | Encoding 2 | -------- 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000180.pt: -------------------------------------------------------------------------------- 1 | nasa_13013.mp4.time6.000000.pt -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | .. _gallery: 2 | 3 | Interactive examples 4 | ==================== 5 | -------------------------------------------------------------------------------- /test/resources/testsrc2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2.mp4 -------------------------------------------------------------------------------- /test/resources/av1_video.mkv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/av1_video.mkv -------------------------------------------------------------------------------- /test/resources/h265_video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/h265_video.mp4 -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4 -------------------------------------------------------------------------------- /test/test_version.py: -------------------------------------------------------------------------------- 1 | import torchcodec 2 | 3 | 4 | def test_version(): 5 | assert torchcodec.__version__ 6 | -------------------------------------------------------------------------------- /test/resources/h264_10bits.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/h264_10bits.mp4 -------------------------------------------------------------------------------- /test/resources/h265_10bits.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/h265_10bits.mp4 -------------------------------------------------------------------------------- /test/resources/sine_mono_s16.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s16.wav -------------------------------------------------------------------------------- /test/resources/sine_mono_s32.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s32.wav -------------------------------------------------------------------------------- /test/resources/testsrc2_h265.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2_h265.mp4 -------------------------------------------------------------------------------- /test/resources/testsrc2_vp8.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2_vp8.webm -------------------------------------------------------------------------------- /test/resources/testsrc2_vp9.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2_vp9.webm -------------------------------------------------------------------------------- /test/resources/testsrc2_mpeg4.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2_mpeg4.avi -------------------------------------------------------------------------------- /test/resources/bt709_full_range.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/bt709_full_range.mp4 -------------------------------------------------------------------------------- /test/resources/sine_mono_s32_8000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s32_8000.wav -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.audio.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.audio.mp3 -------------------------------------------------------------------------------- /test/resources/sine_mono_s32_44100.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s32_44100.wav -------------------------------------------------------------------------------- /src/torchcodec/encoders/__init__.py: -------------------------------------------------------------------------------- 1 | from ._audio_encoder import AudioEncoder # noqa 2 | from ._video_encoder import VideoEncoder # noqa 3 | -------------------------------------------------------------------------------- /benchmarks/decoders/benchmark_readme_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/benchmarks/decoders/benchmark_readme_chart.png -------------------------------------------------------------------------------- /docs/source/_static/img/pytorch-logo-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/docs/source/_static/img/pytorch-logo-dark.png -------------------------------------------------------------------------------- /docs/source/_static/img/pytorch-logo-flame.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/docs/source/_static/img/pytorch-logo-flame.png -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.audio_44100.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.audio_44100.mp3 -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.time10.000000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.time10.000000.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.time12.979633.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.time12.979633.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.time6.000000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.time6.000000.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.time6.100000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.time6.100000.pt -------------------------------------------------------------------------------- /docs/source/_static/img/generic-pytorch-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/docs/source/_static/img/generic-pytorch-logo.png -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | files = src/torchcodec 4 | show_error_codes = True 5 | pretty = True 6 | allow_redefinition = True 7 | follow_untyped_imports = True 8 | -------------------------------------------------------------------------------- /test/resources/av1_video.mkv.stream0.frame000010.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/av1_video.mkv.stream0.frame000010.pt -------------------------------------------------------------------------------- /test/resources/h265_video.mp4.stream0.frame000005.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/h265_video.mp4.stream0.frame000005.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000000.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000001.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000001.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000002.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000002.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000003.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000003.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000004.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000004.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000005.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000005.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000006.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000006.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000007.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000007.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000008.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000008.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000009.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000009.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000015.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000015.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000020.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000020.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000025.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000025.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000030.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000030.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream0.frame000035.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000035.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000000.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000001.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000001.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000002.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000002.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000003.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000003.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000004.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000004.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000005.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000005.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000006.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000006.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000007.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000007.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000008.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000008.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000009.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000009.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000015.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000015.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000020.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000020.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000025.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000025.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000030.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000030.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000035.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000035.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000386.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000386.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000387.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000387.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000388.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000388.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream3.frame000389.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000389.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.stream4.all_frames.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream4.all_frames.pt -------------------------------------------------------------------------------- /test/resources/sine_mono_s16.wav.stream0.all_frames.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s16.wav.stream0.all_frames.pt -------------------------------------------------------------------------------- /test/resources/sine_mono_s32.wav.stream0.all_frames.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s32.wav.stream0.all_frames.pt -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | ignore = E203, E402, W503, W504, F821, E501, B, C4, EXE, E251, E202 4 | per-file-ignores = 5 | __init__.py: F401, F403, F405 6 | -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.audio.mp3.stream0.all_frames.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.audio.mp3.stream0.all_frames.pt -------------------------------------------------------------------------------- /docs/source/_templates/function.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autofunction:: {{ name }} 9 | -------------------------------------------------------------------------------- /src/torchcodec/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from ._index_based import clips_at_random_indices, clips_at_regular_indices 2 | from ._time_based import clips_at_random_timestamps, clips_at_regular_timestamps 3 | -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000000.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000015.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000015.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000200.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000200.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000389.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000389.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000017.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000017.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000230.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000230.pt -------------------------------------------------------------------------------- /test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000389.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000389.pt -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | 4 | include CMakeLists.txt 5 | recursive-include src * 6 | 7 | recursive-exclude * __pycache__ 8 | recursive-exclude src *.py[co] 9 | recursive-exclude src *.so 10 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /docs/source/_templates/class.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | :special-members: __getitem__ 11 | -------------------------------------------------------------------------------- /docs/source/_templates/dataclass.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | .. currentmodule:: {{ module }} 4 | 5 | 6 | {{ name | underline}} 7 | 8 | .. autoclass:: {{ name }} 9 | :members: 10 | :undoc-members: __init__ 11 | :inherited-members: 12 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-gallery>0.11 2 | sphinx==5.0.0 3 | sphinx_design 4 | sphinx_copybutton 5 | sphinx-tabs 6 | sphinx-sitemap 7 | matplotlib 8 | torchvision 9 | ipython 10 | fsspec 11 | aiohttp 12 | joblib 13 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 14 | -------------------------------------------------------------------------------- /src/torchcodec/_samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .video_clip_sampler import * # noqa 8 | -------------------------------------------------------------------------------- /docs/source/api_ref_torchcodec.rst: -------------------------------------------------------------------------------- 1 | .. _torchcodec: 2 | 3 | =================== 4 | torchcodec 5 | =================== 6 | 7 | .. currentmodule:: torchcodec 8 | 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | :nosignatures: 13 | :template: dataclass.rst 14 | 15 | Frame 16 | FrameBatch 17 | AudioSamples 18 | -------------------------------------------------------------------------------- /src/torchcodec/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from ._decoder_transforms import ( # noqa 8 | CenterCrop, 9 | DecoderTransform, 10 | RandomCrop, 11 | Resize, 12 | ) 13 | -------------------------------------------------------------------------------- /packaging/fake_smoke_test.py: -------------------------------------------------------------------------------- 1 | # This is a fake smoke test that runs on the test-infra instances after we build 2 | # a wheel. We cannot run a real smoke test over there, because the machines are 3 | # too old to even install a proper ffmpeg version - and without ffmpeg, 4 | # importing torchcodec just fails. It's OK, we run our *entire* test suite on 5 | # those wheels anyway (on other machines). 6 | 7 | print("Success") 8 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(TorchCodec) 3 | 4 | # Define LINUX platform variable globally 5 | if (UNIX AND NOT APPLE) 6 | set(LINUX TRUE) 7 | else() 8 | set(LINUX FALSE) 9 | endif() 10 | 11 | add_subdirectory(src/torchcodec/_core) 12 | 13 | 14 | option(BUILD_TESTS "Build tests" OFF) 15 | if(BUILD_TESTS) 16 | enable_testing() 17 | add_subdirectory(test) 18 | endif() 19 | -------------------------------------------------------------------------------- /docs/source/_static/img/card-background.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/source/api_ref_encoders.rst: -------------------------------------------------------------------------------- 1 | .. _encoders: 2 | 3 | =================== 4 | torchcodec.encoders 5 | =================== 6 | 7 | .. currentmodule:: torchcodec.encoders 8 | 9 | 10 | For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_encoding_audio_encoding.py`. 11 | 12 | 13 | .. autosummary:: 14 | :toctree: generated/ 15 | :nosignatures: 16 | :template: class.rst 17 | 18 | AudioEncoder 19 | VideoEncoder 20 | -------------------------------------------------------------------------------- /docs/source/api_ref_transforms.rst: -------------------------------------------------------------------------------- 1 | .. _transforms: 2 | 3 | ===================== 4 | torchcodec.transforms 5 | ===================== 6 | 7 | .. currentmodule:: torchcodec.transforms 8 | 9 | For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL. 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | :nosignatures: 14 | :template: dataclass.rst 15 | 16 | DecoderTransform 17 | CenterCrop 18 | RandomCrop 19 | Resize 20 | -------------------------------------------------------------------------------- /src/torchcodec/_core/NVCUVIDRuntimeLoader.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | namespace facebook::torchcodec { 10 | 11 | // See note in corresponding cpp file 12 | bool loadNVCUVIDLibrary(); 13 | 14 | } // namespace facebook::torchcodec 15 | -------------------------------------------------------------------------------- /docs/source/api_ref_samplers.rst: -------------------------------------------------------------------------------- 1 | .. _samplers: 2 | 3 | =================== 4 | torchcodec.samplers 5 | =================== 6 | 7 | .. currentmodule:: torchcodec.samplers 8 | 9 | For a tutorial, see: :ref:`sphx_glr_generated_examples_decoding_sampling.py`. 10 | 11 | .. autosummary:: 12 | :toctree: generated/ 13 | :nosignatures: 14 | :template: function.rst 15 | 16 | clips_at_regular_indices 17 | clips_at_random_indices 18 | clips_at_regular_timestamps 19 | clips_at_random_timestamps 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.yml: -------------------------------------------------------------------------------- 1 | name: 📚 Documentation 2 | description: Report an issue related to the TorchCodec documentation 3 | 4 | body: 5 | - type: textarea 6 | attributes: 7 | label: 📚 The doc issue 8 | description: > 9 | Is something confusing or wrong? Let us know! Please provide URLs to the content in https://pytorch.org/torchcodec/stable/index.html that you're referring to. 10 | validations: 11 | required: true 12 | - type: markdown 13 | attributes: 14 | value: > 15 | Thanks for contributing 🎉! 16 | -------------------------------------------------------------------------------- /src/torchcodec/decoders/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .._core import AudioStreamMetadata, VideoStreamMetadata 8 | from ._audio_decoder import AudioDecoder # noqa 9 | from ._decoder_utils import set_cuda_backend # noqa 10 | from ._video_decoder import CpuFallbackStatus, VideoDecoder # noqa 11 | 12 | SimpleVideoDecoder = VideoDecoder 13 | -------------------------------------------------------------------------------- /packaging/pre_build_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | set -ex 9 | 10 | # We need to install pybind11 because we need its CMake helpers in order to 11 | # compile correctly on Mac. Pybind11 is actually a C++ header-only library, 12 | # and PyTorch actually has it included. PyTorch, however, does not have the 13 | # CMake helpers. 14 | conda install -y pybind11 -c conda-forge 15 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: 🚀 Feature request 2 | description: Submit a proposal/request for a new TorchCodec feature 3 | 4 | body: 5 | - type: textarea 6 | attributes: 7 | label: 🚀 The feature 8 | description: > 9 | What new functionality do you want? 10 | validations: 11 | required: true 12 | - type: textarea 13 | attributes: 14 | label: Motivation, pitch 15 | description: > 16 | Why do you want it? If this is related to another GitHub issue, please link that here. 17 | validations: 18 | required: false 19 | - type: markdown 20 | attributes: 21 | value: > 22 | Thanks for contributing 🎉! 23 | -------------------------------------------------------------------------------- /src/torchcodec/_core/ValidationUtils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace facebook::torchcodec { 14 | 15 | int validateInt64ToInt(int64_t value, const std::string& parameterName); 16 | 17 | std::optional validateOptionalInt64ToInt( 18 | const std::optional& value, 19 | const std::string& parameterName); 20 | 21 | } // namespace facebook::torchcodec 22 | -------------------------------------------------------------------------------- /packaging/build_ffmpeg.bat: -------------------------------------------------------------------------------- 1 | :: Copyright (c) Meta Platforms, Inc. and affiliates. 2 | :: All rights reserved. 3 | :: 4 | :: This source code is licensed under the BSD-style license found in the 5 | :: LICENSE file in the root directory of this source tree. 6 | 7 | :: Taken from torchaudio 8 | @echo off 9 | 10 | set PROJ_FOLDER=%cd% 11 | 12 | choco install -y --no-progress msys2 --package-parameters "/NoUpdate" 13 | C:\tools\msys64\usr\bin\env MSYSTEM=MINGW64 /bin/bash -l -c "pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain diffutils" 14 | C:\tools\msys64\usr\bin\env MSYSTEM=MINGW64 /bin/bash -l -c "cd ${PROJ_FOLDER} && packaging/vc_env_helper.bat bash packaging/build_ffmpeg.sh" 15 | 16 | :end 17 | -------------------------------------------------------------------------------- /docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {% block sidebartitle %} 4 | 7 | {% include "searchbox.html" %} 8 | {% endblock %} 9 | 10 | 11 | {% block footer %} 12 | 13 | 17 | 18 | 21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /benchmarks/decoders/generate_readme_chart.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import json 8 | 9 | from pathlib import Path 10 | 11 | from benchmark_decoders_library import plot_data 12 | 13 | 14 | def main() -> None: 15 | data_json = Path(__file__).parent / "benchmark_readme_data.json" 16 | with open(data_json, "r") as read_file: 17 | data_from_file = json.load(read_file) 18 | 19 | output_png = Path(__file__).parent / "benchmark_readme_chart.png" 20 | plot_data(data_from_file, output_png) 21 | 22 | 23 | if __name__ == "__main__": 24 | main() 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | dist/ 3 | src/TorchCodec.egg-info/ 4 | */**/__pycache__ 5 | */__pycache__ 6 | */*.pyc 7 | */**/*.pyc 8 | */**/**/*.pyc 9 | */**/*~ 10 | *~ 11 | frame180.* # output from smoke test 12 | 13 | src/torchcodec/version.py 14 | 15 | docs/build 16 | # sphinx-gallery 17 | docs/source/generated_examples/ 18 | docs/source/gen_modules/ 19 | docs/source/generated/ 20 | docs/source/models/generated/ 21 | docs/source/sg_execution_times.rst 22 | # pytorch-sphinx-theme gets installed here 23 | docs/src 24 | 25 | .coverage 26 | htmlcov 27 | .*.swp 28 | *.so* 29 | *.dylib* 30 | */*.so* 31 | */*.dylib* 32 | *.swp 33 | *.swo 34 | gen.yml 35 | .mypy_cache 36 | .vscode/ 37 | .idea/ 38 | *.orig 39 | *-checkpoint.ipynb 40 | *.venv 41 | 42 | ## Xcode User settings 43 | xcuserdata/ 44 | 45 | # direnv 46 | .direnv 47 | .envrc 48 | -------------------------------------------------------------------------------- /test/test_policy.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from torchcodec.samplers._common import _POLICY_FUNCTIONS 3 | 4 | 5 | @pytest.mark.parametrize( 6 | "policy, frame_indices, expected_frame_indices", 7 | ( 8 | ("repeat_last", [1, 2, 3], [1, 2, 3, 3, 3]), 9 | ("repeat_last", [1, 2, 3, 4, 5], [1, 2, 3, 4, 5]), 10 | ("wrap", [1, 2, 3], [1, 2, 3, 1, 2]), 11 | ("wrap", [1, 2, 3, 4, 5], [1, 2, 3, 4, 5]), 12 | ), 13 | ) 14 | def test_policy(policy, frame_indices, expected_frame_indices): 15 | policy_fun = _POLICY_FUNCTIONS[policy] 16 | assert policy_fun(frame_indices, desired_len=5) == expected_frame_indices 17 | 18 | 19 | def test_error_policy(): 20 | with pytest.raises(ValueError, match="beyond the number of frames"): 21 | _POLICY_FUNCTIONS["error"]([1, 2, 3], desired_len=5) 22 | -------------------------------------------------------------------------------- /docs/source/api_ref_decoders.rst: -------------------------------------------------------------------------------- 1 | .. _decoders: 2 | 3 | =================== 4 | torchcodec.decoders 5 | =================== 6 | 7 | .. currentmodule:: torchcodec.decoders 8 | 9 | 10 | For a video decoder tutorial, see: :ref:`sphx_glr_generated_examples_decoding_basic_example.py`. 11 | For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_decoding_audio_decoding.py`. 12 | 13 | 14 | .. autosummary:: 15 | :toctree: generated/ 16 | :nosignatures: 17 | :template: class.rst 18 | 19 | VideoDecoder 20 | AudioDecoder 21 | 22 | .. autosummary:: 23 | :toctree: generated/ 24 | :nosignatures: 25 | :template: function.rst 26 | 27 | set_cuda_backend 28 | 29 | .. autosummary:: 30 | :toctree: generated/ 31 | :nosignatures: 32 | :template: dataclass.rst 33 | 34 | VideoStreamMetadata 35 | AudioStreamMetadata 36 | CpuFallbackStatus 37 | -------------------------------------------------------------------------------- /src/torchcodec/_core/ValidationUtils.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include "ValidationUtils.h" 8 | #include 9 | #include "c10/util/Exception.h" 10 | 11 | namespace facebook::torchcodec { 12 | 13 | int validateInt64ToInt(int64_t value, const std::string& parameterName) { 14 | TORCH_CHECK( 15 | value >= std::numeric_limits::min() && 16 | value <= std::numeric_limits::max(), 17 | parameterName, 18 | "=", 19 | value, 20 | " is out of range for int type."); 21 | 22 | return static_cast(value); 23 | } 24 | 25 | std::optional validateOptionalInt64ToInt( 26 | const std::optional& value, 27 | const std::string& parameterName) { 28 | if (value.has_value()) { 29 | return validateInt64ToInt(value.value(), parameterName); 30 | } else { 31 | return std::nullopt; 32 | } 33 | } 34 | 35 | } // namespace facebook::torchcodec 36 | -------------------------------------------------------------------------------- /packaging/helpers.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | _list_wheel_files() { 9 | unzip -l "$1" | awk '{print $4}' 10 | } 11 | 12 | # $1 = path to wheel 13 | # $2 = pattern to grep for in wheel files 14 | # If files matching $2 are found in the wheel, the function errors. 15 | assert_not_in_wheel() { 16 | wheel_files=$(_list_wheel_files "$1") 17 | if grep -q "$2" <<< "$wheel_files" 18 | then 19 | echo "Found files in $1 that start with $2. Exiting!!" 20 | exit 1 21 | fi 22 | } 23 | 24 | # See assert_not_in_wheel 25 | assert_in_wheel() { 26 | wheel_files=$(_list_wheel_files "$1") 27 | if ! grep -q "$2" <<< "$wheel_files" 28 | then 29 | echo "Did not find files in $1 that start with $2. Exiting!!" 30 | exit 1 31 | fi 32 | } 33 | 34 | assert_ffmpeg_not_installed() { 35 | if command -v "ffmpeg" &> /dev/null 36 | then 37 | echo "ffmpeg is installed, but it shouldn't! Exiting!!" 38 | exit 1 39 | fi 40 | } 41 | -------------------------------------------------------------------------------- /src/torchcodec/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from pathlib import Path 8 | 9 | # Note: usort wants to put Frame and FrameBatch after decoders and samplers, 10 | # but that results in circular import. 11 | from ._frame import AudioSamples, Frame, FrameBatch # usort:skip # noqa 12 | from . import decoders, encoders, samplers, transforms # noqa 13 | 14 | try: 15 | # Note that version.py is generated during install. 16 | from .version import __version__ # noqa: F401 17 | except Exception: 18 | pass 19 | 20 | # cmake_prefix_path is needed for downstream cmake-based builds that use 21 | # torchcodec as a dependency to tell cmake where torchcodec is installed and where to find its 22 | # CMake configuration files. 23 | # Pytorch itself has a similar mechanism which we use in our setup.py! 24 | cmake_prefix_path = Path(__file__).parent / "share" / "cmake" 25 | # Similarly, these are exposed for downstream builds that use torchcodec as a 26 | # dependency. 27 | from ._core import core_library_path, ffmpeg_major_version # usort:skip 28 | -------------------------------------------------------------------------------- /src/torchcodec/_core/AVIOTensorContext.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include "AVIOContextHolder.h" 11 | 12 | namespace facebook::torchcodec { 13 | 14 | namespace detail { 15 | 16 | struct TensorContext { 17 | torch::Tensor data; 18 | int64_t current_pos; 19 | int64_t max_pos; 20 | }; 21 | 22 | } // namespace detail 23 | 24 | // For Decoding: enables users to pass in the entire video or audio as bytes. 25 | // Our read and seek functions then traverse the bytes in memory. 26 | class AVIOFromTensorContext : public AVIOContextHolder { 27 | public: 28 | explicit AVIOFromTensorContext(torch::Tensor data); 29 | 30 | private: 31 | detail::TensorContext tensorContext_; 32 | }; 33 | 34 | // For Encoding: used to encode into an output uint8 (bytes) tensor. 35 | class AVIOToTensorContext : public AVIOContextHolder { 36 | public: 37 | explicit AVIOToTensorContext(); 38 | torch::Tensor getOutputTensor(); 39 | 40 | private: 41 | detail::TensorContext tensorContext_; 42 | }; 43 | 44 | } // namespace facebook::torchcodec 45 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.6.0 4 | hooks: 5 | - id: check-docstring-first 6 | - id: trailing-whitespace 7 | - id: check-toml 8 | - id: check-yaml 9 | args: 10 | - --allow-multiple-documents 11 | - id: mixed-line-ending 12 | args: [--fix=lf] 13 | - id: end-of-file-fixer 14 | - id: check-added-large-files 15 | args: ['--maxkb=1000'] 16 | 17 | - repo: https://github.com/asottile/pyupgrade 18 | rev: v3.21.2 19 | hooks: 20 | - id: pyupgrade 21 | args: [--py310-plus] 22 | files: ^(test|src)/ 23 | exclude: ^examples/ 24 | 25 | - repo: https://github.com/omnilib/ufmt 26 | rev: v2.6.0 27 | hooks: 28 | - id: ufmt 29 | additional_dependencies: 30 | - black == 24.4.2 31 | - usort == 1.0.5 32 | 33 | - repo: https://github.com/PyCQA/flake8 34 | rev: 7.1.0 35 | hooks: 36 | - id: flake8 37 | args: [--config=.flake8] 38 | 39 | - repo: https://github.com/pre-commit/mirrors-clang-format 40 | rev: v18.1.3 41 | hooks: 42 | - id: clang-format 43 | name: clang-format 44 | files: \.(cpp|hpp|c|h)$ 45 | types: [file] 46 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "torchcodec" 3 | description = "A video decoder for PyTorch" 4 | readme = "README.md" 5 | requires-python = ">=3.10" 6 | license-files = ["LICENSE"] 7 | authors = [ 8 | { name = "PyTorch Team", email = "packages@pytorch.org" }, 9 | ] 10 | dynamic = ["version"] 11 | 12 | [project.urls] 13 | GitHub = "https://github.com/pytorch/torchcodec" 14 | Documentation = "https://pytorch.org/torchcodec/stable/index.html" 15 | 16 | [tool.setuptools.dynamic] 17 | version = {file = "version.txt"} 18 | 19 | [build-system] 20 | requires = ["setuptools>=61.0"] 21 | build-backend = "setuptools.build_meta" 22 | 23 | [project.optional-dependencies] 24 | dev = [ 25 | "numpy", 26 | "pytest", 27 | "pillow", 28 | ] 29 | 30 | [tool.usort] 31 | # Needed for compatibility with internal linter 32 | first_party_detection = false 33 | 34 | [tool.black] 35 | target-version = ["py310"] 36 | 37 | [tool.ufmt] 38 | 39 | excludes = [ 40 | "examples", 41 | ] 42 | 43 | [tool.pytest.ini_options] 44 | markers = [ 45 | # defines a 'slow' mark to mark slow tests with `@pytest.mark.slow` 46 | "slow: mark test as slow" 47 | ] 48 | 49 | # We don't want to run the slow tests by default. These options are ignored in 50 | # the CI, where we definitely want the 'slow' tests to run. 51 | addopts = "-v -m 'not slow'" 52 | 53 | testpaths = ["test"] 54 | -------------------------------------------------------------------------------- /src/torchcodec/_core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | from ._metadata import ( 9 | AudioStreamMetadata, 10 | ContainerMetadata, 11 | get_container_metadata, 12 | get_container_metadata_from_header, 13 | VideoStreamMetadata, 14 | ) 15 | from .ops import ( 16 | _add_video_stream, 17 | _get_backend_details, 18 | _get_key_frame_indices, 19 | _test_frame_pts_equality, 20 | add_audio_stream, 21 | add_video_stream, 22 | core_library_path, 23 | create_from_bytes, 24 | create_from_file, 25 | create_from_file_like, 26 | create_from_tensor, 27 | encode_audio_to_file, 28 | encode_audio_to_file_like, 29 | encode_audio_to_tensor, 30 | encode_video_to_file, 31 | encode_video_to_file_like, 32 | encode_video_to_tensor, 33 | ffmpeg_major_version, 34 | get_ffmpeg_library_versions, 35 | get_frame_at_index, 36 | get_frame_at_pts, 37 | get_frames_at_indices, 38 | get_frames_by_pts, 39 | get_frames_by_pts_in_range, 40 | get_frames_by_pts_in_range_audio, 41 | get_frames_in_range, 42 | get_json_metadata, 43 | get_next_frame, 44 | scan_all_streams_to_update_metadata, 45 | seek_to_pts, 46 | ) 47 | -------------------------------------------------------------------------------- /docs/source/glossary.rst: -------------------------------------------------------------------------------- 1 | Glossary 2 | ======== 3 | 4 | .. glossary:: 5 | 6 | pts 7 | Presentation Time Stamp. The time at which a frame or audio sample should be played. 8 | In TorchCodec, pts are expressed in seconds. 9 | 10 | best stream 11 | The notion of "best" stream is determined by FFmpeg. Quoting the `FFmpeg docs 12 | `_: 13 | 14 | *The best stream is determined according to various heuristics as the most likely to be what the user expects.* 15 | 16 | scan 17 | A scan corresponds to an entire pass over a video file, with the purpose 18 | of retrieving metadata about the different streams and frames. **It does 19 | not involve decoding**, so it is a lot cheaper than decoding the file. 20 | The :class:`~torchcodec.decoders.VideoDecoder` performs a scan when using 21 | ``seek_mode="exact"``, and doesn't scan when using 22 | ``seek_mode="approximate"``. 23 | 24 | clips 25 | A clip is a sequence of frames, usually in :term:`pts` order. The frames 26 | may not necessarily be consecutive. A clip is represented as a 4D 27 | :class:`~torchcodec.FrameBatch`. A group of clips, which is what the 28 | :ref:`samplers ` return, is represented as 5D 29 | :class:`~torchcodec.FrameBatch`. 30 | -------------------------------------------------------------------------------- /packaging/vc_env_helper.bat: -------------------------------------------------------------------------------- 1 | :: Copyright (c) Meta Platforms, Inc. and affiliates. 2 | :: All rights reserved. 3 | :: 4 | :: This source code is licensed under the BSD-style license found in the 5 | :: LICENSE file in the root directory of this source tree. 6 | 7 | :: Taken from torchaudio 8 | @echo on 9 | 10 | set VC_VERSION_LOWER=17 11 | set VC_VERSION_UPPER=18 12 | 13 | for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( 14 | if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( 15 | set "VS15INSTALLDIR=%%i" 16 | set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" 17 | goto vswhere 18 | ) 19 | ) 20 | 21 | :vswhere 22 | if "%VSDEVCMD_ARGS%" == "" ( 23 | call "%VS15VCVARSALL%" x64 || exit /b 1 24 | ) else ( 25 | call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 26 | ) 27 | 28 | @echo on 29 | 30 | if "%CU_VERSION%" == "xpu" call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" 31 | 32 | set DISTUTILS_USE_SDK=1 33 | set BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 34 | 35 | set args=%1 36 | shift 37 | :start 38 | if [%1] == [] goto done 39 | set args=%args% %1 40 | shift 41 | goto start 42 | 43 | :done 44 | if "%args%" == "" ( 45 | echo Usage: vc_env_helper.bat [command] [args] 46 | echo e.g. vc_env_helper.bat cl /c test.cpp 47 | ) 48 | 49 | %args% || exit /b 1 50 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | include(CMakePrintHelpers) 3 | project(TorchCodecTests) 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_STANDARD_REQUIRED) 6 | 7 | find_package(Torch REQUIRED) 8 | 9 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") 10 | 11 | 12 | include(FetchContent) 13 | FetchContent_Declare( 14 | googletest 15 | URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip 16 | ) 17 | FetchContent_MakeAvailable(googletest) 18 | 19 | add_executable( 20 | VideoDecoderTest 21 | VideoDecoderTest.cpp 22 | ) 23 | 24 | target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${TORCH_INCLUDE_DIRS}) 25 | target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${libav_include_dirs}) 26 | target_include_directories(VideoDecoderTest PRIVATE ../) 27 | 28 | target_link_libraries( 29 | VideoDecoderTest 30 | ${libtorchcodec_library_name} 31 | ${libtorchcodec_custom_ops_name} 32 | GTest::gtest_main 33 | ) 34 | 35 | include(GoogleTest) 36 | gtest_discover_tests(VideoDecoderTest) 37 | 38 | 39 | add_executable( 40 | MetadataTest 41 | MetadataTest.cpp 42 | ) 43 | 44 | target_include_directories(MetadataTest SYSTEM PRIVATE ${TORCH_INCLUDE_DIRS}) 45 | target_include_directories(MetadataTest SYSTEM PRIVATE ${libav_include_dirs}) 46 | target_include_directories(MetadataTest PRIVATE ../) 47 | 48 | target_link_libraries( 49 | MetadataTest 50 | ${libtorchcodec_library_name} 51 | ${libtorchcodec_custom_ops_name} 52 | GTest::gtest_main 53 | ) 54 | 55 | gtest_discover_tests(MetadataTest) 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright 2024 Meta 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice,this list 9 | of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this 12 | list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its contributors may 16 | be used to endorse or promote products derived from this software without specific 17 | prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY 20 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 22 | SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 24 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 25 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 27 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 28 | DAMAGE. 29 | -------------------------------------------------------------------------------- /src/torchcodec/_core/NVDECCache.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include 8 | #include 9 | 10 | #include "CUDACommon.h" 11 | #include "FFMPEGCommon.h" 12 | #include "NVDECCache.h" 13 | 14 | #include // For cudaGetDevice 15 | 16 | extern "C" { 17 | #include 18 | #include 19 | } 20 | 21 | namespace facebook::torchcodec { 22 | 23 | NVDECCache& NVDECCache::getCache(const torch::Device& device) { 24 | static NVDECCache cacheInstances[MAX_CUDA_GPUS]; 25 | return cacheInstances[getDeviceIndex(device)]; 26 | } 27 | 28 | UniqueCUvideodecoder NVDECCache::getDecoder(CUVIDEOFORMAT* videoFormat) { 29 | CacheKey key(videoFormat); 30 | std::lock_guard lock(cacheLock_); 31 | 32 | auto it = cache_.find(key); 33 | if (it != cache_.end()) { 34 | auto decoder = std::move(it->second); 35 | cache_.erase(it); 36 | return decoder; 37 | } 38 | 39 | return nullptr; 40 | } 41 | 42 | bool NVDECCache::returnDecoder( 43 | CUVIDEOFORMAT* videoFormat, 44 | UniqueCUvideodecoder decoder) { 45 | if (!decoder) { 46 | return false; 47 | } 48 | 49 | CacheKey key(videoFormat); 50 | std::lock_guard lock(cacheLock_); 51 | 52 | if (cache_.size() >= MAX_CACHE_SIZE) { 53 | return false; 54 | } 55 | 56 | cache_[key] = std::move(decoder); 57 | return true; 58 | } 59 | 60 | } // namespace facebook::torchcodec 61 | -------------------------------------------------------------------------------- /docs/source/_static/img/pytorch-logo-flame.svg: -------------------------------------------------------------------------------- 1 | 2 | image/svg+xml 34 | -------------------------------------------------------------------------------- /src/torchcodec/_core/CUDACommon.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "FFMPEGCommon.h" 15 | #include "Frame.h" 16 | 17 | extern "C" { 18 | #include 19 | #include 20 | } 21 | 22 | namespace facebook::torchcodec { 23 | 24 | // Pytorch can only handle up to 128 GPUs. 25 | // https://github.com/pytorch/pytorch/blob/e30c55ee527b40d67555464b9e402b4b7ce03737/c10/cuda/CUDAMacros.h#L44 26 | constexpr int MAX_CUDA_GPUS = 128; 27 | 28 | void initializeCudaContextWithPytorch(const torch::Device& device); 29 | 30 | // Unique pointer type for NPP stream context 31 | using UniqueNppContext = std::unique_ptr; 32 | 33 | torch::Tensor convertNV12FrameToRGB( 34 | UniqueAVFrame& avFrame, 35 | const torch::Device& device, 36 | const UniqueNppContext& nppCtx, 37 | at::cuda::CUDAStream nvdecStream, 38 | std::optional preAllocatedOutputTensor = std::nullopt); 39 | 40 | UniqueNppContext getNppStreamContext(const torch::Device& device); 41 | void returnNppStreamContextToCache( 42 | const torch::Device& device, 43 | UniqueNppContext nppCtx); 44 | 45 | void validatePreAllocatedTensorShape( 46 | const std::optional& preAllocatedOutputTensor, 47 | const UniqueAVFrame& avFrame); 48 | 49 | int getDeviceIndex(const torch::Device& device); 50 | 51 | } // namespace facebook::torchcodec 52 | -------------------------------------------------------------------------------- /benchmarks/decoders/memprofile_decoders.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import argparse 8 | import importlib 9 | 10 | import torch 11 | from memory_profiler import profile 12 | from torchcodec._core import add_video_stream, create_from_file, get_next_frame 13 | 14 | torch._dynamo.config.cache_size_limit = 100 15 | torch._dynamo.config.capture_dynamic_output_shape_ops = True 16 | 17 | 18 | @profile 19 | def torchcodec_create_next(video_file): 20 | video_decoder = create_from_file(video_file) 21 | add_video_stream(video_decoder) 22 | get_next_frame(video_decoder) 23 | return video_decoder 24 | 25 | 26 | def get_video_path_str(filename: str) -> str: 27 | resource = importlib.resources.files(__package__).joinpath(filename) 28 | with importlib.resources.as_file(resource) as path: 29 | return str(path) 30 | 31 | 32 | def main() -> None: 33 | """Memory leak check and profiling for decoders.""" 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument( 36 | "--iterations", 37 | help="Number of times to invoke decoder operations.", 38 | type=int, 39 | default=10, 40 | ) 41 | args = parser.parse_args() 42 | 43 | large_video_path = get_video_path_str("853.mp4") 44 | 45 | # We call the same function several times, and each call will produce memory stats on 46 | # standard out. We rely on a human looking at the output to see if memory increases 47 | # on each run. 48 | for _ in range(args.iterations): 49 | torchcodec_create_next(large_video_path) 50 | 51 | 52 | if __name__ == "__main__": 53 | main() 54 | -------------------------------------------------------------------------------- /packaging/post_build_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the BSD-style license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | set -ex 9 | 10 | source packaging/helpers.sh 11 | 12 | wheel_path=$(pwd)/$(find dist -type f -name "*.whl") 13 | echo "Wheel content:" 14 | unzip -l $wheel_path 15 | 16 | unamestr=$(uname) 17 | if [[ "$unamestr" == 'Linux' ]]; then 18 | ext="so" 19 | elif [[ "$unamestr" == 'Darwin' ]]; then 20 | ext="dylib" 21 | else 22 | echo "Unknown operating system: $unamestr" 23 | exit 1 24 | fi 25 | 26 | # TODO: Make ffmpeg4 work with nvcc. 27 | if [[ "$ENABLE_CUDA" -eq 1 ]]; then 28 | ffmpeg_versions=(5 6 7) 29 | fi 30 | 31 | for ffmpeg_major_version in ${ffmpeg_versions[@]}; do 32 | assert_in_wheel $wheel_path torchcodec/libtorchcodec${ffmpeg_major_version}.${ext} 33 | done 34 | assert_not_in_wheel $wheel_path libtorchcodec.${ext} 35 | 36 | for ffmpeg_ext in libavcodec.${ext} libavfilter.${ext} libavformat.${ext} libavutil.${ext} libavdevice.${ext} ; do 37 | assert_not_in_wheel $wheel_path $ffmpeg_ext 38 | done 39 | 40 | assert_not_in_wheel $wheel_path "^test" 41 | assert_not_in_wheel $wheel_path "^doc" 42 | assert_not_in_wheel $wheel_path "^benchmarks" 43 | assert_not_in_wheel $wheel_path "^packaging" 44 | 45 | if [[ "$unamestr" == 'Linux' ]]; then 46 | # See invoked python script below for details about this check. 47 | extracted_wheel_dir=$(mktemp -d) 48 | unzip -q $wheel_path -d $extracted_wheel_dir 49 | symbols_matches=$(find $extracted_wheel_dir | grep ".so$" | xargs objdump --syms | grep GLIBCXX_3.4.) 50 | python packaging/check_glibcxx.py "$symbols_matches" 51 | fi 52 | 53 | echo "ls dist" 54 | ls dist 55 | -------------------------------------------------------------------------------- /src/torchcodec/_core/AVIOContextHolder.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include "AVIOContextHolder.h" 8 | #include 9 | 10 | namespace facebook::torchcodec { 11 | 12 | void AVIOContextHolder::createAVIOContext( 13 | AVIOReadFunction read, 14 | AVIOWriteFunction write, 15 | AVIOSeekFunction seek, 16 | void* heldData, 17 | bool isForWriting, 18 | int bufferSize) { 19 | TORCH_CHECK( 20 | bufferSize > 0, 21 | "Buffer size must be greater than 0; is " + std::to_string(bufferSize)); 22 | auto buffer = static_cast(av_malloc(bufferSize)); 23 | TORCH_CHECK( 24 | buffer != nullptr, 25 | "Failed to allocate buffer of size " + std::to_string(bufferSize)); 26 | 27 | TORCH_CHECK(seek != nullptr, "seek method must be defined"); 28 | 29 | if (isForWriting) { 30 | TORCH_CHECK(write != nullptr, "write method must be defined for writing"); 31 | } else { 32 | TORCH_CHECK(read != nullptr, "read method must be defined for reading"); 33 | } 34 | 35 | avioContext_.reset(avioAllocContext( 36 | buffer, 37 | bufferSize, 38 | /*write_flag=*/isForWriting, 39 | heldData, 40 | read, 41 | write, 42 | seek)); 43 | 44 | if (!avioContext_) { 45 | av_freep(&buffer); 46 | TORCH_CHECK(false, "Failed to allocate AVIOContext"); 47 | } 48 | } 49 | 50 | AVIOContextHolder::~AVIOContextHolder() { 51 | if (avioContext_) { 52 | av_freep(&avioContext_->buffer); 53 | } 54 | } 55 | 56 | AVIOContext* AVIOContextHolder::getAVIOContext() { 57 | return avioContext_.get(); 58 | } 59 | 60 | } // namespace facebook::torchcodec 61 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | ifneq ($(EXAMPLES_PATTERN),) 5 | EXAMPLES_PATTERN_OPTS := -D sphinx_gallery_conf.filename_pattern="$(EXAMPLES_PATTERN)" 6 | endif 7 | 8 | # You can set these variables from the command line. 9 | SPHINXOPTS = -W -j auto $(EXAMPLES_PATTERN_OPTS) 10 | SPHINXBUILD = sphinx-build 11 | SPHINXPROJ = torchcodec 12 | SOURCEDIR = source 13 | BUILDDIR = build 14 | 15 | # Put it first so that "make" without argument is like "make help". 16 | help: 17 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 18 | 19 | docset: html 20 | doc2dash --name $(SPHINXPROJ) --icon $(SOURCEDIR)/_static/img/pytorch-logo-flame.png --enable-js --online-redirect-url http://pytorch.org/vision/ --force $(BUILDDIR)/html/ 21 | 22 | # Manually fix because Zeal doesn't deal well with `icon.png`-only at 2x resolution. 23 | cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png 24 | convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png 25 | 26 | html-noplot: # Avoids running the gallery examples, which may take time 27 | $(SPHINXBUILD) -D plot_gallery=0 -b html "${SOURCEDIR}" "$(BUILDDIR)"/html 28 | @echo 29 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 30 | 31 | clean: 32 | rm -rf $(BUILDDIR)/* 33 | rm -rf $(SOURCEDIR)/generated_examples/ # sphinx-gallery 34 | rm -rf $(SOURCEDIR)/gen_modules/ # sphinx-gallery 35 | rm -rf $(SOURCEDIR)/sg_execution_times.rst # sphinx-gallery 36 | rm -rf $(SOURCEDIR)/generated/ # autosummary 37 | 38 | .PHONY: help Makefile docset 39 | 40 | # Catch-all target: route all unknown targets to Sphinx using the new 41 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 42 | %: Makefile 43 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 44 | -------------------------------------------------------------------------------- /docs/source/_static/img/pytorch-logo-dark.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 10 | 13 | 14 | 16 | 17 | 18 | 20 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/torchcodec/_core/FilterGraph.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include "FFMPEGCommon.h" 10 | #include "StreamOptions.h" 11 | 12 | namespace facebook::torchcodec { 13 | 14 | struct FiltersContext { 15 | int inputWidth = 0; 16 | int inputHeight = 0; 17 | AVPixelFormat inputFormat = AV_PIX_FMT_NONE; 18 | AVRational inputAspectRatio = {0, 0}; 19 | int outputWidth = 0; 20 | int outputHeight = 0; 21 | AVPixelFormat outputFormat = AV_PIX_FMT_NONE; 22 | std::string filtergraphStr; 23 | AVRational timeBase = {0, 0}; 24 | UniqueAVBufferRef hwFramesCtx; 25 | 26 | FiltersContext() = default; 27 | FiltersContext(FiltersContext&&) = default; 28 | FiltersContext& operator=(FiltersContext&&) = default; 29 | FiltersContext( 30 | int inputWidth, 31 | int inputHeight, 32 | AVPixelFormat inputFormat, 33 | AVRational inputAspectRatio, 34 | int outputWidth, 35 | int outputHeight, 36 | AVPixelFormat outputFormat, 37 | const std::string& filtergraphStr, 38 | AVRational timeBase, 39 | AVBufferRef* hwFramesCtx = nullptr); 40 | 41 | bool operator==(const FiltersContext&) const; 42 | bool operator!=(const FiltersContext&) const; 43 | }; 44 | 45 | class FilterGraph { 46 | public: 47 | FilterGraph( 48 | const FiltersContext& filtersContext, 49 | const VideoStreamOptions& videoStreamOptions); 50 | 51 | UniqueAVFrame convert(const UniqueAVFrame& avFrame); 52 | 53 | private: 54 | UniqueAVFilterGraph filterGraph_; 55 | AVFilterContext* sourceContext_ = nullptr; 56 | AVFilterContext* sinkContext_ = nullptr; 57 | }; 58 | 59 | } // namespace facebook::torchcodec 60 | -------------------------------------------------------------------------------- /src/torchcodec/_core/Frame.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include "Frame.h" 8 | 9 | namespace facebook::torchcodec { 10 | 11 | FrameDims::FrameDims(int height, int width) : height(height), width(width) { 12 | TORCH_CHECK(height > 0, "FrameDims.height must be > 0, got: ", height); 13 | TORCH_CHECK(width > 0, "FrameDims.width must be > 0, got: ", width); 14 | } 15 | 16 | FrameBatchOutput::FrameBatchOutput( 17 | int64_t numFrames, 18 | const FrameDims& outputDims, 19 | const torch::Device& device) 20 | : ptsSeconds(torch::empty({numFrames}, {torch::kFloat64})), 21 | durationSeconds(torch::empty({numFrames}, {torch::kFloat64})) { 22 | data = allocateEmptyHWCTensor(outputDims, device, numFrames); 23 | } 24 | 25 | torch::Tensor allocateEmptyHWCTensor( 26 | const FrameDims& frameDims, 27 | const torch::Device& device, 28 | std::optional numFrames) { 29 | auto tensorOptions = torch::TensorOptions() 30 | .dtype(torch::kUInt8) 31 | .layout(torch::kStrided) 32 | .device(device); 33 | TORCH_CHECK( 34 | frameDims.height > 0, "height must be > 0, got: ", frameDims.height); 35 | TORCH_CHECK(frameDims.width > 0, "width must be > 0, got: ", frameDims.width); 36 | if (numFrames.has_value()) { 37 | auto numFramesValue = numFrames.value(); 38 | TORCH_CHECK( 39 | numFramesValue >= 0, "numFrames must be >= 0, got: ", numFramesValue); 40 | return torch::empty( 41 | {numFramesValue, frameDims.height, frameDims.width, 3}, tensorOptions); 42 | } else { 43 | return torch::empty({frameDims.height, frameDims.width, 3}, tensorOptions); 44 | } 45 | } 46 | 47 | } // namespace facebook::torchcodec 48 | -------------------------------------------------------------------------------- /test/test_video_clip_sampler.py: -------------------------------------------------------------------------------- 1 | # (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. 2 | 3 | 4 | import pytest 5 | import torch 6 | from torchcodec._samplers import ( 7 | DEPRECATED_VideoClipSampler, 8 | IndexBasedSamplerArgs, 9 | TimeBasedSamplerArgs, 10 | VideoArgs, 11 | ) 12 | 13 | from .utils import NASA_VIDEO 14 | 15 | 16 | @pytest.mark.parametrize( 17 | ("sampler_args"), 18 | [ 19 | TimeBasedSamplerArgs( 20 | sampler_type="random", clips_per_video=2, frames_per_clip=4 21 | ), 22 | IndexBasedSamplerArgs( 23 | sampler_type="random", clips_per_video=2, frames_per_clip=4 24 | ), 25 | TimeBasedSamplerArgs( 26 | sampler_type="uniform", clips_per_video=3, frames_per_clip=4 27 | ), 28 | IndexBasedSamplerArgs( 29 | sampler_type="uniform", clips_per_video=3, frames_per_clip=4 30 | ), 31 | ], 32 | ) 33 | def test_sampler(sampler_args): 34 | torch.manual_seed(0) 35 | desired_width, desired_height = 320, 240 36 | video_args = VideoArgs(desired_width=desired_width, desired_height=desired_height) 37 | sampler = DEPRECATED_VideoClipSampler(video_args, sampler_args) 38 | clips = sampler(NASA_VIDEO.to_tensor()) 39 | assert len(clips) == sampler_args.clips_per_video 40 | clip = clips[0] 41 | if isinstance(sampler_args, TimeBasedSamplerArgs): 42 | # Note: Looks like we have an API inconsistency. 43 | # With time-based sampler, `clip` is a tensor but with index-based 44 | # samplers `clip` is a list. 45 | # Below manually convert that list to a tensor for the `.shape` check to 46 | # be unified, but this block should be removed eventually. 47 | clip = torch.stack(clip) 48 | assert clip.shape == ( 49 | sampler_args.frames_per_clip, 50 | 3, 51 | desired_height, 52 | desired_width, 53 | ) 54 | 55 | 56 | if __name__ == "__main__": 57 | pytest.main() 58 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: 🐛 Bug Report 2 | description: Create a report to help us reproduce and fix the bug 3 | 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: > 8 | #### Your bug may have already been reported! Please check [the existing and past issues](https://github.com/pytorch/torchcodec/issues?q=sort%3Aupdated-desc+is%3Aissue). 9 | - type: textarea 10 | attributes: 11 | label: 🐛 Describe the bug 12 | description: | 13 | What broke? What behavior did you see versus what did you expect? Please provide any relevant code, error messages and exception tracebacks. 14 | 15 | A minimal code example will help us help you faster! The ideal code example is a small chunk of code that we can copy-paste to see the same error you see. For example: 16 | 17 | ```python 18 | # All necessary imports at the beginning 19 | import torch 20 | import torchcodec 21 | from torchcodec.decoders import VideoDecoder 22 | 23 | # A succinct reproducing example trimmed down to the essential parts: 24 | decoder = VideoDecoder("path/to/video.mp4") # Help! This fails! 25 | # ... 26 | ``` 27 | 28 | If the code is long, put it in a public gist and link it in the issue: https://gist.github.com. Please also paste any error messages and full exception tracebacks in ```` ```triple quotes blocks``` ````. 29 | validations: 30 | required: true 31 | - type: textarea 32 | attributes: 33 | label: Versions 34 | description: | 35 | We support a wide variety of platforms and versions, and many bugs are verison-dependent. Knowing your setup will help us help you faster! Please run the following and paste the output below. 36 | ```sh 37 | wget https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py 38 | # For security purposes, please check the contents of collect_env.py before running it. 39 | python collect_env.py 40 | ``` 41 | validations: 42 | required: true 43 | - type: markdown 44 | attributes: 45 | value: > 46 | Thanks for contributing 🎉! 47 | -------------------------------------------------------------------------------- /src/torchcodec/_core/pybind_ops.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "AVIOFileLikeContext.h" 12 | 13 | namespace py = pybind11; 14 | 15 | namespace facebook::torchcodec { 16 | 17 | // Note: It's not immediately obvous why we need both custom_ops.cpp and 18 | // pybind_ops.cpp. We do all other Python to C++ bridging in 19 | // custom_ops.cpp, and that even depends on pybind11, so why have an 20 | // explicit pybind-only file? 21 | // 22 | // The reason is that we want to accept OWNERSHIP of a file-like object 23 | // from the Python side. In order to do that, we need a proper 24 | // py::object. For raw bytes, we can launder that through a tensor on the 25 | // custom_ops.cpp side, but we can't launder a proper Python object 26 | // through a tensor. Custom ops can't accept a proper Python object 27 | // through py::object, so we have to do direct pybind11 here. 28 | // 29 | // TODO: Investigate if we can do something better here. See: 30 | // https://github.com/pytorch/torchcodec/issues/896 31 | // Short version is that we're laundering a pointer through an int, the 32 | // Python side forwards that to decoder creation functions in 33 | // custom_ops.cpp and we do another cast on that side to get a pointer 34 | // again. We want to investigate if we can do something cleaner by 35 | // defining proper pybind objects. 36 | int64_t create_file_like_context(py::object file_like, bool is_for_writing) { 37 | AVIOFileLikeContext* context = 38 | new AVIOFileLikeContext(file_like, is_for_writing); 39 | return reinterpret_cast(context); 40 | } 41 | 42 | #ifndef PYBIND_OPS_MODULE_NAME 43 | #error PYBIND_OPS_MODULE_NAME must be defined! 44 | #endif 45 | 46 | PYBIND11_MODULE(PYBIND_OPS_MODULE_NAME, m) { 47 | m.def("create_file_like_context", &create_file_like_context); 48 | } 49 | 50 | } // namespace facebook::torchcodec 51 | -------------------------------------------------------------------------------- /src/torchcodec/_core/AVIOFileLikeContext.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | 12 | #include "AVIOContextHolder.h" 13 | 14 | namespace py = pybind11; 15 | 16 | namespace facebook::torchcodec { 17 | 18 | // Enables uers to pass in a Python file-like object. We then forward all read 19 | // and seek calls back up to the methods on the Python object. 20 | class AVIOFileLikeContext : public AVIOContextHolder { 21 | public: 22 | explicit AVIOFileLikeContext(const py::object& fileLike, bool isForWriting); 23 | 24 | private: 25 | static int read(void* opaque, uint8_t* buf, int buf_size); 26 | static int64_t seek(void* opaque, int64_t offset, int whence); 27 | static int write(void* opaque, const uint8_t* buf, int buf_size); 28 | 29 | // Note that we dynamically allocate the Python object because we need to 30 | // strictly control when its destructor is called. We must hold the GIL 31 | // when its destructor gets called, as it needs to update the reference 32 | // count. It's easiest to control that when it's dynamic memory. Otherwise, 33 | // we'd have to ensure whatever enclosing scope holds the object has the GIL, 34 | // and that's, at least, hard. For all of the common pitfalls, see: 35 | // 36 | // https://pybind11.readthedocs.io/en/stable/advanced/misc.html#common-sources-of-global-interpreter-lock-errors 37 | // 38 | // We maintain a reference to the file-like object because the file-like 39 | // object that was created on the Python side must live as long as our 40 | // potential use. That is, even if there are no more references to the object 41 | // on the Python side, we require that the object is still live. 42 | struct PyObjectDeleter { 43 | inline void operator()(py::object* obj) const { 44 | if (obj) { 45 | py::gil_scoped_acquire gil; 46 | delete obj; 47 | } 48 | } 49 | }; 50 | 51 | using UniquePyObject = std::unique_ptr; 52 | UniquePyObject fileLike_; 53 | }; 54 | 55 | } // namespace facebook::torchcodec 56 | -------------------------------------------------------------------------------- /src/torchcodec/_core/Frame.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include "FFMPEGCommon.h" 11 | #include "Metadata.h" 12 | #include "StreamOptions.h" 13 | 14 | namespace facebook::torchcodec { 15 | 16 | struct FrameDims { 17 | int height = 0; 18 | int width = 0; 19 | 20 | FrameDims() = default; 21 | 22 | FrameDims(int h, int w); 23 | }; 24 | 25 | // All public video decoding entry points return either a FrameOutput or a 26 | // FrameBatchOutput. 27 | // They are the equivalent of the user-facing Frame and FrameBatch classes in 28 | // Python. They contain RGB decoded frames along with some associated data 29 | // like PTS and duration. 30 | // FrameOutput is also relevant for audio decoding, typically as the output of 31 | // getNextFrame(), or as a temporary output variable. 32 | struct FrameOutput { 33 | // data shape is: 34 | // - 3D (C, H, W) or (H, W, C) for videos 35 | // - 2D (numChannels, numSamples) for audio 36 | torch::Tensor data; 37 | double ptsSeconds; 38 | double durationSeconds; 39 | }; 40 | 41 | struct FrameBatchOutput { 42 | torch::Tensor data; // 4D: of shape NCHW or NHWC. 43 | torch::Tensor ptsSeconds; // 1D of shape (N,) 44 | torch::Tensor durationSeconds; // 1D of shape (N,) 45 | 46 | FrameBatchOutput( 47 | int64_t numFrames, 48 | const FrameDims& outputDims, 49 | const torch::Device& device); 50 | }; 51 | 52 | struct AudioFramesOutput { 53 | torch::Tensor data; // shape is (numChannels, numSamples) 54 | double ptsSeconds; 55 | }; 56 | 57 | // -------------------------------------------------------------------------- 58 | // FRAME TENSOR ALLOCATION APIs 59 | // -------------------------------------------------------------------------- 60 | 61 | // Note [Frame Tensor allocation] 62 | // 63 | // We always allocate [N]HWC tensors. The low-level decoding functions all 64 | // assume HWC tensors, since this is what FFmpeg natively handles. It's up to 65 | // the high-level decoding entry-points to permute that back to CHW, by calling 66 | // maybePermuteHWC2CHW(). 67 | torch::Tensor allocateEmptyHWCTensor( 68 | const FrameDims& frameDims, 69 | const torch::Device& device, 70 | std::optional numFrames = std::nullopt); 71 | 72 | } // namespace facebook::torchcodec 73 | -------------------------------------------------------------------------------- /src/torchcodec/_core/StreamOptions.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace facebook::torchcodec { 16 | 17 | enum ColorConversionLibrary { 18 | // Use the libavfilter library for color conversion. 19 | FILTERGRAPH, 20 | // Use the libswscale library for color conversion. 21 | SWSCALE 22 | }; 23 | 24 | struct VideoStreamOptions { 25 | VideoStreamOptions() {} 26 | 27 | // Number of threads we pass to FFMPEG for decoding. 28 | // 0 means FFMPEG will choose the number of threads automatically to fully 29 | // utilize all cores. If not set, it will be the default FFMPEG behavior for 30 | // the given codec. 31 | std::optional ffmpegThreadCount; 32 | 33 | // Currently the dimension order can be either NHWC or NCHW. 34 | // H=height, W=width, C=channel. 35 | std::string dimensionOrder = "NCHW"; 36 | 37 | // By default we have to use filtergraph, as it is more general. We can only 38 | // use swscale when we have met strict requirements. See 39 | // CpuDeviceInterface::initialze() for the logic. 40 | ColorConversionLibrary colorConversionLibrary = 41 | ColorConversionLibrary::FILTERGRAPH; 42 | 43 | // By default we use CPU for decoding for both C++ and python users. 44 | // Note: This is not used for video encoding, because device is determined by 45 | // the device of the input frame tensor. 46 | torch::Device device = torch::kCPU; 47 | // Device variant (e.g., "ffmpeg", "beta", etc.) 48 | std::string_view deviceVariant = "ffmpeg"; 49 | 50 | // Encoding options 51 | std::optional codec; 52 | // Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p") 53 | // If not specified, uses codec's default format. 54 | std::optional pixelFormat; 55 | std::optional crf; 56 | std::optional preset; 57 | std::optional> extraOptions; 58 | }; 59 | 60 | struct AudioStreamOptions { 61 | AudioStreamOptions() {} 62 | 63 | // Encoding only 64 | std::optional bitRate; 65 | // Decoding and encoding: 66 | std::optional numChannels; 67 | std::optional sampleRate; 68 | }; 69 | 70 | } // namespace facebook::torchcodec 71 | -------------------------------------------------------------------------------- /src/torchcodec/_internally_replaced_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import importlib 8 | import importlib.util 9 | import sys 10 | from pathlib import Path 11 | from types import ModuleType 12 | 13 | 14 | # Copy pasted from torchvision 15 | # https://github.com/pytorch/vision/blob/947ae1dc71867f28021d5bc0ff3a19c249236e2a/torchvision/_internally_replaced_utils.py#L25 16 | def _get_extension_path(lib_name: str) -> str: 17 | extension_suffixes = [] 18 | if sys.platform == "linux": 19 | extension_suffixes = importlib.machinery.EXTENSION_SUFFIXES 20 | elif sys.platform == "darwin": 21 | extension_suffixes = importlib.machinery.EXTENSION_SUFFIXES + [".dylib"] 22 | elif sys.platform in ("win32", "cygwin"): 23 | extension_suffixes = importlib.machinery.EXTENSION_SUFFIXES + [".dll", ".pyd"] 24 | else: 25 | raise NotImplementedError(f"{sys.platform = } is not not supported") 26 | loader_details = ( 27 | importlib.machinery.ExtensionFileLoader, 28 | extension_suffixes, 29 | ) 30 | 31 | extfinder = importlib.machinery.FileFinder( 32 | str(Path(__file__).parent), loader_details 33 | ) 34 | ext_specs = extfinder.find_spec(lib_name) 35 | if ext_specs is None: 36 | raise ImportError(f"No spec found for {lib_name}") 37 | 38 | if ext_specs.origin is None: 39 | raise ImportError(f"Existing spec found for {lib_name} does not have an origin") 40 | 41 | return ext_specs.origin 42 | 43 | 44 | def _load_pybind11_module(module_name: str, library_path: str) -> ModuleType: 45 | spec = importlib.util.spec_from_file_location( 46 | module_name, 47 | library_path, 48 | ) 49 | if spec is None or spec.loader is None: 50 | raise ImportError( 51 | f"Unable to load spec or spec.loader for module {module_name} from path {library_path}" 52 | ) 53 | 54 | mod = importlib.util.module_from_spec(spec) 55 | spec.loader.exec_module(mod) 56 | 57 | return mod 58 | 59 | 60 | # Note that the return value from this function must match the value used as 61 | # PYBIND_OPS_MODULE_NAME when we compile _core/pybind_ops.cpp. If the values 62 | # do not match, we will not be able to import the C++ shared library as a 63 | # Python module at runtime. 64 | # 65 | # The parameter ffmpeg_major_version is unused externally, but used 66 | # internally. 67 | def _get_pybind_ops_module_name(ffmpeg_major_version: int) -> str: 68 | return "core_pybind_ops" 69 | -------------------------------------------------------------------------------- /src/torchcodec/_core/CudaDeviceInterface.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include "CUDACommon.h" 10 | #include "DeviceInterface.h" 11 | #include "FilterGraph.h" 12 | 13 | namespace facebook::torchcodec { 14 | 15 | class CudaDeviceInterface : public DeviceInterface { 16 | public: 17 | CudaDeviceInterface(const torch::Device& device); 18 | 19 | virtual ~CudaDeviceInterface(); 20 | 21 | std::optional findCodec( 22 | const AVCodecID& codecId, 23 | bool isDecoder = true) override; 24 | 25 | void initialize( 26 | const AVStream* avStream, 27 | const UniqueDecodingAVFormatContext& avFormatCtx, 28 | const SharedAVCodecContext& codecContext) override; 29 | 30 | void initializeVideo( 31 | const VideoStreamOptions& videoStreamOptions, 32 | [[maybe_unused]] const std::vector>& 33 | transforms, 34 | [[maybe_unused]] const std::optional& resizedOutputDims) 35 | override; 36 | 37 | void registerHardwareDeviceWithCodec(AVCodecContext* codecContext) override; 38 | 39 | void convertAVFrameToFrameOutput( 40 | UniqueAVFrame& avFrame, 41 | FrameOutput& frameOutput, 42 | std::optional preAllocatedOutputTensor) override; 43 | 44 | std::string getDetails() override; 45 | 46 | UniqueAVFrame convertCUDATensorToAVFrameForEncoding( 47 | const torch::Tensor& tensor, 48 | int frameIndex, 49 | AVCodecContext* codecContext) override; 50 | 51 | void setupHardwareFrameContextForEncoding( 52 | AVCodecContext* codecContext) override; 53 | 54 | private: 55 | // Our CUDA decoding code assumes NV12 format. In order to handle other 56 | // kinds of input, we need to convert them to NV12. Our current implementation 57 | // does this using filtergraph. 58 | UniqueAVFrame maybeConvertAVFrameToNV12OrRGB24(UniqueAVFrame& avFrame); 59 | 60 | // We sometimes encounter frames that cannot be decoded on the CUDA device. 61 | // Rather than erroring out, we decode them on the CPU. 62 | std::unique_ptr cpuInterface_; 63 | 64 | VideoStreamOptions videoStreamOptions_; 65 | AVRational timeBase_; 66 | 67 | UniqueAVBufferRef hardwareDeviceCtx_; 68 | UniqueNppContext nppCtx_; 69 | 70 | // This filtergraph instance is only used for NV12 format conversion in 71 | // maybeConvertAVFrameToNV12(). 72 | std::unique_ptr nv12ConversionContext_; 73 | std::unique_ptr nv12Conversion_; 74 | 75 | bool usingCPUFallback_ = false; 76 | bool hasDecodedFrame_ = false; 77 | }; 78 | 79 | } // namespace facebook::torchcodec 80 | -------------------------------------------------------------------------------- /src/torchcodec/_core/Transform.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include "Frame.h" 12 | #include "Metadata.h" 13 | 14 | namespace facebook::torchcodec { 15 | 16 | class Transform { 17 | public: 18 | virtual std::string getFilterGraphCpu() const = 0; 19 | virtual ~Transform() = default; 20 | 21 | // If the transformation does not change the output frame dimensions, then 22 | // there is no need to override this member function. The default 23 | // implementation returns an empty optional, indicating that the output frame 24 | // has the same dimensions as the input frame. 25 | // 26 | // If the transformation does change the output frame dimensions, then it 27 | // must override this member function and return the output frame dimensions. 28 | virtual std::optional getOutputFrameDims() const { 29 | return std::nullopt; 30 | } 31 | 32 | // The validity of some transforms depends on the characteristics of the 33 | // AVStream they're being applied to. For example, some transforms will 34 | // specify coordinates inside a frame, we need to validate that those are 35 | // within the frame's bounds. 36 | // 37 | // Note that the validation function does not return anything. We expect 38 | // invalid configurations to throw an exception. 39 | virtual void validate([[maybe_unused]] const FrameDims& inputDims) const {} 40 | }; 41 | 42 | class ResizeTransform : public Transform { 43 | public: 44 | enum class InterpolationMode { BILINEAR }; 45 | 46 | explicit ResizeTransform(const FrameDims& dims) 47 | : outputDims_(dims), interpolationMode_(InterpolationMode::BILINEAR) {} 48 | 49 | ResizeTransform(const FrameDims& dims, InterpolationMode interpolationMode) 50 | : outputDims_(dims), interpolationMode_(interpolationMode) {} 51 | 52 | std::string getFilterGraphCpu() const override; 53 | std::optional getOutputFrameDims() const override; 54 | 55 | private: 56 | FrameDims outputDims_; 57 | InterpolationMode interpolationMode_; 58 | }; 59 | 60 | class CropTransform : public Transform { 61 | public: 62 | CropTransform(const FrameDims& dims, int x, int y); 63 | 64 | // Becomes a center crop if x and y are not specified. 65 | explicit CropTransform(const FrameDims& dims); 66 | 67 | std::string getFilterGraphCpu() const override; 68 | std::optional getOutputFrameDims() const override; 69 | void validate(const FrameDims& inputDims) const override; 70 | 71 | private: 72 | FrameDims outputDims_; 73 | std::optional x_; 74 | std::optional y_; 75 | }; 76 | 77 | } // namespace facebook::torchcodec 78 | -------------------------------------------------------------------------------- /src/torchcodec/_core/AVIOContextHolder.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include "FFMPEGCommon.h" 10 | 11 | namespace facebook::torchcodec { 12 | 13 | // The AVIOContextHolder serves several purposes: 14 | // 15 | // 1. It is a smart pointer for the AVIOContext. It has the logic to create 16 | // a new AVIOContext and will appropriately free the AVIOContext when it 17 | // goes out of scope. Note that this requires more than just having a 18 | // UniqueAVIOContext, as the AVIOContext points to a buffer which must be 19 | // freed. 20 | // 2. It is a base class for AVIOContext specializations. When specializing a 21 | // AVIOContext, we need to provide four things: 22 | // 1. A read callback function, for decoding. 23 | // 2. A seek callback function, for decoding and encoding. 24 | // 3. A write callback function, for encoding. 25 | // 4. A pointer to some context object that has the same lifetime as the 26 | // AVIOContext itself. This context object holds the custom state that 27 | // tracks the custom behavior of reading, seeking and writing. It is 28 | // provided upon AVIOContext creation and to the read, seek and 29 | // write callback functions. 30 | // The callback functions do not need to be members of the derived class, 31 | // but the derived class must have access to them. The context object must 32 | // be a member of the derived class. Derived classes need to call 33 | // createAVIOContext(), ideally in their constructor. 34 | // 3. A generic handle for those that just need to manage having access to an 35 | // AVIOContext, but aren't necessarily concerned with how it was customized: 36 | // typically, the SingleStreamDecoder. 37 | class AVIOContextHolder { 38 | public: 39 | virtual ~AVIOContextHolder(); 40 | AVIOContext* getAVIOContext(); 41 | 42 | protected: 43 | // Make constructor protected to prevent anyone from constructing 44 | // an AVIOContextHolder without deriving it. (Ordinarily this would be 45 | // enforced by having a pure virtual methods, but we don't have any.) 46 | AVIOContextHolder() = default; 47 | 48 | // Deriving classes should call this function in their constructor. 49 | void createAVIOContext( 50 | AVIOReadFunction read, 51 | AVIOWriteFunction write, 52 | AVIOSeekFunction seek, 53 | void* heldData, 54 | bool isForWriting, 55 | int bufferSize = defaultBufferSize); 56 | 57 | private: 58 | UniqueAVIOContext avioContext_; 59 | 60 | // Defaults to 64 KB 61 | static const int defaultBufferSize = 64 * 1024; 62 | }; 63 | 64 | } // namespace facebook::torchcodec 65 | -------------------------------------------------------------------------------- /test/resources/sine_mono_s16.wav.stream0.all_frames_info.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "duration_time": "0.128000", 4 | "pts_time": "0.000000" 5 | }, 6 | { 7 | "duration_time": "0.128000", 8 | "pts_time": "0.128000" 9 | }, 10 | { 11 | "duration_time": "0.128000", 12 | "pts_time": "0.256000" 13 | }, 14 | { 15 | "duration_time": "0.128000", 16 | "pts_time": "0.384000" 17 | }, 18 | { 19 | "duration_time": "0.128000", 20 | "pts_time": "0.512000" 21 | }, 22 | { 23 | "duration_time": "0.128000", 24 | "pts_time": "0.640000" 25 | }, 26 | { 27 | "duration_time": "0.128000", 28 | "pts_time": "0.768000" 29 | }, 30 | { 31 | "duration_time": "0.128000", 32 | "pts_time": "0.896000" 33 | }, 34 | { 35 | "duration_time": "0.128000", 36 | "pts_time": "1.024000" 37 | }, 38 | { 39 | "duration_time": "0.128000", 40 | "pts_time": "1.152000" 41 | }, 42 | { 43 | "duration_time": "0.128000", 44 | "pts_time": "1.280000" 45 | }, 46 | { 47 | "duration_time": "0.128000", 48 | "pts_time": "1.408000" 49 | }, 50 | { 51 | "duration_time": "0.128000", 52 | "pts_time": "1.536000" 53 | }, 54 | { 55 | "duration_time": "0.128000", 56 | "pts_time": "1.664000" 57 | }, 58 | { 59 | "duration_time": "0.128000", 60 | "pts_time": "1.792000" 61 | }, 62 | { 63 | "duration_time": "0.128000", 64 | "pts_time": "1.920000" 65 | }, 66 | { 67 | "duration_time": "0.128000", 68 | "pts_time": "2.048000" 69 | }, 70 | { 71 | "duration_time": "0.128000", 72 | "pts_time": "2.176000" 73 | }, 74 | { 75 | "duration_time": "0.128000", 76 | "pts_time": "2.304000" 77 | }, 78 | { 79 | "duration_time": "0.128000", 80 | "pts_time": "2.432000" 81 | }, 82 | { 83 | "duration_time": "0.128000", 84 | "pts_time": "2.560000" 85 | }, 86 | { 87 | "duration_time": "0.128000", 88 | "pts_time": "2.688000" 89 | }, 90 | { 91 | "duration_time": "0.128000", 92 | "pts_time": "2.816000" 93 | }, 94 | { 95 | "duration_time": "0.128000", 96 | "pts_time": "2.944000" 97 | }, 98 | { 99 | "duration_time": "0.128000", 100 | "pts_time": "3.072000" 101 | }, 102 | { 103 | "duration_time": "0.128000", 104 | "pts_time": "3.200000" 105 | }, 106 | { 107 | "duration_time": "0.128000", 108 | "pts_time": "3.328000" 109 | }, 110 | { 111 | "duration_time": "0.128000", 112 | "pts_time": "3.456000" 113 | }, 114 | { 115 | "duration_time": "0.128000", 116 | "pts_time": "3.584000" 117 | }, 118 | { 119 | "duration_time": "0.128000", 120 | "pts_time": "3.712000" 121 | }, 122 | { 123 | "duration_time": "0.128000", 124 | "pts_time": "3.840000" 125 | }, 126 | { 127 | "duration_time": "0.032000", 128 | "pts_time": "3.968000" 129 | } 130 | ] 131 | -------------------------------------------------------------------------------- /test/resources/sine_mono_s32_8000.wav.stream0.all_frames_info.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "duration_time": "0.128000", 4 | "pts_time": "0.000000" 5 | }, 6 | { 7 | "duration_time": "0.128000", 8 | "pts_time": "0.128000" 9 | }, 10 | { 11 | "duration_time": "0.128000", 12 | "pts_time": "0.256000" 13 | }, 14 | { 15 | "duration_time": "0.128000", 16 | "pts_time": "0.384000" 17 | }, 18 | { 19 | "duration_time": "0.128000", 20 | "pts_time": "0.512000" 21 | }, 22 | { 23 | "duration_time": "0.128000", 24 | "pts_time": "0.640000" 25 | }, 26 | { 27 | "duration_time": "0.128000", 28 | "pts_time": "0.768000" 29 | }, 30 | { 31 | "duration_time": "0.128000", 32 | "pts_time": "0.896000" 33 | }, 34 | { 35 | "duration_time": "0.128000", 36 | "pts_time": "1.024000" 37 | }, 38 | { 39 | "duration_time": "0.128000", 40 | "pts_time": "1.152000" 41 | }, 42 | { 43 | "duration_time": "0.128000", 44 | "pts_time": "1.280000" 45 | }, 46 | { 47 | "duration_time": "0.128000", 48 | "pts_time": "1.408000" 49 | }, 50 | { 51 | "duration_time": "0.128000", 52 | "pts_time": "1.536000" 53 | }, 54 | { 55 | "duration_time": "0.128000", 56 | "pts_time": "1.664000" 57 | }, 58 | { 59 | "duration_time": "0.128000", 60 | "pts_time": "1.792000" 61 | }, 62 | { 63 | "duration_time": "0.128000", 64 | "pts_time": "1.920000" 65 | }, 66 | { 67 | "duration_time": "0.128000", 68 | "pts_time": "2.048000" 69 | }, 70 | { 71 | "duration_time": "0.128000", 72 | "pts_time": "2.176000" 73 | }, 74 | { 75 | "duration_time": "0.128000", 76 | "pts_time": "2.304000" 77 | }, 78 | { 79 | "duration_time": "0.128000", 80 | "pts_time": "2.432000" 81 | }, 82 | { 83 | "duration_time": "0.128000", 84 | "pts_time": "2.560000" 85 | }, 86 | { 87 | "duration_time": "0.128000", 88 | "pts_time": "2.688000" 89 | }, 90 | { 91 | "duration_time": "0.128000", 92 | "pts_time": "2.816000" 93 | }, 94 | { 95 | "duration_time": "0.128000", 96 | "pts_time": "2.944000" 97 | }, 98 | { 99 | "duration_time": "0.128000", 100 | "pts_time": "3.072000" 101 | }, 102 | { 103 | "duration_time": "0.128000", 104 | "pts_time": "3.200000" 105 | }, 106 | { 107 | "duration_time": "0.128000", 108 | "pts_time": "3.328000" 109 | }, 110 | { 111 | "duration_time": "0.128000", 112 | "pts_time": "3.456000" 113 | }, 114 | { 115 | "duration_time": "0.128000", 116 | "pts_time": "3.584000" 117 | }, 118 | { 119 | "duration_time": "0.128000", 120 | "pts_time": "3.712000" 121 | }, 122 | { 123 | "duration_time": "0.128000", 124 | "pts_time": "3.840000" 125 | }, 126 | { 127 | "duration_time": "0.032000", 128 | "pts_time": "3.968000" 129 | } 130 | ] 131 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | 8 | concurrency: 9 | group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} 10 | cancel-in-progress: true 11 | 12 | defaults: 13 | run: 14 | shell: bash -l -eo pipefail {0} 15 | 16 | jobs: 17 | pre-commit-checks: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | python-version: ['3.12'] 23 | steps: 24 | - name: Check out repo 25 | uses: actions/checkout@v3 26 | - name: Setup conda env 27 | uses: conda-incubator/setup-miniconda@v2 28 | with: 29 | auto-update-conda: true 30 | miniconda-version: "latest" 31 | activate-environment: test 32 | python-version: ${{ matrix.python-version }} 33 | - name: Update pip 34 | run: python -m pip install --upgrade pip 35 | - name: Install pre-commit 36 | run: | 37 | python -m pip install pre-commit 38 | - name: Run pre-commit checks 39 | run: | 40 | pre-commit run --all-files 41 | - name: Check to see what files pre-commit modified 42 | run: | 43 | git diff 44 | 45 | mypy: 46 | runs-on: ubuntu-latest 47 | strategy: 48 | fail-fast: false 49 | matrix: 50 | python-version: ['3.12'] 51 | steps: 52 | - name: Check out repo 53 | uses: actions/checkout@v3 54 | - name: Setup conda env 55 | uses: conda-incubator/setup-miniconda@v2 56 | with: 57 | auto-update-conda: true 58 | miniconda-version: "latest" 59 | activate-environment: test 60 | python-version: ${{ matrix.python-version }} 61 | - name: Update pip 62 | run: python -m pip install --upgrade pip 63 | - name: Install dependencies and FFmpeg 64 | run: | 65 | # If we're in a release branch or in a PR against a release branch, 66 | # we install the PyTorch RCs from the test channel. Otherwise, e.g. in 67 | # `main` or in PRs against `main`, we install the nightly builds. 68 | # Note that the `test` RCs are 69 | if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then 70 | CHANNEL=test 71 | else 72 | CHANNEL=nightly 73 | fi 74 | python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/${CHANNEL}/cpu 75 | conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge 76 | ffmpeg -version 77 | - name: Build and install torchcodec 78 | run: | 79 | python -m pip install -e ".[dev]" --no-build-isolation -vvv 80 | - name: Install mypy 81 | run: | 82 | python -m pip install mypy 83 | - name: Run mypy 84 | run: | 85 | mypy --install-types --non-interactive --config-file mypy.ini 86 | -------------------------------------------------------------------------------- /src/torchcodec/samplers/_common.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | 3 | from torchcodec import FrameBatch 4 | 5 | _LIST_OF_INT_OR_FLOAT = list[int] | list[float] 6 | 7 | 8 | def _repeat_last_policy( 9 | values: _LIST_OF_INT_OR_FLOAT, desired_len: int 10 | ) -> _LIST_OF_INT_OR_FLOAT: 11 | # values = [1, 2, 3], desired_len = 5 12 | # output = [1, 2, 3, 3, 3] 13 | values += [values[-1]] * (desired_len - len(values)) 14 | return values 15 | 16 | 17 | def _wrap_policy( 18 | values: _LIST_OF_INT_OR_FLOAT, desired_len: int 19 | ) -> _LIST_OF_INT_OR_FLOAT: 20 | # values = [1, 2, 3], desired_len = 5 21 | # output = [1, 2, 3, 1, 2] 22 | return (values * (desired_len // len(values) + 1))[:desired_len] 23 | 24 | 25 | def _error_policy( 26 | frames_indices: _LIST_OF_INT_OR_FLOAT, desired_len: int 27 | ) -> _LIST_OF_INT_OR_FLOAT: 28 | raise ValueError( 29 | "You set the 'error' policy, and the sampler tried to decode a frame " 30 | "that is beyond the number of frames in the video. " 31 | "Try to leave sampling_range_end to its default value?" 32 | ) 33 | 34 | 35 | _POLICY_FUNCTION_TYPE = Callable[[_LIST_OF_INT_OR_FLOAT, int], _LIST_OF_INT_OR_FLOAT] 36 | 37 | _POLICY_FUNCTIONS: dict[str, _POLICY_FUNCTION_TYPE] = { 38 | "repeat_last": _repeat_last_policy, 39 | "wrap": _wrap_policy, 40 | "error": _error_policy, 41 | } 42 | 43 | 44 | def _validate_common_params(*, decoder, num_frames_per_clip, policy): 45 | if len(decoder) < 1: 46 | raise ValueError( 47 | f"Decoder must have at least one frame, found {len(decoder)} frames." 48 | ) 49 | 50 | if num_frames_per_clip <= 0: 51 | raise ValueError( 52 | f"num_frames_per_clip ({num_frames_per_clip}) must be strictly positive" 53 | ) 54 | if policy not in _POLICY_FUNCTIONS.keys(): 55 | raise ValueError( 56 | f"Invalid policy ({policy}). Supported values are {_POLICY_FUNCTIONS.keys()}." 57 | ) 58 | 59 | 60 | def _reshape_4d_framebatch_into_5d( 61 | *, 62 | frames: FrameBatch, 63 | num_clips: int, 64 | num_frames_per_clip: int, 65 | ) -> FrameBatch: 66 | last_3_dims = frames.data.shape[-3:] 67 | return FrameBatch( 68 | data=frames.data.view(num_clips, num_frames_per_clip, *last_3_dims), 69 | pts_seconds=frames.pts_seconds.view(num_clips, num_frames_per_clip), 70 | duration_seconds=frames.duration_seconds.view(num_clips, num_frames_per_clip), 71 | ) 72 | 73 | 74 | _FRAMEBATCH_RETURN_DOCS = """ 75 | Returns: 76 | FrameBatch: 77 | The sampled :term:`clips`, as a 5D :class:`~torchcodec.FrameBatch`. 78 | The shape of the ``data`` field is (``num_clips``, 79 | ``num_frames_per_clips``, ...) where ... is (H, W, C) or (C, H, W) 80 | depending on the ``dimension_order`` parameter of 81 | :class:`~torchcodec.decoders.VideoDecoder`. The shape of the 82 | ``pts_seconds`` and ``duration_seconds`` fields is (``num_clips``, 83 | ``num_frames_per_clips``). 84 | """ 85 | -------------------------------------------------------------------------------- /src/torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake: -------------------------------------------------------------------------------- 1 | # FindTorchCodec 2 | # -------------- 3 | # 4 | # Finds the TorchCodec library 5 | # 6 | # This will define the following variables: 7 | # 8 | # TORCHCODEC_FOUND: True if the system has the TorchCodec library 9 | # TORCHCODEC_VARIANTS: list of TorchCodec variants. A variant is a supported 10 | # FFmpeg major version. 11 | # 12 | # and the following imported targets: 13 | # 14 | # torchcodec::ffmpeg${N} 15 | # torchcodec::core${N} 16 | # 17 | # where N is a TorchCodec variant (FFmpeg major version) from 18 | # TORCHCODEC_VARIANTS list. 19 | 20 | include(FindPackageHandleStandardArgs) 21 | include("${CMAKE_CURRENT_LIST_DIR}/ffmpeg_versions.cmake") 22 | 23 | # Assume we are in /share/cmake/TorchCodec/TorchCodecConfig.cmake 24 | get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 25 | get_filename_component(TORCHCODEC_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) 26 | 27 | # Include directories. 28 | set(TORCHCODEC_INCLUDE_DIRS ${TORCHCODEC_INSTALL_PREFIX}/_core) 29 | set(TORCHCODEC_VARIANTS "") 30 | 31 | function(add_torchcodec_target ffmpeg_major_version) 32 | set(target torchcodec::core${ffmpeg_major_version}) 33 | 34 | if (NOT TARGET torchcodec::ffmpeg${ffmpeg_major_version}) 35 | message(FATAL_ERROR "torchcodec::ffmpeg${ffmpeg_major_version} target is not defined") 36 | endif() 37 | 38 | find_library(lib_path torchcodec_core${ffmpeg_major_version} 39 | PATHS "${TORCHCODEC_INSTALL_PREFIX}" NO_CACHE NO_DEFAULT_PATH) 40 | if (NOT lib_path) 41 | message(FATAL_ERROR "torchcodec_core${ffmpeg_major_version} shared library is missing") 42 | endif() 43 | 44 | message("Adding ${target} target") 45 | add_library(${target} SHARED IMPORTED) 46 | add_dependencies(${target} torchcodec::ffmpeg${ffmpeg_major_version}) 47 | set_target_properties(${target} PROPERTIES 48 | INTERFACE_INCLUDE_DIRECTORIES ${TORCHCODEC_INCLUDE_DIRS} 49 | IMPORTED_LOCATION ${lib_path} 50 | ) 51 | 52 | list(APPEND TORCHCODEC_VARIANTS "${ffmpeg_major_version}") 53 | set(TORCHCODEC_VARIANTS "${TORCHCODEC_VARIANTS}" PARENT_SCOPE) 54 | endfunction() 55 | 56 | # If any of the TORCHCODEC_FFMPEG${N}_INSTALL_PREFIX environment variables 57 | # are defined, use them to locate the corresponding FFmpeg and TorchCodec targets. 58 | # Otherwise, fall back to pkg-config to find FFmpeg. 59 | set(use_pkg_config TRUE) 60 | foreach(ffmpeg_major_version IN LISTS TORCHCODEC_SUPPORTED_FFMPEG_VERSIONS) 61 | if (DEFINED ENV{TORCHCODEC_FFMPEG${ffmpeg_major_version}_INSTALL_PREFIX}) 62 | add_ffmpeg_target( 63 | "${ffmpeg_major_version}" 64 | "$ENV{TORCHCODEC_FFMPEG${ffmpeg_major_version}_INSTALL_PREFIX}" 65 | ) 66 | add_torchcodec_target(${ffmpeg_major_version}) 67 | set(use_pkg_config FALSE) 68 | endif() 69 | endforeach() 70 | 71 | if (use_pkg_config) 72 | add_ffmpeg_target_with_pkg_config(ffmpeg_major_version) 73 | add_torchcodec_target(${ffmpeg_major_version}) 74 | endif() 75 | 76 | find_package_handle_standard_args(TorchCodec DEFAULT_MSG TORCHCODEC_VARIANTS) 77 | -------------------------------------------------------------------------------- /packaging/check_glibcxx.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """ 8 | The goal of this script is to ensure that the .so files we ship do not contain 9 | symbol versions from libstdc++ that are too recent. This is a very manual way of 10 | doing the checks that `auditwheel repair` would do (but using auditwheel isn't 11 | necessarily easy either). 12 | 13 | Why this is needed: during development, we observed the following general 14 | scenario in various local development setups: 15 | - torchcodec is compiled with a given (somewhat recent) c++ toolchain (say 16 | gcc11) 17 | - because the toolchain is recent, some recent symbol versions from libstdc++ 18 | are added as dependencies in the torchcodec?.so files, e.g. GLIBCXX_3.4.29 19 | (this is normal) 20 | - at runtime, for whatever reason, the libstdc++.so that gets loaded is *not* 21 | the one that was used when building. The libstdc++.so that is loaded can be 22 | older than the toolchain one, and it doesn't contain the more recent symbols 23 | that torchcodec?.so depends on, which leads to a runtime error. 24 | 25 | The reasons why a different libstdc++.so is loaded at runtime can be multiple 26 | (and mysterious! https://hackmd.io/@_NznxihTSmC-IgW4cgnlyQ/HJXc4BEHR). 27 | 28 | This script doesn't try to prevent *that* (it's impossible anyway, as we don't 29 | control users' environments). Instead, it prevents the dependency of torchcodec 30 | on recent symbol versions, which ensures that torchcodec can run on both recent 31 | *and* older runtimes. 32 | The most recent symbol on the manylinux torch.2.3.1 wheel is 33 | GLIBCXX_3.4.19, so as long as torchcodec doesn't ship a symbol that is higher 34 | than that, torchcodec should be fine. 35 | 36 | The easiest way to avoid recent symbols is simply to use an old-enough 37 | toolchain. Relying on the test-infra runners should be enough. 38 | """ 39 | 40 | import re 41 | import sys 42 | 43 | if len(sys.argv) != 2: 44 | raise ValueError("Wrong usage: python check_glibcxx.py .") 45 | 46 | MAX_ALLOWED = (3, 4, 24) 47 | 48 | symbol_matches = sys.argv[1].split("\n") 49 | all_symbols = set() 50 | for line in symbol_matches: 51 | # We search for GLIBCXX_major.minor.micro 52 | if match := re.search(r"GLIBCXX_\d+\.\d+\.\d+", line): 53 | all_symbols.add(match.group(0)) 54 | 55 | if not all_symbols: 56 | raise ValueError( 57 | f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong." 58 | ) 59 | 60 | all_versions = (symbol.split("_")[1].split(".") for symbol in all_symbols) 61 | all_versions = (tuple(int(v) for v in version) for version in all_versions) 62 | max_version = max(all_versions) 63 | 64 | print(f"Found the following GLIBCXX symbol versions: {all_symbols}.") 65 | print(f"The max version is {max_version}. Max allowed is {MAX_ALLOWED}.") 66 | 67 | if max_version > MAX_ALLOWED: 68 | raise AssertionError( 69 | "The max version is greater than the max allowed! " 70 | "That may leads to compatibility issues. " 71 | "Was the wheel compiled with an old-enough toolchain?" 72 | ) 73 | 74 | print("All good.") 75 | -------------------------------------------------------------------------------- /benchmarks/decoders/benchmark_audio_decoders.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from datetime import timedelta 3 | from pathlib import Path 4 | from time import perf_counter_ns 5 | 6 | import torch 7 | import torchaudio 8 | from torch import Tensor 9 | from torchaudio.io import StreamReader 10 | from torchcodec.decoders._audio_decoder import AudioDecoder 11 | 12 | DEFAULT_NUM_EXP = 30 13 | 14 | 15 | def bench(f, *args, num_exp=DEFAULT_NUM_EXP, warmup=1, **kwargs) -> Tensor: 16 | 17 | for _ in range(warmup): 18 | f(*args, **kwargs) 19 | 20 | times = [] 21 | for _ in range(num_exp): 22 | start = perf_counter_ns() 23 | f(*args, **kwargs) 24 | end = perf_counter_ns() 25 | times.append(end - start) 26 | return torch.tensor(times).float() 27 | 28 | 29 | def report_stats(times: Tensor, unit: str = "ms", prefix: str = "") -> float: 30 | mul = { 31 | "ns": 1, 32 | "µs": 1e-3, 33 | "ms": 1e-6, 34 | "s": 1e-9, 35 | }[unit] 36 | times = times * mul 37 | std = times.std().item() 38 | med = times.median().item() 39 | mean = times.mean().item() 40 | min = times.min().item() 41 | max = times.max().item() 42 | print( 43 | f"{prefix:<40} {med = :.2f}, {mean = :.2f} +- {std:.2f}, {min = :.2f}, {max = :.2f} - in {unit}" 44 | ) 45 | 46 | 47 | def decode_with_torchcodec(path: Path) -> None: 48 | AudioDecoder(path).get_all_samples() 49 | 50 | 51 | def decode_with_torchaudio_StreamReader(path: Path) -> None: 52 | reader = StreamReader(path) 53 | reader.add_audio_stream(frames_per_chunk=1024) 54 | for _ in reader.stream(): 55 | pass 56 | 57 | 58 | def decode_with_torchaudio_load(path: Path, backend: str) -> None: 59 | torchaudio.load(str(path), backend=backend) 60 | 61 | 62 | parser = ArgumentParser() 63 | parser.add_argument("--path", type=str, help="path to file", required=True) 64 | parser.add_argument( 65 | "--num-exp", 66 | type=int, 67 | default=DEFAULT_NUM_EXP, 68 | help="number of runs to average over", 69 | ) 70 | 71 | args = parser.parse_args() 72 | path = Path(args.path) 73 | 74 | metadata = AudioDecoder(path).metadata 75 | duration = str(timedelta(seconds=metadata.duration_seconds_from_header)).split(".")[0] 76 | 77 | print( 78 | f"Benchmarking {path.name}, duration: {duration}, codec: {metadata.codec}, format: {metadata.sample_format}, averaging over {args.num_exp} runs:" 79 | ) 80 | 81 | for decode_f, kwargs, prefix in ( 82 | (decode_with_torchcodec, {}, "torchcodec.AudioDecoder"), 83 | ( 84 | decode_with_torchaudio_load, 85 | {"backend": "ffmpeg"}, 86 | "torchaudio.load(backend='ffmpeg')", 87 | ), 88 | (decode_with_torchaudio_load, {"backend": "sox"}, "torchaudio.load(backend='sox')"), 89 | ( 90 | decode_with_torchaudio_load, 91 | {"backend": "soundfile"}, 92 | "torchaudio.load(backend='soundfile')", 93 | ), 94 | (decode_with_torchaudio_StreamReader, {}, "torchaudio.StreamReader"), 95 | ): 96 | 97 | try: 98 | times = bench(decode_f, path, **kwargs, num_exp=args.num_exp) 99 | report_stats(times, prefix=prefix) 100 | except RuntimeError: 101 | print(f"{prefix:<40} Not supported") 102 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import pytest 5 | import torch 6 | 7 | from .utils import in_fbcode 8 | 9 | 10 | def pytest_configure(config): 11 | # register an additional marker (see pytest_collection_modifyitems) 12 | config.addinivalue_line( 13 | "markers", "needs_cuda: mark for tests that rely on a CUDA device" 14 | ) 15 | config.addinivalue_line( 16 | "markers", "needs_ffmpeg_cli: mark for tests that rely on ffmpeg" 17 | ) 18 | 19 | 20 | def pytest_collection_modifyitems(items): 21 | # This hook is called by pytest after it has collected the tests (google its 22 | # name to check out its doc!). We can ignore some tests as we see fit here, 23 | # or add marks, such as a skip mark. 24 | 25 | out_items = [] 26 | for item in items: 27 | # The needs_cuda mark will exist if the test was explicitly decorated 28 | # with the @needs_cuda decorator. It will also exist if it was 29 | # parametrized with a parameter that has the mark: for example if a test 30 | # is parametrized with 31 | # @pytest.mark.parametrize('device', all_supported_devices()) 32 | # the "instances" of the tests where device == 'cuda' will have the 33 | # 'needs_cuda' mark, and the ones with device == 'cpu' won't have the 34 | # mark. 35 | needs_cuda = item.get_closest_marker("needs_cuda") is not None 36 | needs_ffmpeg_cli = item.get_closest_marker("needs_ffmpeg_cli") is not None 37 | has_skip_marker = item.get_closest_marker("skip") is not None 38 | has_skipif_marker = item.get_closest_marker("skipif") is not None 39 | 40 | if in_fbcode(): 41 | # fbcode doesn't like skipping tests, so instead we just don't collect the test 42 | # so that they don't even "exist", hence the continue statements. 43 | if needs_ffmpeg_cli or has_skip_marker or has_skipif_marker: 44 | continue 45 | 46 | if ( 47 | needs_cuda 48 | and not torch.cuda.is_available() 49 | and os.environ.get("FAIL_WITHOUT_CUDA") is None 50 | ): 51 | # We skip CUDA tests on non-CUDA machines, but only if the 52 | # FAIL_WITHOUT_CUDA env var wasn't set. If it's set, the test will 53 | # typically fail with a "Unsupported device: cuda" error. This is 54 | # normal and desirable: this env var is set on CI jobs that are 55 | # supposed to run the CUDA tests, so if CUDA isn't available on 56 | # those for whatever reason, we need to know. 57 | item.add_marker(pytest.mark.skip(reason="CUDA not available.")) 58 | 59 | out_items.append(item) 60 | 61 | items[:] = out_items 62 | 63 | 64 | @pytest.fixture(autouse=True) 65 | def prevent_leaking_rng(): 66 | # Prevent each test from leaking the rng to all other test when they call 67 | # torch.manual_seed() or random.seed(). 68 | 69 | torch_rng_state = torch.get_rng_state() 70 | builtin_rng_state = random.getstate() 71 | if torch.cuda.is_available(): 72 | cuda_rng_state = torch.cuda.get_rng_state() 73 | 74 | yield 75 | 76 | torch.set_rng_state(torch_rng_state) 77 | random.setstate(builtin_rng_state) 78 | if torch.cuda.is_available(): 79 | torch.cuda.set_rng_state(cuda_rng_state) 80 | -------------------------------------------------------------------------------- /src/torchcodec/_core/Metadata.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | extern "C" { 14 | #include 15 | #include 16 | #include 17 | } 18 | 19 | namespace facebook::torchcodec { 20 | 21 | enum class SeekMode { exact, approximate, custom_frame_mappings }; 22 | 23 | struct StreamMetadata { 24 | // Common (video and audio) fields derived from the AVStream. 25 | int streamIndex = -1; 26 | 27 | // See this link for what various values are available: 28 | // https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48 29 | AVMediaType mediaType = AVMEDIA_TYPE_UNKNOWN; 30 | 31 | std::optional codecId; 32 | std::optional codecName; 33 | std::optional durationSecondsFromHeader; 34 | std::optional beginStreamSecondsFromHeader; 35 | std::optional numFramesFromHeader; 36 | std::optional numKeyFrames; 37 | std::optional averageFpsFromHeader; 38 | std::optional bitRate; 39 | 40 | // Used as fallback in approximate mode when stream duration is unavailable. 41 | std::optional durationSecondsFromContainer; 42 | 43 | // More accurate duration, obtained by scanning the file. 44 | // These presentation timestamps are in time base. 45 | std::optional beginStreamPtsFromContent; 46 | std::optional endStreamPtsFromContent; 47 | 48 | // These presentation timestamps are in seconds. 49 | std::optional beginStreamPtsSecondsFromContent; 50 | std::optional endStreamPtsSecondsFromContent; 51 | 52 | // This can be useful for index-based seeking. 53 | std::optional numFramesFromContent; 54 | 55 | // Video-only fields 56 | std::optional width; 57 | std::optional height; 58 | std::optional sampleAspectRatio; 59 | 60 | // Audio-only fields 61 | std::optional sampleRate; 62 | std::optional numChannels; 63 | std::optional sampleFormat; 64 | 65 | // Computed methods with fallback logic 66 | std::optional getDurationSeconds(SeekMode seekMode) const; 67 | double getBeginStreamSeconds(SeekMode seekMode) const; 68 | std::optional getEndStreamSeconds(SeekMode seekMode) const; 69 | std::optional getNumFrames(SeekMode seekMode) const; 70 | std::optional getAverageFps(SeekMode seekMode) const; 71 | }; 72 | 73 | struct ContainerMetadata { 74 | std::vector allStreamMetadata; 75 | int numAudioStreams = 0; 76 | int numVideoStreams = 0; 77 | 78 | // Note that this is the container-level duration, which is usually the max 79 | // of all stream durations available in the container. 80 | std::optional durationSecondsFromHeader; 81 | 82 | // Total BitRate level information at the container level in bit/s 83 | std::optional bitRate; 84 | 85 | // If set, this is the index to the default audio stream. 86 | std::optional bestAudioStreamIndex; 87 | 88 | // If set, this is the index to the default video stream. 89 | std::optional bestVideoStreamIndex; 90 | }; 91 | 92 | } // namespace facebook::torchcodec 93 | -------------------------------------------------------------------------------- /.github/workflows/reference_resources.yaml: -------------------------------------------------------------------------------- 1 | name: Reference resource generation tests 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | paths: 7 | - test/generate_reference_resources.py 8 | - .github/workflows/reference_resources.yaml # self reference 9 | schedule: 10 | - cron: '0 0 * * 0' # on sunday 11 | 12 | defaults: 13 | run: 14 | shell: bash -l -eo pipefail {0} 15 | 16 | jobs: 17 | generate-matrix: 18 | uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main 19 | with: 20 | package-type: wheel 21 | os: linux 22 | test-infra-repository: pytorch/test-infra 23 | test-infra-ref: main 24 | with-xpu: disable 25 | with-rocm: disable 26 | with-cuda: disable 27 | build-python-only: "disable" 28 | 29 | build: 30 | needs: generate-matrix 31 | strategy: 32 | fail-fast: false 33 | name: Build and Upload Linux wheel 34 | uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main 35 | with: 36 | repository: meta-pytorch/torchcodec 37 | ref: "" 38 | test-infra-repository: pytorch/test-infra 39 | test-infra-ref: main 40 | build-matrix: ${{ needs.generate-matrix.outputs.matrix }} 41 | pre-script: packaging/pre_build_script.sh 42 | post-script: packaging/post_build_script.sh 43 | smoke-test-script: packaging/fake_smoke_test.py 44 | package-name: torchcodec 45 | trigger-event: ${{ github.event_name }} 46 | build-platform: "python-build-package" 47 | build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 python -m build --wheel -vvv --no-isolation" 48 | 49 | test-reference-resource-generation: 50 | needs: build 51 | runs-on: ubuntu-latest 52 | strategy: 53 | fail-fast: false 54 | matrix: 55 | python-version: ['3.10'] 56 | ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1'] 57 | steps: 58 | - uses: actions/download-artifact@v4 59 | with: 60 | name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64 61 | path: pytorch/torchcodec/dist/ 62 | - name: Setup conda env 63 | uses: conda-incubator/setup-miniconda@v2 64 | with: 65 | auto-update-conda: true 66 | miniconda-version: "latest" 67 | activate-environment: test 68 | python-version: ${{ matrix.python-version }} 69 | 70 | - name: Install ffmpeg 71 | run: | 72 | conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge 73 | ffmpeg -version 74 | 75 | - name: Update pip 76 | run: python -m pip install --upgrade pip 77 | 78 | - name: Install generation dependencies 79 | run: | 80 | # Note that we're installing stable - this is for running a script where we're a normal PyTorch 81 | # user, not for building TorhCodec. 82 | python -m pip install torch --index-url https://download.pytorch.org/whl/cpu 83 | python -m pip install numpy pillow pytest 84 | 85 | - name: Install torchcodec from the wheel 86 | run: | 87 | wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"` 88 | echo Installing $wheel_path 89 | python -m pip install $wheel_path -vvv 90 | - name: Check out repo 91 | uses: actions/checkout@v3 92 | 93 | - name: Run generation reference resources 94 | run: | 95 | python -m test.generate_reference_resources 96 | -------------------------------------------------------------------------------- /src/torchcodec/_core/NVDECCache.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "NVCUVIDRuntimeLoader.h" 17 | #include "nvcuvid_include/cuviddec.h" 18 | #include "nvcuvid_include/nvcuvid.h" 19 | 20 | namespace facebook::torchcodec { 21 | 22 | // This file implements a cache for NVDEC decoders. 23 | // TODONVDEC P3: Consider merging this with Cache.h. The main difference is that 24 | // this NVDEC Cache involves a cache key (the decoder parameters). 25 | 26 | struct CUvideoDecoderDeleter { 27 | void operator()(CUvideodecoder* decoderPtr) const { 28 | if (decoderPtr && *decoderPtr) { 29 | cuvidDestroyDecoder(*decoderPtr); 30 | delete decoderPtr; 31 | } 32 | } 33 | }; 34 | 35 | using UniqueCUvideodecoder = 36 | std::unique_ptr; 37 | 38 | // A per-device cache for NVDEC decoders. There is one instance of this class 39 | // per GPU device, and it is accessed through the static getCache() method. 40 | class NVDECCache { 41 | public: 42 | static NVDECCache& getCache(const torch::Device& device); 43 | 44 | // Get decoder from cache - returns nullptr if none available 45 | UniqueCUvideodecoder getDecoder(CUVIDEOFORMAT* videoFormat); 46 | 47 | // Return decoder to cache - returns true if added to cache 48 | bool returnDecoder(CUVIDEOFORMAT* videoFormat, UniqueCUvideodecoder decoder); 49 | 50 | private: 51 | // Cache key struct: a decoder can be reused and taken from the cache only if 52 | // all these parameters match. 53 | struct CacheKey { 54 | cudaVideoCodec codecType; 55 | uint32_t width; 56 | uint32_t height; 57 | cudaVideoChromaFormat chromaFormat; 58 | uint32_t bitDepthLumaMinus8; 59 | uint8_t numDecodeSurfaces; 60 | 61 | CacheKey() = delete; 62 | 63 | explicit CacheKey(CUVIDEOFORMAT* videoFormat) 64 | : codecType(videoFormat->codec), 65 | width(videoFormat->coded_width), 66 | height(videoFormat->coded_height), 67 | chromaFormat(videoFormat->chroma_format), 68 | bitDepthLumaMinus8(videoFormat->bit_depth_luma_minus8), 69 | numDecodeSurfaces(videoFormat->min_num_decode_surfaces) {} 70 | 71 | CacheKey(const CacheKey&) = default; 72 | CacheKey& operator=(const CacheKey&) = default; 73 | 74 | bool operator<(const CacheKey& other) const { 75 | return std::tie( 76 | codecType, 77 | width, 78 | height, 79 | chromaFormat, 80 | bitDepthLumaMinus8, 81 | numDecodeSurfaces) < 82 | std::tie( 83 | other.codecType, 84 | other.width, 85 | other.height, 86 | other.chromaFormat, 87 | other.bitDepthLumaMinus8, 88 | other.numDecodeSurfaces); 89 | } 90 | }; 91 | 92 | NVDECCache() = default; 93 | ~NVDECCache() = default; 94 | 95 | std::map cache_; 96 | std::mutex cacheLock_; 97 | 98 | // Max number of cached decoders, per device 99 | static constexpr int MAX_CACHE_SIZE = 20; 100 | }; 101 | 102 | } // namespace facebook::torchcodec 103 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /src/torchcodec/_core/AVIOFileLikeContext.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include "AVIOFileLikeContext.h" 8 | #include 9 | 10 | namespace facebook::torchcodec { 11 | 12 | AVIOFileLikeContext::AVIOFileLikeContext( 13 | const py::object& fileLike, 14 | bool isForWriting) 15 | : fileLike_{UniquePyObject(new py::object(fileLike))} { 16 | { 17 | // TODO: Is it necessary to acquire the GIL here? Is it maybe even 18 | // harmful? At the moment, this is only called from within a pybind 19 | // function, and pybind guarantees we have the GIL. 20 | py::gil_scoped_acquire gil; 21 | 22 | if (isForWriting) { 23 | TORCH_CHECK( 24 | py::hasattr(fileLike, "write"), 25 | "File like object must implement a write method for writing."); 26 | } else { 27 | TORCH_CHECK( 28 | py::hasattr(fileLike, "read"), 29 | "File like object must implement a read method for reading."); 30 | } 31 | 32 | TORCH_CHECK( 33 | py::hasattr(fileLike, "seek"), 34 | "File like object must implement a seek method."); 35 | } 36 | createAVIOContext(&read, &write, &seek, &fileLike_, isForWriting); 37 | } 38 | 39 | int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) { 40 | auto fileLike = static_cast(opaque); 41 | 42 | // Note that we acquire the GIL outside of the loop. This is likely more 43 | // efficient than releasing and acquiring it each loop iteration. 44 | py::gil_scoped_acquire gil; 45 | 46 | int totalNumRead = 0; 47 | while (totalNumRead < buf_size) { 48 | int request = buf_size - totalNumRead; 49 | 50 | // The Python method returns the actual bytes, which we access through the 51 | // py::bytes wrapper. That wrapper, however, does not provide us access to 52 | // the underlying data pointer, which we need for the memcpy below. So we 53 | // convert the bytes to a string_view to get access to the data pointer. 54 | // Becauase it's a view and not a copy, it should be cheap. 55 | auto bytesRead = static_cast((*fileLike)->attr("read")(request)); 56 | auto bytesView = static_cast(bytesRead); 57 | 58 | int numBytesRead = static_cast(bytesView.size()); 59 | if (numBytesRead == 0) { 60 | break; 61 | } 62 | 63 | TORCH_CHECK( 64 | numBytesRead <= request, 65 | "Requested up to ", 66 | request, 67 | " bytes but, received ", 68 | numBytesRead, 69 | " bytes. The given object does not conform to read protocol of file object."); 70 | 71 | std::memcpy(buf, bytesView.data(), numBytesRead); 72 | buf += numBytesRead; 73 | totalNumRead += numBytesRead; 74 | } 75 | 76 | return totalNumRead == 0 ? AVERROR_EOF : totalNumRead; 77 | } 78 | 79 | int64_t AVIOFileLikeContext::seek(void* opaque, int64_t offset, int whence) { 80 | // We do not know the file size. 81 | if (whence == AVSEEK_SIZE) { 82 | return AVERROR(EIO); 83 | } 84 | 85 | auto fileLike = static_cast(opaque); 86 | py::gil_scoped_acquire gil; 87 | return py::cast((*fileLike)->attr("seek")(offset, whence)); 88 | } 89 | 90 | int AVIOFileLikeContext::write(void* opaque, const uint8_t* buf, int buf_size) { 91 | auto fileLike = static_cast(opaque); 92 | py::gil_scoped_acquire gil; 93 | py::bytes bytes_obj(reinterpret_cast(buf), buf_size); 94 | 95 | return py::cast((*fileLike)->attr("write")(bytes_obj)); 96 | } 97 | 98 | } // namespace facebook::torchcodec 99 | -------------------------------------------------------------------------------- /examples/encoding/audio_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """ 8 | ======================================== 9 | Encoding audio samples with AudioEncoder 10 | ======================================== 11 | 12 | In this example, we'll learn how to encode audio samples to a file or to raw 13 | bytes using the :class:`~torchcodec.encoders.AudioEncoder` class. 14 | """ 15 | 16 | # %% 17 | # Let's first generate some samples to be encoded. The data to be encoded could 18 | # also just come from an :class:`~torchcodec.decoders.AudioDecoder`! 19 | import torch 20 | from IPython.display import Audio as play_audio 21 | 22 | 23 | def make_sinewave() -> tuple[torch.Tensor, int]: 24 | freq_A = 440 # Hz 25 | sample_rate = 16000 # Hz 26 | duration_seconds = 3 # seconds 27 | t = torch.linspace(0, duration_seconds, int(sample_rate * duration_seconds), dtype=torch.float32) 28 | return torch.sin(2 * torch.pi * freq_A * t), sample_rate 29 | 30 | 31 | samples, sample_rate = make_sinewave() 32 | 33 | print(f"Encoding samples with {samples.shape = } and {sample_rate = }") 34 | play_audio(samples, rate=sample_rate) 35 | 36 | # %% 37 | # We first instantiate an :class:`~torchcodec.encoders.AudioEncoder`. We pass it 38 | # the samples to be encoded. The samples must be a 2D tensors of shape 39 | # ``(num_channels, num_samples)``, or in this case, a 1D tensor where 40 | # ``num_channels`` is assumed to be 1. The values must be float values 41 | # normalized in ``[-1, 1]``: this is also what the 42 | # :class:`~torchcodec.decoders.AudioDecoder` would return. 43 | # 44 | # .. note:: 45 | # 46 | # The ``sample_rate`` parameter corresponds to the sample rate of the 47 | # *input*, not the desired encoded sample rate. 48 | from torchcodec.encoders import AudioEncoder 49 | 50 | encoder = AudioEncoder(samples=samples, sample_rate=sample_rate) 51 | 52 | 53 | # %% 54 | # :class:`~torchcodec.encoders.AudioEncoder` supports encoding samples into a 55 | # file via the :meth:`~torchcodec.encoders.AudioEncoder.to_file` method, or to 56 | # raw bytes via :meth:`~torchcodec.encoders.AudioEncoder.to_tensor`. For the 57 | # purpose of this tutorial we'll use 58 | # :meth:`~torchcodec.encoders.AudioEncoder.to_tensor`, so that we can easily 59 | # re-decode the encoded samples and check their properies. The 60 | # :meth:`~torchcodec.encoders.AudioEncoder.to_file` method works very similarly. 61 | 62 | encoded_samples = encoder.to_tensor(format="mp3") 63 | print(f"{encoded_samples.shape = }, {encoded_samples.dtype = }") 64 | 65 | 66 | # %% 67 | # That's it! 68 | # 69 | # Now that we have our encoded data, we can decode it back, to make sure it 70 | # looks and sounds as expected: 71 | from torchcodec.decoders import AudioDecoder 72 | 73 | samples_back = AudioDecoder(encoded_samples).get_all_samples() 74 | 75 | print(samples_back) 76 | play_audio(samples_back.data, rate=samples_back.sample_rate) 77 | 78 | # %% 79 | # The encoder supports some encoding options that allow you to change how to 80 | # data is encoded. For example, we can decide to encode our mono data (1 81 | # channel) into stereo data (2 channels), and to specify an output sample rate: 82 | 83 | desired_sample_rate = 32000 84 | encoded_samples = encoder.to_tensor(format="wav", num_channels=2, sample_rate=desired_sample_rate) 85 | 86 | stereo_samples_back = AudioDecoder(encoded_samples).get_all_samples() 87 | 88 | print(stereo_samples_back) 89 | play_audio(stereo_samples_back.data, rate=desired_sample_rate) 90 | 91 | # %% 92 | # Check the docstring of the encoding methods to learn about the different 93 | # encoding options. 94 | -------------------------------------------------------------------------------- /examples/decoding/audio_decoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """ 8 | ======================================== 9 | Decoding audio streams with AudioDecoder 10 | ======================================== 11 | 12 | In this example, we'll learn how to decode an audio file using the 13 | :class:`~torchcodec.decoders.AudioDecoder` class. 14 | """ 15 | 16 | # %% 17 | # First, a bit of boilerplate: we'll download an audio file from the web and 18 | # define an audio playing utility. You can ignore that part and jump right 19 | # below to :ref:`creating_decoder_audio`. 20 | import requests 21 | from IPython.display import Audio 22 | 23 | 24 | def play_audio(samples): 25 | return Audio(samples.data, rate=samples.sample_rate) 26 | 27 | 28 | # Audio source is CC0: https://opengameart.org/content/town-theme-rpg 29 | # Attribution: cynicmusic.com pixelsphere.org 30 | url = "https://opengameart.org/sites/default/files/TownTheme.mp3" 31 | response = requests.get(url, headers={"User-Agent": ""}) 32 | if response.status_code != 200: 33 | raise RuntimeError(f"Failed to download video. {response.status_code = }.") 34 | 35 | raw_audio_bytes = response.content 36 | 37 | # %% 38 | # .. _creating_decoder_audio: 39 | # 40 | # Creating a decoder 41 | # ------------------ 42 | # 43 | # We can now create a decoder from the raw (encoded) audio bytes. You can of 44 | # course use a local audio file and pass the path as input. You can also decode 45 | # audio streams from videos! 46 | 47 | from torchcodec.decoders import AudioDecoder 48 | 49 | decoder = AudioDecoder(raw_audio_bytes) 50 | 51 | # %% 52 | # The has not yet been decoded by the decoder, but we already have access to 53 | # some metadata via the ``metadata`` attribute which is an 54 | # :class:`~torchcodec.decoders.AudioStreamMetadata` object. 55 | print(decoder.metadata) 56 | 57 | # %% 58 | # Decoding samples 59 | # ---------------- 60 | # 61 | # To get decoded samples, we just need to call the 62 | # :meth:`~torchcodec.decoders.AudioDecoder.get_all_samples` method, 63 | # which returns an :class:`~torchcodec.AudioSamples` object: 64 | 65 | samples = decoder.get_all_samples() 66 | 67 | print(samples) 68 | play_audio(samples) 69 | 70 | # %% 71 | # The ``.data`` field is a tensor of shape ``(num_channels, num_samples)`` and 72 | # of float dtype with values in [-1, 1]. 73 | # 74 | # The ``.pts_seconds`` field indicates the starting time of the output samples. 75 | # Here it's 0.025 seconds, even though we asked for samples starting from 0. Not 76 | # all streams start exactly at 0! This is not a bug in TorchCodec, this is a 77 | # property of the file that was defined when it was encoded. 78 | # 79 | # Specifying a range 80 | # ------------------ 81 | # 82 | # If we don't need all the samples, we can use 83 | # :meth:`~torchcodec.decoders.AudioDecoder.get_samples_played_in_range` to 84 | # decode the samples within a custom range: 85 | 86 | samples = decoder.get_samples_played_in_range(start_seconds=10, stop_seconds=70) 87 | 88 | print(samples) 89 | play_audio(samples) 90 | 91 | # %% 92 | # Custom sample rate 93 | # ------------------ 94 | # 95 | # We can also decode the samples into a desired sample rate using the 96 | # ``sample_rate`` parameter of :class:`~torchcodec.decoders.AudioDecoder`. The 97 | # ouput will sound similar, but note that the number of samples greatly 98 | # decreased: 99 | 100 | decoder = AudioDecoder(raw_audio_bytes, sample_rate=16_000) 101 | samples = decoder.get_all_samples() 102 | 103 | print(samples) 104 | play_audio(samples) 105 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | AccessModifierOffset: -1 3 | AlignAfterOpenBracket: AlwaysBreak 4 | AlignConsecutiveMacros: false 5 | AlignConsecutiveAssignments: false 6 | AlignConsecutiveBitFields: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlines: Left 9 | AlignOperands: DontAlign 10 | AlignTrailingComments: false 11 | AllowAllArgumentsOnNextLine: true 12 | AllowAllConstructorInitializersOnNextLine: true 13 | AllowAllParametersOfDeclarationOnNextLine: false 14 | AllowShortEnumsOnASingleLine: true 15 | AllowShortBlocksOnASingleLine: Never 16 | AllowShortCaseLabelsOnASingleLine: false 17 | AllowShortFunctionsOnASingleLine: Empty 18 | AllowShortLambdasOnASingleLine: All 19 | AllowShortIfStatementsOnASingleLine: Never 20 | AllowShortLoopsOnASingleLine: false 21 | AlwaysBreakAfterReturnType: None 22 | AlwaysBreakBeforeMultilineStrings: true 23 | AlwaysBreakTemplateDeclarations: Yes 24 | BinPackArguments: false 25 | BinPackParameters: false 26 | BreakBeforeBinaryOperators: None 27 | BreakBeforeBraces: Attach 28 | BreakInheritanceList: BeforeColon 29 | BreakBeforeTernaryOperators: true 30 | BreakConstructorInitializers: BeforeColon 31 | BreakAfterJavaFieldAnnotations: false 32 | BreakStringLiterals: false 33 | ColumnLimit: 80 34 | CommentPragmas: '^ IWYU pragma:' 35 | CompactNamespaces: false 36 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 37 | ConstructorInitializerIndentWidth: 4 38 | ContinuationIndentWidth: 4 39 | Cpp11BracedListStyle: true 40 | DeriveLineEnding: true 41 | DerivePointerAlignment: false 42 | DisableFormat: false 43 | FixNamespaceComments: true 44 | ForEachMacros: 45 | - FOR_EACH 46 | - FOR_EACH_R 47 | - FOR_EACH_RANGE 48 | IncludeBlocks: Preserve 49 | IncludeCategories: 50 | - Regex: '^<.*\.h(pp)?>' 51 | Priority: 1 52 | - Regex: '^<.*' 53 | Priority: 2 54 | - Regex: '.*' 55 | Priority: 3 56 | IndentCaseLabels: true 57 | IndentCaseBlocks: false 58 | IndentGotoLabels: true 59 | IndentPPDirectives: None 60 | IndentExternBlock: AfterExternBlock 61 | IndentWidth: 2 62 | IndentWrappedFunctionNames: false 63 | InsertTrailingCommas: None 64 | JavaScriptQuotes: Leave 65 | JavaScriptWrapImports: true 66 | KeepEmptyLinesAtTheStartOfBlocks: false 67 | MacroBlockBegin: '' 68 | MacroBlockEnd: '' 69 | MaxEmptyLinesToKeep: 1 70 | NamespaceIndentation: None 71 | ObjCBinPackProtocolList: Auto 72 | ObjCBlockIndentWidth: 2 73 | ObjCBreakBeforeNestedBlockParam: true 74 | ObjCSpaceAfterProperty: false 75 | ObjCSpaceBeforeProtocolList: false 76 | PenaltyBreakAssignment: 2 77 | PenaltyBreakBeforeFirstCallParameter: 1 78 | PenaltyBreakComment: 300 79 | PenaltyBreakFirstLessLess: 120 80 | PenaltyBreakString: 1000 81 | PenaltyBreakTemplateDeclaration: 10 82 | PenaltyExcessCharacter: 1000000 83 | PenaltyReturnTypeOnItsOwnLine: 200 84 | PointerAlignment: Left 85 | ReflowComments: true 86 | SeparateDefinitionBlocks: Always 87 | SortIncludes: true 88 | SortUsingDeclarations: true 89 | SpaceAfterCStyleCast: false 90 | SpaceAfterLogicalNot: false 91 | SpaceAfterTemplateKeyword: true 92 | SpaceBeforeAssignmentOperators: true 93 | SpaceBeforeCpp11BracedList: false 94 | SpaceBeforeCtorInitializerColon: true 95 | SpaceBeforeInheritanceColon: true 96 | SpaceBeforeParens: ControlStatements 97 | SpaceBeforeRangeBasedForLoopColon: true 98 | SpaceInEmptyBlock: false 99 | SpaceInEmptyParentheses: false 100 | SpacesBeforeTrailingComments: 1 101 | SpacesInAngles: false 102 | SpacesInConditionalStatement: false 103 | SpacesInContainerLiterals: true 104 | SpacesInCStyleCastParentheses: false 105 | SpacesInParentheses: false 106 | SpacesInSquareBrackets: false 107 | SpaceBeforeSquareBrackets: false 108 | Standard: Latest 109 | TabWidth: 8 110 | UseCRLF: false 111 | UseTab: Never 112 | ... 113 | -------------------------------------------------------------------------------- /src/torchcodec/_core/DeviceInterface.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include "DeviceInterface.h" 8 | #include 9 | #include 10 | 11 | namespace facebook::torchcodec { 12 | 13 | namespace { 14 | using DeviceInterfaceMap = 15 | std::map; 16 | static std::mutex g_interface_mutex; 17 | 18 | DeviceInterfaceMap& getDeviceMap() { 19 | static DeviceInterfaceMap deviceMap; 20 | return deviceMap; 21 | } 22 | 23 | std::string getDeviceType(const std::string& device) { 24 | size_t pos = device.find(':'); 25 | if (pos == std::string::npos) { 26 | return device; 27 | } 28 | return device.substr(0, pos); 29 | } 30 | 31 | } // namespace 32 | 33 | bool registerDeviceInterface( 34 | const DeviceInterfaceKey& key, 35 | CreateDeviceInterfaceFn createInterface) { 36 | std::scoped_lock lock(g_interface_mutex); 37 | DeviceInterfaceMap& deviceMap = getDeviceMap(); 38 | 39 | TORCH_CHECK( 40 | deviceMap.find(key) == deviceMap.end(), 41 | "Device interface already registered for device type ", 42 | key.deviceType, 43 | " variant '", 44 | key.variant, 45 | "'"); 46 | deviceMap.insert({key, createInterface}); 47 | 48 | return true; 49 | } 50 | 51 | void validateDeviceInterface( 52 | const std::string device, 53 | const std::string variant) { 54 | std::scoped_lock lock(g_interface_mutex); 55 | std::string deviceType = getDeviceType(device); 56 | 57 | DeviceInterfaceMap& deviceMap = getDeviceMap(); 58 | 59 | // Find device interface that matches device type and variant 60 | torch::DeviceType deviceTypeEnum = torch::Device(deviceType).type(); 61 | 62 | auto deviceInterface = std::find_if( 63 | deviceMap.begin(), 64 | deviceMap.end(), 65 | [&](const std::pair& arg) { 66 | return arg.first.deviceType == deviceTypeEnum && 67 | arg.first.variant == variant; 68 | }); 69 | 70 | TORCH_CHECK( 71 | deviceInterface != deviceMap.end(), 72 | "Unsupported device: ", 73 | device, 74 | " (device type: ", 75 | deviceType, 76 | ", variant: ", 77 | variant, 78 | ")"); 79 | } 80 | 81 | std::unique_ptr createDeviceInterface( 82 | const torch::Device& device, 83 | const std::string_view variant) { 84 | DeviceInterfaceKey key(device.type(), variant); 85 | std::scoped_lock lock(g_interface_mutex); 86 | DeviceInterfaceMap& deviceMap = getDeviceMap(); 87 | 88 | auto it = deviceMap.find(key); 89 | if (it != deviceMap.end()) { 90 | return std::unique_ptr(it->second(device)); 91 | } 92 | 93 | TORCH_CHECK( 94 | false, 95 | "No device interface found for device type: ", 96 | device.type(), 97 | " variant: '", 98 | variant, 99 | "'"); 100 | } 101 | 102 | torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame) { 103 | TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24); 104 | 105 | int height = avFrame->height; 106 | int width = avFrame->width; 107 | std::vector shape = {height, width, 3}; 108 | std::vector strides = {avFrame->linesize[0], 3, 1}; 109 | AVFrame* avFrameClone = av_frame_clone(avFrame.get()); 110 | auto deleter = [avFrameClone](void*) { 111 | UniqueAVFrame avFrameToDelete(avFrameClone); 112 | }; 113 | return torch::from_blob( 114 | avFrameClone->data[0], shape, strides, deleter, {torch::kUInt8}); 115 | } 116 | 117 | } // namespace facebook::torchcodec 118 | -------------------------------------------------------------------------------- /src/torchcodec/_core/Transform.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include "Transform.h" 8 | #include 9 | #include "FFMPEGCommon.h" 10 | 11 | namespace facebook::torchcodec { 12 | 13 | namespace { 14 | 15 | std::string toFilterGraphInterpolation( 16 | ResizeTransform::InterpolationMode mode) { 17 | switch (mode) { 18 | case ResizeTransform::InterpolationMode::BILINEAR: 19 | return "bilinear"; 20 | default: 21 | TORCH_CHECK( 22 | false, 23 | "Unknown interpolation mode: " + 24 | std::to_string(static_cast(mode))); 25 | } 26 | } 27 | 28 | } // namespace 29 | 30 | std::string ResizeTransform::getFilterGraphCpu() const { 31 | return "scale=" + std::to_string(outputDims_.width) + ":" + 32 | std::to_string(outputDims_.height) + 33 | ":flags=" + toFilterGraphInterpolation(interpolationMode_); 34 | } 35 | 36 | std::optional ResizeTransform::getOutputFrameDims() const { 37 | return outputDims_; 38 | } 39 | 40 | CropTransform::CropTransform(const FrameDims& dims) : outputDims_(dims) {} 41 | 42 | CropTransform::CropTransform(const FrameDims& dims, int x, int y) 43 | : outputDims_(dims), x_(x), y_(y) { 44 | TORCH_CHECK(x_ >= 0, "Crop x position must be >= 0, got: ", x_); 45 | TORCH_CHECK(y_ >= 0, "Crop y position must be >= 0, got: ", y_); 46 | } 47 | 48 | std::string CropTransform::getFilterGraphCpu() const { 49 | // For the FFmpeg filter crop, if the x and y coordinates are left 50 | // unspecified, it defaults to a center crop. 51 | std::string coordinates = x_.has_value() 52 | ? (":" + std::to_string(x_.value()) + ":" + std::to_string(y_.value())) 53 | : ""; 54 | return "crop=" + std::to_string(outputDims_.width) + ":" + 55 | std::to_string(outputDims_.height) + coordinates + ":exact=1"; 56 | } 57 | 58 | std::optional CropTransform::getOutputFrameDims() const { 59 | return outputDims_; 60 | } 61 | 62 | void CropTransform::validate(const FrameDims& inputDims) const { 63 | TORCH_CHECK( 64 | outputDims_.height <= inputDims.height, 65 | "Crop output height (", 66 | outputDims_.height, 67 | ") is greater than input height (", 68 | inputDims.height, 69 | ")"); 70 | TORCH_CHECK( 71 | outputDims_.width <= inputDims.width, 72 | "Crop output width (", 73 | outputDims_.width, 74 | ") is greater than input width (", 75 | inputDims.width, 76 | ")"); 77 | TORCH_CHECK( 78 | x_.has_value() == y_.has_value(), 79 | "Crop x and y values must be both set or both unset"); 80 | if (x_.has_value()) { 81 | TORCH_CHECK( 82 | x_.value() <= inputDims.width, 83 | "Crop x start position, ", 84 | x_.value(), 85 | ", out of bounds of input width, ", 86 | inputDims.width); 87 | TORCH_CHECK( 88 | x_.value() + outputDims_.width <= inputDims.width, 89 | "Crop x end position, ", 90 | x_.value() + outputDims_.width, 91 | ", out of bounds of input width ", 92 | inputDims.width); 93 | TORCH_CHECK( 94 | y_.value() <= inputDims.height, 95 | "Crop y start position, ", 96 | y_.value(), 97 | ", out of bounds of input height, ", 98 | inputDims.height); 99 | TORCH_CHECK( 100 | y_.value() + outputDims_.height <= inputDims.height, 101 | "Crop y end position, ", 102 | y_.value() + outputDims_.height, 103 | ", out of bounds of input height ", 104 | inputDims.height); 105 | } 106 | } 107 | 108 | } // namespace facebook::torchcodec 109 | -------------------------------------------------------------------------------- /.github/workflows/cpp_tests.yaml: -------------------------------------------------------------------------------- 1 | name: CPP tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | 8 | concurrency: 9 | group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} 10 | cancel-in-progress: true 11 | 12 | defaults: 13 | run: 14 | shell: bash -l -eo pipefail {0} 15 | 16 | jobs: 17 | Cpp-tests: 18 | runs-on: ubuntu-latest 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1'] 23 | steps: 24 | - name: Check out repo 25 | uses: actions/checkout@v3 26 | - name: Setup conda env 27 | uses: conda-incubator/setup-miniconda@v3 28 | with: 29 | auto-update-conda: true 30 | # Using miniforge instead of miniconda ensures that the default 31 | # conda channel is conda-forge instead of main/default. This ensures 32 | # ABI consistency between dependencies: 33 | # https://conda-forge.org/docs/user/transitioning_from_defaults/ 34 | miniforge-version: latest 35 | activate-environment: test 36 | python-version: '3.12' 37 | - name: Update pip 38 | run: python -m pip install --upgrade pip 39 | - name: Install torch dependencies 40 | run: | 41 | # If we're in a release branch or in a PR against a release branch, 42 | # we install the PyTorch RCs from the test channel. Otherwise, e.g. in 43 | # `main` or in PRs against `main`, we install the nightly builds. 44 | # Note that the `test` RCs are 45 | if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then 46 | CHANNEL=test 47 | else 48 | CHANNEL=nightly 49 | fi 50 | python -m pip install --pre torch --index-url https://download.pytorch.org/whl/${CHANNEL}/cpu 51 | - name: Install ffmpeg, pkg-config and pybind11 52 | run: | 53 | conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" pkg-config pybind11 -c conda-forge 54 | ffmpeg -version 55 | - name: Build and run C++ tests 56 | run: | 57 | # Note: we're not setting BUILD_AGAINST_ALL_FFMPEG_FROM_S3 here, so 58 | # we're building libtorchcodec against the installed FFmpeg version 59 | # (from conda-forge) instead of building against our pre-built non-GPL 60 | # FFmpeg libraries. 61 | # The reason we need this is because the C++ tests decode x264 files. 62 | # x264 support is not LGPL, os it is not supported by our 63 | # pre-built non-GPL FFmpeg libraries. And if we were to build against 64 | # those, this is also what the tests would be loading at run time, 65 | # then failing when we try to decode x264. 66 | # To remediate that, we build against the FFmpeg that we installed 67 | # from conda-forge (which is able to decode x264), and that's also 68 | # what gets loaded at run time. 69 | # The Python tests are also decoding x264 files, and are built against 70 | # our non-GPL FFmpeg. And yet they pass. This is because in Python 71 | # we're able to distinguish between build-time (non-GPL FFmpeg) and 72 | # run time (conda-forge FFmpeg). 73 | 74 | build_tests_dir="${PWD}/build_tests" 75 | mkdir $build_tests_dir 76 | pushd $build_tests_dir 77 | TORCH_PATH=$(python -c "import pathlib, torch; print(pathlib.Path(torch.__path__[0]))") 78 | Torch_DIR="${TORCH_PATH}/share/cmake/Torch" 79 | cmake .. -DTorch_DIR=$Torch_DIR -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DCMAKE_VERBOSE_MAKEFILE=ON 80 | cmake --build . 81 | ctest --output-on-failure 82 | popd 83 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to TorchCodec 2 | 3 | You can contribute to this project by writing code, fixing issues or simply by 4 | using the library and reporting your feedback. 5 | 6 | Below are instructions to build TorchCodec from source, as well as the usual 7 | contribution guidelines (code formatting, testing, etc). To submit a PR, please 8 | follow the [official GitHub 9 | guidelines](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). 10 | 11 | ## Building TorchCodec from source 12 | 13 | ### Installing dependencies 14 | 15 | The instructions below assume you are using a conda environment, but the steps 16 | are easily adaptable to other kind of virtual environments. To build, run and 17 | test locally you will need the following dependencies: 18 | 19 | - A C++ compiler+linker. This is typically available on a baseline Linux 20 | installation already. 21 | - cmake 22 | - pkg-config 23 | - pybind11 24 | - FFmpeg 25 | - PyTorch nightly 26 | 27 | Start by installing the **nightly** build of PyTorch following the 28 | [official instructions](https://pytorch.org/get-started/locally/). 29 | 30 | Then, the easiest way to install the rest of the dependencies is to run: 31 | 32 | ```bash 33 | conda install cmake pkg-config pybind11 "ffmpeg" -c conda-forge 34 | ``` 35 | 36 | ### Clone and build 37 | 38 | To clone and install the repo, run: 39 | 40 | ```bash 41 | git clone git@github.com:pytorch/torchcodec.git 42 | # Or, using https instead of ssh: git clone https://github.com/pytorch/torchcodec.git 43 | cd torchcodec 44 | 45 | # Optional, but recommended: define a persistent build directory which speeds-up 46 | # subsequent builds. 47 | export TORCHCODEC_CMAKE_BUILD_DIR="${PWD}/build" 48 | 49 | pip install -e ".[dev]" --no-build-isolation -vv 50 | # Or, for cuda support: ENABLE_CUDA=1 pip install -e ".[dev]" --no-build-isolation -vv 51 | ``` 52 | 53 | ### Running unit tests 54 | 55 | To run python tests run: 56 | 57 | ```bash 58 | pytest 59 | ``` 60 | 61 | Some tests are marked as 'slow' and aren't run by default. You can use `pytest 62 | -m slow` to run those, or `pytest -m ""` to run all tests, slow or not. 63 | 64 | To run the C++ tests run: 65 | 66 | ```bash 67 | mkdir build 68 | cd build 69 | cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=1 -DCMAKE_PREFIX_PATH=$(python3 -c 'import torch;print(torch.utils.cmake_prefix_path)') .. 70 | cmake --build . -- VERBOSE=1 71 | ctest --rerun-failed --output-on-failure 72 | ``` 73 | 74 | ### Code formatting and type checking 75 | 76 | We use `pre-commit` to enforce code formatting and `mypy` for type checking. 77 | Install both with 78 | 79 | ```bash 80 | pip install pre-commit mypy 81 | ``` 82 | 83 | To run pre-commit hooks before each commit, run `pre-commit install`. You may 84 | prefer to run these checks manually, in which case you can just use `pre-commit 85 | run --all-files`. 86 | 87 | For `mypy` we recommend the following command: 88 | 89 | ```bash 90 | mypy --install-types --non-interactive --config-file mypy.ini 91 | ``` 92 | 93 | ### Building the docs 94 | 95 | First install from source, then install the doc dependencies: 96 | 97 | ```bash 98 | cd docs 99 | pip install -r requirements.txt 100 | ``` 101 | 102 | Then, still from within the `docs` directory: 103 | 104 | ```bash 105 | make html 106 | ``` 107 | 108 | The built docs will be in `build/html`. Open in your browser to view them. 109 | 110 | To avoid building the examples (which execute python code and can take time) you 111 | can use `make html-noplot`. To build a subset of specific examples instead of 112 | all of them, you can use a regex like 113 | `EXAMPLES_PATTERN="plot_the_best_example*" make html`. 114 | 115 | Run `make clean` from time to time if you encounter issues. 116 | 117 | ## License 118 | 119 | By contributing to TorchCodec, you agree that your contributions will be 120 | licensed under the LICENSE file in the root directory of this source tree. 121 | 122 | Contributors are also required to 123 | [sign our Contributor License Agreement](https://code.facebook.com/cla). 124 | -------------------------------------------------------------------------------- /benchmarks/decoders/generate_readme_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import json 8 | import os 9 | import platform 10 | import shutil 11 | from pathlib import Path 12 | 13 | import torch 14 | 15 | from benchmark_decoders_library import ( 16 | BatchParameters, 17 | DataLoaderInspiredWorkloadParameters, 18 | generate_videos, 19 | retrieve_videos, 20 | run_benchmarks, 21 | TorchAudioDecoder, 22 | TorchCodecPublic, 23 | TorchVision, 24 | ) 25 | 26 | NASA_URL = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4" 27 | 28 | 29 | def main() -> None: 30 | """Benchmarks the performance of a few video decoders on synthetic videos""" 31 | 32 | videos_dir_path = "/tmp/torchcodec_benchmarking_videos" 33 | if not os.path.exists(videos_dir_path): 34 | shutil.rmtree(videos_dir_path, ignore_errors=True) 35 | os.makedirs(videos_dir_path) 36 | 37 | resolutions = ["1920x1080"] 38 | encodings = ["libx264"] 39 | patterns = ["mandelbrot"] 40 | fpses = [60] 41 | gop_sizes = [600] 42 | durations = [10, 120] 43 | pix_fmts = ["yuv420p"] 44 | ffmpeg_path = "ffmpeg" 45 | generate_videos( 46 | resolutions, 47 | encodings, 48 | patterns, 49 | fpses, 50 | gop_sizes, 51 | durations, 52 | pix_fmts, 53 | ffmpeg_path, 54 | videos_dir_path, 55 | ) 56 | 57 | urls_and_dest_paths = [ 58 | (NASA_URL, f"{videos_dir_path}/nasa_960x540_206s_30fps_yuv420p.mp4") 59 | ] 60 | retrieve_videos(urls_and_dest_paths) 61 | 62 | decoder_dict = {} 63 | decoder_dict["torchcodec"] = TorchCodecPublic() 64 | decoder_dict["torchcodec[approx]"] = TorchCodecPublic(seek_mode="approximate") 65 | if torch.cuda.is_available(): 66 | decoder_dict["torchcodec[cuda]"] = TorchCodecPublic(device="cuda") 67 | decoder_dict["torchcodec[cuda,approx]"] = TorchCodecPublic( 68 | device="cuda", seek_mode="approximate" 69 | ) 70 | decoder_dict["torchvision[video_reader]"] = TorchVision("video_reader") 71 | decoder_dict["torchaudio"] = TorchAudioDecoder() 72 | 73 | # These are the number of uniform seeks we do in the seek+decode benchmark. 74 | num_samples = 10 75 | video_files_paths = list(Path(videos_dir_path).glob("*.mp4")) 76 | assert len(video_files_paths) == 3, "Expected exactly 3 videos" 77 | results = run_benchmarks( 78 | decoder_dict, 79 | video_files_paths, 80 | num_samples, 81 | num_sequential_frames_from_start=[100], 82 | min_runtime_seconds=30, 83 | benchmark_video_creation=False, 84 | dataloader_parameters=DataLoaderInspiredWorkloadParameters( 85 | batch_parameters=BatchParameters(batch_size=50, num_threads=10), 86 | resize_height=256, 87 | resize_width=256, 88 | resize_device="cuda" if torch.cuda.is_available() else "cpu", 89 | ), 90 | ) 91 | data_for_writing = { 92 | "experiments": results, 93 | "system_metadata": { 94 | "cpu_count": os.cpu_count(), 95 | "system": platform.system(), 96 | "machine": platform.machine(), 97 | "python_version": str(platform.python_version()), 98 | "cuda": ( 99 | torch.cuda.get_device_properties(0).name 100 | if torch.cuda.is_available() 101 | else "not available" 102 | ), 103 | }, 104 | } 105 | 106 | data_json = Path(__file__).parent / "benchmark_readme_data.json" 107 | with open(data_json, "w") as write_file: 108 | json.dump(data_for_writing, write_file, sort_keys=True, indent=4) 109 | 110 | 111 | if __name__ == "__main__": 112 | main() 113 | -------------------------------------------------------------------------------- /benchmarks/samplers/benchmark_samplers.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | from time import perf_counter_ns 4 | 5 | import torch 6 | from torchcodec.decoders import VideoDecoder 7 | from torchcodec.samplers import ( 8 | clips_at_random_indices, 9 | clips_at_random_timestamps, 10 | clips_at_regular_indices, 11 | clips_at_regular_timestamps, 12 | ) 13 | 14 | DEFAULT_VIDEO_PATH = Path(__file__).parent / "../../test/resources/nasa_13013.mp4" 15 | DEFAULT_NUM_EXP = 30 16 | 17 | 18 | def bench(f, *args, num_exp, warmup=0, seed, **kwargs): 19 | 20 | for _ in range(warmup): 21 | f(*args, **kwargs) 22 | 23 | num_frames = None 24 | times = [] 25 | for _ in range(num_exp): 26 | if seed is not None: 27 | torch.manual_seed(seed) 28 | start = perf_counter_ns() 29 | clips = f(*args, **kwargs) 30 | end = perf_counter_ns() 31 | times.append(end - start) 32 | num_frames = ( 33 | clips.data.shape[0] * clips.data.shape[1] 34 | ) # should be constant across calls 35 | return torch.tensor(times).float(), num_frames 36 | 37 | 38 | def report_stats(times, num_frames, unit="ms"): 39 | fps = num_frames * 1e9 / torch.median(times) 40 | 41 | mul = { 42 | "ns": 1, 43 | "µs": 1e-3, 44 | "ms": 1e-6, 45 | "s": 1e-9, 46 | }[unit] 47 | times = times * mul 48 | std = times.std().item() 49 | med = times.median().item() 50 | print(f"{med = :.2f}{unit} +- {std:.2f} med fps = {fps:.1f}") 51 | return med, fps 52 | 53 | 54 | def sample(decoder, sampler, **kwargs): 55 | return sampler( 56 | decoder, 57 | num_frames_per_clip=10, 58 | **kwargs, 59 | ) 60 | 61 | 62 | def run_sampler_benchmarks(device, video, num_experiments, torch_seed): 63 | 64 | for num_clips in (1, 50): 65 | print("-" * 10) 66 | print(f"{num_clips = }") 67 | 68 | print("clips_at_random_indices ", end="") 69 | decoder = VideoDecoder(video, device=device) 70 | times, num_frames = bench( 71 | sample, 72 | decoder, 73 | clips_at_random_indices, 74 | num_clips=num_clips, 75 | num_exp=num_experiments, 76 | warmup=2, 77 | seed=torch_seed, 78 | ) 79 | report_stats(times, num_frames, unit="ms") 80 | 81 | print("clips_at_regular_indices ", end="") 82 | times, num_frames = bench( 83 | sample, 84 | decoder, 85 | clips_at_regular_indices, 86 | num_clips=num_clips, 87 | num_exp=num_experiments, 88 | warmup=2, 89 | seed=torch_seed, 90 | ) 91 | report_stats(times, num_frames, unit="ms") 92 | 93 | print("clips_at_random_timestamps ", end="") 94 | times, num_frames = bench( 95 | sample, 96 | decoder, 97 | clips_at_random_timestamps, 98 | num_clips=num_clips, 99 | num_exp=num_experiments, 100 | warmup=2, 101 | seed=torch_seed, 102 | ) 103 | report_stats(times, num_frames, unit="ms") 104 | 105 | print("clips_at_regular_timestamps ", end="") 106 | seconds_between_clip_starts = 13 / num_clips # approximate. video is 13s long 107 | times, num_frames = bench( 108 | sample, 109 | decoder, 110 | clips_at_regular_timestamps, 111 | seconds_between_clip_starts=seconds_between_clip_starts, 112 | num_exp=num_experiments, 113 | warmup=2, 114 | seed=torch_seed, 115 | ) 116 | report_stats(times, num_frames, unit="ms") 117 | 118 | 119 | def main(): 120 | parser = argparse.ArgumentParser() 121 | parser.add_argument("--device", type=str, default="cpu") 122 | parser.add_argument("--video", type=str, default=str(DEFAULT_VIDEO_PATH)) 123 | parser.add_argument("--num_experiments", type=int, default=DEFAULT_NUM_EXP) 124 | parser.add_argument("--torch_seed", type=int) 125 | args = parser.parse_args() 126 | run_sampler_benchmarks( 127 | args.device, args.video, args.num_experiments, args.torch_seed 128 | ) 129 | 130 | 131 | if __name__ == "__main__": 132 | main() 133 | -------------------------------------------------------------------------------- /.github/workflows/build_ffmpeg.yaml: -------------------------------------------------------------------------------- 1 | # Taken and adapted from torchaudio. 2 | # Ref: https://github.com/pytorch/audio/blob/main/.github/workflows/ffmpeg.yml 3 | # This job is not directly related to regular CI pipeline. 4 | # It is intended to create FFmpeg binaries that we upload on S3, 5 | # which then will be used during all the build process in CI or local. 6 | # 7 | # This job does not include the uploading part. 8 | # Upload needs to be done manually, and it should be done only once 9 | # per new major release of FFmepg. 10 | name: Build non-GPL FFmpeg from source 11 | 12 | on: 13 | workflow_dispatch: 14 | pull_request: 15 | paths: 16 | - packaging/build_ffmpeg.sh 17 | - .github/workflows/build_ffmpeg.yaml # self reference 18 | schedule: 19 | - cron: '0 0 * * 0' # on sunday 20 | 21 | defaults: 22 | run: 23 | shell: bash -l -eo pipefail {0} 24 | 25 | jobs: 26 | LGPL-Linux-x86_64: 27 | strategy: 28 | fail-fast: false 29 | matrix: 30 | ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"] 31 | uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 32 | permissions: 33 | id-token: write 34 | contents: read 35 | with: 36 | job-name: Build 37 | upload-artifact: ffmpeg-lgpl-linux_x86_64-${{ matrix.ffmpeg-version }} 38 | repository: meta-pytorch/torchcodec 39 | script: | 40 | export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}" 41 | export FFMPEG_ROOT="${PWD}/ffmpeg" 42 | 43 | packaging/build_ffmpeg.sh 44 | 45 | tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/lib 46 | 47 | artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/linux_x86_64" 48 | mkdir -p "${artifact_dir}" 49 | mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz" 50 | 51 | LGPL-Linux-aarch64: 52 | strategy: 53 | fail-fast: false 54 | matrix: 55 | ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"] 56 | uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main 57 | permissions: 58 | id-token: write 59 | contents: read 60 | with: 61 | job-name: Build 62 | upload-artifact: ffmpeg-lgpl-linux_aarch64-${{ matrix.ffmpeg-version }} 63 | repository: meta-pytorch/torchcodec 64 | runner: linux.arm64.2xlarge 65 | docker-image: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64 66 | script: | 67 | export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}" 68 | export FFMPEG_ROOT="${PWD}/ffmpeg" 69 | 70 | packaging/build_ffmpeg.sh 71 | 72 | tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/lib 73 | 74 | artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/linux_aarch64" 75 | mkdir -p "${artifact_dir}" 76 | mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz" 77 | 78 | LGPL-macOS: 79 | strategy: 80 | fail-fast: false 81 | matrix: 82 | ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"] 83 | uses: pytorch/test-infra/.github/workflows/macos_job.yml@main 84 | with: 85 | job-name: Build 86 | upload-artifact: ffmpeg-lgpl-macos-${{ matrix.ffmpeg-version }} 87 | repository: meta-pytorch/torchcodec 88 | runner: macos-14-xlarge 89 | script: | 90 | export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}" 91 | export FFMPEG_ROOT="${PWD}/ffmpeg" 92 | 93 | packaging/build_ffmpeg.sh 94 | 95 | tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/lib 96 | 97 | artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/macos_$(uname -m)" 98 | mkdir -p "${artifact_dir}" 99 | mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz" 100 | 101 | LGPL-Windows: 102 | strategy: 103 | fail-fast: false 104 | matrix: 105 | ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"] 106 | uses: pytorch/test-infra/.github/workflows/windows_job.yml@main 107 | with: 108 | job-name: Build 109 | upload-artifact: ffmpeg-lgpl-windows_x86_64-${{ matrix.ffmpeg-version }} 110 | repository: meta-pytorch/torchcodec 111 | script: | 112 | export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}" 113 | export FFMPEG_ROOT="${PWD}/ffmpeg" 114 | 115 | packaging/build_ffmpeg.bat 116 | 117 | tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/bin 118 | 119 | artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/windows_$(uname -m)" 120 | mkdir -p "${artifact_dir}" 121 | mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz" 122 | -------------------------------------------------------------------------------- /src/torchcodec/_core/Metadata.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include "Metadata.h" 8 | #include "torch/types.h" 9 | 10 | namespace facebook::torchcodec { 11 | 12 | std::optional StreamMetadata::getDurationSeconds( 13 | SeekMode seekMode) const { 14 | switch (seekMode) { 15 | case SeekMode::custom_frame_mappings: 16 | case SeekMode::exact: 17 | TORCH_CHECK( 18 | endStreamPtsSecondsFromContent.has_value() && 19 | beginStreamPtsSecondsFromContent.has_value(), 20 | "Missing beginStreamPtsSecondsFromContent or endStreamPtsSecondsFromContent"); 21 | return endStreamPtsSecondsFromContent.value() - 22 | beginStreamPtsSecondsFromContent.value(); 23 | case SeekMode::approximate: 24 | if (durationSecondsFromHeader.has_value()) { 25 | return durationSecondsFromHeader.value(); 26 | } 27 | if (numFramesFromHeader.has_value() && averageFpsFromHeader.has_value() && 28 | averageFpsFromHeader.value() != 0.0) { 29 | return static_cast(numFramesFromHeader.value()) / 30 | averageFpsFromHeader.value(); 31 | } 32 | if (durationSecondsFromContainer.has_value()) { 33 | return durationSecondsFromContainer.value(); 34 | } 35 | return std::nullopt; 36 | default: 37 | TORCH_CHECK(false, "Unknown SeekMode"); 38 | } 39 | } 40 | 41 | double StreamMetadata::getBeginStreamSeconds(SeekMode seekMode) const { 42 | switch (seekMode) { 43 | case SeekMode::custom_frame_mappings: 44 | case SeekMode::exact: 45 | TORCH_CHECK( 46 | beginStreamPtsSecondsFromContent.has_value(), 47 | "Missing beginStreamPtsSecondsFromContent"); 48 | return beginStreamPtsSecondsFromContent.value(); 49 | case SeekMode::approximate: 50 | if (beginStreamPtsSecondsFromContent.has_value()) { 51 | return beginStreamPtsSecondsFromContent.value(); 52 | } 53 | return 0.0; 54 | default: 55 | TORCH_CHECK(false, "Unknown SeekMode"); 56 | } 57 | } 58 | 59 | std::optional StreamMetadata::getEndStreamSeconds( 60 | SeekMode seekMode) const { 61 | switch (seekMode) { 62 | case SeekMode::custom_frame_mappings: 63 | case SeekMode::exact: 64 | TORCH_CHECK( 65 | endStreamPtsSecondsFromContent.has_value(), 66 | "Missing endStreamPtsSecondsFromContent"); 67 | return endStreamPtsSecondsFromContent.value(); 68 | case SeekMode::approximate: 69 | if (endStreamPtsSecondsFromContent.has_value()) { 70 | return endStreamPtsSecondsFromContent.value(); 71 | } 72 | return getDurationSeconds(seekMode); 73 | default: 74 | TORCH_CHECK(false, "Unknown SeekMode"); 75 | } 76 | } 77 | 78 | std::optional StreamMetadata::getNumFrames(SeekMode seekMode) const { 79 | switch (seekMode) { 80 | case SeekMode::custom_frame_mappings: 81 | case SeekMode::exact: 82 | TORCH_CHECK( 83 | numFramesFromContent.has_value(), "Missing numFramesFromContent"); 84 | return numFramesFromContent.value(); 85 | case SeekMode::approximate: { 86 | auto durationSeconds = getDurationSeconds(seekMode); 87 | if (numFramesFromHeader.has_value()) { 88 | return numFramesFromHeader.value(); 89 | } 90 | if (averageFpsFromHeader.has_value() && durationSeconds.has_value()) { 91 | return static_cast( 92 | averageFpsFromHeader.value() * durationSeconds.value()); 93 | } 94 | return std::nullopt; 95 | } 96 | default: 97 | TORCH_CHECK(false, "Unknown SeekMode"); 98 | } 99 | } 100 | 101 | std::optional StreamMetadata::getAverageFps(SeekMode seekMode) const { 102 | switch (seekMode) { 103 | case SeekMode::custom_frame_mappings: 104 | case SeekMode::exact: { 105 | auto numFrames = getNumFrames(seekMode); 106 | if (numFrames.has_value() && 107 | beginStreamPtsSecondsFromContent.has_value() && 108 | endStreamPtsSecondsFromContent.has_value()) { 109 | double duration = endStreamPtsSecondsFromContent.value() - 110 | beginStreamPtsSecondsFromContent.value(); 111 | if (duration != 0.0) { 112 | return static_cast(numFrames.value()) / duration; 113 | } 114 | } 115 | return averageFpsFromHeader; 116 | } 117 | case SeekMode::approximate: 118 | return averageFpsFromHeader; 119 | default: 120 | TORCH_CHECK(false, "Unknown SeekMode"); 121 | } 122 | } 123 | 124 | } // namespace facebook::torchcodec 125 | -------------------------------------------------------------------------------- /src/torchcodec/decoders/_decoder_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | 8 | import contextvars 9 | import io 10 | 11 | from collections.abc import Generator 12 | from contextlib import contextmanager 13 | from pathlib import Path 14 | 15 | from torch import Tensor 16 | from torchcodec import _core as core 17 | 18 | ERROR_REPORTING_INSTRUCTIONS = """ 19 | This should never happen. Please report an issue following the steps in 20 | https://github.com/pytorch/torchcodec/issues/new?assignees=&labels=&projects=&template=bug-report.yml. 21 | """ 22 | 23 | 24 | def create_decoder( 25 | *, 26 | source: str | Path | io.RawIOBase | io.BufferedReader | bytes | Tensor, 27 | seek_mode: str, 28 | ) -> Tensor: 29 | if isinstance(source, str): 30 | return core.create_from_file(source, seek_mode) 31 | elif isinstance(source, Path): 32 | return core.create_from_file(str(source), seek_mode) 33 | elif isinstance(source, io.RawIOBase) or isinstance(source, io.BufferedReader): 34 | return core.create_from_file_like(source, seek_mode) 35 | elif isinstance(source, bytes): 36 | return core.create_from_bytes(source, seek_mode) 37 | elif isinstance(source, Tensor): 38 | return core.create_from_tensor(source, seek_mode) 39 | elif isinstance(source, io.TextIOBase): 40 | raise TypeError( 41 | "source is for reading text, likely from open(..., 'r'). Try with 'rb' for binary reading?" 42 | ) 43 | elif hasattr(source, "read") and hasattr(source, "seek"): 44 | # This check must be after checking for text-based reading. Also placing 45 | # it last in general to be defensive: hasattr is a blunt instrument. We 46 | # could use the inspect module to check for methods with the right 47 | # signature. 48 | return core.create_from_file_like(source, seek_mode) 49 | 50 | raise TypeError( 51 | f"Unknown source type: {type(source)}. " 52 | "Supported types are str, Path, bytes, Tensor and file-like objects with " 53 | "read(self, size: int) -> bytes and " 54 | "seek(self, offset: int, whence: int) -> int methods." 55 | ) 56 | 57 | 58 | # Thread-local and async-safe storage for the current CUDA backend 59 | _CUDA_BACKEND: contextvars.ContextVar[str] = contextvars.ContextVar( 60 | "_CUDA_BACKEND", default="ffmpeg" 61 | ) 62 | 63 | 64 | @contextmanager 65 | def set_cuda_backend(backend: str) -> Generator[None, None, None]: 66 | """Context Manager to set the CUDA backend for :class:`~torchcodec.decoders.VideoDecoder`. 67 | 68 | This context manager allows you to specify which CUDA backend implementation 69 | to use when creating :class:`~torchcodec.decoders.VideoDecoder` instances 70 | with CUDA devices. 71 | 72 | .. note:: 73 | **We recommend trying the "beta" backend instead of the default "ffmpeg" 74 | backend!** The beta backend is faster, and will eventually become the 75 | default in future versions. It may have rough edges that we'll polish 76 | over time, but it's already quite stable and ready for adoption. Let us 77 | know what you think! 78 | 79 | Only the creation of the decoder needs to be inside the context manager, the 80 | decoding methods can be called outside of it. You still need to pass 81 | ``device="cuda"`` when creating the 82 | :class:`~torchcodec.decoders.VideoDecoder` instance. If a CUDA device isn't 83 | specified, this context manager will have no effect. See example below. 84 | 85 | This is thread-safe and async-safe. 86 | 87 | Args: 88 | backend (str): The CUDA backend to use. Can be "ffmpeg" (default) or 89 | "beta". We recommend trying "beta" as it's faster! 90 | 91 | Example: 92 | >>> with set_cuda_backend("beta"): 93 | ... decoder = VideoDecoder("video.mp4", device="cuda") 94 | ... 95 | ... # Only the decoder creation needs to be part of the context manager. 96 | ... # Decoder will now the beta CUDA implementation: 97 | ... decoder.get_frame_at(0) 98 | """ 99 | backend = backend.lower() 100 | if backend not in ("ffmpeg", "beta"): 101 | raise ValueError( 102 | f"Invalid CUDA backend ({backend}). Supported values are 'ffmpeg' and 'beta'." 103 | ) 104 | 105 | previous_state = _CUDA_BACKEND.set(backend) 106 | try: 107 | yield 108 | finally: 109 | _CUDA_BACKEND.reset(previous_state) 110 | 111 | 112 | def _get_cuda_backend() -> str: 113 | return _CUDA_BACKEND.get() 114 | -------------------------------------------------------------------------------- /src/torchcodec/_core/Cache.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace facebook::torchcodec { 14 | 15 | // This header defines simple cache class primitives to store reusable objects 16 | // across TorchCodec stream instances. Intended usage is to store hardware 17 | // contexts creation of which is expensive. The cache mechanism is as follows: 18 | // 1. 'PerGpuCache' provides a dynamic cache with the specified maximum capacity 19 | // for the given number of GPUs. 20 | // 2. When stream object (e.g. SingleStreamDecoder) is destoyed cachable object 21 | // must be released to the cache. Cache will accept the object if it is not 22 | // full. 23 | // 3. When stream object (e.g. SingleStreamDecoder) is created cachable object 24 | // must be first queried from the cache. If the cache is empty then new 25 | // object must be created. 26 | 27 | template > 28 | class Cache { 29 | public: 30 | using element_type = std::unique_ptr; 31 | 32 | explicit Cache(int capacity) : capacity_(capacity) {} 33 | 34 | // Adds an object to the cache if the cache has capacity. Returns true 35 | // if object was added and false otherwise. 36 | bool addIfCacheHasCapacity(element_type&& obj); 37 | 38 | // Returns an object from the cache. Cache does not hold a reference 39 | // to the object after this call. 40 | element_type get(); 41 | 42 | private: 43 | int capacity_; 44 | std::mutex mutex_; 45 | std::vector cache_; 46 | }; 47 | 48 | template 49 | bool Cache::addIfCacheHasCapacity(element_type&& obj) { 50 | std::scoped_lock lock(mutex_); 51 | if (capacity_ >= 0 && cache_.size() >= static_cast(capacity_)) { 52 | return false; 53 | } 54 | cache_.push_back(std::move(obj)); 55 | return true; 56 | } 57 | 58 | template 59 | typename Cache::element_type Cache::get() { 60 | std::scoped_lock lock(mutex_); 61 | if (cache_.empty()) { 62 | return nullptr; 63 | } 64 | 65 | element_type obj = std::move(cache_.back()); 66 | cache_.pop_back(); 67 | return obj; 68 | } 69 | 70 | template > 71 | class PerGpuCache { 72 | public: 73 | using element_type = typename Cache::element_type; 74 | 75 | // Initializes 'maxGpus' number of caches. Each cache can hold no 76 | // more than 'capacity' items. If 'capacity' <0 cache size is unlimited. 77 | PerGpuCache(int maxGpus, int capacity) { 78 | TORCH_CHECK(maxGpus > 0, "maxGpus for PerGpuCache must be >0"); 79 | for (int i = 0; i < maxGpus; ++i) { 80 | cache_.emplace_back(std::make_unique>(capacity)); 81 | } 82 | } 83 | 84 | // Adds an object to the specified device cache if the cache has 85 | // capacity. Returns true if object was added and false otherwise. 86 | bool addIfCacheHasCapacity(const torch::Device& device, element_type&& obj); 87 | 88 | // Returns an object from the cache of the specified device. Cache 89 | // does not hold a reference to the object after this call. 90 | element_type get(const torch::Device& device); 91 | 92 | private: 93 | // 'Cache' class implementation contains mutex which makes it non-movable 94 | // and non-copyable, so we need to wrap it in std::unique_ptr. 95 | std::vector>> cache_; 96 | }; 97 | 98 | // Forward declaration of getDeviceIndex which exists in CUDACommon.h 99 | // This avoids circular dependency between Cache.h and CUDACommon.cpp which also 100 | // needs to include Cache.h 101 | int getDeviceIndex(const torch::Device& device); 102 | 103 | template 104 | bool PerGpuCache::addIfCacheHasCapacity( 105 | const torch::Device& device, 106 | element_type&& obj) { 107 | int deviceIndex = getDeviceIndex(device); 108 | TORCH_CHECK( 109 | static_cast(deviceIndex) < cache_.size(), 110 | "Device index out of range"); 111 | return cache_[deviceIndex]->addIfCacheHasCapacity(std::move(obj)); 112 | } 113 | 114 | template 115 | typename PerGpuCache::element_type PerGpuCache::get( 116 | const torch::Device& device) { 117 | int deviceIndex = getDeviceIndex(device); 118 | TORCH_CHECK( 119 | static_cast(deviceIndex) < cache_.size(), 120 | "Device index out of range"); 121 | return cache_[deviceIndex]->get(); 122 | } 123 | 124 | } // namespace facebook::torchcodec 125 | -------------------------------------------------------------------------------- /src/torchcodec/_core/AVIOTensorContext.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #include "AVIOTensorContext.h" 8 | #include 9 | 10 | namespace facebook::torchcodec { 11 | 12 | namespace { 13 | 14 | constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB 15 | constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB 16 | 17 | // The signature of this function is defined by FFMPEG. 18 | int read(void* opaque, uint8_t* buf, int buf_size) { 19 | auto tensorContext = static_cast(opaque); 20 | TORCH_CHECK( 21 | tensorContext->current_pos <= tensorContext->data.numel(), 22 | "Tried to read outside of the buffer: current_pos=", 23 | tensorContext->current_pos, 24 | ", size=", 25 | tensorContext->data.numel()); 26 | 27 | int64_t numBytesRead = std::min( 28 | static_cast(buf_size), 29 | tensorContext->data.numel() - tensorContext->current_pos); 30 | 31 | TORCH_CHECK( 32 | numBytesRead >= 0, 33 | "Tried to read negative bytes: numBytesRead=", 34 | numBytesRead, 35 | ", size=", 36 | tensorContext->data.numel(), 37 | ", current_pos=", 38 | tensorContext->current_pos); 39 | 40 | if (numBytesRead == 0) { 41 | return AVERROR_EOF; 42 | } 43 | 44 | std::memcpy( 45 | buf, 46 | tensorContext->data.data_ptr() + tensorContext->current_pos, 47 | numBytesRead); 48 | tensorContext->current_pos += numBytesRead; 49 | return numBytesRead; 50 | } 51 | 52 | // The signature of this function is defined by FFMPEG. 53 | int write(void* opaque, const uint8_t* buf, int buf_size) { 54 | auto tensorContext = static_cast(opaque); 55 | 56 | int64_t bufSize = static_cast(buf_size); 57 | if (tensorContext->current_pos + bufSize > tensorContext->data.numel()) { 58 | TORCH_CHECK( 59 | tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE, 60 | "We tried to allocate an output encoded tensor larger than ", 61 | MAX_TENSOR_SIZE, 62 | " bytes. If you think this should be supported, please report."); 63 | 64 | // We double the size of the outpout tensor. Calling cat() may not be the 65 | // most efficient, but it's simple. 66 | tensorContext->data = 67 | torch::cat({tensorContext->data, tensorContext->data}); 68 | } 69 | 70 | TORCH_CHECK( 71 | tensorContext->current_pos + bufSize <= tensorContext->data.numel(), 72 | "Re-allocation of the output tensor didn't work. ", 73 | "This should not happen, please report on TorchCodec bug tracker"); 74 | 75 | uint8_t* outputTensorData = tensorContext->data.data_ptr(); 76 | std::memcpy(outputTensorData + tensorContext->current_pos, buf, bufSize); 77 | tensorContext->current_pos += bufSize; 78 | // Track the maximum position written so getOutputTensor's narrow() does not 79 | // truncate the file if final seek was backwards 80 | tensorContext->max_pos = 81 | std::max(tensorContext->current_pos, tensorContext->max_pos); 82 | return buf_size; 83 | } 84 | 85 | // The signature of this function is defined by FFMPEG. 86 | int64_t seek(void* opaque, int64_t offset, int whence) { 87 | auto tensorContext = static_cast(opaque); 88 | int64_t ret = -1; 89 | 90 | switch (whence) { 91 | case AVSEEK_SIZE: 92 | ret = tensorContext->data.numel(); 93 | break; 94 | case SEEK_SET: 95 | tensorContext->current_pos = offset; 96 | ret = offset; 97 | break; 98 | default: 99 | break; 100 | } 101 | 102 | return ret; 103 | } 104 | 105 | } // namespace 106 | 107 | AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data) 108 | : tensorContext_{data, 0, 0} { 109 | TORCH_CHECK(data.numel() > 0, "data must not be empty"); 110 | TORCH_CHECK(data.is_contiguous(), "data must be contiguous"); 111 | TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8"); 112 | createAVIOContext( 113 | &read, nullptr, &seek, &tensorContext_, /*isForWriting=*/false); 114 | } 115 | 116 | AVIOToTensorContext::AVIOToTensorContext() 117 | : tensorContext_{ 118 | torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}), 119 | 0, 120 | 0} { 121 | createAVIOContext( 122 | nullptr, &write, &seek, &tensorContext_, /*isForWriting=*/true); 123 | } 124 | 125 | torch::Tensor AVIOToTensorContext::getOutputTensor() { 126 | return tensorContext_.data.narrow( 127 | /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.max_pos); 128 | } 129 | 130 | } // namespace facebook::torchcodec 131 | -------------------------------------------------------------------------------- /docs/source/_static/css/custom_torchcodec.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under the BSD-style license found in the 6 | * LICENSE file in the root directory of this source tree. 7 | */ 8 | 9 | /* sphinx-design styles for cards/tabs */ 10 | 11 | 12 | :root { 13 | --sd-color-info: #ee4c2c; 14 | --sd-color-primary: #6c6c6d; 15 | --sd-color-primary-highlight: #f3f4f7; 16 | --sd-color-card-border-hover: #ee4c2c; 17 | --sd-color-card-border: #f3f4f7; 18 | --sd-color-card-background: #fff; 19 | --sd-color-card-text: inherit; 20 | --sd-color-card-header: transparent; 21 | --sd-color-card-footer: transparent; 22 | --sd-color-tabs-label-active: #ee4c2c; 23 | --sd-color-tabs-label-hover: #ee4c2c; 24 | --sd-color-tabs-label-inactive: #6c6c6d; 25 | --sd-color-tabs-underline-active: #ee4c2c; 26 | --sd-color-tabs-underline-hover: #fabdbd; 27 | --sd-color-tabs-underline-inactive: transparent; 28 | --sd-color-tabs-overline: rgb(222, 222, 222); 29 | --sd-color-tabs-underline: rgb(222, 222, 222); 30 | } 31 | 32 | .sd-text-info { 33 | color: #ee4c2c; 34 | } 35 | 36 | .sd-card-img-top { 37 | background: #ee4c2c; 38 | height: 5px !important; 39 | } 40 | 41 | .sd-card { 42 | position: relative; 43 | background-color: #fff; 44 | opacity: 1.0; 45 | border-radius: 0px; 46 | width: 30%; 47 | border: none; 48 | padding-bottom: 0px; 49 | } 50 | 51 | 52 | .sd-card-img:hover { 53 | opacity: 1.0; 54 | background-color: #f3f4f7; 55 | } 56 | 57 | 58 | .sd-card:after { 59 | display: block; 60 | opacity: 1; 61 | content: ''; 62 | border-bottom: solid 1px #ee4c2c; 63 | background-color: #fff; 64 | transform: scaleX(0); 65 | transition: transform .250s ease-in-out; 66 | transform-origin: 0% 50%; 67 | } 68 | 69 | .sd-card:hover { 70 | background-color: #fff; 71 | opacity: 1; 72 | border-top: 1px solid #f3f4f7; 73 | border-left: 1px solid #f3f4f7; 74 | border-right: 1px solid #f3f4f7; 75 | } 76 | 77 | .sd-card:hover:after { 78 | transform: scaleX(1); 79 | } 80 | 81 | .card-prerequisites:hover { 82 | transition: none; 83 | border: none; 84 | } 85 | 86 | .card-prerequisites:hover:after { 87 | transition: none; 88 | transform: none; 89 | } 90 | 91 | .card-prerequisites:after { 92 | display: block; 93 | content: ''; 94 | border-bottom: none; 95 | background-color: #fff; 96 | transform: none; 97 | transition: none; 98 | transform-origin: none; 99 | } 100 | 101 | 102 | details.sd-dropdown { 103 | font-weight: 300; 104 | width: auto; 105 | } 106 | 107 | details.sd-dropdown:after { 108 | border: none; 109 | transition: none; 110 | } 111 | 112 | details.sd-dropdown:hover { 113 | border: none; 114 | transition: none; 115 | } 116 | 117 | details.sd-dropdown .sd-summary-content { 118 | font-weight: 300; 119 | } 120 | 121 | details.sd-dropdown .highlight .n { 122 | font-weight: normal; 123 | } 124 | 125 | .et-page-column1 { 126 | float: left; 127 | width: 70%; 128 | font-size: 1rem; 129 | } 130 | 131 | .et-page-column2 { 132 | float: right; 133 | padding-top: 40px; 134 | padding-left: 60px; 135 | padding-right: 60px; 136 | padding-bottom: 60px; 137 | width: 30%; 138 | } 139 | 140 | .et-page-column-row:after { 141 | content: ""; 142 | display: table; 143 | clear: both; 144 | } 145 | 146 | /* For screens smaller than 768px (typical mobile devices) */ 147 | @media screen and (max-width: 768px) { 148 | .et-page-column1, .et-page-column2 { 149 | float: none; /* Remove floats */ 150 | width: 100%; /* Full width for both columns */ 151 | padding: 0; 152 | font-size: 1rem; 153 | } 154 | 155 | .et-page-column2 img { 156 | display: none; 157 | } 158 | .et-page-column-row:after { 159 | content: ""; 160 | display: table; 161 | clear: both; 162 | } 163 | } 164 | 165 | article.pytorch-article .class .method dt { 166 | border-top: none; 167 | } 168 | 169 | article.pytorch-article .class .simple dt { 170 | border-top: none; 171 | } 172 | 173 | article.pytorch-article .function dt.sig { 174 | border-top: none; 175 | } 176 | 177 | /* Fix for Sphinx gallery thumbnails. 178 | See https://github.com/sphinx-gallery/sphinx-gallery/issues/990 179 | */ 180 | article.pytorch-article .sphx-glr-thumbnails .sphx-glr-thumbcontainer { 181 | width: unset; 182 | margin-right: 0; 183 | margin-left: 0; 184 | } 185 | article.pytorch-article div.section div.wy-table-responsive tbody td { 186 | width: 50%; 187 | } 188 | 189 | article.pytorch-article section#glossary dl.simple.glossary dt { 190 | font-weight: bold; 191 | font-size: x-large; 192 | } 193 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to the TorchCodec documentation! 2 | ======================================== 3 | 4 | TorchCodec is a Python library for decoding video and audio data into PyTorch 5 | tensors, on CPU and CUDA GPU. It also supports audio and video encoding! 6 | It aims to be fast, easy to use, and well integrated into the PyTorch ecosystem. 7 | If you want to use PyTorch to train ML models on videos and audio, TorchCodec is 8 | how you turn these into data. 9 | 10 | We achieve these capabilities through: 11 | 12 | * Pythonic APIs that mirror Python and PyTorch conventions. 13 | * Relying on `FFmpeg `_ to do the decoding / encoding. 14 | TorchCodec uses the version of FFmpeg you already have installed. FFmpeg is a 15 | mature library with broad coverage available on most systems. It is, however, 16 | not easy to use. TorchCodec abstracts FFmpeg's complexity to ensure it is 17 | used correctly and efficiently. 18 | * Returning data as PyTorch tensors, ready to be fed into PyTorch transforms 19 | or used directly to train models. 20 | 21 | Installation instructions 22 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 23 | 24 | .. grid:: 3 25 | 26 | .. grid-item-card:: :octicon:`file-code;1em` 27 | Installation instructions 28 | :img-top: _static/img/card-background.svg 29 | :link: https://github.com/pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec 30 | :link-type: url 31 | 32 | How to install TorchCodec 33 | 34 | Decoding 35 | ^^^^^^^^ 36 | 37 | .. grid:: 3 38 | 39 | .. grid-item-card:: :octicon:`file-code;1em` 40 | Getting Started with TorchCodec 41 | :img-top: _static/img/card-background.svg 42 | :link: generated_examples/decoding/basic_example.html 43 | :link-type: url 44 | 45 | A simple video decoding example 46 | 47 | .. grid-item-card:: :octicon:`file-code;1em` 48 | Audio Decoding 49 | :img-top: _static/img/card-background.svg 50 | :link: generated_examples/decoding/audio_decoding.html 51 | :link-type: url 52 | 53 | A simple audio decoding example 54 | 55 | .. grid-item-card:: :octicon:`file-code;1em` 56 | GPU decoding 57 | :img-top: _static/img/card-background.svg 58 | :link: generated_examples/decoding/basic_cuda_example.html 59 | :link-type: url 60 | 61 | A simple example demonstrating CUDA GPU decoding 62 | 63 | .. grid-item-card:: :octicon:`file-code;1em` 64 | Streaming video 65 | :img-top: _static/img/card-background.svg 66 | :link: generated_examples/decoding/file_like.html 67 | :link-type: url 68 | 69 | How to efficiently decode videos from the cloud 70 | 71 | .. grid-item-card:: :octicon:`file-code;1em` 72 | Parallel decoding 73 | :img-top: _static/img/card-background.svg 74 | :link: generated_examples/decoding/parallel_decoding.html 75 | :link-type: url 76 | 77 | How to decode a video with multiple processes or threads. 78 | 79 | .. grid-item-card:: :octicon:`file-code;1em` 80 | Clip sampling 81 | :img-top: _static/img/card-background.svg 82 | :link: generated_examples/decoding/sampling.html 83 | :link-type: url 84 | 85 | How to sample regular and random clips from a video 86 | 87 | .. grid-item-card:: :octicon:`file-code;1em` 88 | Performance Tips 89 | :img-top: _static/img/card-background.svg 90 | :link: generated_examples/decoding/performance_tips.html 91 | :link-type: url 92 | 93 | Tips for optimizing video decoding performance 94 | 95 | 96 | Encoding 97 | ^^^^^^^^ 98 | 99 | .. grid:: 3 100 | 101 | .. grid-item-card:: :octicon:`file-code;1em` 102 | Audio Encoding 103 | :img-top: _static/img/card-background.svg 104 | :link: generated_examples/encoding/audio_encoding.html 105 | :link-type: url 106 | 107 | How encode audio samples 108 | 109 | .. grid-item-card:: :octicon:`file-code;1em` 110 | Video Encoding 111 | :img-top: _static/img/card-background.svg 112 | :link: generated_examples/encoding/video_encoding.html 113 | :link-type: url 114 | 115 | How to encode video frames 116 | 117 | .. toctree:: 118 | :maxdepth: 1 119 | :caption: TorchCodec documentation 120 | :hidden: 121 | 122 | Home 123 | glossary 124 | 125 | .. toctree:: 126 | :maxdepth: 1 127 | :caption: Examples and tutorials 128 | :hidden: 129 | 130 | Installation instructions 131 | generated_examples/index 132 | 133 | 134 | .. toctree:: 135 | :glob: 136 | :maxdepth: 1 137 | :caption: API Reference 138 | :hidden: 139 | 140 | api_ref_torchcodec 141 | api_ref_decoders 142 | api_ref_encoders 143 | api_ref_samplers 144 | api_ref_transforms 145 | -------------------------------------------------------------------------------- /src/torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake: -------------------------------------------------------------------------------- 1 | # This file fetches the non-GPL ffmpeg libraries from the torchcodec S3 bucket, 2 | # and exposes them as CMake targets so we can dynamically link against them. 3 | # These libraries were built on the CI via the build_ffmpeg.yaml workflow. 4 | 5 | # Avoid warning: see https://cmake.org/cmake/help/latest/policy/CMP0135.html 6 | if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") 7 | cmake_policy(SET CMP0135 NEW) 8 | endif() 9 | 10 | include(FetchContent) 11 | 12 | set( 13 | base_url 14 | https://pytorch.s3.amazonaws.com/torchcodec/ffmpeg/2025-03-14 15 | ) 16 | 17 | if (LINUX) 18 | if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") 19 | set( 20 | platform_url 21 | ${base_url}/linux_aarch64 22 | ) 23 | 24 | set( 25 | f4_sha256 26 | a310a2ed9ffe555fd3278dae15065541098dd35e124564671dcda6a6620ac842 27 | ) 28 | set( 29 | f5_sha256 30 | 89ca7996bccbc2db49adaa401d20fdbabffe0e1b4e07a0f81d6b143e858b7c8d 31 | ) 32 | set( 33 | f6_sha256 34 | ae44c67b4587d061b8e9cc8990ca891ee013fe52ad79e5016ba29871562621da 35 | ) 36 | set( 37 | f7_sha256 38 | 948e2cac66ca6f68ff526d5e84138e94bce0f1a7c83f502d15d85d0bd3ddc112 39 | ) 40 | set( 41 | f8_sha256 42 | b9cfd99ae75a14e58300854967d4dc49de0b3daa551df51ea1f52a3f08d2c8af 43 | ) 44 | elseif (LINUX) # assume x86_64 45 | set( 46 | platform_url 47 | ${base_url}/linux_x86_64 48 | ) 49 | 50 | set( 51 | f4_sha256 52 | 1a083f1922443bedb5243d04896383b8c606778a7ddb9d886c8303e55339fe0c 53 | ) 54 | set( 55 | f5_sha256 56 | 65d6ad54082d94dcb3f801d73df2265e0e1bb303c7afbce7723e3b77ccd0e207 57 | ) 58 | set( 59 | f6_sha256 60 | 8bd5939c2f4a4b072e837e7870c13fe7d13824e5ff087ab534e4db4e90b7be9c 61 | ) 62 | set( 63 | f7_sha256 64 | 1cb946d8b7c6393c2c3ebe1f900b8de7a2885fe614c45d4ec32c9833084f2f26 65 | ) 66 | set( 67 | f8_sha256 68 | c55b3c1a4b5e4d5fdd7c632bea3ab6f45b4e37cc8e0999dda3f84a8ed8defad8 69 | ) 70 | endif() 71 | elseif (APPLE) 72 | set( 73 | platform_url 74 | ${base_url}/macos_arm64 75 | ) 76 | set( 77 | f4_sha256 78 | f0335434529d9e19359eae0fe912dd9e747667534a1c92e662f5219a55dfad8c 79 | ) 80 | set( 81 | f5_sha256 82 | cfc3449c9af6863731a431ce89e32c08c5f8ece94b306fb6b695828502a76166 83 | ) 84 | set( 85 | f6_sha256 86 | ec47b4783c342038e720e33b2fdfa55a9a490afb1cf37a26467733983688647e 87 | ) 88 | set( 89 | f7_sha256 90 | 48a4fc8ce098305cfd4a58f40889249c523ca3c285f66ba704b5bad0e3ada53a 91 | ) 92 | set( 93 | f8_sha256 94 | beb936b76f25d2621228a12cdb67c9ae3d1eff7aa713ef8d1167ebf0c25bd5ec 95 | ) 96 | elseif (WIN32) 97 | set( 98 | platform_url 99 | ${base_url}/windows_x86_64 100 | ) 101 | set( 102 | f4_sha256 103 | 270a1aa8892225267e68a7eb87c417931da30dccbf08ee2bde8833e659cab5cb 104 | ) 105 | set( 106 | f5_sha256 107 | b8b2a349a847e56a6da875b066dff1cae53cb8ee7cf5ba9321ec1243dea0cde0 108 | ) 109 | set( 110 | f6_sha256 111 | 5d9f8c76dc55f790fa31d825985e9270bf9e498b8bfec21a0ad3a1feb1fa053a 112 | ) 113 | set( 114 | f7_sha256 115 | ae391ace382330e912793b70b68529ee7c91026d2869b4df7e7c3e7d3656bdd5 116 | ) 117 | set( 118 | f8_sha256 119 | bac845ac79876b104959cb0e7b9dec772a261116344dd17d2f97e7ddfac4a73f 120 | ) 121 | else() 122 | message( 123 | FATAL_ERROR 124 | "Unsupported operating system: ${CMAKE_SYSTEM_NAME}" 125 | ) 126 | endif() 127 | 128 | FetchContent_Declare( 129 | f4 130 | URL ${platform_url}/4.4.4.tar.gz 131 | URL_HASH 132 | SHA256=${f4_sha256} 133 | ) 134 | FetchContent_Declare( 135 | f5 136 | URL ${platform_url}/5.1.4.tar.gz 137 | URL_HASH 138 | SHA256=${f5_sha256} 139 | ) 140 | FetchContent_Declare( 141 | f6 142 | URL ${platform_url}/6.1.1.tar.gz 143 | URL_HASH 144 | SHA256=${f6_sha256} 145 | ) 146 | FetchContent_Declare( 147 | f7 148 | URL ${platform_url}/7.0.1.tar.gz 149 | URL_HASH 150 | SHA256=${f7_sha256} 151 | ) 152 | FetchContent_Declare( 153 | f8 154 | URL ${platform_url}/8.0.tar.gz 155 | URL_HASH 156 | SHA256=${f8_sha256} 157 | ) 158 | 159 | FetchContent_MakeAvailable(f4 f5 f6 f7 f8) 160 | 161 | # makes add_ffmpeg_target available 162 | include("${CMAKE_CURRENT_SOURCE_DIR}/../share/cmake/TorchCodec/ffmpeg_versions.cmake") 163 | 164 | # Note: the f?_SOURCE_DIR variables were set by FetchContent_MakeAvailable 165 | add_ffmpeg_target(4 "${f4_SOURCE_DIR}") 166 | add_ffmpeg_target(5 "${f5_SOURCE_DIR}") 167 | add_ffmpeg_target(6 "${f6_SOURCE_DIR}") 168 | add_ffmpeg_target(7 "${f7_SOURCE_DIR}") 169 | add_ffmpeg_target(8 "${f8_SOURCE_DIR}") 170 | -------------------------------------------------------------------------------- /.github/workflows/macos_wheel.yaml: -------------------------------------------------------------------------------- 1 | name: Build and test MacOS wheel 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - nightly 8 | - main 9 | - release/* 10 | tags: 11 | - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ 12 | workflow_dispatch: 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} 16 | cancel-in-progress: true 17 | 18 | permissions: 19 | id-token: write 20 | contents: write 21 | 22 | defaults: 23 | run: 24 | shell: bash -l -eo pipefail {0} 25 | 26 | jobs: 27 | 28 | generate-matrix: 29 | uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main 30 | with: 31 | package-type: wheel 32 | os: macos-arm64 33 | test-infra-repository: pytorch/test-infra 34 | test-infra-ref: main 35 | with-xpu: disable 36 | with-rocm: disable 37 | with-cuda: disable 38 | build-python-only: "disable" 39 | 40 | build: 41 | needs: generate-matrix 42 | strategy: 43 | fail-fast: false 44 | name: Build and Upload Mac wheel 45 | uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@main 46 | with: 47 | repository: meta-pytorch/torchcodec 48 | ref: "" 49 | test-infra-repository: pytorch/test-infra 50 | test-infra-ref: main 51 | build-matrix: ${{ needs.generate-matrix.outputs.matrix }} 52 | pre-script: packaging/pre_build_script.sh 53 | post-script: packaging/post_build_script.sh 54 | smoke-test-script: packaging/fake_smoke_test.py 55 | runner-type: macos-14 56 | setup-miniconda: true 57 | package-name: torchcodec 58 | trigger-event: ${{ github.event_name }} 59 | build-platform: "python-build-package" 60 | build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 python -m build --wheel -vvv --no-isolation" 61 | 62 | install-and-test: 63 | runs-on: macos-14-xlarge 64 | strategy: 65 | fail-fast: false 66 | matrix: 67 | python-version: ['3.10'] 68 | ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1', '8.0'] 69 | needs: build 70 | steps: 71 | - name: Download wheel 72 | uses: actions/download-artifact@v4 73 | with: 74 | name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_ 75 | path: pytorch/torchcodec/dist/ 76 | 77 | - name: Setup conda env 78 | uses: conda-incubator/setup-miniconda@v3 79 | with: 80 | auto-update-conda: true 81 | miniconda-version: "latest" 82 | activate-environment: test 83 | python-version: ${{ matrix.python-version }} 84 | - name: Update pip 85 | run: python -m pip install --upgrade pip 86 | 87 | - name: Install PyTorch 88 | run: | 89 | # If we're in a release branch or in a PR against a release branch, 90 | # we install the PyTorch RCs from the test channel. Otherwise, e.g. in 91 | # `main` or in PRs against `main`, we install the nightly builds. 92 | # Note that the `test` RCs are 93 | if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then 94 | CHANNEL=test 95 | else 96 | CHANNEL=nightly 97 | fi 98 | python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/${CHANNEL}/cpu 99 | 100 | - name: Install torchcodec from the wheel 101 | run: | 102 | wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"` 103 | echo Installing $wheel_path 104 | python -m pip install $wheel_path -vvv 105 | 106 | - name: Check out torchcodec repo 107 | uses: actions/checkout@v3 108 | 109 | - name: Install ffmpeg 110 | run: | 111 | conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge 112 | ffmpeg -version 113 | 114 | - name: Install test dependencies 115 | run: | 116 | python -m pip install numpy pytest pillow 117 | 118 | - name: Delete the src/ folder just for fun 119 | run: | 120 | # The only reason we checked-out the repo is to get access to the 121 | # tests. We don't care about the rest. Out of precaution, we delete 122 | # the src/ folder to be extra sure that we're running the code from 123 | # the installed wheel rather than from the source. 124 | # This is just to be extra cautious and very overkill because a) 125 | # there's no way the `torchcodec` package from src/ can be found from 126 | # the PythonPath: the main point of `src/` is precisely to protect 127 | # against that and b) if we ever were to execute code from 128 | # `src/torchcodec`, it would fail loudly because the built .so files 129 | # aren't present there. 130 | rm -r src/ 131 | ls -lh 132 | 133 | - name: Run Python tests 134 | run: | 135 | pytest --override-ini="addopts=-v" test 136 | -------------------------------------------------------------------------------- /src/torchcodec/_core/CpuDeviceInterface.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. 2 | // All rights reserved. 3 | // 4 | // This source code is licensed under the BSD-style license found in the 5 | // LICENSE file in the root directory of this source tree. 6 | 7 | #pragma once 8 | 9 | #include "DeviceInterface.h" 10 | #include "FFMPEGCommon.h" 11 | #include "FilterGraph.h" 12 | 13 | namespace facebook::torchcodec { 14 | 15 | class CpuDeviceInterface : public DeviceInterface { 16 | public: 17 | CpuDeviceInterface(const torch::Device& device); 18 | 19 | virtual ~CpuDeviceInterface() {} 20 | 21 | std::optional findCodec( 22 | [[maybe_unused]] const AVCodecID& codecId, 23 | [[maybe_unused]] bool isDecoder = true) override { 24 | return std::nullopt; 25 | } 26 | 27 | virtual void initialize( 28 | const AVStream* avStream, 29 | const UniqueDecodingAVFormatContext& avFormatCtx, 30 | const SharedAVCodecContext& codecContext) override; 31 | 32 | virtual void initializeVideo( 33 | const VideoStreamOptions& videoStreamOptions, 34 | const std::vector>& transforms, 35 | const std::optional& resizedOutputDims) override; 36 | 37 | virtual void initializeAudio( 38 | const AudioStreamOptions& audioStreamOptions) override; 39 | 40 | virtual std::optional maybeFlushAudioBuffers() override; 41 | 42 | void convertAVFrameToFrameOutput( 43 | UniqueAVFrame& avFrame, 44 | FrameOutput& frameOutput, 45 | std::optional preAllocatedOutputTensor) override; 46 | 47 | std::string getDetails() override; 48 | 49 | private: 50 | void convertAudioAVFrameToFrameOutput( 51 | UniqueAVFrame& srcAVFrame, 52 | FrameOutput& frameOutput); 53 | 54 | void convertVideoAVFrameToFrameOutput( 55 | UniqueAVFrame& avFrame, 56 | FrameOutput& frameOutput, 57 | std::optional preAllocatedOutputTensor); 58 | 59 | int convertAVFrameToTensorUsingSwScale( 60 | const UniqueAVFrame& avFrame, 61 | torch::Tensor& outputTensor, 62 | const FrameDims& outputDims); 63 | 64 | torch::Tensor convertAVFrameToTensorUsingFilterGraph( 65 | const UniqueAVFrame& avFrame, 66 | const FrameDims& outputDims); 67 | 68 | ColorConversionLibrary getColorConversionLibrary( 69 | const FrameDims& inputFrameDims) const; 70 | 71 | VideoStreamOptions videoStreamOptions_; 72 | AVRational timeBase_; 73 | 74 | // If the resized output dimensions are present, then we always use those as 75 | // the output frame's dimensions. If they are not present, then we use the 76 | // dimensions of the raw decoded frame. Note that we do not know the 77 | // dimensions of the raw decoded frame until very late; we learn it in 78 | // convertAVFrameToFrameOutput(). Deciding the final output frame's actual 79 | // dimensions late allows us to handle video streams with variable 80 | // resolutions. 81 | std::optional resizedOutputDims_; 82 | 83 | // Color-conversion objects. Only one of filterGraph_ and swsContext_ should 84 | // be non-null. Which one we use is determined dynamically in 85 | // getColorConversionLibrary() each time we decode a frame. 86 | // 87 | // Creating both filterGraph_ and swsContext_ is relatively expensive, so we 88 | // reuse them across frames. However, it is possbile that subsequent frames 89 | // are different enough (change in dimensions) that we can't reuse the color 90 | // conversion object. We store the relevant frame context from the frame used 91 | // to create the object last time. We always compare the current frame's info 92 | // against the previous one to determine if we need to recreate the color 93 | // conversion object. 94 | // 95 | // TODO: The names of these fields is confusing, as the actual color 96 | // conversion object for Sws has "context" in the name, and we use 97 | // "context" for the structs we store to know if we need to recreate a 98 | // color conversion object. We should clean that up. 99 | std::unique_ptr filterGraph_; 100 | FiltersContext prevFiltersContext_; 101 | UniqueSwsContext swsContext_; 102 | SwsFrameContext prevSwsFrameContext_; 103 | 104 | // We pass these filters to FFmpeg's filtergraph API. It is a simple pipeline 105 | // of what FFmpeg calls "filters" to apply to decoded frames before returning 106 | // them. In the PyTorch ecosystem, we call these "transforms". During 107 | // initialization, we convert the user-supplied transforms into this string of 108 | // filters. 109 | // 110 | // Note that if there are no user-supplied transforms, then the default filter 111 | // we use is the copy filter, which is just an identity: it emits the output 112 | // frame unchanged. We supply such a filter because we can't supply just the 113 | // empty-string; we must supply SOME filter. 114 | // 115 | // See also [Tranform and Format Conversion Order] for more on filters. 116 | std::string filters_ = "copy"; 117 | 118 | // Values set during initialization and referred to in 119 | // getColorConversionLibrary(). 120 | bool areTransformsSwScaleCompatible_; 121 | bool userRequestedSwScale_; 122 | 123 | bool initialized_ = false; 124 | 125 | // Audio-specific members 126 | AudioStreamOptions audioStreamOptions_; 127 | UniqueSwrContext swrContext_; 128 | }; 129 | 130 | } // namespace facebook::torchcodec 131 | --------------------------------------------------------------------------------