├── version.txt
├── examples
    ├── decoding
    │   ├── README.rst
    │   └── audio_decoding.py
    ├── encoding
    │   ├── README.rst
    │   └── audio_encoding.py
    └── README.rst
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── documentation.yml
    │   ├── feature-request.yml
    │   └── bug-report.yml
    └── workflows
    │   ├── lint.yaml
    │   ├── reference_resources.yaml
    │   ├── cpp_tests.yaml
    │   ├── build_ffmpeg.yaml
    │   └── macos_wheel.yaml
├── test
    ├── resources
    │   ├── nasa_13013.mp4.stream3.frame000180.pt
    │   ├── testsrc2.mp4
    │   ├── av1_video.mkv
    │   ├── h265_video.mp4
    │   ├── nasa_13013.mp4
    │   ├── h264_10bits.mp4
    │   ├── h265_10bits.mp4
    │   ├── sine_mono_s16.wav
    │   ├── sine_mono_s32.wav
    │   ├── testsrc2_h265.mp4
    │   ├── testsrc2_vp8.webm
    │   ├── testsrc2_vp9.webm
    │   ├── testsrc2_mpeg4.avi
    │   ├── bt709_full_range.mp4
    │   ├── sine_mono_s32_8000.wav
    │   ├── nasa_13013.mp4.audio.mp3
    │   ├── sine_mono_s32_44100.wav
    │   ├── nasa_13013.mp4.audio_44100.mp3
    │   ├── nasa_13013.mp4.time10.000000.pt
    │   ├── nasa_13013.mp4.time12.979633.pt
    │   ├── nasa_13013.mp4.time6.000000.pt
    │   ├── nasa_13013.mp4.time6.100000.pt
    │   ├── av1_video.mkv.stream0.frame000010.pt
    │   ├── h265_video.mp4.stream0.frame000005.pt
    │   ├── nasa_13013.mp4.stream0.frame000000.pt
    │   ├── nasa_13013.mp4.stream0.frame000001.pt
    │   ├── nasa_13013.mp4.stream0.frame000002.pt
    │   ├── nasa_13013.mp4.stream0.frame000003.pt
    │   ├── nasa_13013.mp4.stream0.frame000004.pt
    │   ├── nasa_13013.mp4.stream0.frame000005.pt
    │   ├── nasa_13013.mp4.stream0.frame000006.pt
    │   ├── nasa_13013.mp4.stream0.frame000007.pt
    │   ├── nasa_13013.mp4.stream0.frame000008.pt
    │   ├── nasa_13013.mp4.stream0.frame000009.pt
    │   ├── nasa_13013.mp4.stream0.frame000015.pt
    │   ├── nasa_13013.mp4.stream0.frame000020.pt
    │   ├── nasa_13013.mp4.stream0.frame000025.pt
    │   ├── nasa_13013.mp4.stream0.frame000030.pt
    │   ├── nasa_13013.mp4.stream0.frame000035.pt
    │   ├── nasa_13013.mp4.stream3.frame000000.pt
    │   ├── nasa_13013.mp4.stream3.frame000001.pt
    │   ├── nasa_13013.mp4.stream3.frame000002.pt
    │   ├── nasa_13013.mp4.stream3.frame000003.pt
    │   ├── nasa_13013.mp4.stream3.frame000004.pt
    │   ├── nasa_13013.mp4.stream3.frame000005.pt
    │   ├── nasa_13013.mp4.stream3.frame000006.pt
    │   ├── nasa_13013.mp4.stream3.frame000007.pt
    │   ├── nasa_13013.mp4.stream3.frame000008.pt
    │   ├── nasa_13013.mp4.stream3.frame000009.pt
    │   ├── nasa_13013.mp4.stream3.frame000015.pt
    │   ├── nasa_13013.mp4.stream3.frame000020.pt
    │   ├── nasa_13013.mp4.stream3.frame000025.pt
    │   ├── nasa_13013.mp4.stream3.frame000030.pt
    │   ├── nasa_13013.mp4.stream3.frame000035.pt
    │   ├── nasa_13013.mp4.stream3.frame000386.pt
    │   ├── nasa_13013.mp4.stream3.frame000387.pt
    │   ├── nasa_13013.mp4.stream3.frame000388.pt
    │   ├── nasa_13013.mp4.stream3.frame000389.pt
    │   ├── nasa_13013.mp4.stream4.all_frames.pt
    │   ├── sine_mono_s16.wav.stream0.all_frames.pt
    │   ├── sine_mono_s32.wav.stream0.all_frames.pt
    │   ├── nasa_13013.mp4.audio.mp3.stream0.all_frames.pt
    │   ├── nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000000.pt
    │   ├── nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000015.pt
    │   ├── nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000200.pt
    │   ├── nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000389.pt
    │   ├── nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000017.pt
    │   ├── nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000230.pt
    │   ├── nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000389.pt
    │   ├── sine_mono_s16.wav.stream0.all_frames_info.json
    │   └── sine_mono_s32_8000.wav.stream0.all_frames_info.json
    ├── test_version.py
    ├── __init__.py
    ├── test_policy.py
    ├── CMakeLists.txt
    ├── test_video_clip_sampler.py
    └── conftest.py
├── src
    └── torchcodec
    │   ├── encoders
    │       └── __init__.py
    │   ├── samplers
    │       ├── __init__.py
    │       └── _common.py
    │   ├── _samplers
    │       └── __init__.py
    │   ├── transforms
    │       └── __init__.py
    │   ├── _core
    │       ├── NVCUVIDRuntimeLoader.h
    │       ├── ValidationUtils.h
    │       ├── ValidationUtils.cpp
    │       ├── AVIOTensorContext.h
    │       ├── __init__.py
    │       ├── NVDECCache.cpp
    │       ├── CUDACommon.h
    │       ├── AVIOContextHolder.cpp
    │       ├── FilterGraph.h
    │       ├── Frame.cpp
    │       ├── pybind_ops.cpp
    │       ├── AVIOFileLikeContext.h
    │       ├── Frame.h
    │       ├── StreamOptions.h
    │       ├── CudaDeviceInterface.h
    │       ├── Transform.h
    │       ├── AVIOContextHolder.h
    │       ├── Metadata.h
    │       ├── NVDECCache.h
    │       ├── AVIOFileLikeContext.cpp
    │       ├── DeviceInterface.cpp
    │       ├── Transform.cpp
    │       ├── Metadata.cpp
    │       ├── Cache.h
    │       ├── AVIOTensorContext.cpp
    │       ├── fetch_and_expose_non_gpl_ffmpeg_libs.cmake
    │       └── CpuDeviceInterface.h
    │   ├── decoders
    │       ├── __init__.py
    │       └── _decoder_utils.py
    │   ├── __init__.py
    │   ├── _internally_replaced_utils.py
    │   └── share
    │       └── cmake
    │           └── TorchCodec
    │               └── TorchCodecConfig.cmake
├── benchmarks
    ├── decoders
    │   ├── benchmark_readme_chart.png
    │   ├── generate_readme_chart.py
    │   ├── memprofile_decoders.py
    │   ├── benchmark_audio_decoders.py
    │   └── generate_readme_data.py
    └── samplers
    │   └── benchmark_samplers.py
├── docs
    ├── source
    │   ├── _static
    │   │   ├── img
    │   │   │   ├── pytorch-logo-dark.png
    │   │   │   ├── pytorch-logo-flame.png
    │   │   │   ├── generic-pytorch-logo.png
    │   │   │   ├── card-background.svg
    │   │   │   ├── pytorch-logo-flame.svg
    │   │   │   └── pytorch-logo-dark.svg
    │   │   └── css
    │   │   │   └── custom_torchcodec.css
    │   ├── _templates
    │   │   ├── function.rst
    │   │   ├── class.rst
    │   │   ├── dataclass.rst
    │   │   └── layout.html
    │   ├── api_ref_torchcodec.rst
    │   ├── api_ref_encoders.rst
    │   ├── api_ref_transforms.rst
    │   ├── api_ref_samplers.rst
    │   ├── api_ref_decoders.rst
    │   ├── glossary.rst
    │   └── index.rst
    ├── requirements.txt
    └── Makefile
├── mypy.ini
├── .flake8
├── MANIFEST.in
├── packaging
    ├── fake_smoke_test.py
    ├── pre_build_script.sh
    ├── build_ffmpeg.bat
    ├── helpers.sh
    ├── vc_env_helper.bat
    ├── post_build_script.sh
    └── check_glibcxx.py
├── CMakeLists.txt
├── .gitignore
├── .pre-commit-config.yaml
├── pyproject.toml
├── LICENSE
├── CODE_OF_CONDUCT.md
├── .clang-format
└── CONTRIBUTING.md


/version.txt:
--------------------------------------------------------------------------------
1 | 0.10.0a0
2 | 


--------------------------------------------------------------------------------
/examples/decoding/README.rst:
--------------------------------------------------------------------------------
1 | Decoding
2 | --------
3 | 


--------------------------------------------------------------------------------
/examples/encoding/README.rst:
--------------------------------------------------------------------------------
1 | Encoding
2 | --------
3 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | 


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000180.pt:
--------------------------------------------------------------------------------
1 | nasa_13013.mp4.time6.000000.pt


--------------------------------------------------------------------------------
/examples/README.rst:
--------------------------------------------------------------------------------
1 | .. _gallery:
2 | 
3 | Interactive examples
4 | ====================
5 | 


--------------------------------------------------------------------------------
/test/resources/testsrc2.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2.mp4


--------------------------------------------------------------------------------
/test/resources/av1_video.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/av1_video.mkv


--------------------------------------------------------------------------------
/test/resources/h265_video.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/h265_video.mp4


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4


--------------------------------------------------------------------------------
/test/test_version.py:
--------------------------------------------------------------------------------
1 | import torchcodec
2 | 
3 | 
4 | def test_version():
5 |     assert torchcodec.__version__
6 | 


--------------------------------------------------------------------------------
/test/resources/h264_10bits.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/h264_10bits.mp4


--------------------------------------------------------------------------------
/test/resources/h265_10bits.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/h265_10bits.mp4


--------------------------------------------------------------------------------
/test/resources/sine_mono_s16.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s16.wav


--------------------------------------------------------------------------------
/test/resources/sine_mono_s32.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s32.wav


--------------------------------------------------------------------------------
/test/resources/testsrc2_h265.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2_h265.mp4


--------------------------------------------------------------------------------
/test/resources/testsrc2_vp8.webm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2_vp8.webm


--------------------------------------------------------------------------------
/test/resources/testsrc2_vp9.webm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2_vp9.webm


--------------------------------------------------------------------------------
/test/resources/testsrc2_mpeg4.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/testsrc2_mpeg4.avi


--------------------------------------------------------------------------------
/test/resources/bt709_full_range.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/bt709_full_range.mp4


--------------------------------------------------------------------------------
/test/resources/sine_mono_s32_8000.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s32_8000.wav


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.audio.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.audio.mp3


--------------------------------------------------------------------------------
/test/resources/sine_mono_s32_44100.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s32_44100.wav


--------------------------------------------------------------------------------
/src/torchcodec/encoders/__init__.py:
--------------------------------------------------------------------------------
1 | from ._audio_encoder import AudioEncoder  # noqa
2 | from ._video_encoder import VideoEncoder  # noqa
3 | 


--------------------------------------------------------------------------------
/benchmarks/decoders/benchmark_readme_chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/benchmarks/decoders/benchmark_readme_chart.png


--------------------------------------------------------------------------------
/docs/source/_static/img/pytorch-logo-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/docs/source/_static/img/pytorch-logo-dark.png


--------------------------------------------------------------------------------
/docs/source/_static/img/pytorch-logo-flame.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/docs/source/_static/img/pytorch-logo-flame.png


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.audio_44100.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.audio_44100.mp3


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.time10.000000.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.time10.000000.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.time12.979633.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.time12.979633.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.time6.000000.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.time6.000000.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.time6.100000.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.time6.100000.pt


--------------------------------------------------------------------------------
/docs/source/_static/img/generic-pytorch-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/docs/source/_static/img/generic-pytorch-logo.png


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | 
3 | files = src/torchcodec
4 | show_error_codes = True
5 | pretty = True
6 | allow_redefinition = True
7 | follow_untyped_imports = True
8 | 


--------------------------------------------------------------------------------
/test/resources/av1_video.mkv.stream0.frame000010.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/av1_video.mkv.stream0.frame000010.pt


--------------------------------------------------------------------------------
/test/resources/h265_video.mp4.stream0.frame000005.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/h265_video.mp4.stream0.frame000005.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000000.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000000.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000001.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000001.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000002.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000002.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000003.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000003.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000004.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000004.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000005.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000005.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000006.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000006.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000007.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000007.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000008.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000008.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000009.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000009.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000015.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000015.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000020.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000020.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000025.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000025.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000030.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000030.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream0.frame000035.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream0.frame000035.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000000.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000000.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000001.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000001.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000002.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000002.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000003.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000003.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000004.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000004.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000005.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000005.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000006.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000006.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000007.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000007.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000008.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000008.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000009.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000009.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000015.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000015.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000020.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000020.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000025.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000025.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000030.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000030.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000035.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000035.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000386.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000386.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000387.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000387.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000388.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000388.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream3.frame000389.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream3.frame000389.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.stream4.all_frames.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.stream4.all_frames.pt


--------------------------------------------------------------------------------
/test/resources/sine_mono_s16.wav.stream0.all_frames.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s16.wav.stream0.all_frames.pt


--------------------------------------------------------------------------------
/test/resources/sine_mono_s32.wav.stream0.all_frames.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/sine_mono_s32.wav.stream0.all_frames.pt


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | ignore = E203, E402, W503, W504, F821, E501, B, C4, EXE, E251, E202
4 | per-file-ignores =
5 |     __init__.py: F401, F403, F405
6 | 


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.audio.mp3.stream0.all_frames.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.audio.mp3.stream0.all_frames.pt


--------------------------------------------------------------------------------
/docs/source/_templates/function.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 |     :class: hidden-section
3 | .. currentmodule:: {{ module }}
4 | 
5 | 
6 | {{ name | underline}}
7 | 
8 | .. autofunction:: {{ name }}
9 | 


--------------------------------------------------------------------------------
/src/torchcodec/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from ._index_based import clips_at_random_indices, clips_at_regular_indices
2 | from ._time_based import clips_at_random_timestamps, clips_at_regular_timestamps
3 | 


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000000.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000000.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000015.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000015.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000200.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000200.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000389.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.crop_300_200_50_35_exact_1.stream3.frame000389.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000017.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000017.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000230.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000230.pt


--------------------------------------------------------------------------------
/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000389.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/meta-pytorch/torchcodec/HEAD/test/resources/nasa_13013.mp4.scale_240_135_flags_bilinear.stream3.frame000389.pt


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include LICENSE
 3 | 
 4 | include CMakeLists.txt
 5 | recursive-include src *
 6 | 
 7 | recursive-exclude * __pycache__
 8 | recursive-exclude src *.py[co]
 9 | recursive-exclude src *.so
10 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/docs/source/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | 
 6 | {{ name | underline}}
 7 | 
 8 | .. autoclass:: {{ name }}
 9 |     :members:
10 |     :special-members: __getitem__
11 | 


--------------------------------------------------------------------------------
/docs/source/_templates/dataclass.rst:
--------------------------------------------------------------------------------
 1 | .. role:: hidden
 2 |     :class: hidden-section
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | 
 6 | {{ name | underline}}
 7 | 
 8 | .. autoclass:: {{ name }}
 9 |     :members:
10 |     :undoc-members: __init__
11 |     :inherited-members:
12 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | sphinx-gallery>0.11
 2 | sphinx==5.0.0
 3 | sphinx_design
 4 | sphinx_copybutton
 5 | sphinx-tabs
 6 | sphinx-sitemap
 7 | matplotlib
 8 | torchvision
 9 | ipython
10 | fsspec
11 | aiohttp
12 | joblib
13 | -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
14 | 


--------------------------------------------------------------------------------
/src/torchcodec/_samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | #
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | from .video_clip_sampler import *  # noqa
8 | 


--------------------------------------------------------------------------------
/docs/source/api_ref_torchcodec.rst:
--------------------------------------------------------------------------------
 1 | .. _torchcodec:
 2 | 
 3 | ===================
 4 | torchcodec
 5 | ===================
 6 | 
 7 | .. currentmodule:: torchcodec
 8 | 
 9 | 
10 | .. autosummary::
11 |     :toctree: generated/
12 |     :nosignatures:
13 |     :template: dataclass.rst
14 | 
15 |     Frame
16 |     FrameBatch
17 |     AudioSamples
18 | 


--------------------------------------------------------------------------------
/src/torchcodec/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from ._decoder_transforms import (  # noqa
 8 |     CenterCrop,
 9 |     DecoderTransform,
10 |     RandomCrop,
11 |     Resize,
12 | )
13 | 


--------------------------------------------------------------------------------
/packaging/fake_smoke_test.py:
--------------------------------------------------------------------------------
1 | # This is a fake smoke test that runs on the test-infra instances after we build
2 | # a wheel. We cannot run a real smoke test over there, because the machines are
3 | # too old to even install a proper ffmpeg version - and without ffmpeg,
4 | # importing torchcodec just fails. It's OK, we run our *entire* test suite on
5 | # those wheels anyway (on other machines).
6 | 
7 | print("Success")
8 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.18)
 2 | project(TorchCodec)
 3 | 
 4 | # Define LINUX platform variable globally
 5 | if (UNIX AND NOT APPLE)
 6 |     set(LINUX TRUE)
 7 | else()
 8 |     set(LINUX FALSE)
 9 | endif()
10 | 
11 | add_subdirectory(src/torchcodec/_core)
12 | 
13 | 
14 | option(BUILD_TESTS "Build tests" OFF)
15 | if(BUILD_TESTS)
16 |     enable_testing()
17 |     add_subdirectory(test)
18 | endif()
19 | 


--------------------------------------------------------------------------------
/docs/source/_static/img/card-background.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 202.43 51">
 3 |   <defs>
 4 |     <style>
 5 |       .cls-1 {
 6 |         fill: #ee4c2a;
 7 |         stroke: #ee4c2a;
 8 |         stroke-miterlimit: 10;
 9 |       }
10 |     </style>
11 |   </defs>
12 |   <rect class="cls-1" x=".5" y=".5" width="201.43" height="50"/>
13 | </svg>
14 | 


--------------------------------------------------------------------------------
/docs/source/api_ref_encoders.rst:
--------------------------------------------------------------------------------
 1 | .. _encoders:
 2 | 
 3 | ===================
 4 | torchcodec.encoders
 5 | ===================
 6 | 
 7 | .. currentmodule:: torchcodec.encoders
 8 | 
 9 | 
10 | For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_encoding_audio_encoding.py`.
11 | 
12 | 
13 | .. autosummary::
14 |     :toctree: generated/
15 |     :nosignatures:
16 |     :template: class.rst
17 | 
18 |     AudioEncoder
19 |     VideoEncoder
20 | 


--------------------------------------------------------------------------------
/docs/source/api_ref_transforms.rst:
--------------------------------------------------------------------------------
 1 | .. _transforms:
 2 | 
 3 | =====================
 4 | torchcodec.transforms
 5 | =====================
 6 | 
 7 | .. currentmodule:: torchcodec.transforms
 8 | 
 9 | For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL.
10 | 
11 | .. autosummary::
12 |     :toctree: generated/
13 |     :nosignatures:
14 |     :template: dataclass.rst
15 | 
16 |     DecoderTransform
17 |     CenterCrop
18 |     RandomCrop
19 |     Resize
20 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/NVCUVIDRuntimeLoader.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | namespace facebook::torchcodec {
10 | 
11 | // See note in corresponding cpp file
12 | bool loadNVCUVIDLibrary();
13 | 
14 | } // namespace facebook::torchcodec
15 | 


--------------------------------------------------------------------------------
/docs/source/api_ref_samplers.rst:
--------------------------------------------------------------------------------
 1 | .. _samplers:
 2 | 
 3 | ===================
 4 | torchcodec.samplers
 5 | ===================
 6 | 
 7 | .. currentmodule:: torchcodec.samplers
 8 | 
 9 | For a tutorial, see: :ref:`sphx_glr_generated_examples_decoding_sampling.py`.
10 | 
11 | .. autosummary::
12 |     :toctree: generated/
13 |     :nosignatures:
14 |     :template: function.rst
15 | 
16 |     clips_at_regular_indices
17 |     clips_at_random_indices
18 |     clips_at_regular_timestamps
19 |     clips_at_random_timestamps
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yml:
--------------------------------------------------------------------------------
 1 | name: 📚 Documentation
 2 | description: Report an issue related to the TorchCodec documentation
 3 | 
 4 | body:
 5 | - type: textarea
 6 |   attributes:
 7 |     label: 📚 The doc issue
 8 |     description: >
 9 |       Is something confusing or wrong? Let us know! Please provide URLs to the content in https://pytorch.org/torchcodec/stable/index.html that you're referring to.
10 |   validations:
11 |     required: true
12 | - type: markdown
13 |   attributes:
14 |     value: >
15 |       Thanks for contributing 🎉!
16 | 


--------------------------------------------------------------------------------
/src/torchcodec/decoders/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .._core import AudioStreamMetadata, VideoStreamMetadata
 8 | from ._audio_decoder import AudioDecoder  # noqa
 9 | from ._decoder_utils import set_cuda_backend  # noqa
10 | from ._video_decoder import CpuFallbackStatus, VideoDecoder  # noqa
11 | 
12 | SimpleVideoDecoder = VideoDecoder
13 | 


--------------------------------------------------------------------------------
/packaging/pre_build_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the BSD-style license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | set -ex
 9 | 
10 | # We need to install pybind11 because we need its CMake helpers in order to
11 | # compile correctly on Mac. Pybind11 is actually a C++ header-only library,
12 | # and PyTorch actually has it included. PyTorch, however, does not have the
13 | # CMake helpers.
14 | conda install -y pybind11 -c conda-forge
15 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Feature request
 2 | description: Submit a proposal/request for a new TorchCodec feature
 3 | 
 4 | body:
 5 | - type: textarea
 6 |   attributes:
 7 |     label: 🚀 The feature
 8 |     description: >
 9 |       What new functionality do you want?
10 |   validations:
11 |     required: true
12 | - type: textarea
13 |   attributes:
14 |     label: Motivation, pitch
15 |     description: >
16 |       Why do you want it? If this is related to another GitHub issue, please link that here.
17 |   validations:
18 |     required: false
19 | - type: markdown
20 |   attributes:
21 |     value: >
22 |       Thanks for contributing 🎉!
23 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/ValidationUtils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <cstdint>
10 | #include <optional>
11 | #include <string>
12 | 
13 | namespace facebook::torchcodec {
14 | 
15 | int validateInt64ToInt(int64_t value, const std::string& parameterName);
16 | 
17 | std::optional<int> validateOptionalInt64ToInt(
18 |     const std::optional<int64_t>& value,
19 |     const std::string& parameterName);
20 | 
21 | } // namespace facebook::torchcodec
22 | 


--------------------------------------------------------------------------------
/packaging/build_ffmpeg.bat:
--------------------------------------------------------------------------------
 1 | :: Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | :: All rights reserved.
 3 | ::
 4 | :: This source code is licensed under the BSD-style license found in the
 5 | :: LICENSE file in the root directory of this source tree.
 6 | 
 7 | :: Taken from torchaudio
 8 | @echo off
 9 | 
10 | set PROJ_FOLDER=%cd%
11 | 
12 | choco install -y --no-progress msys2 --package-parameters "/NoUpdate"
13 | C:\tools\msys64\usr\bin\env MSYSTEM=MINGW64 /bin/bash -l -c "pacman -S --noconfirm --needed base-devel mingw-w64-x86_64-toolchain diffutils"
14 | C:\tools\msys64\usr\bin\env MSYSTEM=MINGW64 /bin/bash -l -c "cd ${PROJ_FOLDER} && packaging/vc_env_helper.bat bash packaging/build_ffmpeg.sh"
15 | 
16 | :end
17 | 


--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 | 
 3 | {% block sidebartitle %}
 4 |     <div class="version">
 5 |         <a href='https://pytorch.org/torchcodec/versions.html'>{{ version }} &#x25BC</a>
 6 |       </div>
 7 |     {% include "searchbox.html" %}
 8 | {% endblock %}
 9 | 
10 | 
11 | {% block footer %}
12 | <!-- Disabling "auto-collapsing" of sections on the left side bar. Replace script with commented out sections to reenable. -->
13 | <!-- {{ super() }}
14 | <script script type="text/javascript">
15 |     var collapsedSections = ['Introduction', 'Getting Started', 'Tutorials']
16 | </script> -->
17 | 
18 | <script script type="text/javascript">
19 |     var collapsedSections = []
20 | </script>
21 | {% endblock %}
22 | 


--------------------------------------------------------------------------------
/benchmarks/decoders/generate_readme_chart.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import json
 8 | 
 9 | from pathlib import Path
10 | 
11 | from benchmark_decoders_library import plot_data
12 | 
13 | 
14 | def main() -> None:
15 |     data_json = Path(__file__).parent / "benchmark_readme_data.json"
16 |     with open(data_json, "r") as read_file:
17 |         data_from_file = json.load(read_file)
18 | 
19 |     output_png = Path(__file__).parent / "benchmark_readme_chart.png"
20 |     plot_data(data_from_file, output_png)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     main()
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | dist/
 3 | src/TorchCodec.egg-info/
 4 | */**/__pycache__
 5 | */__pycache__
 6 | */*.pyc
 7 | */**/*.pyc
 8 | */**/**/*.pyc
 9 | */**/*~
10 | *~
11 | frame180.* # output from smoke test
12 | 
13 | src/torchcodec/version.py
14 | 
15 | docs/build
16 | # sphinx-gallery
17 | docs/source/generated_examples/
18 | docs/source/gen_modules/
19 | docs/source/generated/
20 | docs/source/models/generated/
21 | docs/source/sg_execution_times.rst
22 | # pytorch-sphinx-theme gets installed here
23 | docs/src
24 | 
25 | .coverage
26 | htmlcov
27 | .*.swp
28 | *.so*
29 | *.dylib*
30 | */*.so*
31 | */*.dylib*
32 | *.swp
33 | *.swo
34 | gen.yml
35 | .mypy_cache
36 | .vscode/
37 | .idea/
38 | *.orig
39 | *-checkpoint.ipynb
40 | *.venv
41 | 
42 | ## Xcode User settings
43 | xcuserdata/
44 | 
45 | # direnv
46 | .direnv
47 | .envrc
48 | 


--------------------------------------------------------------------------------
/test/test_policy.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from torchcodec.samplers._common import _POLICY_FUNCTIONS
 3 | 
 4 | 
 5 | @pytest.mark.parametrize(
 6 |     "policy, frame_indices, expected_frame_indices",
 7 |     (
 8 |         ("repeat_last", [1, 2, 3], [1, 2, 3, 3, 3]),
 9 |         ("repeat_last", [1, 2, 3, 4, 5], [1, 2, 3, 4, 5]),
10 |         ("wrap", [1, 2, 3], [1, 2, 3, 1, 2]),
11 |         ("wrap", [1, 2, 3, 4, 5], [1, 2, 3, 4, 5]),
12 |     ),
13 | )
14 | def test_policy(policy, frame_indices, expected_frame_indices):
15 |     policy_fun = _POLICY_FUNCTIONS[policy]
16 |     assert policy_fun(frame_indices, desired_len=5) == expected_frame_indices
17 | 
18 | 
19 | def test_error_policy():
20 |     with pytest.raises(ValueError, match="beyond the number of frames"):
21 |         _POLICY_FUNCTIONS["error"]([1, 2, 3], desired_len=5)
22 | 


--------------------------------------------------------------------------------
/docs/source/api_ref_decoders.rst:
--------------------------------------------------------------------------------
 1 | .. _decoders:
 2 | 
 3 | ===================
 4 | torchcodec.decoders
 5 | ===================
 6 | 
 7 | .. currentmodule:: torchcodec.decoders
 8 | 
 9 | 
10 | For a video decoder tutorial, see: :ref:`sphx_glr_generated_examples_decoding_basic_example.py`.
11 | For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_decoding_audio_decoding.py`.
12 | 
13 | 
14 | .. autosummary::
15 |     :toctree: generated/
16 |     :nosignatures:
17 |     :template: class.rst
18 | 
19 |     VideoDecoder
20 |     AudioDecoder
21 | 
22 | .. autosummary::
23 |     :toctree: generated/
24 |     :nosignatures:
25 |     :template: function.rst
26 | 
27 |     set_cuda_backend
28 | 
29 | .. autosummary::
30 |     :toctree: generated/
31 |     :nosignatures:
32 |     :template: dataclass.rst
33 | 
34 |     VideoStreamMetadata
35 |     AudioStreamMetadata
36 |     CpuFallbackStatus
37 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/ValidationUtils.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #include "ValidationUtils.h"
 8 | #include <limits>
 9 | #include "c10/util/Exception.h"
10 | 
11 | namespace facebook::torchcodec {
12 | 
13 | int validateInt64ToInt(int64_t value, const std::string& parameterName) {
14 |   TORCH_CHECK(
15 |       value >= std::numeric_limits<int>::min() &&
16 |           value <= std::numeric_limits<int>::max(),
17 |       parameterName,
18 |       "=",
19 |       value,
20 |       " is out of range for int type.");
21 | 
22 |   return static_cast<int>(value);
23 | }
24 | 
25 | std::optional<int> validateOptionalInt64ToInt(
26 |     const std::optional<int64_t>& value,
27 |     const std::string& parameterName) {
28 |   if (value.has_value()) {
29 |     return validateInt64ToInt(value.value(), parameterName);
30 |   } else {
31 |     return std::nullopt;
32 |   }
33 | }
34 | 
35 | } // namespace facebook::torchcodec
36 | 


--------------------------------------------------------------------------------
/packaging/helpers.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the BSD-style license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | _list_wheel_files() {
 9 |     unzip -l "$1" | awk '{print $4}'
10 | }
11 | 
12 | # $1 = path to wheel
13 | # $2 = pattern to grep for in wheel files
14 | # If files matching $2 are found in the wheel, the function errors.
15 | assert_not_in_wheel() {
16 |     wheel_files=$(_list_wheel_files "$1")
17 |     if grep -q "$2" <<< "$wheel_files"
18 |     then
19 |         echo "Found files in $1 that start with $2. Exiting!!"
20 |         exit 1
21 |     fi
22 | }
23 | 
24 | # See assert_not_in_wheel
25 | assert_in_wheel() {
26 |     wheel_files=$(_list_wheel_files "$1")
27 |     if ! grep -q "$2" <<< "$wheel_files"
28 |     then
29 |         echo "Did not find files in $1 that start with $2. Exiting!!"
30 |         exit 1
31 |     fi
32 | }
33 | 
34 | assert_ffmpeg_not_installed() {
35 |     if command -v "ffmpeg" &> /dev/null
36 |     then
37 |         echo "ffmpeg is installed, but it shouldn't! Exiting!!"
38 |         exit 1
39 |     fi
40 | }
41 | 


--------------------------------------------------------------------------------
/src/torchcodec/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from pathlib import Path
 8 | 
 9 | # Note: usort wants to put Frame and FrameBatch after decoders and samplers,
10 | # but that results in circular import.
11 | from ._frame import AudioSamples, Frame, FrameBatch  # usort:skip # noqa
12 | from . import decoders, encoders, samplers, transforms  # noqa
13 | 
14 | try:
15 |     # Note that version.py is generated during install.
16 |     from .version import __version__  # noqa: F401
17 | except Exception:
18 |     pass
19 | 
20 | # cmake_prefix_path is needed for downstream cmake-based builds that use
21 | # torchcodec as a dependency to tell cmake where torchcodec is installed and where to find its
22 | # CMake configuration files.
23 | # Pytorch itself has a similar mechanism which we use in our setup.py!
24 | cmake_prefix_path = Path(__file__).parent / "share" / "cmake"
25 | # Similarly, these are exposed for downstream builds that use torchcodec as a
26 | # dependency.
27 | from ._core import core_library_path, ffmpeg_major_version  # usort:skip
28 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/AVIOTensorContext.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <torch/types.h>
10 | #include "AVIOContextHolder.h"
11 | 
12 | namespace facebook::torchcodec {
13 | 
14 | namespace detail {
15 | 
16 | struct TensorContext {
17 |   torch::Tensor data;
18 |   int64_t current_pos;
19 |   int64_t max_pos;
20 | };
21 | 
22 | } // namespace detail
23 | 
24 | // For Decoding: enables users to pass in the entire video or audio as bytes.
25 | // Our read and seek functions then traverse the bytes in memory.
26 | class AVIOFromTensorContext : public AVIOContextHolder {
27 |  public:
28 |   explicit AVIOFromTensorContext(torch::Tensor data);
29 | 
30 |  private:
31 |   detail::TensorContext tensorContext_;
32 | };
33 | 
34 | // For Encoding: used to encode into an output uint8 (bytes) tensor.
35 | class AVIOToTensorContext : public AVIOContextHolder {
36 |  public:
37 |   explicit AVIOToTensorContext();
38 |   torch::Tensor getOutputTensor();
39 | 
40 |  private:
41 |   detail::TensorContext tensorContext_;
42 | };
43 | 
44 | } // namespace facebook::torchcodec
45 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.6.0
 4 |     hooks:
 5 |       - id: check-docstring-first
 6 |       - id: trailing-whitespace
 7 |       - id: check-toml
 8 |       - id: check-yaml
 9 |         args:
10 |           - --allow-multiple-documents
11 |       - id: mixed-line-ending
12 |         args: [--fix=lf]
13 |       - id: end-of-file-fixer
14 |       - id: check-added-large-files
15 |         args: ['--maxkb=1000']
16 | 
17 |   - repo: https://github.com/asottile/pyupgrade
18 |     rev: v3.21.2
19 |     hooks:
20 |     -   id: pyupgrade
21 |         args: [--py310-plus]
22 |         files: ^(test|src)/
23 |         exclude: ^examples/
24 | 
25 |   - repo: https://github.com/omnilib/ufmt
26 |     rev: v2.6.0
27 |     hooks:
28 |     -   id: ufmt
29 |         additional_dependencies:
30 |           - black == 24.4.2
31 |           - usort == 1.0.5
32 | 
33 |   - repo: https://github.com/PyCQA/flake8
34 |     rev: 7.1.0
35 |     hooks:
36 |       - id: flake8
37 |         args: [--config=.flake8]
38 | 
39 |   - repo: https://github.com/pre-commit/mirrors-clang-format
40 |     rev: v18.1.3
41 |     hooks:
42 |       - id: clang-format
43 |         name: clang-format
44 |         files: \.(cpp|hpp|c|h)$
45 |         types: [file]
46 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "torchcodec"
 3 | description = "A video decoder for PyTorch"
 4 | readme = "README.md"
 5 | requires-python = ">=3.10"
 6 | license-files = ["LICENSE"]
 7 | authors = [
 8 |     { name = "PyTorch Team", email = "packages@pytorch.org" },
 9 | ]
10 | dynamic = ["version"]
11 | 
12 | [project.urls]
13 | GitHub = "https://github.com/pytorch/torchcodec"
14 | Documentation = "https://pytorch.org/torchcodec/stable/index.html"
15 | 
16 | [tool.setuptools.dynamic]
17 | version = {file = "version.txt"}
18 | 
19 | [build-system]
20 | requires = ["setuptools>=61.0"]
21 | build-backend = "setuptools.build_meta"
22 | 
23 | [project.optional-dependencies]
24 | dev = [
25 |     "numpy",
26 |     "pytest",
27 |     "pillow",
28 | ]
29 | 
30 | [tool.usort]
31 | # Needed for compatibility with internal linter
32 | first_party_detection = false
33 | 
34 | [tool.black]
35 | target-version = ["py310"]
36 | 
37 | [tool.ufmt]
38 | 
39 | excludes = [
40 |     "examples",
41 | ]
42 | 
43 | [tool.pytest.ini_options]
44 | markers = [
45 |     # defines a 'slow' mark to mark slow tests with `@pytest.mark.slow`
46 |     "slow: mark test as slow"
47 | ]
48 | 
49 | # We don't want to run the slow tests by default. These options are ignored in
50 | # the CI, where we definitely want the 'slow' tests to run.
51 | addopts = "-v -m 'not slow'"
52 | 
53 | testpaths = ["test"]
54 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | from ._metadata import (
 9 |     AudioStreamMetadata,
10 |     ContainerMetadata,
11 |     get_container_metadata,
12 |     get_container_metadata_from_header,
13 |     VideoStreamMetadata,
14 | )
15 | from .ops import (
16 |     _add_video_stream,
17 |     _get_backend_details,
18 |     _get_key_frame_indices,
19 |     _test_frame_pts_equality,
20 |     add_audio_stream,
21 |     add_video_stream,
22 |     core_library_path,
23 |     create_from_bytes,
24 |     create_from_file,
25 |     create_from_file_like,
26 |     create_from_tensor,
27 |     encode_audio_to_file,
28 |     encode_audio_to_file_like,
29 |     encode_audio_to_tensor,
30 |     encode_video_to_file,
31 |     encode_video_to_file_like,
32 |     encode_video_to_tensor,
33 |     ffmpeg_major_version,
34 |     get_ffmpeg_library_versions,
35 |     get_frame_at_index,
36 |     get_frame_at_pts,
37 |     get_frames_at_indices,
38 |     get_frames_by_pts,
39 |     get_frames_by_pts_in_range,
40 |     get_frames_by_pts_in_range_audio,
41 |     get_frames_in_range,
42 |     get_json_metadata,
43 |     get_next_frame,
44 |     scan_all_streams_to_update_metadata,
45 |     seek_to_pts,
46 | )
47 | 


--------------------------------------------------------------------------------
/docs/source/glossary.rst:
--------------------------------------------------------------------------------
 1 | Glossary
 2 | ========
 3 | 
 4 | .. glossary::
 5 | 
 6 |     pts
 7 |        Presentation Time Stamp. The time at which a frame or audio sample should be played.
 8 |        In TorchCodec, pts are expressed in seconds.
 9 | 
10 |     best stream
11 |        The notion of "best" stream is determined by FFmpeg. Quoting the `FFmpeg docs
12 |        <https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga757780d38f482deb4d809c6c521fbcc2>`_:
13 | 
14 |         *The best stream is determined according to various heuristics as the most likely to be what the user expects.*
15 | 
16 |     scan
17 |        A scan corresponds to an entire pass over a video file, with the purpose
18 |        of retrieving metadata about the different streams and frames. **It does
19 |        not involve decoding**, so it is a lot cheaper than decoding the file.
20 |        The :class:`~torchcodec.decoders.VideoDecoder` performs a scan when using
21 |        ``seek_mode="exact"``, and doesn't scan when using
22 |        ``seek_mode="approximate"``.
23 | 
24 |     clips
25 |         A clip is a sequence of frames, usually in :term:`pts` order. The frames
26 |         may not necessarily be consecutive. A clip is represented as a 4D
27 |         :class:`~torchcodec.FrameBatch`. A group of clips, which is what the
28 |         :ref:`samplers <samplers>` return, is represented as 5D
29 |         :class:`~torchcodec.FrameBatch`.
30 | 


--------------------------------------------------------------------------------
/packaging/vc_env_helper.bat:
--------------------------------------------------------------------------------
 1 | :: Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | :: All rights reserved.
 3 | ::
 4 | :: This source code is licensed under the BSD-style license found in the
 5 | :: LICENSE file in the root directory of this source tree.
 6 | 
 7 | :: Taken from torchaudio
 8 | @echo on
 9 | 
10 | set VC_VERSION_LOWER=17
11 | set VC_VERSION_UPPER=18
12 | 
13 | for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
14 |     if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
15 |         set "VS15INSTALLDIR=%%i"
16 |         set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
17 |         goto vswhere
18 |     )
19 | )
20 | 
21 | :vswhere
22 | if "%VSDEVCMD_ARGS%" == "" (
23 |     call "%VS15VCVARSALL%" x64 || exit /b 1
24 | ) else (
25 |     call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1
26 | )
27 | 
28 | @echo on
29 | 
30 | if "%CU_VERSION%" == "xpu" call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
31 | 
32 | set DISTUTILS_USE_SDK=1
33 | set BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1
34 | 
35 | set args=%1
36 | shift
37 | :start
38 | if [%1] == [] goto done
39 | set args=%args% %1
40 | shift
41 | goto start
42 | 
43 | :done
44 | if "%args%" == "" (
45 |     echo Usage: vc_env_helper.bat [command] [args]
46 |     echo e.g. vc_env_helper.bat cl /c test.cpp
47 | )
48 | 
49 | %args% || exit /b 1
50 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | include(CMakePrintHelpers)
 3 | project(TorchCodecTests)
 4 | set(CMAKE_CXX_STANDARD 17)
 5 | set(CMAKE_CXX_STANDARD_REQUIRED)
 6 | 
 7 | find_package(Torch REQUIRED)
 8 | 
 9 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
10 | 
11 | 
12 | include(FetchContent)
13 | FetchContent_Declare(
14 |   googletest
15 |   URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip
16 | )
17 | FetchContent_MakeAvailable(googletest)
18 | 
19 | add_executable(
20 |   VideoDecoderTest
21 |   VideoDecoderTest.cpp
22 | )
23 | 
24 | target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${TORCH_INCLUDE_DIRS})
25 | target_include_directories(VideoDecoderTest SYSTEM PRIVATE ${libav_include_dirs})
26 | target_include_directories(VideoDecoderTest PRIVATE ../)
27 | 
28 | target_link_libraries(
29 |   VideoDecoderTest
30 |   ${libtorchcodec_library_name}
31 |   ${libtorchcodec_custom_ops_name}
32 |   GTest::gtest_main
33 | )
34 | 
35 | include(GoogleTest)
36 | gtest_discover_tests(VideoDecoderTest)
37 | 
38 | 
39 | add_executable(
40 |   MetadataTest
41 |   MetadataTest.cpp
42 | )
43 | 
44 | target_include_directories(MetadataTest SYSTEM PRIVATE ${TORCH_INCLUDE_DIRS})
45 | target_include_directories(MetadataTest SYSTEM PRIVATE ${libav_include_dirs})
46 | target_include_directories(MetadataTest PRIVATE ../)
47 | 
48 | target_link_libraries(
49 |   MetadataTest
50 |   ${libtorchcodec_library_name}
51 |   ${libtorchcodec_custom_ops_name}
52 |   GTest::gtest_main
53 | )
54 | 
55 | gtest_discover_tests(MetadataTest)
56 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright 2024 Meta
 4 | 
 5 | Redistribution and use in source and binary forms, with or without modification,
 6 | are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice,this list
 9 | of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice, this
12 | list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its contributors may
16 | be used to endorse or promote products derived from this software without specific
17 | prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY
20 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
22 | SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
27 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28 | DAMAGE.
29 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/NVDECCache.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #include <torch/types.h>
 8 | #include <mutex>
 9 | 
10 | #include "CUDACommon.h"
11 | #include "FFMPEGCommon.h"
12 | #include "NVDECCache.h"
13 | 
14 | #include <cuda_runtime.h> // For cudaGetDevice
15 | 
16 | extern "C" {
17 | #include <libavutil/hwcontext_cuda.h>
18 | #include <libavutil/pixdesc.h>
19 | }
20 | 
21 | namespace facebook::torchcodec {
22 | 
23 | NVDECCache& NVDECCache::getCache(const torch::Device& device) {
24 |   static NVDECCache cacheInstances[MAX_CUDA_GPUS];
25 |   return cacheInstances[getDeviceIndex(device)];
26 | }
27 | 
28 | UniqueCUvideodecoder NVDECCache::getDecoder(CUVIDEOFORMAT* videoFormat) {
29 |   CacheKey key(videoFormat);
30 |   std::lock_guard<std::mutex> lock(cacheLock_);
31 | 
32 |   auto it = cache_.find(key);
33 |   if (it != cache_.end()) {
34 |     auto decoder = std::move(it->second);
35 |     cache_.erase(it);
36 |     return decoder;
37 |   }
38 | 
39 |   return nullptr;
40 | }
41 | 
42 | bool NVDECCache::returnDecoder(
43 |     CUVIDEOFORMAT* videoFormat,
44 |     UniqueCUvideodecoder decoder) {
45 |   if (!decoder) {
46 |     return false;
47 |   }
48 | 
49 |   CacheKey key(videoFormat);
50 |   std::lock_guard<std::mutex> lock(cacheLock_);
51 | 
52 |   if (cache_.size() >= MAX_CACHE_SIZE) {
53 |     return false;
54 |   }
55 | 
56 |   cache_[key] = std::move(decoder);
57 |   return true;
58 | }
59 | 
60 | } // namespace facebook::torchcodec
61 | 


--------------------------------------------------------------------------------
/docs/source/_static/img/pytorch-logo-flame.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <svg
 3 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
 4 |    xmlns:cc="http://creativecommons.org/ns#"
 5 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 6 |    xmlns:svg="http://www.w3.org/2000/svg"
 7 |    xmlns="http://www.w3.org/2000/svg"
 8 |    height="40.200001"
 9 |    width="40.200001"
10 |    xml:space="preserve"
11 |    viewBox="0 0 40.200002 40.2"
12 |    y="0px"
13 |    x="0px"
14 |    id="Layer_1"
15 |    version="1.1"><metadata
16 |      id="metadata4717"><rdf:RDF><cc:Work
17 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
18 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs
19 |      id="defs4715" /><style
20 |      id="style4694"
21 |      type="text/css">
22 | 	.st0{fill:#F05732;}
23 | 	.st1{fill:#9E529F;}
24 | 	.st2{fill:#333333;}
25 | </style><path
26 |      style="fill:#f05732"
27 |      id="path4696"
28 |      d="m 26.975479,12.199999 c -1.3,-1 -1.8,3.9 -4.4,3.9 -3,0 -4,-12.9999998 -6.3,-12.9999998 -0.7,0 -0.8,-0.4 -7.9000003,21.2999998 -2.9000001,9 4.4000003,15.8 11.8000003,15.8 4.6,0 12.3,-3 12.3,-12.6 0,-7.1 -3.5,-13.9 -5.5,-15.4 z m -6.9,23.1 c -3.7,0 -6.7,-3.1 -6.7,-7 0,-3.9 3,-7 6.7,-7 3.7,0 6.7,3.1 6.7,7 0,3.8 -3,7 -6.7,7 z"
29 |      class="st0" /><path
30 |      style="fill:#9e529f"
31 |      id="path4698"
32 |      d="m 24.075479,-7.6293945e-7 c -0.5,0 -1.8,2.49999996293945 -1.8,3.59999996293945 0,1.5 1,2 1.8,2 0.8,0 1.8,-0.5 1.8,-2 -0.1,-1.1 -1.4,-3.59999996293945 -1.8,-3.59999996293945 z"
33 |      class="st1" /></svg>
34 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/CUDACommon.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <ATen/cuda/CUDAEvent.h>
10 | #include <c10/cuda/CUDAStream.h>
11 | #include <npp.h>
12 | #include <torch/types.h>
13 | 
14 | #include "FFMPEGCommon.h"
15 | #include "Frame.h"
16 | 
17 | extern "C" {
18 | #include <libavutil/hwcontext_cuda.h>
19 | #include <libavutil/pixdesc.h>
20 | }
21 | 
22 | namespace facebook::torchcodec {
23 | 
24 | // Pytorch can only handle up to 128 GPUs.
25 | // https://github.com/pytorch/pytorch/blob/e30c55ee527b40d67555464b9e402b4b7ce03737/c10/cuda/CUDAMacros.h#L44
26 | constexpr int MAX_CUDA_GPUS = 128;
27 | 
28 | void initializeCudaContextWithPytorch(const torch::Device& device);
29 | 
30 | // Unique pointer type for NPP stream context
31 | using UniqueNppContext = std::unique_ptr<NppStreamContext>;
32 | 
33 | torch::Tensor convertNV12FrameToRGB(
34 |     UniqueAVFrame& avFrame,
35 |     const torch::Device& device,
36 |     const UniqueNppContext& nppCtx,
37 |     at::cuda::CUDAStream nvdecStream,
38 |     std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
39 | 
40 | UniqueNppContext getNppStreamContext(const torch::Device& device);
41 | void returnNppStreamContextToCache(
42 |     const torch::Device& device,
43 |     UniqueNppContext nppCtx);
44 | 
45 | void validatePreAllocatedTensorShape(
46 |     const std::optional<torch::Tensor>& preAllocatedOutputTensor,
47 |     const UniqueAVFrame& avFrame);
48 | 
49 | int getDeviceIndex(const torch::Device& device);
50 | 
51 | } // namespace facebook::torchcodec
52 | 


--------------------------------------------------------------------------------
/benchmarks/decoders/memprofile_decoders.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import argparse
 8 | import importlib
 9 | 
10 | import torch
11 | from memory_profiler import profile
12 | from torchcodec._core import add_video_stream, create_from_file, get_next_frame
13 | 
14 | torch._dynamo.config.cache_size_limit = 100
15 | torch._dynamo.config.capture_dynamic_output_shape_ops = True
16 | 
17 | 
18 | @profile
19 | def torchcodec_create_next(video_file):
20 |     video_decoder = create_from_file(video_file)
21 |     add_video_stream(video_decoder)
22 |     get_next_frame(video_decoder)
23 |     return video_decoder
24 | 
25 | 
26 | def get_video_path_str(filename: str) -> str:
27 |     resource = importlib.resources.files(__package__).joinpath(filename)
28 |     with importlib.resources.as_file(resource) as path:
29 |         return str(path)
30 | 
31 | 
32 | def main() -> None:
33 |     """Memory leak check and profiling for decoders."""
34 |     parser = argparse.ArgumentParser()
35 |     parser.add_argument(
36 |         "--iterations",
37 |         help="Number of times to invoke decoder operations.",
38 |         type=int,
39 |         default=10,
40 |     )
41 |     args = parser.parse_args()
42 | 
43 |     large_video_path = get_video_path_str("853.mp4")
44 | 
45 |     # We call the same function several times, and each call will produce memory stats on
46 |     # standard out. We rely on a human looking at the output to see if memory increases
47 |     # on each run.
48 |     for _ in range(args.iterations):
49 |         torchcodec_create_next(large_video_path)
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     main()
54 | 


--------------------------------------------------------------------------------
/packaging/post_build_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | # All rights reserved.
 4 | #
 5 | # This source code is licensed under the BSD-style license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | set -ex
 9 | 
10 | source packaging/helpers.sh
11 | 
12 | wheel_path=$(pwd)/$(find dist -type f -name "*.whl")
13 | echo "Wheel content:"
14 | unzip -l $wheel_path
15 | 
16 | unamestr=$(uname)
17 | if [[ "$unamestr" == 'Linux' ]]; then
18 |     ext="so"
19 | elif [[ "$unamestr" == 'Darwin' ]]; then
20 |     ext="dylib"
21 | else
22 |     echo "Unknown operating system: $unamestr"
23 |     exit 1
24 | fi
25 | 
26 | # TODO: Make ffmpeg4 work with nvcc.
27 | if [[ "$ENABLE_CUDA" -eq 1 ]]; then
28 |   ffmpeg_versions=(5 6 7)
29 | fi
30 | 
31 | for ffmpeg_major_version in ${ffmpeg_versions[@]}; do
32 |     assert_in_wheel $wheel_path torchcodec/libtorchcodec${ffmpeg_major_version}.${ext}
33 | done
34 | assert_not_in_wheel $wheel_path libtorchcodec.${ext}
35 | 
36 | for ffmpeg_ext in libavcodec.${ext} libavfilter.${ext} libavformat.${ext} libavutil.${ext} libavdevice.${ext} ; do
37 |     assert_not_in_wheel $wheel_path $ffmpeg_ext
38 | done
39 | 
40 | assert_not_in_wheel $wheel_path "^test"
41 | assert_not_in_wheel $wheel_path "^doc"
42 | assert_not_in_wheel $wheel_path "^benchmarks"
43 | assert_not_in_wheel $wheel_path "^packaging"
44 | 
45 | if [[ "$unamestr" == 'Linux' ]]; then
46 |    # See invoked python script below for details about this check.
47 |    extracted_wheel_dir=$(mktemp -d)
48 |    unzip -q $wheel_path -d $extracted_wheel_dir
49 |    symbols_matches=$(find $extracted_wheel_dir | grep ".so$" | xargs objdump --syms | grep GLIBCXX_3.4.)
50 |    python packaging/check_glibcxx.py "$symbols_matches"
51 | fi
52 | 
53 | echo "ls dist"
54 | ls dist
55 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/AVIOContextHolder.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #include "AVIOContextHolder.h"
 8 | #include <torch/types.h>
 9 | 
10 | namespace facebook::torchcodec {
11 | 
12 | void AVIOContextHolder::createAVIOContext(
13 |     AVIOReadFunction read,
14 |     AVIOWriteFunction write,
15 |     AVIOSeekFunction seek,
16 |     void* heldData,
17 |     bool isForWriting,
18 |     int bufferSize) {
19 |   TORCH_CHECK(
20 |       bufferSize > 0,
21 |       "Buffer size must be greater than 0; is " + std::to_string(bufferSize));
22 |   auto buffer = static_cast<uint8_t*>(av_malloc(bufferSize));
23 |   TORCH_CHECK(
24 |       buffer != nullptr,
25 |       "Failed to allocate buffer of size " + std::to_string(bufferSize));
26 | 
27 |   TORCH_CHECK(seek != nullptr, "seek method must be defined");
28 | 
29 |   if (isForWriting) {
30 |     TORCH_CHECK(write != nullptr, "write method must be defined for writing");
31 |   } else {
32 |     TORCH_CHECK(read != nullptr, "read method must be defined for reading");
33 |   }
34 | 
35 |   avioContext_.reset(avioAllocContext(
36 |       buffer,
37 |       bufferSize,
38 |       /*write_flag=*/isForWriting,
39 |       heldData,
40 |       read,
41 |       write,
42 |       seek));
43 | 
44 |   if (!avioContext_) {
45 |     av_freep(&buffer);
46 |     TORCH_CHECK(false, "Failed to allocate AVIOContext");
47 |   }
48 | }
49 | 
50 | AVIOContextHolder::~AVIOContextHolder() {
51 |   if (avioContext_) {
52 |     av_freep(&avioContext_->buffer);
53 |   }
54 | }
55 | 
56 | AVIOContext* AVIOContextHolder::getAVIOContext() {
57 |   return avioContext_.get();
58 | }
59 | 
60 | } // namespace facebook::torchcodec
61 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | ifneq ($(EXAMPLES_PATTERN),)
 5 |     EXAMPLES_PATTERN_OPTS := -D sphinx_gallery_conf.filename_pattern="$(EXAMPLES_PATTERN)"
 6 | endif
 7 | 
 8 | # You can set these variables from the command line.
 9 | SPHINXOPTS    = -W -j auto $(EXAMPLES_PATTERN_OPTS)
10 | SPHINXBUILD   = sphinx-build
11 | SPHINXPROJ    = torchcodec
12 | SOURCEDIR     = source
13 | BUILDDIR      = build
14 | 
15 | # Put it first so that "make" without argument is like "make help".
16 | help:
17 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
18 | 
19 | docset: html
20 | 	doc2dash --name $(SPHINXPROJ) --icon $(SOURCEDIR)/_static/img/pytorch-logo-flame.png --enable-js --online-redirect-url http://pytorch.org/vision/ --force $(BUILDDIR)/html/
21 | 
22 | 	# Manually fix because Zeal doesn't deal well with `icon.png`-only at 2x resolution.
23 | 	cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png
24 | 	convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png
25 | 
26 | html-noplot:  # Avoids running the gallery examples, which may take time
27 | 	$(SPHINXBUILD) -D plot_gallery=0 -b html "${SOURCEDIR}" "$(BUILDDIR)"/html
28 | 	@echo
29 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
30 | 
31 | clean:
32 | 	rm -rf $(BUILDDIR)/*
33 | 	rm -rf $(SOURCEDIR)/generated_examples/  # sphinx-gallery
34 | 	rm -rf $(SOURCEDIR)/gen_modules/  # sphinx-gallery
35 | 	rm -rf $(SOURCEDIR)/sg_execution_times.rst  # sphinx-gallery
36 | 	rm -rf $(SOURCEDIR)/generated/  # autosummary
37 | 
38 | .PHONY: help Makefile docset
39 | 
40 | # Catch-all target: route all unknown targets to Sphinx using the new
41 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
42 | %: Makefile
43 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
44 | 


--------------------------------------------------------------------------------
/docs/source/_static/img/pytorch-logo-dark.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 21.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 199.7 40.2" style="enable-background:new 0 0 199.7 40.2;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#F05732;}
 7 | 	.st1{fill:#9E529F;}
 8 | 	.st2{fill:#333333;}
 9 | </style>
10 | <path class="st0" d="M102.7,12.2c-1.3-1-1.8,3.9-4.4,3.9c-3,0-4-13-6.3-13c-0.7,0-0.8-0.4-7.9,21.3c-2.9,9,4.4,15.8,11.8,15.8
11 | 	c4.6,0,12.3-3,12.3-12.6C108.2,20.5,104.7,13.7,102.7,12.2z M95.8,35.3c-3.7,0-6.7-3.1-6.7-7c0-3.9,3-7,6.7-7s6.7,3.1,6.7,7
12 | 	C102.5,32.1,99.5,35.3,95.8,35.3z"/>
13 | <path class="st1" d="M99.8,0c-0.5,0-1.8,2.5-1.8,3.6c0,1.5,1,2,1.8,2c0.8,0,1.8-0.5,1.8-2C101.5,2.5,100.2,0,99.8,0z"/>
14 | <path class="st2" d="M0,39.5V14.9h11.5c5.3,0,8.3,3.6,8.3,7.9c0,4.3-3,7.9-8.3,7.9H5.2v8.8H0z M14.4,22.8c0-2.1-1.6-3.3-3.7-3.3H5.2
15 | 	v6.6h5.5C12.8,26.1,14.4,24.8,14.4,22.8z"/>
16 | <path class="st2" d="M35.2,39.5V29.4l-9.4-14.5h6l6.1,9.8l6.1-9.8h5.9l-9.4,14.5v10.1H35.2z"/>
17 | <path class="st2" d="M63.3,39.5v-20h-7.2v-4.6h19.6v4.6h-7.2v20H63.3z"/>
18 | <path class="st2" d="M131.4,39.5l-4.8-8.7h-3.8v8.7h-5.2V14.9H129c5.1,0,8.3,3.4,8.3,7.9c0,4.3-2.8,6.7-5.4,7.3l5.6,9.4H131.4z
19 | 	 M131.9,22.8c0-2-1.6-3.3-3.7-3.3h-5.5v6.6h5.5C130.3,26.1,131.9,24.9,131.9,22.8z"/>
20 | <path class="st2" d="M145.6,27.2c0-7.6,5.7-12.7,13.1-12.7c5.4,0,8.5,2.9,10.3,6l-4.5,2.2c-1-2-3.2-3.6-5.8-3.6
21 | 	c-4.5,0-7.7,3.4-7.7,8.1c0,4.6,3.2,8.1,7.7,8.1c2.5,0,4.7-1.6,5.8-3.6l4.5,2.2c-1.7,3.1-4.9,6-10.3,6
22 | 	C151.3,39.9,145.6,34.7,145.6,27.2z"/>
23 | <path class="st2" d="M194.5,39.5V29.1h-11.6v10.4h-5.2V14.9h5.2v9.7h11.6v-9.7h5.3v24.6H194.5z"/>
24 | </svg>
25 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/FilterGraph.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include "FFMPEGCommon.h"
10 | #include "StreamOptions.h"
11 | 
12 | namespace facebook::torchcodec {
13 | 
14 | struct FiltersContext {
15 |   int inputWidth = 0;
16 |   int inputHeight = 0;
17 |   AVPixelFormat inputFormat = AV_PIX_FMT_NONE;
18 |   AVRational inputAspectRatio = {0, 0};
19 |   int outputWidth = 0;
20 |   int outputHeight = 0;
21 |   AVPixelFormat outputFormat = AV_PIX_FMT_NONE;
22 |   std::string filtergraphStr;
23 |   AVRational timeBase = {0, 0};
24 |   UniqueAVBufferRef hwFramesCtx;
25 | 
26 |   FiltersContext() = default;
27 |   FiltersContext(FiltersContext&&) = default;
28 |   FiltersContext& operator=(FiltersContext&&) = default;
29 |   FiltersContext(
30 |       int inputWidth,
31 |       int inputHeight,
32 |       AVPixelFormat inputFormat,
33 |       AVRational inputAspectRatio,
34 |       int outputWidth,
35 |       int outputHeight,
36 |       AVPixelFormat outputFormat,
37 |       const std::string& filtergraphStr,
38 |       AVRational timeBase,
39 |       AVBufferRef* hwFramesCtx = nullptr);
40 | 
41 |   bool operator==(const FiltersContext&) const;
42 |   bool operator!=(const FiltersContext&) const;
43 | };
44 | 
45 | class FilterGraph {
46 |  public:
47 |   FilterGraph(
48 |       const FiltersContext& filtersContext,
49 |       const VideoStreamOptions& videoStreamOptions);
50 | 
51 |   UniqueAVFrame convert(const UniqueAVFrame& avFrame);
52 | 
53 |  private:
54 |   UniqueAVFilterGraph filterGraph_;
55 |   AVFilterContext* sourceContext_ = nullptr;
56 |   AVFilterContext* sinkContext_ = nullptr;
57 | };
58 | 
59 | } // namespace facebook::torchcodec
60 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/Frame.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #include "Frame.h"
 8 | 
 9 | namespace facebook::torchcodec {
10 | 
11 | FrameDims::FrameDims(int height, int width) : height(height), width(width) {
12 |   TORCH_CHECK(height > 0, "FrameDims.height must be > 0, got: ", height);
13 |   TORCH_CHECK(width > 0, "FrameDims.width must be > 0, got: ", width);
14 | }
15 | 
16 | FrameBatchOutput::FrameBatchOutput(
17 |     int64_t numFrames,
18 |     const FrameDims& outputDims,
19 |     const torch::Device& device)
20 |     : ptsSeconds(torch::empty({numFrames}, {torch::kFloat64})),
21 |       durationSeconds(torch::empty({numFrames}, {torch::kFloat64})) {
22 |   data = allocateEmptyHWCTensor(outputDims, device, numFrames);
23 | }
24 | 
25 | torch::Tensor allocateEmptyHWCTensor(
26 |     const FrameDims& frameDims,
27 |     const torch::Device& device,
28 |     std::optional<int> numFrames) {
29 |   auto tensorOptions = torch::TensorOptions()
30 |                            .dtype(torch::kUInt8)
31 |                            .layout(torch::kStrided)
32 |                            .device(device);
33 |   TORCH_CHECK(
34 |       frameDims.height > 0, "height must be > 0, got: ", frameDims.height);
35 |   TORCH_CHECK(frameDims.width > 0, "width must be > 0, got: ", frameDims.width);
36 |   if (numFrames.has_value()) {
37 |     auto numFramesValue = numFrames.value();
38 |     TORCH_CHECK(
39 |         numFramesValue >= 0, "numFrames must be >= 0, got: ", numFramesValue);
40 |     return torch::empty(
41 |         {numFramesValue, frameDims.height, frameDims.width, 3}, tensorOptions);
42 |   } else {
43 |     return torch::empty({frameDims.height, frameDims.width, 3}, tensorOptions);
44 |   }
45 | }
46 | 
47 | } // namespace facebook::torchcodec
48 | 


--------------------------------------------------------------------------------
/test/test_video_clip_sampler.py:
--------------------------------------------------------------------------------
 1 | # (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
 2 | 
 3 | 
 4 | import pytest
 5 | import torch
 6 | from torchcodec._samplers import (
 7 |     DEPRECATED_VideoClipSampler,
 8 |     IndexBasedSamplerArgs,
 9 |     TimeBasedSamplerArgs,
10 |     VideoArgs,
11 | )
12 | 
13 | from .utils import NASA_VIDEO
14 | 
15 | 
16 | @pytest.mark.parametrize(
17 |     ("sampler_args"),
18 |     [
19 |         TimeBasedSamplerArgs(
20 |             sampler_type="random", clips_per_video=2, frames_per_clip=4
21 |         ),
22 |         IndexBasedSamplerArgs(
23 |             sampler_type="random", clips_per_video=2, frames_per_clip=4
24 |         ),
25 |         TimeBasedSamplerArgs(
26 |             sampler_type="uniform", clips_per_video=3, frames_per_clip=4
27 |         ),
28 |         IndexBasedSamplerArgs(
29 |             sampler_type="uniform", clips_per_video=3, frames_per_clip=4
30 |         ),
31 |     ],
32 | )
33 | def test_sampler(sampler_args):
34 |     torch.manual_seed(0)
35 |     desired_width, desired_height = 320, 240
36 |     video_args = VideoArgs(desired_width=desired_width, desired_height=desired_height)
37 |     sampler = DEPRECATED_VideoClipSampler(video_args, sampler_args)
38 |     clips = sampler(NASA_VIDEO.to_tensor())
39 |     assert len(clips) == sampler_args.clips_per_video
40 |     clip = clips[0]
41 |     if isinstance(sampler_args, TimeBasedSamplerArgs):
42 |         # Note: Looks like we have an API inconsistency.
43 |         # With time-based sampler, `clip` is a tensor but with index-based
44 |         # samplers `clip` is a list.
45 |         # Below manually convert that list to a tensor for the `.shape` check to
46 |         # be unified, but this block should be removed eventually.
47 |         clip = torch.stack(clip)
48 |     assert clip.shape == (
49 |         sampler_args.frames_per_clip,
50 |         3,
51 |         desired_height,
52 |         desired_width,
53 |     )
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     pytest.main()
58 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: 🐛 Bug Report
 2 | description: Create a report to help us reproduce and fix the bug
 3 | 
 4 | body:
 5 | - type: markdown
 6 |   attributes:
 7 |     value: >
 8 |       #### Your bug may have already been reported! Please check [the existing and past issues](https://github.com/pytorch/torchcodec/issues?q=sort%3Aupdated-desc+is%3Aissue).
 9 | - type: textarea
10 |   attributes:
11 |     label: 🐛 Describe the bug
12 |     description: |
13 |       What broke? What behavior did you see versus what did you expect? Please provide any relevant code, error messages and exception tracebacks.
14 | 
15 |       A minimal code example will help us help you faster! The ideal code example is a small chunk of code that we can copy-paste to see the same error you see. For example:
16 | 
17 |       ```python
18 |       # All necessary imports at the beginning
19 |       import torch
20 |       import torchcodec
21 |       from torchcodec.decoders import VideoDecoder
22 | 
23 |       # A succinct reproducing example trimmed down to the essential parts:
24 |       decoder = VideoDecoder("path/to/video.mp4")  # Help! This fails!
25 |       # ...
26 |       ```
27 | 
28 |       If the code is long, put it in a public gist and link it in the issue: https://gist.github.com. Please also paste any error messages and full exception tracebacks in ```` ```triple quotes blocks``` ````.
29 |   validations:
30 |     required: true
31 | - type: textarea
32 |   attributes:
33 |     label: Versions
34 |     description: |
35 |       We support a wide variety of platforms and versions, and many bugs are verison-dependent. Knowing your setup will help us help you faster! Please run the following and paste the output below.
36 |       ```sh
37 |       wget https://raw.githubusercontent.com/pytorch/pytorch/main/torch/utils/collect_env.py
38 |       # For security purposes, please check the contents of collect_env.py before running it.
39 |       python collect_env.py
40 |       ```
41 |   validations:
42 |     required: true
43 | - type: markdown
44 |   attributes:
45 |     value: >
46 |       Thanks for contributing 🎉!
47 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/pybind_ops.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #include <pybind11/pybind11.h>
 8 | #include <pybind11/stl.h>
 9 | #include <cstdint>
10 | 
11 | #include "AVIOFileLikeContext.h"
12 | 
13 | namespace py = pybind11;
14 | 
15 | namespace facebook::torchcodec {
16 | 
17 | // Note: It's not immediately obvous why we need both custom_ops.cpp and
18 | //       pybind_ops.cpp. We do all other Python to C++ bridging in
19 | //       custom_ops.cpp, and that even depends on pybind11, so why have an
20 | //       explicit pybind-only file?
21 | //
22 | //       The reason is that we want to accept OWNERSHIP of a file-like object
23 | //       from the Python side. In order to do that, we need a proper
24 | //       py::object. For raw bytes, we can launder that through a tensor on the
25 | //       custom_ops.cpp side, but we can't launder a proper Python object
26 | //       through a tensor. Custom ops can't accept a proper Python object
27 | //       through py::object, so we have to do direct pybind11 here.
28 | //
29 | // TODO: Investigate if we can do something better here. See:
30 | //         https://github.com/pytorch/torchcodec/issues/896
31 | //       Short version is that we're laundering a pointer through an int, the
32 | //       Python side forwards that to decoder creation functions in
33 | //       custom_ops.cpp and we do another cast on that side to get a pointer
34 | //       again. We want to investigate if we can do something cleaner by
35 | //       defining proper pybind objects.
36 | int64_t create_file_like_context(py::object file_like, bool is_for_writing) {
37 |   AVIOFileLikeContext* context =
38 |       new AVIOFileLikeContext(file_like, is_for_writing);
39 |   return reinterpret_cast<int64_t>(context);
40 | }
41 | 
42 | #ifndef PYBIND_OPS_MODULE_NAME
43 | #error PYBIND_OPS_MODULE_NAME must be defined!
44 | #endif
45 | 
46 | PYBIND11_MODULE(PYBIND_OPS_MODULE_NAME, m) {
47 |   m.def("create_file_like_context", &create_file_like_context);
48 | }
49 | 
50 | } // namespace facebook::torchcodec
51 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/AVIOFileLikeContext.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <pybind11/pybind11.h>
10 | #include <pybind11/stl.h>
11 | 
12 | #include "AVIOContextHolder.h"
13 | 
14 | namespace py = pybind11;
15 | 
16 | namespace facebook::torchcodec {
17 | 
18 | // Enables uers to pass in a Python file-like object. We then forward all read
19 | // and seek calls back up to the methods on the Python object.
20 | class AVIOFileLikeContext : public AVIOContextHolder {
21 |  public:
22 |   explicit AVIOFileLikeContext(const py::object& fileLike, bool isForWriting);
23 | 
24 |  private:
25 |   static int read(void* opaque, uint8_t* buf, int buf_size);
26 |   static int64_t seek(void* opaque, int64_t offset, int whence);
27 |   static int write(void* opaque, const uint8_t* buf, int buf_size);
28 | 
29 |   // Note that we dynamically allocate the Python object because we need to
30 |   // strictly control when its destructor is called. We must hold the GIL
31 |   // when its destructor gets called, as it needs to update the reference
32 |   // count. It's easiest to control that when it's dynamic memory. Otherwise,
33 |   // we'd have to ensure whatever enclosing scope holds the object has the GIL,
34 |   // and that's, at least, hard. For all of the common pitfalls, see:
35 |   //
36 |   //   https://pybind11.readthedocs.io/en/stable/advanced/misc.html#common-sources-of-global-interpreter-lock-errors
37 |   //
38 |   // We maintain a reference to the file-like object because the file-like
39 |   // object that was created on the Python side must live as long as our
40 |   // potential use. That is, even if there are no more references to the object
41 |   // on the Python side, we require that the object is still live.
42 |   struct PyObjectDeleter {
43 |     inline void operator()(py::object* obj) const {
44 |       if (obj) {
45 |         py::gil_scoped_acquire gil;
46 |         delete obj;
47 |       }
48 |     }
49 |   };
50 | 
51 |   using UniquePyObject = std::unique_ptr<py::object, PyObjectDeleter>;
52 |   UniquePyObject fileLike_;
53 | };
54 | 
55 | } // namespace facebook::torchcodec
56 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/Frame.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <torch/types.h>
10 | #include "FFMPEGCommon.h"
11 | #include "Metadata.h"
12 | #include "StreamOptions.h"
13 | 
14 | namespace facebook::torchcodec {
15 | 
16 | struct FrameDims {
17 |   int height = 0;
18 |   int width = 0;
19 | 
20 |   FrameDims() = default;
21 | 
22 |   FrameDims(int h, int w);
23 | };
24 | 
25 | // All public video decoding entry points return either a FrameOutput or a
26 | // FrameBatchOutput.
27 | // They are the equivalent of the user-facing Frame and FrameBatch classes in
28 | // Python. They contain RGB decoded frames along with some associated data
29 | // like PTS and duration.
30 | // FrameOutput is also relevant for audio decoding, typically as the output of
31 | // getNextFrame(), or as a temporary output variable.
32 | struct FrameOutput {
33 |   // data shape is:
34 |   // - 3D (C, H, W) or (H, W, C) for videos
35 |   // - 2D (numChannels, numSamples) for audio
36 |   torch::Tensor data;
37 |   double ptsSeconds;
38 |   double durationSeconds;
39 | };
40 | 
41 | struct FrameBatchOutput {
42 |   torch::Tensor data; // 4D: of shape NCHW or NHWC.
43 |   torch::Tensor ptsSeconds; // 1D of shape (N,)
44 |   torch::Tensor durationSeconds; // 1D of shape (N,)
45 | 
46 |   FrameBatchOutput(
47 |       int64_t numFrames,
48 |       const FrameDims& outputDims,
49 |       const torch::Device& device);
50 | };
51 | 
52 | struct AudioFramesOutput {
53 |   torch::Tensor data; // shape is (numChannels, numSamples)
54 |   double ptsSeconds;
55 | };
56 | 
57 | // --------------------------------------------------------------------------
58 | // FRAME TENSOR ALLOCATION APIs
59 | // --------------------------------------------------------------------------
60 | 
61 | // Note [Frame Tensor allocation]
62 | //
63 | // We always allocate [N]HWC tensors. The low-level decoding functions all
64 | // assume HWC tensors, since this is what FFmpeg natively handles. It's up to
65 | // the high-level decoding entry-points to permute that back to CHW, by calling
66 | // maybePermuteHWC2CHW().
67 | torch::Tensor allocateEmptyHWCTensor(
68 |     const FrameDims& frameDims,
69 |     const torch::Device& device,
70 |     std::optional<int> numFrames = std::nullopt);
71 | 
72 | } // namespace facebook::torchcodec
73 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/StreamOptions.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <torch/types.h>
10 | #include <map>
11 | #include <optional>
12 | #include <string>
13 | #include <string_view>
14 | 
15 | namespace facebook::torchcodec {
16 | 
17 | enum ColorConversionLibrary {
18 |   // Use the libavfilter library for color conversion.
19 |   FILTERGRAPH,
20 |   // Use the libswscale library for color conversion.
21 |   SWSCALE
22 | };
23 | 
24 | struct VideoStreamOptions {
25 |   VideoStreamOptions() {}
26 | 
27 |   // Number of threads we pass to FFMPEG for decoding.
28 |   // 0 means FFMPEG will choose the number of threads automatically to fully
29 |   // utilize all cores. If not set, it will be the default FFMPEG behavior for
30 |   // the given codec.
31 |   std::optional<int> ffmpegThreadCount;
32 | 
33 |   // Currently the dimension order can be either NHWC or NCHW.
34 |   // H=height, W=width, C=channel.
35 |   std::string dimensionOrder = "NCHW";
36 | 
37 |   // By default we have to use filtergraph, as it is more general. We can only
38 |   // use swscale when we have met strict requirements. See
39 |   // CpuDeviceInterface::initialze() for the logic.
40 |   ColorConversionLibrary colorConversionLibrary =
41 |       ColorConversionLibrary::FILTERGRAPH;
42 | 
43 |   // By default we use CPU for decoding for both C++ and python users.
44 |   // Note: This is not used for video encoding, because device is determined by
45 |   // the device of the input frame tensor.
46 |   torch::Device device = torch::kCPU;
47 |   // Device variant (e.g., "ffmpeg", "beta", etc.)
48 |   std::string_view deviceVariant = "ffmpeg";
49 | 
50 |   // Encoding options
51 |   std::optional<std::string> codec;
52 |   // Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
53 |   // If not specified, uses codec's default format.
54 |   std::optional<std::string> pixelFormat;
55 |   std::optional<double> crf;
56 |   std::optional<std::string> preset;
57 |   std::optional<std::map<std::string, std::string>> extraOptions;
58 | };
59 | 
60 | struct AudioStreamOptions {
61 |   AudioStreamOptions() {}
62 | 
63 |   // Encoding only
64 |   std::optional<int> bitRate;
65 |   // Decoding and encoding:
66 |   std::optional<int> numChannels;
67 |   std::optional<int> sampleRate;
68 | };
69 | 
70 | } // namespace facebook::torchcodec
71 | 


--------------------------------------------------------------------------------
/src/torchcodec/_internally_replaced_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import importlib
 8 | import importlib.util
 9 | import sys
10 | from pathlib import Path
11 | from types import ModuleType
12 | 
13 | 
14 | # Copy pasted from torchvision
15 | # https://github.com/pytorch/vision/blob/947ae1dc71867f28021d5bc0ff3a19c249236e2a/torchvision/_internally_replaced_utils.py#L25
16 | def _get_extension_path(lib_name: str) -> str:
17 |     extension_suffixes = []
18 |     if sys.platform == "linux":
19 |         extension_suffixes = importlib.machinery.EXTENSION_SUFFIXES
20 |     elif sys.platform == "darwin":
21 |         extension_suffixes = importlib.machinery.EXTENSION_SUFFIXES + [".dylib"]
22 |     elif sys.platform in ("win32", "cygwin"):
23 |         extension_suffixes = importlib.machinery.EXTENSION_SUFFIXES + [".dll", ".pyd"]
24 |     else:
25 |         raise NotImplementedError(f"{sys.platform = } is not not supported")
26 |     loader_details = (
27 |         importlib.machinery.ExtensionFileLoader,
28 |         extension_suffixes,
29 |     )
30 | 
31 |     extfinder = importlib.machinery.FileFinder(
32 |         str(Path(__file__).parent), loader_details
33 |     )
34 |     ext_specs = extfinder.find_spec(lib_name)
35 |     if ext_specs is None:
36 |         raise ImportError(f"No spec found for {lib_name}")
37 | 
38 |     if ext_specs.origin is None:
39 |         raise ImportError(f"Existing spec found for {lib_name} does not have an origin")
40 | 
41 |     return ext_specs.origin
42 | 
43 | 
44 | def _load_pybind11_module(module_name: str, library_path: str) -> ModuleType:
45 |     spec = importlib.util.spec_from_file_location(
46 |         module_name,
47 |         library_path,
48 |     )
49 |     if spec is None or spec.loader is None:
50 |         raise ImportError(
51 |             f"Unable to load spec or spec.loader for module {module_name} from path {library_path}"
52 |         )
53 | 
54 |     mod = importlib.util.module_from_spec(spec)
55 |     spec.loader.exec_module(mod)
56 | 
57 |     return mod
58 | 
59 | 
60 | # Note that the return value from this function must match the value used as
61 | # PYBIND_OPS_MODULE_NAME when we compile _core/pybind_ops.cpp. If the values
62 | # do not match, we will not be able to import the C++ shared library as a
63 | # Python module at runtime.
64 | #
65 | # The parameter ffmpeg_major_version is unused externally, but used
66 | # internally.
67 | def _get_pybind_ops_module_name(ffmpeg_major_version: int) -> str:
68 |     return "core_pybind_ops"
69 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/CudaDeviceInterface.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include "CUDACommon.h"
10 | #include "DeviceInterface.h"
11 | #include "FilterGraph.h"
12 | 
13 | namespace facebook::torchcodec {
14 | 
15 | class CudaDeviceInterface : public DeviceInterface {
16 |  public:
17 |   CudaDeviceInterface(const torch::Device& device);
18 | 
19 |   virtual ~CudaDeviceInterface();
20 | 
21 |   std::optional<const AVCodec*> findCodec(
22 |       const AVCodecID& codecId,
23 |       bool isDecoder = true) override;
24 | 
25 |   void initialize(
26 |       const AVStream* avStream,
27 |       const UniqueDecodingAVFormatContext& avFormatCtx,
28 |       const SharedAVCodecContext& codecContext) override;
29 | 
30 |   void initializeVideo(
31 |       const VideoStreamOptions& videoStreamOptions,
32 |       [[maybe_unused]] const std::vector<std::unique_ptr<Transform>>&
33 |           transforms,
34 |       [[maybe_unused]] const std::optional<FrameDims>& resizedOutputDims)
35 |       override;
36 | 
37 |   void registerHardwareDeviceWithCodec(AVCodecContext* codecContext) override;
38 | 
39 |   void convertAVFrameToFrameOutput(
40 |       UniqueAVFrame& avFrame,
41 |       FrameOutput& frameOutput,
42 |       std::optional<torch::Tensor> preAllocatedOutputTensor) override;
43 | 
44 |   std::string getDetails() override;
45 | 
46 |   UniqueAVFrame convertCUDATensorToAVFrameForEncoding(
47 |       const torch::Tensor& tensor,
48 |       int frameIndex,
49 |       AVCodecContext* codecContext) override;
50 | 
51 |   void setupHardwareFrameContextForEncoding(
52 |       AVCodecContext* codecContext) override;
53 | 
54 |  private:
55 |   // Our CUDA decoding code assumes NV12 format. In order to handle other
56 |   // kinds of input, we need to convert them to NV12. Our current implementation
57 |   // does this using filtergraph.
58 |   UniqueAVFrame maybeConvertAVFrameToNV12OrRGB24(UniqueAVFrame& avFrame);
59 | 
60 |   // We sometimes encounter frames that cannot be decoded on the CUDA device.
61 |   // Rather than erroring out, we decode them on the CPU.
62 |   std::unique_ptr<DeviceInterface> cpuInterface_;
63 | 
64 |   VideoStreamOptions videoStreamOptions_;
65 |   AVRational timeBase_;
66 | 
67 |   UniqueAVBufferRef hardwareDeviceCtx_;
68 |   UniqueNppContext nppCtx_;
69 | 
70 |   // This filtergraph instance is only used for NV12 format conversion in
71 |   // maybeConvertAVFrameToNV12().
72 |   std::unique_ptr<FiltersContext> nv12ConversionContext_;
73 |   std::unique_ptr<FilterGraph> nv12Conversion_;
74 | 
75 |   bool usingCPUFallback_ = false;
76 |   bool hasDecodedFrame_ = false;
77 | };
78 | 
79 | } // namespace facebook::torchcodec
80 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/Transform.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <optional>
10 | #include <string>
11 | #include "Frame.h"
12 | #include "Metadata.h"
13 | 
14 | namespace facebook::torchcodec {
15 | 
16 | class Transform {
17 |  public:
18 |   virtual std::string getFilterGraphCpu() const = 0;
19 |   virtual ~Transform() = default;
20 | 
21 |   // If the transformation does not change the output frame dimensions, then
22 |   // there is no need to override this member function. The default
23 |   // implementation returns an empty optional, indicating that the output frame
24 |   // has the same dimensions as the input frame.
25 |   //
26 |   // If the transformation does change the output frame dimensions, then it
27 |   // must override this member function and return the output frame dimensions.
28 |   virtual std::optional<FrameDims> getOutputFrameDims() const {
29 |     return std::nullopt;
30 |   }
31 | 
32 |   // The validity of some transforms depends on the characteristics of the
33 |   // AVStream they're being applied to. For example, some transforms will
34 |   // specify coordinates inside a frame, we need to validate that those are
35 |   // within the frame's bounds.
36 |   //
37 |   // Note that the validation function does not return anything. We expect
38 |   // invalid configurations to throw an exception.
39 |   virtual void validate([[maybe_unused]] const FrameDims& inputDims) const {}
40 | };
41 | 
42 | class ResizeTransform : public Transform {
43 |  public:
44 |   enum class InterpolationMode { BILINEAR };
45 | 
46 |   explicit ResizeTransform(const FrameDims& dims)
47 |       : outputDims_(dims), interpolationMode_(InterpolationMode::BILINEAR) {}
48 | 
49 |   ResizeTransform(const FrameDims& dims, InterpolationMode interpolationMode)
50 |       : outputDims_(dims), interpolationMode_(interpolationMode) {}
51 | 
52 |   std::string getFilterGraphCpu() const override;
53 |   std::optional<FrameDims> getOutputFrameDims() const override;
54 | 
55 |  private:
56 |   FrameDims outputDims_;
57 |   InterpolationMode interpolationMode_;
58 | };
59 | 
60 | class CropTransform : public Transform {
61 |  public:
62 |   CropTransform(const FrameDims& dims, int x, int y);
63 | 
64 |   // Becomes a center crop if x and y are not specified.
65 |   explicit CropTransform(const FrameDims& dims);
66 | 
67 |   std::string getFilterGraphCpu() const override;
68 |   std::optional<FrameDims> getOutputFrameDims() const override;
69 |   void validate(const FrameDims& inputDims) const override;
70 | 
71 |  private:
72 |   FrameDims outputDims_;
73 |   std::optional<int> x_;
74 |   std::optional<int> y_;
75 | };
76 | 
77 | } // namespace facebook::torchcodec
78 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/AVIOContextHolder.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include "FFMPEGCommon.h"
10 | 
11 | namespace facebook::torchcodec {
12 | 
13 | // The AVIOContextHolder serves several purposes:
14 | //
15 | //   1. It is a smart pointer for the AVIOContext. It has the logic to create
16 | //      a new AVIOContext and will appropriately free the AVIOContext when it
17 | //      goes out of scope. Note that this requires more than just having a
18 | //      UniqueAVIOContext, as the AVIOContext points to a buffer which must be
19 | //      freed.
20 | //   2. It is a base class for AVIOContext specializations. When specializing a
21 | //      AVIOContext, we need to provide four things:
22 | //        1. A read callback function, for decoding.
23 | //        2. A seek callback function, for decoding and encoding.
24 | //        3. A write callback function, for encoding.
25 | //        4. A pointer to some context object that has the same lifetime as the
26 | //           AVIOContext itself. This context object holds the custom state that
27 | //           tracks the custom behavior of reading, seeking and writing. It is
28 | //           provided upon AVIOContext creation and to the read, seek and
29 | //           write callback functions.
30 | //      The callback functions do not need to be members of the derived class,
31 | //      but the derived class must have access to them. The context object must
32 | //      be a member of the derived class. Derived classes need to call
33 | //      createAVIOContext(), ideally in their constructor.
34 | //  3. A generic handle for those that just need to manage having access to an
35 | //     AVIOContext, but aren't necessarily concerned with how it was customized:
36 | //     typically, the SingleStreamDecoder.
37 | class AVIOContextHolder {
38 |  public:
39 |   virtual ~AVIOContextHolder();
40 |   AVIOContext* getAVIOContext();
41 | 
42 |  protected:
43 |   // Make constructor protected to prevent anyone from constructing
44 |   // an AVIOContextHolder without deriving it. (Ordinarily this would be
45 |   // enforced by having a pure virtual methods, but we don't have any.)
46 |   AVIOContextHolder() = default;
47 | 
48 |   // Deriving classes should call this function in their constructor.
49 |   void createAVIOContext(
50 |       AVIOReadFunction read,
51 |       AVIOWriteFunction write,
52 |       AVIOSeekFunction seek,
53 |       void* heldData,
54 |       bool isForWriting,
55 |       int bufferSize = defaultBufferSize);
56 | 
57 |  private:
58 |   UniqueAVIOContext avioContext_;
59 | 
60 |   // Defaults to 64 KB
61 |   static const int defaultBufferSize = 64 * 1024;
62 | };
63 | 
64 | } // namespace facebook::torchcodec
65 | 


--------------------------------------------------------------------------------
/test/resources/sine_mono_s16.wav.stream0.all_frames_info.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "duration_time": "0.128000",
  4 |     "pts_time": "0.000000"
  5 |   },
  6 |   {
  7 |     "duration_time": "0.128000",
  8 |     "pts_time": "0.128000"
  9 |   },
 10 |   {
 11 |     "duration_time": "0.128000",
 12 |     "pts_time": "0.256000"
 13 |   },
 14 |   {
 15 |     "duration_time": "0.128000",
 16 |     "pts_time": "0.384000"
 17 |   },
 18 |   {
 19 |     "duration_time": "0.128000",
 20 |     "pts_time": "0.512000"
 21 |   },
 22 |   {
 23 |     "duration_time": "0.128000",
 24 |     "pts_time": "0.640000"
 25 |   },
 26 |   {
 27 |     "duration_time": "0.128000",
 28 |     "pts_time": "0.768000"
 29 |   },
 30 |   {
 31 |     "duration_time": "0.128000",
 32 |     "pts_time": "0.896000"
 33 |   },
 34 |   {
 35 |     "duration_time": "0.128000",
 36 |     "pts_time": "1.024000"
 37 |   },
 38 |   {
 39 |     "duration_time": "0.128000",
 40 |     "pts_time": "1.152000"
 41 |   },
 42 |   {
 43 |     "duration_time": "0.128000",
 44 |     "pts_time": "1.280000"
 45 |   },
 46 |   {
 47 |     "duration_time": "0.128000",
 48 |     "pts_time": "1.408000"
 49 |   },
 50 |   {
 51 |     "duration_time": "0.128000",
 52 |     "pts_time": "1.536000"
 53 |   },
 54 |   {
 55 |     "duration_time": "0.128000",
 56 |     "pts_time": "1.664000"
 57 |   },
 58 |   {
 59 |     "duration_time": "0.128000",
 60 |     "pts_time": "1.792000"
 61 |   },
 62 |   {
 63 |     "duration_time": "0.128000",
 64 |     "pts_time": "1.920000"
 65 |   },
 66 |   {
 67 |     "duration_time": "0.128000",
 68 |     "pts_time": "2.048000"
 69 |   },
 70 |   {
 71 |     "duration_time": "0.128000",
 72 |     "pts_time": "2.176000"
 73 |   },
 74 |   {
 75 |     "duration_time": "0.128000",
 76 |     "pts_time": "2.304000"
 77 |   },
 78 |   {
 79 |     "duration_time": "0.128000",
 80 |     "pts_time": "2.432000"
 81 |   },
 82 |   {
 83 |     "duration_time": "0.128000",
 84 |     "pts_time": "2.560000"
 85 |   },
 86 |   {
 87 |     "duration_time": "0.128000",
 88 |     "pts_time": "2.688000"
 89 |   },
 90 |   {
 91 |     "duration_time": "0.128000",
 92 |     "pts_time": "2.816000"
 93 |   },
 94 |   {
 95 |     "duration_time": "0.128000",
 96 |     "pts_time": "2.944000"
 97 |   },
 98 |   {
 99 |     "duration_time": "0.128000",
100 |     "pts_time": "3.072000"
101 |   },
102 |   {
103 |     "duration_time": "0.128000",
104 |     "pts_time": "3.200000"
105 |   },
106 |   {
107 |     "duration_time": "0.128000",
108 |     "pts_time": "3.328000"
109 |   },
110 |   {
111 |     "duration_time": "0.128000",
112 |     "pts_time": "3.456000"
113 |   },
114 |   {
115 |     "duration_time": "0.128000",
116 |     "pts_time": "3.584000"
117 |   },
118 |   {
119 |     "duration_time": "0.128000",
120 |     "pts_time": "3.712000"
121 |   },
122 |   {
123 |     "duration_time": "0.128000",
124 |     "pts_time": "3.840000"
125 |   },
126 |   {
127 |     "duration_time": "0.032000",
128 |     "pts_time": "3.968000"
129 |   }
130 | ]
131 | 


--------------------------------------------------------------------------------
/test/resources/sine_mono_s32_8000.wav.stream0.all_frames_info.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "duration_time": "0.128000",
  4 |     "pts_time": "0.000000"
  5 |   },
  6 |   {
  7 |     "duration_time": "0.128000",
  8 |     "pts_time": "0.128000"
  9 |   },
 10 |   {
 11 |     "duration_time": "0.128000",
 12 |     "pts_time": "0.256000"
 13 |   },
 14 |   {
 15 |     "duration_time": "0.128000",
 16 |     "pts_time": "0.384000"
 17 |   },
 18 |   {
 19 |     "duration_time": "0.128000",
 20 |     "pts_time": "0.512000"
 21 |   },
 22 |   {
 23 |     "duration_time": "0.128000",
 24 |     "pts_time": "0.640000"
 25 |   },
 26 |   {
 27 |     "duration_time": "0.128000",
 28 |     "pts_time": "0.768000"
 29 |   },
 30 |   {
 31 |     "duration_time": "0.128000",
 32 |     "pts_time": "0.896000"
 33 |   },
 34 |   {
 35 |     "duration_time": "0.128000",
 36 |     "pts_time": "1.024000"
 37 |   },
 38 |   {
 39 |     "duration_time": "0.128000",
 40 |     "pts_time": "1.152000"
 41 |   },
 42 |   {
 43 |     "duration_time": "0.128000",
 44 |     "pts_time": "1.280000"
 45 |   },
 46 |   {
 47 |     "duration_time": "0.128000",
 48 |     "pts_time": "1.408000"
 49 |   },
 50 |   {
 51 |     "duration_time": "0.128000",
 52 |     "pts_time": "1.536000"
 53 |   },
 54 |   {
 55 |     "duration_time": "0.128000",
 56 |     "pts_time": "1.664000"
 57 |   },
 58 |   {
 59 |     "duration_time": "0.128000",
 60 |     "pts_time": "1.792000"
 61 |   },
 62 |   {
 63 |     "duration_time": "0.128000",
 64 |     "pts_time": "1.920000"
 65 |   },
 66 |   {
 67 |     "duration_time": "0.128000",
 68 |     "pts_time": "2.048000"
 69 |   },
 70 |   {
 71 |     "duration_time": "0.128000",
 72 |     "pts_time": "2.176000"
 73 |   },
 74 |   {
 75 |     "duration_time": "0.128000",
 76 |     "pts_time": "2.304000"
 77 |   },
 78 |   {
 79 |     "duration_time": "0.128000",
 80 |     "pts_time": "2.432000"
 81 |   },
 82 |   {
 83 |     "duration_time": "0.128000",
 84 |     "pts_time": "2.560000"
 85 |   },
 86 |   {
 87 |     "duration_time": "0.128000",
 88 |     "pts_time": "2.688000"
 89 |   },
 90 |   {
 91 |     "duration_time": "0.128000",
 92 |     "pts_time": "2.816000"
 93 |   },
 94 |   {
 95 |     "duration_time": "0.128000",
 96 |     "pts_time": "2.944000"
 97 |   },
 98 |   {
 99 |     "duration_time": "0.128000",
100 |     "pts_time": "3.072000"
101 |   },
102 |   {
103 |     "duration_time": "0.128000",
104 |     "pts_time": "3.200000"
105 |   },
106 |   {
107 |     "duration_time": "0.128000",
108 |     "pts_time": "3.328000"
109 |   },
110 |   {
111 |     "duration_time": "0.128000",
112 |     "pts_time": "3.456000"
113 |   },
114 |   {
115 |     "duration_time": "0.128000",
116 |     "pts_time": "3.584000"
117 |   },
118 |   {
119 |     "duration_time": "0.128000",
120 |     "pts_time": "3.712000"
121 |   },
122 |   {
123 |     "duration_time": "0.128000",
124 |     "pts_time": "3.840000"
125 |   },
126 |   {
127 |     "duration_time": "0.032000",
128 |     "pts_time": "3.968000"
129 |   }
130 | ]
131 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 | 
 8 | concurrency:
 9 |   group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
10 |   cancel-in-progress: true
11 | 
12 | defaults:
13 |   run:
14 |     shell: bash -l -eo pipefail {0}
15 | 
16 | jobs:
17 |   pre-commit-checks:
18 |     runs-on: ubuntu-latest
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         python-version: ['3.12']
23 |     steps:
24 |       - name: Check out repo
25 |         uses: actions/checkout@v3
26 |       - name: Setup conda env
27 |         uses: conda-incubator/setup-miniconda@v2
28 |         with:
29 |           auto-update-conda: true
30 |           miniconda-version: "latest"
31 |           activate-environment: test
32 |           python-version: ${{ matrix.python-version }}
33 |       - name: Update pip
34 |         run: python -m pip install --upgrade pip
35 |       - name: Install pre-commit
36 |         run: |
37 |           python -m pip install pre-commit
38 |       - name: Run pre-commit checks
39 |         run: |
40 |           pre-commit run --all-files
41 |       - name: Check to see what files pre-commit modified
42 |         run: |
43 |           git diff
44 | 
45 |   mypy:
46 |     runs-on: ubuntu-latest
47 |     strategy:
48 |       fail-fast: false
49 |       matrix:
50 |         python-version: ['3.12']
51 |     steps:
52 |       - name: Check out repo
53 |         uses: actions/checkout@v3
54 |       - name: Setup conda env
55 |         uses: conda-incubator/setup-miniconda@v2
56 |         with:
57 |           auto-update-conda: true
58 |           miniconda-version: "latest"
59 |           activate-environment: test
60 |           python-version: ${{ matrix.python-version }}
61 |       - name: Update pip
62 |         run: python -m pip install --upgrade pip
63 |       - name: Install dependencies and FFmpeg
64 |         run: |
65 |           # If we're in a release branch or in a PR against a release branch,
66 |           # we install the PyTorch RCs from the test channel. Otherwise, e.g. in
67 |           # `main` or in PRs against `main`, we install the nightly builds.
68 |           # Note that the `test` RCs are
69 |           if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then
70 |             CHANNEL=test
71 |           else
72 |             CHANNEL=nightly
73 |           fi
74 |           python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/${CHANNEL}/cpu
75 |           conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
76 |           ffmpeg -version
77 |       - name: Build and install torchcodec
78 |         run: |
79 |           python -m pip install -e ".[dev]" --no-build-isolation -vvv
80 |       - name: Install mypy
81 |         run: |
82 |           python -m pip install mypy
83 |       - name: Run mypy
84 |         run: |
85 |           mypy --install-types --non-interactive --config-file mypy.ini
86 | 


--------------------------------------------------------------------------------
/src/torchcodec/samplers/_common.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Callable
 2 | 
 3 | from torchcodec import FrameBatch
 4 | 
 5 | _LIST_OF_INT_OR_FLOAT = list[int] | list[float]
 6 | 
 7 | 
 8 | def _repeat_last_policy(
 9 |     values: _LIST_OF_INT_OR_FLOAT, desired_len: int
10 | ) -> _LIST_OF_INT_OR_FLOAT:
11 |     # values = [1, 2, 3], desired_len = 5
12 |     # output = [1, 2, 3, 3, 3]
13 |     values += [values[-1]] * (desired_len - len(values))
14 |     return values
15 | 
16 | 
17 | def _wrap_policy(
18 |     values: _LIST_OF_INT_OR_FLOAT, desired_len: int
19 | ) -> _LIST_OF_INT_OR_FLOAT:
20 |     # values = [1, 2, 3], desired_len = 5
21 |     # output = [1, 2, 3, 1, 2]
22 |     return (values * (desired_len // len(values) + 1))[:desired_len]
23 | 
24 | 
25 | def _error_policy(
26 |     frames_indices: _LIST_OF_INT_OR_FLOAT, desired_len: int
27 | ) -> _LIST_OF_INT_OR_FLOAT:
28 |     raise ValueError(
29 |         "You set the 'error' policy, and the sampler tried to decode a frame "
30 |         "that is beyond the number of frames in the video. "
31 |         "Try to leave sampling_range_end to its default value?"
32 |     )
33 | 
34 | 
35 | _POLICY_FUNCTION_TYPE = Callable[[_LIST_OF_INT_OR_FLOAT, int], _LIST_OF_INT_OR_FLOAT]
36 | 
37 | _POLICY_FUNCTIONS: dict[str, _POLICY_FUNCTION_TYPE] = {
38 |     "repeat_last": _repeat_last_policy,
39 |     "wrap": _wrap_policy,
40 |     "error": _error_policy,
41 | }
42 | 
43 | 
44 | def _validate_common_params(*, decoder, num_frames_per_clip, policy):
45 |     if len(decoder) < 1:
46 |         raise ValueError(
47 |             f"Decoder must have at least one frame, found {len(decoder)} frames."
48 |         )
49 | 
50 |     if num_frames_per_clip <= 0:
51 |         raise ValueError(
52 |             f"num_frames_per_clip ({num_frames_per_clip}) must be strictly positive"
53 |         )
54 |     if policy not in _POLICY_FUNCTIONS.keys():
55 |         raise ValueError(
56 |             f"Invalid policy ({policy}). Supported values are {_POLICY_FUNCTIONS.keys()}."
57 |         )
58 | 
59 | 
60 | def _reshape_4d_framebatch_into_5d(
61 |     *,
62 |     frames: FrameBatch,
63 |     num_clips: int,
64 |     num_frames_per_clip: int,
65 | ) -> FrameBatch:
66 |     last_3_dims = frames.data.shape[-3:]
67 |     return FrameBatch(
68 |         data=frames.data.view(num_clips, num_frames_per_clip, *last_3_dims),
69 |         pts_seconds=frames.pts_seconds.view(num_clips, num_frames_per_clip),
70 |         duration_seconds=frames.duration_seconds.view(num_clips, num_frames_per_clip),
71 |     )
72 | 
73 | 
74 | _FRAMEBATCH_RETURN_DOCS = """
75 |     Returns:
76 |         FrameBatch:
77 |             The sampled :term:`clips`, as a 5D :class:`~torchcodec.FrameBatch`.
78 |             The shape of the ``data`` field is (``num_clips``,
79 |             ``num_frames_per_clips``, ...) where ... is (H, W, C) or (C, H, W)
80 |             depending on the ``dimension_order`` parameter of
81 |             :class:`~torchcodec.decoders.VideoDecoder`. The shape of the
82 |             ``pts_seconds`` and ``duration_seconds`` fields is (``num_clips``,
83 |             ``num_frames_per_clips``).
84 | """
85 | 


--------------------------------------------------------------------------------
/src/torchcodec/share/cmake/TorchCodec/TorchCodecConfig.cmake:
--------------------------------------------------------------------------------
 1 | # FindTorchCodec
 2 | # --------------
 3 | #
 4 | # Finds the TorchCodec library
 5 | #
 6 | # This will define the following variables:
 7 | #
 8 | #   TORCHCODEC_FOUND: True if the system has the TorchCodec library
 9 | #   TORCHCODEC_VARIANTS: list of TorchCodec variants. A variant is a supported
10 | #   FFmpeg major version.
11 | #
12 | # and the following imported targets:
13 | #
14 | #   torchcodec::ffmpeg${N}
15 | #   torchcodec::core${N}
16 | #
17 | # where N is a TorchCodec variant (FFmpeg major version) from
18 | # TORCHCODEC_VARIANTS list.
19 | 
20 | include(FindPackageHandleStandardArgs)
21 | include("${CMAKE_CURRENT_LIST_DIR}/ffmpeg_versions.cmake")
22 | 
23 | # Assume we are in <install-prefix>/share/cmake/TorchCodec/TorchCodecConfig.cmake
24 | get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
25 | get_filename_component(TORCHCODEC_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
26 | 
27 | # Include directories.
28 | set(TORCHCODEC_INCLUDE_DIRS ${TORCHCODEC_INSTALL_PREFIX}/_core)
29 | set(TORCHCODEC_VARIANTS "")
30 | 
31 | function(add_torchcodec_target ffmpeg_major_version)
32 |     set(target torchcodec::core${ffmpeg_major_version})
33 | 
34 |     if (NOT TARGET torchcodec::ffmpeg${ffmpeg_major_version})
35 |         message(FATAL_ERROR "torchcodec::ffmpeg${ffmpeg_major_version} target is not defined")
36 |     endif()
37 | 
38 |     find_library(lib_path torchcodec_core${ffmpeg_major_version}
39 |         PATHS "${TORCHCODEC_INSTALL_PREFIX}" NO_CACHE NO_DEFAULT_PATH)
40 |     if (NOT lib_path)
41 |         message(FATAL_ERROR "torchcodec_core${ffmpeg_major_version} shared library is missing")
42 |     endif()
43 | 
44 |     message("Adding ${target} target")
45 |     add_library(${target} SHARED IMPORTED)
46 |     add_dependencies(${target} torchcodec::ffmpeg${ffmpeg_major_version})
47 |     set_target_properties(${target} PROPERTIES
48 |       INTERFACE_INCLUDE_DIRECTORIES ${TORCHCODEC_INCLUDE_DIRS}
49 |       IMPORTED_LOCATION ${lib_path}
50 |     )
51 | 
52 |     list(APPEND TORCHCODEC_VARIANTS "${ffmpeg_major_version}")
53 |     set(TORCHCODEC_VARIANTS "${TORCHCODEC_VARIANTS}" PARENT_SCOPE)
54 | endfunction()
55 | 
56 | # If any of the TORCHCODEC_FFMPEG${N}_INSTALL_PREFIX environment variables
57 | # are defined, use them to locate the corresponding FFmpeg and TorchCodec targets.
58 | # Otherwise, fall back to pkg-config to find FFmpeg.
59 | set(use_pkg_config TRUE)
60 | foreach(ffmpeg_major_version IN LISTS TORCHCODEC_SUPPORTED_FFMPEG_VERSIONS)
61 |     if (DEFINED ENV{TORCHCODEC_FFMPEG${ffmpeg_major_version}_INSTALL_PREFIX})
62 |         add_ffmpeg_target(
63 |             "${ffmpeg_major_version}"
64 |             "$ENV{TORCHCODEC_FFMPEG${ffmpeg_major_version}_INSTALL_PREFIX}"
65 |         )
66 |         add_torchcodec_target(${ffmpeg_major_version})
67 |         set(use_pkg_config FALSE)
68 |     endif()
69 | endforeach()
70 | 
71 | if (use_pkg_config)
72 |     add_ffmpeg_target_with_pkg_config(ffmpeg_major_version)
73 |     add_torchcodec_target(${ffmpeg_major_version})
74 | endif()
75 | 
76 | find_package_handle_standard_args(TorchCodec DEFAULT_MSG TORCHCODEC_VARIANTS)
77 | 


--------------------------------------------------------------------------------
/packaging/check_glibcxx.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | """
 8 | The goal of this script is to ensure that the .so files we ship do not contain
 9 | symbol versions from libstdc++ that are too recent. This is a very manual way of
10 | doing the checks that `auditwheel repair` would do (but using auditwheel isn't
11 | necessarily easy either).
12 | 
13 | Why this is needed: during development, we observed the following general
14 | scenario in various local development setups:
15 | - torchcodec is compiled with a given (somewhat recent) c++ toolchain (say
16 |   gcc11)
17 | - because the toolchain is recent, some recent symbol versions from libstdc++
18 |   are added as dependencies in the torchcodec?.so files, e.g. GLIBCXX_3.4.29
19 |   (this is normal)
20 | - at runtime, for whatever reason, the libstdc++.so that gets loaded is *not*
21 |   the one that was used when building. The libstdc++.so that is loaded can be
22 |   older than the toolchain one, and it doesn't contain the more recent symbols
23 |   that torchcodec?.so depends on, which leads to a runtime error.
24 | 
25 | The reasons why a different libstdc++.so is loaded at runtime can be multiple
26 | (and mysterious! https://hackmd.io/@_NznxihTSmC-IgW4cgnlyQ/HJXc4BEHR).
27 | 
28 | This script doesn't try to prevent *that* (it's impossible anyway, as we don't
29 | control users' environments). Instead, it prevents the dependency of torchcodec
30 | on recent symbol versions, which ensures that torchcodec can run on both recent
31 | *and* older runtimes.
32 | The most recent symbol on the manylinux torch.2.3.1 wheel is
33 | GLIBCXX_3.4.19, so as long as torchcodec doesn't ship a symbol that is higher
34 | than that, torchcodec should be fine.
35 | 
36 | The easiest way to avoid recent symbols is simply to use an old-enough
37 | toolchain. Relying on the test-infra runners should be enough.
38 | """
39 | 
40 | import re
41 | import sys
42 | 
43 | if len(sys.argv) != 2:
44 |     raise ValueError("Wrong usage: python check_glibcxx.py <str_with_symbols>.")
45 | 
46 | MAX_ALLOWED = (3, 4, 24)
47 | 
48 | symbol_matches = sys.argv[1].split("\n")
49 | all_symbols = set()
50 | for line in symbol_matches:
51 |     # We search for GLIBCXX_major.minor.micro
52 |     if match := re.search(r"GLIBCXX_\d+\.\d+\.\d+", line):
53 |         all_symbols.add(match.group(0))
54 | 
55 | if not all_symbols:
56 |     raise ValueError(
57 |         f"No GLIBCXX symbols found in {symbol_matches}. Something is wrong."
58 |     )
59 | 
60 | all_versions = (symbol.split("_")[1].split(".") for symbol in all_symbols)
61 | all_versions = (tuple(int(v) for v in version) for version in all_versions)
62 | max_version = max(all_versions)
63 | 
64 | print(f"Found the following GLIBCXX symbol versions: {all_symbols}.")
65 | print(f"The max version is {max_version}. Max allowed is {MAX_ALLOWED}.")
66 | 
67 | if max_version > MAX_ALLOWED:
68 |     raise AssertionError(
69 |         "The max version is greater than the max allowed! "
70 |         "That may leads to compatibility issues. "
71 |         "Was the wheel compiled with an old-enough toolchain?"
72 |     )
73 | 
74 | print("All good.")
75 | 


--------------------------------------------------------------------------------
/benchmarks/decoders/benchmark_audio_decoders.py:
--------------------------------------------------------------------------------
  1 | from argparse import ArgumentParser
  2 | from datetime import timedelta
  3 | from pathlib import Path
  4 | from time import perf_counter_ns
  5 | 
  6 | import torch
  7 | import torchaudio
  8 | from torch import Tensor
  9 | from torchaudio.io import StreamReader
 10 | from torchcodec.decoders._audio_decoder import AudioDecoder
 11 | 
 12 | DEFAULT_NUM_EXP = 30
 13 | 
 14 | 
 15 | def bench(f, *args, num_exp=DEFAULT_NUM_EXP, warmup=1, **kwargs) -> Tensor:
 16 | 
 17 |     for _ in range(warmup):
 18 |         f(*args, **kwargs)
 19 | 
 20 |     times = []
 21 |     for _ in range(num_exp):
 22 |         start = perf_counter_ns()
 23 |         f(*args, **kwargs)
 24 |         end = perf_counter_ns()
 25 |         times.append(end - start)
 26 |     return torch.tensor(times).float()
 27 | 
 28 | 
 29 | def report_stats(times: Tensor, unit: str = "ms", prefix: str = "") -> float:
 30 |     mul = {
 31 |         "ns": 1,
 32 |         "µs": 1e-3,
 33 |         "ms": 1e-6,
 34 |         "s": 1e-9,
 35 |     }[unit]
 36 |     times = times * mul
 37 |     std = times.std().item()
 38 |     med = times.median().item()
 39 |     mean = times.mean().item()
 40 |     min = times.min().item()
 41 |     max = times.max().item()
 42 |     print(
 43 |         f"{prefix:<40} {med = :.2f}, {mean = :.2f} +- {std:.2f}, {min = :.2f}, {max = :.2f} - in {unit}"
 44 |     )
 45 | 
 46 | 
 47 | def decode_with_torchcodec(path: Path) -> None:
 48 |     AudioDecoder(path).get_all_samples()
 49 | 
 50 | 
 51 | def decode_with_torchaudio_StreamReader(path: Path) -> None:
 52 |     reader = StreamReader(path)
 53 |     reader.add_audio_stream(frames_per_chunk=1024)
 54 |     for _ in reader.stream():
 55 |         pass
 56 | 
 57 | 
 58 | def decode_with_torchaudio_load(path: Path, backend: str) -> None:
 59 |     torchaudio.load(str(path), backend=backend)
 60 | 
 61 | 
 62 | parser = ArgumentParser()
 63 | parser.add_argument("--path", type=str, help="path to file", required=True)
 64 | parser.add_argument(
 65 |     "--num-exp",
 66 |     type=int,
 67 |     default=DEFAULT_NUM_EXP,
 68 |     help="number of runs to average over",
 69 | )
 70 | 
 71 | args = parser.parse_args()
 72 | path = Path(args.path)
 73 | 
 74 | metadata = AudioDecoder(path).metadata
 75 | duration = str(timedelta(seconds=metadata.duration_seconds_from_header)).split(".")[0]
 76 | 
 77 | print(
 78 |     f"Benchmarking {path.name}, duration: {duration}, codec: {metadata.codec}, format: {metadata.sample_format}, averaging over {args.num_exp} runs:"
 79 | )
 80 | 
 81 | for decode_f, kwargs, prefix in (
 82 |     (decode_with_torchcodec, {}, "torchcodec.AudioDecoder"),
 83 |     (
 84 |         decode_with_torchaudio_load,
 85 |         {"backend": "ffmpeg"},
 86 |         "torchaudio.load(backend='ffmpeg')",
 87 |     ),
 88 |     (decode_with_torchaudio_load, {"backend": "sox"}, "torchaudio.load(backend='sox')"),
 89 |     (
 90 |         decode_with_torchaudio_load,
 91 |         {"backend": "soundfile"},
 92 |         "torchaudio.load(backend='soundfile')",
 93 |     ),
 94 |     (decode_with_torchaudio_StreamReader, {}, "torchaudio.StreamReader"),
 95 | ):
 96 | 
 97 |     try:
 98 |         times = bench(decode_f, path, **kwargs, num_exp=args.num_exp)
 99 |         report_stats(times, prefix=prefix)
100 |     except RuntimeError:
101 |         print(f"{prefix:<40} Not supported")
102 | 


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | import pytest
 5 | import torch
 6 | 
 7 | from .utils import in_fbcode
 8 | 
 9 | 
10 | def pytest_configure(config):
11 |     # register an additional marker (see pytest_collection_modifyitems)
12 |     config.addinivalue_line(
13 |         "markers", "needs_cuda: mark for tests that rely on a CUDA device"
14 |     )
15 |     config.addinivalue_line(
16 |         "markers", "needs_ffmpeg_cli: mark for tests that rely on ffmpeg"
17 |     )
18 | 
19 | 
20 | def pytest_collection_modifyitems(items):
21 |     # This hook is called by pytest after it has collected the tests (google its
22 |     # name to check out its doc!). We can ignore some tests as we see fit here,
23 |     # or add marks, such as a skip mark.
24 | 
25 |     out_items = []
26 |     for item in items:
27 |         # The needs_cuda mark will exist if the test was explicitly decorated
28 |         # with the @needs_cuda decorator. It will also exist if it was
29 |         # parametrized with a parameter that has the mark: for example if a test
30 |         # is parametrized with
31 |         # @pytest.mark.parametrize('device', all_supported_devices())
32 |         # the "instances" of the tests where device == 'cuda' will have the
33 |         # 'needs_cuda' mark, and the ones with device == 'cpu' won't have the
34 |         # mark.
35 |         needs_cuda = item.get_closest_marker("needs_cuda") is not None
36 |         needs_ffmpeg_cli = item.get_closest_marker("needs_ffmpeg_cli") is not None
37 |         has_skip_marker = item.get_closest_marker("skip") is not None
38 |         has_skipif_marker = item.get_closest_marker("skipif") is not None
39 | 
40 |         if in_fbcode():
41 |             # fbcode doesn't like skipping tests, so instead we  just don't collect the test
42 |             # so that they don't even "exist", hence the continue statements.
43 |             if needs_ffmpeg_cli or has_skip_marker or has_skipif_marker:
44 |                 continue
45 | 
46 |         if (
47 |             needs_cuda
48 |             and not torch.cuda.is_available()
49 |             and os.environ.get("FAIL_WITHOUT_CUDA") is None
50 |         ):
51 |             # We skip CUDA tests on non-CUDA machines, but only if the
52 |             # FAIL_WITHOUT_CUDA env var wasn't set. If it's set, the test will
53 |             # typically fail with a "Unsupported device: cuda" error. This is
54 |             # normal and desirable: this env var is set on CI jobs that are
55 |             # supposed to run the CUDA tests, so if CUDA isn't available on
56 |             # those for whatever reason, we need to know.
57 |             item.add_marker(pytest.mark.skip(reason="CUDA not available."))
58 | 
59 |         out_items.append(item)
60 | 
61 |     items[:] = out_items
62 | 
63 | 
64 | @pytest.fixture(autouse=True)
65 | def prevent_leaking_rng():
66 |     # Prevent each test from leaking the rng to all other test when they call
67 |     # torch.manual_seed() or random.seed().
68 | 
69 |     torch_rng_state = torch.get_rng_state()
70 |     builtin_rng_state = random.getstate()
71 |     if torch.cuda.is_available():
72 |         cuda_rng_state = torch.cuda.get_rng_state()
73 | 
74 |     yield
75 | 
76 |     torch.set_rng_state(torch_rng_state)
77 |     random.setstate(builtin_rng_state)
78 |     if torch.cuda.is_available():
79 |         torch.cuda.set_rng_state(cuda_rng_state)
80 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/Metadata.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <optional>
10 | #include <string>
11 | #include <vector>
12 | 
13 | extern "C" {
14 | #include <libavcodec/avcodec.h>
15 | #include <libavutil/avutil.h>
16 | #include <libavutil/rational.h>
17 | }
18 | 
19 | namespace facebook::torchcodec {
20 | 
21 | enum class SeekMode { exact, approximate, custom_frame_mappings };
22 | 
23 | struct StreamMetadata {
24 |   // Common (video and audio) fields derived from the AVStream.
25 |   int streamIndex = -1;
26 | 
27 |   // See this link for what various values are available:
28 |   // https://ffmpeg.org/doxygen/trunk/group__lavu__misc.html#ga9a84bba4713dfced21a1a56163be1f48
29 |   AVMediaType mediaType = AVMEDIA_TYPE_UNKNOWN;
30 | 
31 |   std::optional<AVCodecID> codecId;
32 |   std::optional<std::string> codecName;
33 |   std::optional<double> durationSecondsFromHeader;
34 |   std::optional<double> beginStreamSecondsFromHeader;
35 |   std::optional<int64_t> numFramesFromHeader;
36 |   std::optional<int64_t> numKeyFrames;
37 |   std::optional<double> averageFpsFromHeader;
38 |   std::optional<double> bitRate;
39 | 
40 |   // Used as fallback in approximate mode when stream duration is unavailable.
41 |   std::optional<double> durationSecondsFromContainer;
42 | 
43 |   // More accurate duration, obtained by scanning the file.
44 |   // These presentation timestamps are in time base.
45 |   std::optional<int64_t> beginStreamPtsFromContent;
46 |   std::optional<int64_t> endStreamPtsFromContent;
47 | 
48 |   // These presentation timestamps are in seconds.
49 |   std::optional<double> beginStreamPtsSecondsFromContent;
50 |   std::optional<double> endStreamPtsSecondsFromContent;
51 | 
52 |   // This can be useful for index-based seeking.
53 |   std::optional<int64_t> numFramesFromContent;
54 | 
55 |   // Video-only fields
56 |   std::optional<int> width;
57 |   std::optional<int> height;
58 |   std::optional<AVRational> sampleAspectRatio;
59 | 
60 |   // Audio-only fields
61 |   std::optional<int64_t> sampleRate;
62 |   std::optional<int64_t> numChannels;
63 |   std::optional<std::string> sampleFormat;
64 | 
65 |   // Computed methods with fallback logic
66 |   std::optional<double> getDurationSeconds(SeekMode seekMode) const;
67 |   double getBeginStreamSeconds(SeekMode seekMode) const;
68 |   std::optional<double> getEndStreamSeconds(SeekMode seekMode) const;
69 |   std::optional<int64_t> getNumFrames(SeekMode seekMode) const;
70 |   std::optional<double> getAverageFps(SeekMode seekMode) const;
71 | };
72 | 
73 | struct ContainerMetadata {
74 |   std::vector<StreamMetadata> allStreamMetadata;
75 |   int numAudioStreams = 0;
76 |   int numVideoStreams = 0;
77 | 
78 |   // Note that this is the container-level duration, which is usually the max
79 |   // of all stream durations available in the container.
80 |   std::optional<double> durationSecondsFromHeader;
81 | 
82 |   // Total BitRate level information at the container level in bit/s
83 |   std::optional<double> bitRate;
84 | 
85 |   // If set, this is the index to the default audio stream.
86 |   std::optional<int> bestAudioStreamIndex;
87 | 
88 |   // If set, this is the index to the default video stream.
89 |   std::optional<int> bestVideoStreamIndex;
90 | };
91 | 
92 | } // namespace facebook::torchcodec
93 | 


--------------------------------------------------------------------------------
/.github/workflows/reference_resources.yaml:
--------------------------------------------------------------------------------
 1 | name: Reference resource generation tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |     paths:
 7 |       - test/generate_reference_resources.py
 8 |       - .github/workflows/reference_resources.yaml # self reference
 9 |   schedule:
10 |     - cron: '0 0 * * 0'  # on sunday
11 | 
12 | defaults:
13 |   run:
14 |     shell: bash -l -eo pipefail {0}
15 | 
16 | jobs:
17 |   generate-matrix:
18 |     uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
19 |     with:
20 |       package-type: wheel
21 |       os: linux
22 |       test-infra-repository: pytorch/test-infra
23 |       test-infra-ref: main
24 |       with-xpu: disable
25 |       with-rocm: disable
26 |       with-cuda: disable
27 |       build-python-only: "disable"
28 | 
29 |   build:
30 |     needs: generate-matrix
31 |     strategy:
32 |       fail-fast: false
33 |     name: Build and Upload Linux wheel
34 |     uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
35 |     with:
36 |       repository: meta-pytorch/torchcodec
37 |       ref: ""
38 |       test-infra-repository: pytorch/test-infra
39 |       test-infra-ref: main
40 |       build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
41 |       pre-script: packaging/pre_build_script.sh
42 |       post-script: packaging/post_build_script.sh
43 |       smoke-test-script: packaging/fake_smoke_test.py
44 |       package-name: torchcodec
45 |       trigger-event: ${{ github.event_name }}
46 |       build-platform: "python-build-package"
47 |       build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 python -m build --wheel -vvv --no-isolation"
48 | 
49 |   test-reference-resource-generation:
50 |     needs: build
51 |     runs-on: ubuntu-latest
52 |     strategy:
53 |       fail-fast: false
54 |       matrix:
55 |         python-version: ['3.10']
56 |         ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
57 |     steps:
58 |       - uses: actions/download-artifact@v4
59 |         with:
60 |           name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_x86_64
61 |           path: pytorch/torchcodec/dist/
62 |       - name: Setup conda env
63 |         uses: conda-incubator/setup-miniconda@v2
64 |         with:
65 |           auto-update-conda: true
66 |           miniconda-version: "latest"
67 |           activate-environment: test
68 |           python-version: ${{ matrix.python-version }}
69 | 
70 |       - name: Install ffmpeg
71 |         run: |
72 |           conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge
73 |           ffmpeg -version
74 | 
75 |       - name: Update pip
76 |         run: python -m pip install --upgrade pip
77 | 
78 |       - name: Install generation dependencies
79 |         run: |
80 |           # Note that we're installing stable - this is for running a script where we're a normal PyTorch
81 |           # user, not for building TorhCodec.
82 |           python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
83 |           python -m pip install numpy pillow pytest
84 | 
85 |       - name: Install torchcodec from the wheel
86 |         run: |
87 |           wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
88 |           echo Installing $wheel_path
89 |           python -m pip install $wheel_path -vvv
90 |       - name: Check out repo
91 |         uses: actions/checkout@v3
92 | 
93 |       - name: Run generation reference resources
94 |         run: |
95 |           python -m test.generate_reference_resources
96 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/NVDECCache.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | // All rights reserved.
  3 | //
  4 | // This source code is licensed under the BSD-style license found in the
  5 | // LICENSE file in the root directory of this source tree.
  6 | 
  7 | #pragma once
  8 | 
  9 | #include <map>
 10 | #include <memory>
 11 | #include <mutex>
 12 | 
 13 | #include <cuda.h>
 14 | #include <torch/types.h>
 15 | 
 16 | #include "NVCUVIDRuntimeLoader.h"
 17 | #include "nvcuvid_include/cuviddec.h"
 18 | #include "nvcuvid_include/nvcuvid.h"
 19 | 
 20 | namespace facebook::torchcodec {
 21 | 
 22 | // This file implements a cache for NVDEC decoders.
 23 | // TODONVDEC P3: Consider merging this with Cache.h. The main difference is that
 24 | // this NVDEC Cache involves a cache key (the decoder parameters).
 25 | 
 26 | struct CUvideoDecoderDeleter {
 27 |   void operator()(CUvideodecoder* decoderPtr) const {
 28 |     if (decoderPtr && *decoderPtr) {
 29 |       cuvidDestroyDecoder(*decoderPtr);
 30 |       delete decoderPtr;
 31 |     }
 32 |   }
 33 | };
 34 | 
 35 | using UniqueCUvideodecoder =
 36 |     std::unique_ptr<CUvideodecoder, CUvideoDecoderDeleter>;
 37 | 
 38 | // A per-device cache for NVDEC decoders. There is one instance of this class
 39 | // per GPU device, and it is accessed through the static getCache() method.
 40 | class NVDECCache {
 41 |  public:
 42 |   static NVDECCache& getCache(const torch::Device& device);
 43 | 
 44 |   // Get decoder from cache - returns nullptr if none available
 45 |   UniqueCUvideodecoder getDecoder(CUVIDEOFORMAT* videoFormat);
 46 | 
 47 |   // Return decoder to cache - returns true if added to cache
 48 |   bool returnDecoder(CUVIDEOFORMAT* videoFormat, UniqueCUvideodecoder decoder);
 49 | 
 50 |  private:
 51 |   // Cache key struct: a decoder can be reused and taken from the cache only if
 52 |   // all these parameters match.
 53 |   struct CacheKey {
 54 |     cudaVideoCodec codecType;
 55 |     uint32_t width;
 56 |     uint32_t height;
 57 |     cudaVideoChromaFormat chromaFormat;
 58 |     uint32_t bitDepthLumaMinus8;
 59 |     uint8_t numDecodeSurfaces;
 60 | 
 61 |     CacheKey() = delete;
 62 | 
 63 |     explicit CacheKey(CUVIDEOFORMAT* videoFormat)
 64 |         : codecType(videoFormat->codec),
 65 |           width(videoFormat->coded_width),
 66 |           height(videoFormat->coded_height),
 67 |           chromaFormat(videoFormat->chroma_format),
 68 |           bitDepthLumaMinus8(videoFormat->bit_depth_luma_minus8),
 69 |           numDecodeSurfaces(videoFormat->min_num_decode_surfaces) {}
 70 | 
 71 |     CacheKey(const CacheKey&) = default;
 72 |     CacheKey& operator=(const CacheKey&) = default;
 73 | 
 74 |     bool operator<(const CacheKey& other) const {
 75 |       return std::tie(
 76 |                  codecType,
 77 |                  width,
 78 |                  height,
 79 |                  chromaFormat,
 80 |                  bitDepthLumaMinus8,
 81 |                  numDecodeSurfaces) <
 82 |           std::tie(
 83 |                  other.codecType,
 84 |                  other.width,
 85 |                  other.height,
 86 |                  other.chromaFormat,
 87 |                  other.bitDepthLumaMinus8,
 88 |                  other.numDecodeSurfaces);
 89 |     }
 90 |   };
 91 | 
 92 |   NVDECCache() = default;
 93 |   ~NVDECCache() = default;
 94 | 
 95 |   std::map<CacheKey, UniqueCUvideodecoder> cache_;
 96 |   std::mutex cacheLock_;
 97 | 
 98 |   // Max number of cached decoders, per device
 99 |   static constexpr int MAX_CACHE_SIZE = 20;
100 | };
101 | 
102 | } // namespace facebook::torchcodec
103 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at <conduct@pytorch.org>. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/AVIOFileLikeContext.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | // All rights reserved.
 3 | //
 4 | // This source code is licensed under the BSD-style license found in the
 5 | // LICENSE file in the root directory of this source tree.
 6 | 
 7 | #include "AVIOFileLikeContext.h"
 8 | #include <torch/types.h>
 9 | 
10 | namespace facebook::torchcodec {
11 | 
12 | AVIOFileLikeContext::AVIOFileLikeContext(
13 |     const py::object& fileLike,
14 |     bool isForWriting)
15 |     : fileLike_{UniquePyObject(new py::object(fileLike))} {
16 |   {
17 |     // TODO: Is it necessary to acquire the GIL here? Is it maybe even
18 |     // harmful? At the moment, this is only called from within a pybind
19 |     // function, and pybind guarantees we have the GIL.
20 |     py::gil_scoped_acquire gil;
21 | 
22 |     if (isForWriting) {
23 |       TORCH_CHECK(
24 |           py::hasattr(fileLike, "write"),
25 |           "File like object must implement a write method for writing.");
26 |     } else {
27 |       TORCH_CHECK(
28 |           py::hasattr(fileLike, "read"),
29 |           "File like object must implement a read method for reading.");
30 |     }
31 | 
32 |     TORCH_CHECK(
33 |         py::hasattr(fileLike, "seek"),
34 |         "File like object must implement a seek method.");
35 |   }
36 |   createAVIOContext(&read, &write, &seek, &fileLike_, isForWriting);
37 | }
38 | 
39 | int AVIOFileLikeContext::read(void* opaque, uint8_t* buf, int buf_size) {
40 |   auto fileLike = static_cast<UniquePyObject*>(opaque);
41 | 
42 |   // Note that we acquire the GIL outside of the loop. This is likely more
43 |   // efficient than releasing and acquiring it each loop iteration.
44 |   py::gil_scoped_acquire gil;
45 | 
46 |   int totalNumRead = 0;
47 |   while (totalNumRead < buf_size) {
48 |     int request = buf_size - totalNumRead;
49 | 
50 |     // The Python method returns the actual bytes, which we access through the
51 |     // py::bytes wrapper. That wrapper, however, does not provide us access to
52 |     // the underlying data pointer, which we need for the memcpy below. So we
53 |     // convert the bytes to a string_view to get access to the data pointer.
54 |     // Becauase it's a view and not a copy, it should be cheap.
55 |     auto bytesRead = static_cast<py::bytes>((*fileLike)->attr("read")(request));
56 |     auto bytesView = static_cast<std::string_view>(bytesRead);
57 | 
58 |     int numBytesRead = static_cast<int>(bytesView.size());
59 |     if (numBytesRead == 0) {
60 |       break;
61 |     }
62 | 
63 |     TORCH_CHECK(
64 |         numBytesRead <= request,
65 |         "Requested up to ",
66 |         request,
67 |         " bytes but, received ",
68 |         numBytesRead,
69 |         " bytes. The given object does not conform to read protocol of file object.");
70 | 
71 |     std::memcpy(buf, bytesView.data(), numBytesRead);
72 |     buf += numBytesRead;
73 |     totalNumRead += numBytesRead;
74 |   }
75 | 
76 |   return totalNumRead == 0 ? AVERROR_EOF : totalNumRead;
77 | }
78 | 
79 | int64_t AVIOFileLikeContext::seek(void* opaque, int64_t offset, int whence) {
80 |   // We do not know the file size.
81 |   if (whence == AVSEEK_SIZE) {
82 |     return AVERROR(EIO);
83 |   }
84 | 
85 |   auto fileLike = static_cast<UniquePyObject*>(opaque);
86 |   py::gil_scoped_acquire gil;
87 |   return py::cast<int64_t>((*fileLike)->attr("seek")(offset, whence));
88 | }
89 | 
90 | int AVIOFileLikeContext::write(void* opaque, const uint8_t* buf, int buf_size) {
91 |   auto fileLike = static_cast<UniquePyObject*>(opaque);
92 |   py::gil_scoped_acquire gil;
93 |   py::bytes bytes_obj(reinterpret_cast<const char*>(buf), buf_size);
94 | 
95 |   return py::cast<int>((*fileLike)->attr("write")(bytes_obj));
96 | }
97 | 
98 | } // namespace facebook::torchcodec
99 | 


--------------------------------------------------------------------------------
/examples/encoding/audio_encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the BSD-style license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | """
 8 | ========================================
 9 | Encoding audio samples with AudioEncoder
10 | ========================================
11 | 
12 | In this example, we'll learn how to encode audio samples to a file or to raw
13 | bytes using the :class:`~torchcodec.encoders.AudioEncoder` class.
14 | """
15 | 
16 | # %%
17 | # Let's first generate some samples to be encoded. The data to be encoded could
18 | # also just come from an :class:`~torchcodec.decoders.AudioDecoder`!
19 | import torch
20 | from IPython.display import Audio as play_audio
21 | 
22 | 
23 | def make_sinewave() -> tuple[torch.Tensor, int]:
24 |     freq_A = 440  # Hz
25 |     sample_rate = 16000  # Hz
26 |     duration_seconds = 3  # seconds
27 |     t = torch.linspace(0, duration_seconds, int(sample_rate * duration_seconds), dtype=torch.float32)
28 |     return torch.sin(2 * torch.pi * freq_A * t), sample_rate
29 | 
30 | 
31 | samples, sample_rate = make_sinewave()
32 | 
33 | print(f"Encoding samples with {samples.shape = } and {sample_rate = }")
34 | play_audio(samples, rate=sample_rate)
35 | 
36 | # %%
37 | # We first instantiate an :class:`~torchcodec.encoders.AudioEncoder`. We pass it
38 | # the samples to be encoded. The samples must be a 2D tensors of shape
39 | # ``(num_channels, num_samples)``, or in this case, a 1D tensor where
40 | # ``num_channels`` is assumed to be 1. The values must be float values
41 | # normalized in ``[-1, 1]``: this is also what the
42 | # :class:`~torchcodec.decoders.AudioDecoder` would return.
43 | #
44 | # .. note::
45 | #
46 | #     The ``sample_rate`` parameter corresponds to the sample rate of the
47 | #     *input*, not the desired encoded sample rate.
48 | from torchcodec.encoders import AudioEncoder
49 | 
50 | encoder = AudioEncoder(samples=samples, sample_rate=sample_rate)
51 | 
52 | 
53 | # %%
54 | # :class:`~torchcodec.encoders.AudioEncoder` supports encoding samples into a
55 | # file via the :meth:`~torchcodec.encoders.AudioEncoder.to_file` method, or to
56 | # raw bytes via :meth:`~torchcodec.encoders.AudioEncoder.to_tensor`.  For the
57 | # purpose of this tutorial we'll use
58 | # :meth:`~torchcodec.encoders.AudioEncoder.to_tensor`, so that we can easily
59 | # re-decode the encoded samples and check their properies. The
60 | # :meth:`~torchcodec.encoders.AudioEncoder.to_file` method works very similarly.
61 | 
62 | encoded_samples = encoder.to_tensor(format="mp3")
63 | print(f"{encoded_samples.shape = }, {encoded_samples.dtype = }")
64 | 
65 | 
66 | # %%
67 | # That's it!
68 | #
69 | # Now that we have our encoded data, we can decode it back, to make sure it
70 | # looks and sounds as expected:
71 | from torchcodec.decoders import AudioDecoder
72 | 
73 | samples_back = AudioDecoder(encoded_samples).get_all_samples()
74 | 
75 | print(samples_back)
76 | play_audio(samples_back.data, rate=samples_back.sample_rate)
77 | 
78 | # %%
79 | # The encoder supports some encoding options that allow you to change how to
80 | # data is encoded. For example, we can decide to encode our mono data (1
81 | # channel) into stereo data (2 channels), and to specify an output sample rate:
82 | 
83 | desired_sample_rate = 32000
84 | encoded_samples = encoder.to_tensor(format="wav", num_channels=2, sample_rate=desired_sample_rate)
85 | 
86 | stereo_samples_back = AudioDecoder(encoded_samples).get_all_samples()
87 | 
88 | print(stereo_samples_back)
89 | play_audio(stereo_samples_back.data, rate=desired_sample_rate)
90 | 
91 | # %%
92 | # Check the docstring of the encoding methods to learn about the different
93 | # encoding options.
94 | 


--------------------------------------------------------------------------------
/examples/decoding/audio_decoding.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the BSD-style license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | """
  8 | ========================================
  9 | Decoding audio streams with AudioDecoder
 10 | ========================================
 11 | 
 12 | In this example, we'll learn how to decode an audio file using the
 13 | :class:`~torchcodec.decoders.AudioDecoder` class.
 14 | """
 15 | 
 16 | # %%
 17 | # First, a bit of boilerplate: we'll download an audio file from the web and
 18 | # define an audio playing utility.  You can ignore that part and jump right
 19 | # below to :ref:`creating_decoder_audio`.
 20 | import requests
 21 | from IPython.display import Audio
 22 | 
 23 | 
 24 | def play_audio(samples):
 25 |     return Audio(samples.data, rate=samples.sample_rate)
 26 | 
 27 | 
 28 | # Audio source is CC0: https://opengameart.org/content/town-theme-rpg
 29 | # Attribution: cynicmusic.com pixelsphere.org
 30 | url = "https://opengameart.org/sites/default/files/TownTheme.mp3"
 31 | response = requests.get(url, headers={"User-Agent": ""})
 32 | if response.status_code != 200:
 33 |     raise RuntimeError(f"Failed to download video. {response.status_code = }.")
 34 | 
 35 | raw_audio_bytes = response.content
 36 | 
 37 | # %%
 38 | # .. _creating_decoder_audio:
 39 | #
 40 | # Creating a decoder
 41 | # ------------------
 42 | #
 43 | # We can now create a decoder from the raw (encoded) audio bytes. You can of
 44 | # course use a local audio file and pass the path as input. You can also decode
 45 | # audio streams from videos!
 46 | 
 47 | from torchcodec.decoders import AudioDecoder
 48 | 
 49 | decoder = AudioDecoder(raw_audio_bytes)
 50 | 
 51 | # %%
 52 | # The has not yet been decoded by the decoder, but we already have access to
 53 | # some metadata via the ``metadata`` attribute which is an
 54 | # :class:`~torchcodec.decoders.AudioStreamMetadata` object.
 55 | print(decoder.metadata)
 56 | 
 57 | # %%
 58 | # Decoding samples
 59 | # ----------------
 60 | #
 61 | # To get decoded samples, we just need to call the
 62 | # :meth:`~torchcodec.decoders.AudioDecoder.get_all_samples` method,
 63 | # which returns an :class:`~torchcodec.AudioSamples` object:
 64 | 
 65 | samples = decoder.get_all_samples()
 66 | 
 67 | print(samples)
 68 | play_audio(samples)
 69 | 
 70 | # %%
 71 | # The ``.data`` field is a tensor of shape ``(num_channels, num_samples)`` and
 72 | # of float dtype with values in [-1, 1].
 73 | #
 74 | # The ``.pts_seconds`` field indicates the starting time of the output samples.
 75 | # Here it's 0.025 seconds, even though we asked for samples starting from 0. Not
 76 | # all streams start exactly at 0! This is not a bug in TorchCodec, this is a
 77 | # property of the file that was defined when it was encoded.
 78 | #
 79 | # Specifying a range
 80 | # ------------------
 81 | #
 82 | # If we don't need all the samples, we can use
 83 | # :meth:`~torchcodec.decoders.AudioDecoder.get_samples_played_in_range` to
 84 | # decode the samples within a custom range:
 85 | 
 86 | samples = decoder.get_samples_played_in_range(start_seconds=10, stop_seconds=70)
 87 | 
 88 | print(samples)
 89 | play_audio(samples)
 90 | 
 91 | # %%
 92 | # Custom sample rate
 93 | # ------------------
 94 | #
 95 | # We can also decode the samples into a desired sample rate using the
 96 | # ``sample_rate`` parameter of :class:`~torchcodec.decoders.AudioDecoder`. The
 97 | # ouput will sound similar, but note that the number of samples greatly
 98 | # decreased:
 99 | 
100 | decoder = AudioDecoder(raw_audio_bytes, sample_rate=16_000)
101 | samples = decoder.get_all_samples()
102 | 
103 | print(samples)
104 | play_audio(samples)
105 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
  1 | ---
  2 | AccessModifierOffset: -1
  3 | AlignAfterOpenBracket: AlwaysBreak
  4 | AlignConsecutiveMacros: false
  5 | AlignConsecutiveAssignments: false
  6 | AlignConsecutiveBitFields: false
  7 | AlignConsecutiveDeclarations: false
  8 | AlignEscapedNewlines: Left
  9 | AlignOperands: DontAlign
 10 | AlignTrailingComments: false
 11 | AllowAllArgumentsOnNextLine: true
 12 | AllowAllConstructorInitializersOnNextLine: true
 13 | AllowAllParametersOfDeclarationOnNextLine: false
 14 | AllowShortEnumsOnASingleLine: true
 15 | AllowShortBlocksOnASingleLine: Never
 16 | AllowShortCaseLabelsOnASingleLine: false
 17 | AllowShortFunctionsOnASingleLine: Empty
 18 | AllowShortLambdasOnASingleLine: All
 19 | AllowShortIfStatementsOnASingleLine: Never
 20 | AllowShortLoopsOnASingleLine: false
 21 | AlwaysBreakAfterReturnType: None
 22 | AlwaysBreakBeforeMultilineStrings: true
 23 | AlwaysBreakTemplateDeclarations: Yes
 24 | BinPackArguments: false
 25 | BinPackParameters: false
 26 | BreakBeforeBinaryOperators: None
 27 | BreakBeforeBraces: Attach
 28 | BreakInheritanceList: BeforeColon
 29 | BreakBeforeTernaryOperators: true
 30 | BreakConstructorInitializers: BeforeColon
 31 | BreakAfterJavaFieldAnnotations: false
 32 | BreakStringLiterals: false
 33 | ColumnLimit: 80
 34 | CommentPragmas: '^ IWYU pragma:'
 35 | CompactNamespaces: false
 36 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
 37 | ConstructorInitializerIndentWidth: 4
 38 | ContinuationIndentWidth: 4
 39 | Cpp11BracedListStyle: true
 40 | DeriveLineEnding: true
 41 | DerivePointerAlignment: false
 42 | DisableFormat: false
 43 | FixNamespaceComments: true
 44 | ForEachMacros:
 45 |   - FOR_EACH
 46 |   - FOR_EACH_R
 47 |   - FOR_EACH_RANGE
 48 | IncludeBlocks: Preserve
 49 | IncludeCategories:
 50 |   - Regex:           '^<.*\.h(pp)?>'
 51 |     Priority:        1
 52 |   - Regex:           '^<.*'
 53 |     Priority:        2
 54 |   - Regex:           '.*'
 55 |     Priority:        3
 56 | IndentCaseLabels: true
 57 | IndentCaseBlocks: false
 58 | IndentGotoLabels: true
 59 | IndentPPDirectives: None
 60 | IndentExternBlock: AfterExternBlock
 61 | IndentWidth: 2
 62 | IndentWrappedFunctionNames: false
 63 | InsertTrailingCommas: None
 64 | JavaScriptQuotes: Leave
 65 | JavaScriptWrapImports: true
 66 | KeepEmptyLinesAtTheStartOfBlocks: false
 67 | MacroBlockBegin: ''
 68 | MacroBlockEnd: ''
 69 | MaxEmptyLinesToKeep: 1
 70 | NamespaceIndentation: None
 71 | ObjCBinPackProtocolList: Auto
 72 | ObjCBlockIndentWidth: 2
 73 | ObjCBreakBeforeNestedBlockParam: true
 74 | ObjCSpaceAfterProperty: false
 75 | ObjCSpaceBeforeProtocolList: false
 76 | PenaltyBreakAssignment: 2
 77 | PenaltyBreakBeforeFirstCallParameter: 1
 78 | PenaltyBreakComment: 300
 79 | PenaltyBreakFirstLessLess: 120
 80 | PenaltyBreakString: 1000
 81 | PenaltyBreakTemplateDeclaration: 10
 82 | PenaltyExcessCharacter: 1000000
 83 | PenaltyReturnTypeOnItsOwnLine: 200
 84 | PointerAlignment: Left
 85 | ReflowComments: true
 86 | SeparateDefinitionBlocks: Always
 87 | SortIncludes: true
 88 | SortUsingDeclarations: true
 89 | SpaceAfterCStyleCast: false
 90 | SpaceAfterLogicalNot: false
 91 | SpaceAfterTemplateKeyword: true
 92 | SpaceBeforeAssignmentOperators: true
 93 | SpaceBeforeCpp11BracedList: false
 94 | SpaceBeforeCtorInitializerColon: true
 95 | SpaceBeforeInheritanceColon: true
 96 | SpaceBeforeParens: ControlStatements
 97 | SpaceBeforeRangeBasedForLoopColon: true
 98 | SpaceInEmptyBlock: false
 99 | SpaceInEmptyParentheses: false
100 | SpacesBeforeTrailingComments: 1
101 | SpacesInAngles: false
102 | SpacesInConditionalStatement: false
103 | SpacesInContainerLiterals: true
104 | SpacesInCStyleCastParentheses: false
105 | SpacesInParentheses: false
106 | SpacesInSquareBrackets: false
107 | SpaceBeforeSquareBrackets: false
108 | Standard: Latest
109 | TabWidth: 8
110 | UseCRLF: false
111 | UseTab: Never
112 | ...
113 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/DeviceInterface.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | // All rights reserved.
  3 | //
  4 | // This source code is licensed under the BSD-style license found in the
  5 | // LICENSE file in the root directory of this source tree.
  6 | 
  7 | #include "DeviceInterface.h"
  8 | #include <map>
  9 | #include <mutex>
 10 | 
 11 | namespace facebook::torchcodec {
 12 | 
 13 | namespace {
 14 | using DeviceInterfaceMap =
 15 |     std::map<DeviceInterfaceKey, CreateDeviceInterfaceFn>;
 16 | static std::mutex g_interface_mutex;
 17 | 
 18 | DeviceInterfaceMap& getDeviceMap() {
 19 |   static DeviceInterfaceMap deviceMap;
 20 |   return deviceMap;
 21 | }
 22 | 
 23 | std::string getDeviceType(const std::string& device) {
 24 |   size_t pos = device.find(':');
 25 |   if (pos == std::string::npos) {
 26 |     return device;
 27 |   }
 28 |   return device.substr(0, pos);
 29 | }
 30 | 
 31 | } // namespace
 32 | 
 33 | bool registerDeviceInterface(
 34 |     const DeviceInterfaceKey& key,
 35 |     CreateDeviceInterfaceFn createInterface) {
 36 |   std::scoped_lock lock(g_interface_mutex);
 37 |   DeviceInterfaceMap& deviceMap = getDeviceMap();
 38 | 
 39 |   TORCH_CHECK(
 40 |       deviceMap.find(key) == deviceMap.end(),
 41 |       "Device interface already registered for device type ",
 42 |       key.deviceType,
 43 |       " variant '",
 44 |       key.variant,
 45 |       "'");
 46 |   deviceMap.insert({key, createInterface});
 47 | 
 48 |   return true;
 49 | }
 50 | 
 51 | void validateDeviceInterface(
 52 |     const std::string device,
 53 |     const std::string variant) {
 54 |   std::scoped_lock lock(g_interface_mutex);
 55 |   std::string deviceType = getDeviceType(device);
 56 | 
 57 |   DeviceInterfaceMap& deviceMap = getDeviceMap();
 58 | 
 59 |   // Find device interface that matches device type and variant
 60 |   torch::DeviceType deviceTypeEnum = torch::Device(deviceType).type();
 61 | 
 62 |   auto deviceInterface = std::find_if(
 63 |       deviceMap.begin(),
 64 |       deviceMap.end(),
 65 |       [&](const std::pair<DeviceInterfaceKey, CreateDeviceInterfaceFn>& arg) {
 66 |         return arg.first.deviceType == deviceTypeEnum &&
 67 |             arg.first.variant == variant;
 68 |       });
 69 | 
 70 |   TORCH_CHECK(
 71 |       deviceInterface != deviceMap.end(),
 72 |       "Unsupported device: ",
 73 |       device,
 74 |       " (device type: ",
 75 |       deviceType,
 76 |       ", variant: ",
 77 |       variant,
 78 |       ")");
 79 | }
 80 | 
 81 | std::unique_ptr<DeviceInterface> createDeviceInterface(
 82 |     const torch::Device& device,
 83 |     const std::string_view variant) {
 84 |   DeviceInterfaceKey key(device.type(), variant);
 85 |   std::scoped_lock lock(g_interface_mutex);
 86 |   DeviceInterfaceMap& deviceMap = getDeviceMap();
 87 | 
 88 |   auto it = deviceMap.find(key);
 89 |   if (it != deviceMap.end()) {
 90 |     return std::unique_ptr<DeviceInterface>(it->second(device));
 91 |   }
 92 | 
 93 |   TORCH_CHECK(
 94 |       false,
 95 |       "No device interface found for device type: ",
 96 |       device.type(),
 97 |       " variant: '",
 98 |       variant,
 99 |       "'");
100 | }
101 | 
102 | torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame) {
103 |   TORCH_CHECK_EQ(avFrame->format, AV_PIX_FMT_RGB24);
104 | 
105 |   int height = avFrame->height;
106 |   int width = avFrame->width;
107 |   std::vector<int64_t> shape = {height, width, 3};
108 |   std::vector<int64_t> strides = {avFrame->linesize[0], 3, 1};
109 |   AVFrame* avFrameClone = av_frame_clone(avFrame.get());
110 |   auto deleter = [avFrameClone](void*) {
111 |     UniqueAVFrame avFrameToDelete(avFrameClone);
112 |   };
113 |   return torch::from_blob(
114 |       avFrameClone->data[0], shape, strides, deleter, {torch::kUInt8});
115 | }
116 | 
117 | } // namespace facebook::torchcodec
118 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/Transform.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | // All rights reserved.
  3 | //
  4 | // This source code is licensed under the BSD-style license found in the
  5 | // LICENSE file in the root directory of this source tree.
  6 | 
  7 | #include "Transform.h"
  8 | #include <torch/types.h>
  9 | #include "FFMPEGCommon.h"
 10 | 
 11 | namespace facebook::torchcodec {
 12 | 
 13 | namespace {
 14 | 
 15 | std::string toFilterGraphInterpolation(
 16 |     ResizeTransform::InterpolationMode mode) {
 17 |   switch (mode) {
 18 |     case ResizeTransform::InterpolationMode::BILINEAR:
 19 |       return "bilinear";
 20 |     default:
 21 |       TORCH_CHECK(
 22 |           false,
 23 |           "Unknown interpolation mode: " +
 24 |               std::to_string(static_cast<int>(mode)));
 25 |   }
 26 | }
 27 | 
 28 | } // namespace
 29 | 
 30 | std::string ResizeTransform::getFilterGraphCpu() const {
 31 |   return "scale=" + std::to_string(outputDims_.width) + ":" +
 32 |       std::to_string(outputDims_.height) +
 33 |       ":flags=" + toFilterGraphInterpolation(interpolationMode_);
 34 | }
 35 | 
 36 | std::optional<FrameDims> ResizeTransform::getOutputFrameDims() const {
 37 |   return outputDims_;
 38 | }
 39 | 
 40 | CropTransform::CropTransform(const FrameDims& dims) : outputDims_(dims) {}
 41 | 
 42 | CropTransform::CropTransform(const FrameDims& dims, int x, int y)
 43 |     : outputDims_(dims), x_(x), y_(y) {
 44 |   TORCH_CHECK(x_ >= 0, "Crop x position must be >= 0, got: ", x_);
 45 |   TORCH_CHECK(y_ >= 0, "Crop y position must be >= 0, got: ", y_);
 46 | }
 47 | 
 48 | std::string CropTransform::getFilterGraphCpu() const {
 49 |   // For the FFmpeg filter crop, if the x and y coordinates are left
 50 |   // unspecified, it defaults to a center crop.
 51 |   std::string coordinates = x_.has_value()
 52 |       ? (":" + std::to_string(x_.value()) + ":" + std::to_string(y_.value()))
 53 |       : "";
 54 |   return "crop=" + std::to_string(outputDims_.width) + ":" +
 55 |       std::to_string(outputDims_.height) + coordinates + ":exact=1";
 56 | }
 57 | 
 58 | std::optional<FrameDims> CropTransform::getOutputFrameDims() const {
 59 |   return outputDims_;
 60 | }
 61 | 
 62 | void CropTransform::validate(const FrameDims& inputDims) const {
 63 |   TORCH_CHECK(
 64 |       outputDims_.height <= inputDims.height,
 65 |       "Crop output height (",
 66 |       outputDims_.height,
 67 |       ") is greater than input height (",
 68 |       inputDims.height,
 69 |       ")");
 70 |   TORCH_CHECK(
 71 |       outputDims_.width <= inputDims.width,
 72 |       "Crop output width (",
 73 |       outputDims_.width,
 74 |       ") is greater than input width (",
 75 |       inputDims.width,
 76 |       ")");
 77 |   TORCH_CHECK(
 78 |       x_.has_value() == y_.has_value(),
 79 |       "Crop x and y values must be both set or both unset");
 80 |   if (x_.has_value()) {
 81 |     TORCH_CHECK(
 82 |         x_.value() <= inputDims.width,
 83 |         "Crop x start position, ",
 84 |         x_.value(),
 85 |         ", out of bounds of input width, ",
 86 |         inputDims.width);
 87 |     TORCH_CHECK(
 88 |         x_.value() + outputDims_.width <= inputDims.width,
 89 |         "Crop x end position, ",
 90 |         x_.value() + outputDims_.width,
 91 |         ", out of bounds of input width ",
 92 |         inputDims.width);
 93 |     TORCH_CHECK(
 94 |         y_.value() <= inputDims.height,
 95 |         "Crop y start position, ",
 96 |         y_.value(),
 97 |         ", out of bounds of input height, ",
 98 |         inputDims.height);
 99 |     TORCH_CHECK(
100 |         y_.value() + outputDims_.height <= inputDims.height,
101 |         "Crop y end position, ",
102 |         y_.value() + outputDims_.height,
103 |         ", out of bounds of input height ",
104 |         inputDims.height);
105 |   }
106 | }
107 | 
108 | } // namespace facebook::torchcodec
109 | 


--------------------------------------------------------------------------------
/.github/workflows/cpp_tests.yaml:
--------------------------------------------------------------------------------
 1 | name: CPP tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 | 
 8 | concurrency:
 9 |   group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
10 |   cancel-in-progress: true
11 | 
12 | defaults:
13 |   run:
14 |     shell: bash -l -eo pipefail {0}
15 | 
16 | jobs:
17 |   Cpp-tests:
18 |     runs-on: ubuntu-latest
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
23 |     steps:
24 |       - name: Check out repo
25 |         uses: actions/checkout@v3
26 |       - name: Setup conda env
27 |         uses: conda-incubator/setup-miniconda@v3
28 |         with:
29 |           auto-update-conda: true
30 |           # Using miniforge instead of miniconda ensures that the default
31 |           # conda channel is conda-forge instead of main/default. This ensures
32 |           # ABI consistency between dependencies:
33 |           # https://conda-forge.org/docs/user/transitioning_from_defaults/
34 |           miniforge-version: latest
35 |           activate-environment: test
36 |           python-version: '3.12'
37 |       - name: Update pip
38 |         run: python -m pip install --upgrade pip
39 |       - name: Install torch dependencies
40 |         run: |
41 |           # If we're in a release branch or in a PR against a release branch,
42 |           # we install the PyTorch RCs from the test channel. Otherwise, e.g. in
43 |           # `main` or in PRs against `main`, we install the nightly builds.
44 |           # Note that the `test` RCs are
45 |           if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then
46 |             CHANNEL=test
47 |           else
48 |             CHANNEL=nightly
49 |           fi
50 |           python -m pip install --pre torch --index-url https://download.pytorch.org/whl/${CHANNEL}/cpu
51 |       - name: Install ffmpeg, pkg-config and pybind11
52 |         run: |
53 |           conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" pkg-config pybind11 -c conda-forge
54 |           ffmpeg -version
55 |       - name: Build and run C++ tests
56 |         run: |
57 |           # Note: we're not setting BUILD_AGAINST_ALL_FFMPEG_FROM_S3 here, so
58 |           # we're building libtorchcodec against the installed FFmpeg version
59 |           # (from conda-forge) instead of building against our pre-built non-GPL
60 |           # FFmpeg libraries.
61 |           # The reason we need this is because the C++ tests decode x264 files.
62 |           # x264 support is not LGPL, os it is not supported by our
63 |           # pre-built non-GPL FFmpeg libraries. And if we were to build against
64 |           # those, this is also what the tests would be loading at run time,
65 |           # then failing when we try to decode x264.
66 |           # To remediate that, we build against the FFmpeg that we installed
67 |           # from conda-forge (which is able to decode x264), and that's also
68 |           # what gets loaded at run time.
69 |           # The Python tests are also decoding x264 files, and are built against
70 |           # our non-GPL FFmpeg. And yet they pass. This is because in Python
71 |           # we're able to distinguish between build-time (non-GPL FFmpeg) and
72 |           # run time (conda-forge FFmpeg).
73 | 
74 |           build_tests_dir="${PWD}/build_tests"
75 |           mkdir $build_tests_dir
76 |           pushd $build_tests_dir
77 |           TORCH_PATH=$(python -c "import pathlib, torch; print(pathlib.Path(torch.__path__[0]))")
78 |           Torch_DIR="${TORCH_PATH}/share/cmake/Torch"
79 |           cmake .. -DTorch_DIR=$Torch_DIR -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=ON -DCMAKE_VERBOSE_MAKEFILE=ON
80 |           cmake --build .
81 |           ctest --output-on-failure
82 |           popd
83 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to TorchCodec
  2 | 
  3 | You can contribute to this project by writing code, fixing issues or simply by
  4 | using the library and reporting your feedback.
  5 | 
  6 | Below are instructions to build TorchCodec from source, as well as the usual
  7 | contribution guidelines (code formatting, testing, etc). To submit a PR, please
  8 | follow the [official GitHub
  9 | guidelines](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
 10 | 
 11 | ## Building TorchCodec from source
 12 | 
 13 | ### Installing dependencies
 14 | 
 15 | The instructions below assume you are using a conda environment, but the steps
 16 | are easily adaptable to other kind of virtual environments. To build, run and
 17 | test locally you will need the following dependencies:
 18 | 
 19 | - A C++ compiler+linker. This is typically available on a baseline Linux
 20 |   installation already.
 21 | - cmake
 22 | - pkg-config
 23 | - pybind11
 24 | - FFmpeg
 25 | - PyTorch nightly
 26 | 
 27 | Start by installing the **nightly** build of PyTorch following the
 28 | [official instructions](https://pytorch.org/get-started/locally/).
 29 | 
 30 | Then, the easiest way to install the rest of the dependencies is to run:
 31 | 
 32 | ```bash
 33 | conda install cmake pkg-config pybind11 "ffmpeg" -c conda-forge
 34 | ```
 35 | 
 36 | ### Clone and build
 37 | 
 38 | To clone and install the repo, run:
 39 | 
 40 | ```bash
 41 | git clone git@github.com:pytorch/torchcodec.git
 42 | # Or, using https instead of ssh: git clone https://github.com/pytorch/torchcodec.git
 43 | cd torchcodec
 44 | 
 45 | # Optional, but recommended: define a persistent build directory which speeds-up
 46 | # subsequent builds.
 47 | export TORCHCODEC_CMAKE_BUILD_DIR="${PWD}/build"
 48 | 
 49 | pip install -e ".[dev]" --no-build-isolation -vv
 50 | # Or, for cuda support: ENABLE_CUDA=1 pip install -e ".[dev]" --no-build-isolation -vv
 51 | ```
 52 | 
 53 | ### Running unit tests
 54 | 
 55 | To run python tests run:
 56 | 
 57 | ```bash
 58 | pytest
 59 | ```
 60 | 
 61 | Some tests are marked as 'slow' and aren't run by default. You can use `pytest
 62 | -m slow` to run those, or `pytest -m ""` to run all tests, slow or not.
 63 | 
 64 | To run the C++ tests run:
 65 | 
 66 | ```bash
 67 | mkdir build
 68 | cd build
 69 | cmake -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTS=1 -DCMAKE_PREFIX_PATH=$(python3 -c 'import torch;print(torch.utils.cmake_prefix_path)') ..
 70 | cmake --build . -- VERBOSE=1
 71 | ctest --rerun-failed --output-on-failure
 72 | ```
 73 | 
 74 | ### Code formatting and type checking
 75 | 
 76 | We use `pre-commit` to enforce code formatting and `mypy` for type checking.
 77 | Install both with
 78 | 
 79 | ```bash
 80 | pip install pre-commit mypy
 81 | ```
 82 | 
 83 | To run pre-commit hooks before each commit, run `pre-commit install`. You may
 84 | prefer to run these checks manually, in which case you can just use `pre-commit
 85 | run --all-files`.
 86 | 
 87 | For `mypy` we recommend the following command:
 88 | 
 89 | ```bash
 90 | mypy --install-types --non-interactive --config-file mypy.ini
 91 | ```
 92 | 
 93 | ### Building the docs
 94 | 
 95 | First install from source, then install the doc dependencies:
 96 | 
 97 | ```bash
 98 | cd docs
 99 | pip install -r requirements.txt
100 | ```
101 | 
102 | Then, still from within the `docs` directory:
103 | 
104 | ```bash
105 | make html
106 | ```
107 | 
108 | The built docs will be in `build/html`. Open in your browser to view them.
109 | 
110 | To avoid building the examples (which execute python code and can take time) you
111 | can use `make html-noplot`. To build a subset of specific examples instead of
112 | all of them, you can use a regex like
113 | `EXAMPLES_PATTERN="plot_the_best_example*" make html`.
114 | 
115 | Run `make clean` from time to time if you encounter issues.
116 | 
117 | ## License
118 | 
119 | By contributing to TorchCodec, you agree that your contributions will be
120 | licensed under the LICENSE file in the root directory of this source tree.
121 | 
122 | Contributors are also required to
123 | [sign our Contributor License Agreement](https://code.facebook.com/cla).
124 | 


--------------------------------------------------------------------------------
/benchmarks/decoders/generate_readme_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the BSD-style license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import json
  8 | import os
  9 | import platform
 10 | import shutil
 11 | from pathlib import Path
 12 | 
 13 | import torch
 14 | 
 15 | from benchmark_decoders_library import (
 16 |     BatchParameters,
 17 |     DataLoaderInspiredWorkloadParameters,
 18 |     generate_videos,
 19 |     retrieve_videos,
 20 |     run_benchmarks,
 21 |     TorchAudioDecoder,
 22 |     TorchCodecPublic,
 23 |     TorchVision,
 24 | )
 25 | 
 26 | NASA_URL = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4"
 27 | 
 28 | 
 29 | def main() -> None:
 30 |     """Benchmarks the performance of a few video decoders on synthetic videos"""
 31 | 
 32 |     videos_dir_path = "/tmp/torchcodec_benchmarking_videos"
 33 |     if not os.path.exists(videos_dir_path):
 34 |         shutil.rmtree(videos_dir_path, ignore_errors=True)
 35 |         os.makedirs(videos_dir_path)
 36 | 
 37 |         resolutions = ["1920x1080"]
 38 |         encodings = ["libx264"]
 39 |         patterns = ["mandelbrot"]
 40 |         fpses = [60]
 41 |         gop_sizes = [600]
 42 |         durations = [10, 120]
 43 |         pix_fmts = ["yuv420p"]
 44 |         ffmpeg_path = "ffmpeg"
 45 |         generate_videos(
 46 |             resolutions,
 47 |             encodings,
 48 |             patterns,
 49 |             fpses,
 50 |             gop_sizes,
 51 |             durations,
 52 |             pix_fmts,
 53 |             ffmpeg_path,
 54 |             videos_dir_path,
 55 |         )
 56 | 
 57 |         urls_and_dest_paths = [
 58 |             (NASA_URL, f"{videos_dir_path}/nasa_960x540_206s_30fps_yuv420p.mp4")
 59 |         ]
 60 |         retrieve_videos(urls_and_dest_paths)
 61 | 
 62 |     decoder_dict = {}
 63 |     decoder_dict["torchcodec"] = TorchCodecPublic()
 64 |     decoder_dict["torchcodec[approx]"] = TorchCodecPublic(seek_mode="approximate")
 65 |     if torch.cuda.is_available():
 66 |         decoder_dict["torchcodec[cuda]"] = TorchCodecPublic(device="cuda")
 67 |         decoder_dict["torchcodec[cuda,approx]"] = TorchCodecPublic(
 68 |             device="cuda", seek_mode="approximate"
 69 |         )
 70 |     decoder_dict["torchvision[video_reader]"] = TorchVision("video_reader")
 71 |     decoder_dict["torchaudio"] = TorchAudioDecoder()
 72 | 
 73 |     # These are the number of uniform seeks we do in the seek+decode benchmark.
 74 |     num_samples = 10
 75 |     video_files_paths = list(Path(videos_dir_path).glob("*.mp4"))
 76 |     assert len(video_files_paths) == 3, "Expected exactly 3 videos"
 77 |     results = run_benchmarks(
 78 |         decoder_dict,
 79 |         video_files_paths,
 80 |         num_samples,
 81 |         num_sequential_frames_from_start=[100],
 82 |         min_runtime_seconds=30,
 83 |         benchmark_video_creation=False,
 84 |         dataloader_parameters=DataLoaderInspiredWorkloadParameters(
 85 |             batch_parameters=BatchParameters(batch_size=50, num_threads=10),
 86 |             resize_height=256,
 87 |             resize_width=256,
 88 |             resize_device="cuda" if torch.cuda.is_available() else "cpu",
 89 |         ),
 90 |     )
 91 |     data_for_writing = {
 92 |         "experiments": results,
 93 |         "system_metadata": {
 94 |             "cpu_count": os.cpu_count(),
 95 |             "system": platform.system(),
 96 |             "machine": platform.machine(),
 97 |             "python_version": str(platform.python_version()),
 98 |             "cuda": (
 99 |                 torch.cuda.get_device_properties(0).name
100 |                 if torch.cuda.is_available()
101 |                 else "not available"
102 |             ),
103 |         },
104 |     }
105 | 
106 |     data_json = Path(__file__).parent / "benchmark_readme_data.json"
107 |     with open(data_json, "w") as write_file:
108 |         json.dump(data_for_writing, write_file, sort_keys=True, indent=4)
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     main()
113 | 


--------------------------------------------------------------------------------
/benchmarks/samplers/benchmark_samplers.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from pathlib import Path
  3 | from time import perf_counter_ns
  4 | 
  5 | import torch
  6 | from torchcodec.decoders import VideoDecoder
  7 | from torchcodec.samplers import (
  8 |     clips_at_random_indices,
  9 |     clips_at_random_timestamps,
 10 |     clips_at_regular_indices,
 11 |     clips_at_regular_timestamps,
 12 | )
 13 | 
 14 | DEFAULT_VIDEO_PATH = Path(__file__).parent / "../../test/resources/nasa_13013.mp4"
 15 | DEFAULT_NUM_EXP = 30
 16 | 
 17 | 
 18 | def bench(f, *args, num_exp, warmup=0, seed, **kwargs):
 19 | 
 20 |     for _ in range(warmup):
 21 |         f(*args, **kwargs)
 22 | 
 23 |     num_frames = None
 24 |     times = []
 25 |     for _ in range(num_exp):
 26 |         if seed is not None:
 27 |             torch.manual_seed(seed)
 28 |         start = perf_counter_ns()
 29 |         clips = f(*args, **kwargs)
 30 |         end = perf_counter_ns()
 31 |         times.append(end - start)
 32 |         num_frames = (
 33 |             clips.data.shape[0] * clips.data.shape[1]
 34 |         )  # should be constant across calls
 35 |     return torch.tensor(times).float(), num_frames
 36 | 
 37 | 
 38 | def report_stats(times, num_frames, unit="ms"):
 39 |     fps = num_frames * 1e9 / torch.median(times)
 40 | 
 41 |     mul = {
 42 |         "ns": 1,
 43 |         "µs": 1e-3,
 44 |         "ms": 1e-6,
 45 |         "s": 1e-9,
 46 |     }[unit]
 47 |     times = times * mul
 48 |     std = times.std().item()
 49 |     med = times.median().item()
 50 |     print(f"{med = :.2f}{unit} +- {std:.2f}  med fps = {fps:.1f}")
 51 |     return med, fps
 52 | 
 53 | 
 54 | def sample(decoder, sampler, **kwargs):
 55 |     return sampler(
 56 |         decoder,
 57 |         num_frames_per_clip=10,
 58 |         **kwargs,
 59 |     )
 60 | 
 61 | 
 62 | def run_sampler_benchmarks(device, video, num_experiments, torch_seed):
 63 | 
 64 |     for num_clips in (1, 50):
 65 |         print("-" * 10)
 66 |         print(f"{num_clips = }")
 67 | 
 68 |         print("clips_at_random_indices     ", end="")
 69 |         decoder = VideoDecoder(video, device=device)
 70 |         times, num_frames = bench(
 71 |             sample,
 72 |             decoder,
 73 |             clips_at_random_indices,
 74 |             num_clips=num_clips,
 75 |             num_exp=num_experiments,
 76 |             warmup=2,
 77 |             seed=torch_seed,
 78 |         )
 79 |         report_stats(times, num_frames, unit="ms")
 80 | 
 81 |         print("clips_at_regular_indices    ", end="")
 82 |         times, num_frames = bench(
 83 |             sample,
 84 |             decoder,
 85 |             clips_at_regular_indices,
 86 |             num_clips=num_clips,
 87 |             num_exp=num_experiments,
 88 |             warmup=2,
 89 |             seed=torch_seed,
 90 |         )
 91 |         report_stats(times, num_frames, unit="ms")
 92 | 
 93 |         print("clips_at_random_timestamps  ", end="")
 94 |         times, num_frames = bench(
 95 |             sample,
 96 |             decoder,
 97 |             clips_at_random_timestamps,
 98 |             num_clips=num_clips,
 99 |             num_exp=num_experiments,
100 |             warmup=2,
101 |             seed=torch_seed,
102 |         )
103 |         report_stats(times, num_frames, unit="ms")
104 | 
105 |         print("clips_at_regular_timestamps ", end="")
106 |         seconds_between_clip_starts = 13 / num_clips  # approximate. video is 13s long
107 |         times, num_frames = bench(
108 |             sample,
109 |             decoder,
110 |             clips_at_regular_timestamps,
111 |             seconds_between_clip_starts=seconds_between_clip_starts,
112 |             num_exp=num_experiments,
113 |             warmup=2,
114 |             seed=torch_seed,
115 |         )
116 |         report_stats(times, num_frames, unit="ms")
117 | 
118 | 
119 | def main():
120 |     parser = argparse.ArgumentParser()
121 |     parser.add_argument("--device", type=str, default="cpu")
122 |     parser.add_argument("--video", type=str, default=str(DEFAULT_VIDEO_PATH))
123 |     parser.add_argument("--num_experiments", type=int, default=DEFAULT_NUM_EXP)
124 |     parser.add_argument("--torch_seed", type=int)
125 |     args = parser.parse_args()
126 |     run_sampler_benchmarks(
127 |         args.device, args.video, args.num_experiments, args.torch_seed
128 |     )
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     main()
133 | 


--------------------------------------------------------------------------------
/.github/workflows/build_ffmpeg.yaml:
--------------------------------------------------------------------------------
  1 | # Taken and adapted from torchaudio.
  2 | # Ref: https://github.com/pytorch/audio/blob/main/.github/workflows/ffmpeg.yml
  3 | # This job is not directly related to regular CI pipeline.
  4 | # It is intended to create FFmpeg binaries that we upload on S3,
  5 | # which then will be used during all the build process in CI or local.
  6 | #
  7 | # This job does not include the uploading part.
  8 | # Upload needs to be done manually, and it should be done only once
  9 | # per new major release of FFmepg.
 10 | name: Build non-GPL FFmpeg from source
 11 | 
 12 | on:
 13 |   workflow_dispatch:
 14 |   pull_request:
 15 |     paths:
 16 |       - packaging/build_ffmpeg.sh
 17 |       - .github/workflows/build_ffmpeg.yaml # self reference
 18 |   schedule:
 19 |     - cron: '0 0 * * 0'  # on sunday
 20 | 
 21 | defaults:
 22 |   run:
 23 |     shell: bash -l -eo pipefail {0}
 24 | 
 25 | jobs:
 26 |   LGPL-Linux-x86_64:
 27 |     strategy:
 28 |       fail-fast: false
 29 |       matrix:
 30 |         ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"]
 31 |     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 32 |     permissions:
 33 |       id-token: write
 34 |       contents: read
 35 |     with:
 36 |       job-name: Build
 37 |       upload-artifact: ffmpeg-lgpl-linux_x86_64-${{ matrix.ffmpeg-version }}
 38 |       repository: meta-pytorch/torchcodec
 39 |       script: |
 40 |         export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}"
 41 |         export FFMPEG_ROOT="${PWD}/ffmpeg"
 42 | 
 43 |         packaging/build_ffmpeg.sh
 44 | 
 45 |         tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/lib
 46 | 
 47 |         artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/linux_x86_64"
 48 |         mkdir -p "${artifact_dir}"
 49 |         mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz"
 50 | 
 51 |   LGPL-Linux-aarch64:
 52 |     strategy:
 53 |       fail-fast: false
 54 |       matrix:
 55 |         ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"]
 56 |     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 57 |     permissions:
 58 |       id-token: write
 59 |       contents: read
 60 |     with:
 61 |       job-name: Build
 62 |       upload-artifact: ffmpeg-lgpl-linux_aarch64-${{ matrix.ffmpeg-version }}
 63 |       repository: meta-pytorch/torchcodec
 64 |       runner: linux.arm64.2xlarge
 65 |       docker-image: pytorch/manylinux2_28_aarch64-builder:cpu-aarch64
 66 |       script: |
 67 |         export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}"
 68 |         export FFMPEG_ROOT="${PWD}/ffmpeg"
 69 | 
 70 |         packaging/build_ffmpeg.sh
 71 | 
 72 |         tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/lib
 73 | 
 74 |         artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/linux_aarch64"
 75 |         mkdir -p "${artifact_dir}"
 76 |         mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz"
 77 | 
 78 |   LGPL-macOS:
 79 |     strategy:
 80 |       fail-fast: false
 81 |       matrix:
 82 |         ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"]
 83 |     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
 84 |     with:
 85 |       job-name: Build
 86 |       upload-artifact: ffmpeg-lgpl-macos-${{ matrix.ffmpeg-version }}
 87 |       repository: meta-pytorch/torchcodec
 88 |       runner: macos-14-xlarge
 89 |       script: |
 90 |         export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}"
 91 |         export FFMPEG_ROOT="${PWD}/ffmpeg"
 92 | 
 93 |         packaging/build_ffmpeg.sh
 94 | 
 95 |         tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/lib
 96 | 
 97 |         artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/macos_$(uname -m)"
 98 |         mkdir -p "${artifact_dir}"
 99 |         mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz"
100 | 
101 |   LGPL-Windows:
102 |     strategy:
103 |       fail-fast: false
104 |       matrix:
105 |         ffmpeg-version: ["4.4.4", "5.1.4", "6.1.1", "7.0.1", "8.0"]
106 |     uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
107 |     with:
108 |       job-name: Build
109 |       upload-artifact: ffmpeg-lgpl-windows_x86_64-${{ matrix.ffmpeg-version }}
110 |       repository: meta-pytorch/torchcodec
111 |       script: |
112 |         export FFMPEG_VERSION="${{ matrix.ffmpeg-version }}"
113 |         export FFMPEG_ROOT="${PWD}/ffmpeg"
114 | 
115 |         packaging/build_ffmpeg.bat
116 | 
117 |         tar -cf ffmpeg.tar.gz ffmpeg/include ffmpeg/bin
118 | 
119 |         artifact_dir="${RUNNER_ARTIFACT_DIR}/$(date +%Y-%m-%d)/windows_$(uname -m)"
120 |         mkdir -p "${artifact_dir}"
121 |         mv ffmpeg.tar.gz "${artifact_dir}/${FFMPEG_VERSION}.tar.gz"
122 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/Metadata.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | // All rights reserved.
  3 | //
  4 | // This source code is licensed under the BSD-style license found in the
  5 | // LICENSE file in the root directory of this source tree.
  6 | 
  7 | #include "Metadata.h"
  8 | #include "torch/types.h"
  9 | 
 10 | namespace facebook::torchcodec {
 11 | 
 12 | std::optional<double> StreamMetadata::getDurationSeconds(
 13 |     SeekMode seekMode) const {
 14 |   switch (seekMode) {
 15 |     case SeekMode::custom_frame_mappings:
 16 |     case SeekMode::exact:
 17 |       TORCH_CHECK(
 18 |           endStreamPtsSecondsFromContent.has_value() &&
 19 |               beginStreamPtsSecondsFromContent.has_value(),
 20 |           "Missing beginStreamPtsSecondsFromContent or endStreamPtsSecondsFromContent");
 21 |       return endStreamPtsSecondsFromContent.value() -
 22 |           beginStreamPtsSecondsFromContent.value();
 23 |     case SeekMode::approximate:
 24 |       if (durationSecondsFromHeader.has_value()) {
 25 |         return durationSecondsFromHeader.value();
 26 |       }
 27 |       if (numFramesFromHeader.has_value() && averageFpsFromHeader.has_value() &&
 28 |           averageFpsFromHeader.value() != 0.0) {
 29 |         return static_cast<double>(numFramesFromHeader.value()) /
 30 |             averageFpsFromHeader.value();
 31 |       }
 32 |       if (durationSecondsFromContainer.has_value()) {
 33 |         return durationSecondsFromContainer.value();
 34 |       }
 35 |       return std::nullopt;
 36 |     default:
 37 |       TORCH_CHECK(false, "Unknown SeekMode");
 38 |   }
 39 | }
 40 | 
 41 | double StreamMetadata::getBeginStreamSeconds(SeekMode seekMode) const {
 42 |   switch (seekMode) {
 43 |     case SeekMode::custom_frame_mappings:
 44 |     case SeekMode::exact:
 45 |       TORCH_CHECK(
 46 |           beginStreamPtsSecondsFromContent.has_value(),
 47 |           "Missing beginStreamPtsSecondsFromContent");
 48 |       return beginStreamPtsSecondsFromContent.value();
 49 |     case SeekMode::approximate:
 50 |       if (beginStreamPtsSecondsFromContent.has_value()) {
 51 |         return beginStreamPtsSecondsFromContent.value();
 52 |       }
 53 |       return 0.0;
 54 |     default:
 55 |       TORCH_CHECK(false, "Unknown SeekMode");
 56 |   }
 57 | }
 58 | 
 59 | std::optional<double> StreamMetadata::getEndStreamSeconds(
 60 |     SeekMode seekMode) const {
 61 |   switch (seekMode) {
 62 |     case SeekMode::custom_frame_mappings:
 63 |     case SeekMode::exact:
 64 |       TORCH_CHECK(
 65 |           endStreamPtsSecondsFromContent.has_value(),
 66 |           "Missing endStreamPtsSecondsFromContent");
 67 |       return endStreamPtsSecondsFromContent.value();
 68 |     case SeekMode::approximate:
 69 |       if (endStreamPtsSecondsFromContent.has_value()) {
 70 |         return endStreamPtsSecondsFromContent.value();
 71 |       }
 72 |       return getDurationSeconds(seekMode);
 73 |     default:
 74 |       TORCH_CHECK(false, "Unknown SeekMode");
 75 |   }
 76 | }
 77 | 
 78 | std::optional<int64_t> StreamMetadata::getNumFrames(SeekMode seekMode) const {
 79 |   switch (seekMode) {
 80 |     case SeekMode::custom_frame_mappings:
 81 |     case SeekMode::exact:
 82 |       TORCH_CHECK(
 83 |           numFramesFromContent.has_value(), "Missing numFramesFromContent");
 84 |       return numFramesFromContent.value();
 85 |     case SeekMode::approximate: {
 86 |       auto durationSeconds = getDurationSeconds(seekMode);
 87 |       if (numFramesFromHeader.has_value()) {
 88 |         return numFramesFromHeader.value();
 89 |       }
 90 |       if (averageFpsFromHeader.has_value() && durationSeconds.has_value()) {
 91 |         return static_cast<int64_t>(
 92 |             averageFpsFromHeader.value() * durationSeconds.value());
 93 |       }
 94 |       return std::nullopt;
 95 |     }
 96 |     default:
 97 |       TORCH_CHECK(false, "Unknown SeekMode");
 98 |   }
 99 | }
100 | 
101 | std::optional<double> StreamMetadata::getAverageFps(SeekMode seekMode) const {
102 |   switch (seekMode) {
103 |     case SeekMode::custom_frame_mappings:
104 |     case SeekMode::exact: {
105 |       auto numFrames = getNumFrames(seekMode);
106 |       if (numFrames.has_value() &&
107 |           beginStreamPtsSecondsFromContent.has_value() &&
108 |           endStreamPtsSecondsFromContent.has_value()) {
109 |         double duration = endStreamPtsSecondsFromContent.value() -
110 |             beginStreamPtsSecondsFromContent.value();
111 |         if (duration != 0.0) {
112 |           return static_cast<double>(numFrames.value()) / duration;
113 |         }
114 |       }
115 |       return averageFpsFromHeader;
116 |     }
117 |     case SeekMode::approximate:
118 |       return averageFpsFromHeader;
119 |     default:
120 |       TORCH_CHECK(false, "Unknown SeekMode");
121 |   }
122 | }
123 | 
124 | } // namespace facebook::torchcodec
125 | 


--------------------------------------------------------------------------------
/src/torchcodec/decoders/_decoder_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | #
  4 | # This source code is licensed under the BSD-style license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | 
  8 | import contextvars
  9 | import io
 10 | 
 11 | from collections.abc import Generator
 12 | from contextlib import contextmanager
 13 | from pathlib import Path
 14 | 
 15 | from torch import Tensor
 16 | from torchcodec import _core as core
 17 | 
 18 | ERROR_REPORTING_INSTRUCTIONS = """
 19 | This should never happen. Please report an issue following the steps in
 20 | https://github.com/pytorch/torchcodec/issues/new?assignees=&labels=&projects=&template=bug-report.yml.
 21 | """
 22 | 
 23 | 
 24 | def create_decoder(
 25 |     *,
 26 |     source: str | Path | io.RawIOBase | io.BufferedReader | bytes | Tensor,
 27 |     seek_mode: str,
 28 | ) -> Tensor:
 29 |     if isinstance(source, str):
 30 |         return core.create_from_file(source, seek_mode)
 31 |     elif isinstance(source, Path):
 32 |         return core.create_from_file(str(source), seek_mode)
 33 |     elif isinstance(source, io.RawIOBase) or isinstance(source, io.BufferedReader):
 34 |         return core.create_from_file_like(source, seek_mode)
 35 |     elif isinstance(source, bytes):
 36 |         return core.create_from_bytes(source, seek_mode)
 37 |     elif isinstance(source, Tensor):
 38 |         return core.create_from_tensor(source, seek_mode)
 39 |     elif isinstance(source, io.TextIOBase):
 40 |         raise TypeError(
 41 |             "source is for reading text, likely from open(..., 'r'). Try with 'rb' for binary reading?"
 42 |         )
 43 |     elif hasattr(source, "read") and hasattr(source, "seek"):
 44 |         # This check must be after checking for text-based reading. Also placing
 45 |         # it last in general to be defensive: hasattr is a blunt instrument. We
 46 |         # could use the inspect module to check for methods with the right
 47 |         # signature.
 48 |         return core.create_from_file_like(source, seek_mode)
 49 | 
 50 |     raise TypeError(
 51 |         f"Unknown source type: {type(source)}. "
 52 |         "Supported types are str, Path, bytes, Tensor and file-like objects with "
 53 |         "read(self, size: int) -> bytes and "
 54 |         "seek(self, offset: int, whence: int) -> int methods."
 55 |     )
 56 | 
 57 | 
 58 | # Thread-local and async-safe storage for the current CUDA backend
 59 | _CUDA_BACKEND: contextvars.ContextVar[str] = contextvars.ContextVar(
 60 |     "_CUDA_BACKEND", default="ffmpeg"
 61 | )
 62 | 
 63 | 
 64 | @contextmanager
 65 | def set_cuda_backend(backend: str) -> Generator[None, None, None]:
 66 |     """Context Manager to set the CUDA backend for :class:`~torchcodec.decoders.VideoDecoder`.
 67 | 
 68 |     This context manager allows you to specify which CUDA backend implementation
 69 |     to use when creating :class:`~torchcodec.decoders.VideoDecoder` instances
 70 |     with CUDA devices.
 71 | 
 72 |     .. note::
 73 |         **We recommend trying the "beta" backend instead of the default "ffmpeg"
 74 |         backend!** The beta backend is faster, and will eventually become the
 75 |         default in future versions. It may have rough edges that we'll polish
 76 |         over time, but it's already quite stable and ready for adoption. Let us
 77 |         know what you think!
 78 | 
 79 |     Only the creation of the decoder needs to be inside the context manager, the
 80 |     decoding methods can be called outside of it. You still need to pass
 81 |     ``device="cuda"`` when creating the
 82 |     :class:`~torchcodec.decoders.VideoDecoder` instance. If a CUDA device isn't
 83 |     specified, this context manager will have no effect. See example below.
 84 | 
 85 |     This is thread-safe and async-safe.
 86 | 
 87 |     Args:
 88 |         backend (str): The CUDA backend to use. Can be "ffmpeg" (default) or
 89 |             "beta". We recommend trying "beta" as it's faster!
 90 | 
 91 |     Example:
 92 |         >>> with set_cuda_backend("beta"):
 93 |         ...     decoder = VideoDecoder("video.mp4", device="cuda")
 94 |         ...
 95 |         ... # Only the decoder creation needs to be part of the context manager.
 96 |         ... # Decoder will now the beta CUDA implementation:
 97 |         ... decoder.get_frame_at(0)
 98 |     """
 99 |     backend = backend.lower()
100 |     if backend not in ("ffmpeg", "beta"):
101 |         raise ValueError(
102 |             f"Invalid CUDA backend ({backend}). Supported values are 'ffmpeg' and 'beta'."
103 |         )
104 | 
105 |     previous_state = _CUDA_BACKEND.set(backend)
106 |     try:
107 |         yield
108 |     finally:
109 |         _CUDA_BACKEND.reset(previous_state)
110 | 
111 | 
112 | def _get_cuda_backend() -> str:
113 |     return _CUDA_BACKEND.get()
114 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/Cache.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | // All rights reserved.
  3 | //
  4 | // This source code is licensed under the BSD-style license found in the
  5 | // LICENSE file in the root directory of this source tree.
  6 | 
  7 | #pragma once
  8 | 
  9 | #include <torch/types.h>
 10 | #include <memory>
 11 | #include <mutex>
 12 | 
 13 | namespace facebook::torchcodec {
 14 | 
 15 | // This header defines simple cache class primitives to store reusable objects
 16 | // across TorchCodec stream instances. Intended usage is to store hardware
 17 | // contexts creation of which is expensive. The cache mechanism is as follows:
 18 | // 1. 'PerGpuCache' provides a dynamic cache with the specified maximum capacity
 19 | //    for the given number of GPUs.
 20 | // 2. When stream object (e.g. SingleStreamDecoder) is destoyed cachable object
 21 | //    must be released to the cache. Cache will accept the object if it is not
 22 | //    full.
 23 | // 3. When stream object (e.g. SingleStreamDecoder) is created cachable object
 24 | //    must be first queried from the cache. If the cache is empty then new
 25 | //    object must be created.
 26 | 
 27 | template <typename T, typename D = std::default_delete<T>>
 28 | class Cache {
 29 |  public:
 30 |   using element_type = std::unique_ptr<T, D>;
 31 | 
 32 |   explicit Cache(int capacity) : capacity_(capacity) {}
 33 | 
 34 |   // Adds an object to the cache if the cache has capacity. Returns true
 35 |   // if object was added and false otherwise.
 36 |   bool addIfCacheHasCapacity(element_type&& obj);
 37 | 
 38 |   // Returns an object from the cache. Cache does not hold a reference
 39 |   // to the object after this call.
 40 |   element_type get();
 41 | 
 42 |  private:
 43 |   int capacity_;
 44 |   std::mutex mutex_;
 45 |   std::vector<element_type> cache_;
 46 | };
 47 | 
 48 | template <typename T, typename D>
 49 | bool Cache<T, D>::addIfCacheHasCapacity(element_type&& obj) {
 50 |   std::scoped_lock lock(mutex_);
 51 |   if (capacity_ >= 0 && cache_.size() >= static_cast<size_t>(capacity_)) {
 52 |     return false;
 53 |   }
 54 |   cache_.push_back(std::move(obj));
 55 |   return true;
 56 | }
 57 | 
 58 | template <typename T, typename D>
 59 | typename Cache<T, D>::element_type Cache<T, D>::get() {
 60 |   std::scoped_lock lock(mutex_);
 61 |   if (cache_.empty()) {
 62 |     return nullptr;
 63 |   }
 64 | 
 65 |   element_type obj = std::move(cache_.back());
 66 |   cache_.pop_back();
 67 |   return obj;
 68 | }
 69 | 
 70 | template <typename T, typename D = std::default_delete<T>>
 71 | class PerGpuCache {
 72 |  public:
 73 |   using element_type = typename Cache<T, D>::element_type;
 74 | 
 75 |   // Initializes 'maxGpus' number of caches. Each cache can hold no
 76 |   // more than 'capacity' items. If 'capacity' <0 cache size is unlimited.
 77 |   PerGpuCache(int maxGpus, int capacity) {
 78 |     TORCH_CHECK(maxGpus > 0, "maxGpus for PerGpuCache must be >0");
 79 |     for (int i = 0; i < maxGpus; ++i) {
 80 |       cache_.emplace_back(std::make_unique<Cache<T, D>>(capacity));
 81 |     }
 82 |   }
 83 | 
 84 |   // Adds an object to the specified device cache if the cache has
 85 |   // capacity. Returns true if object was added and false otherwise.
 86 |   bool addIfCacheHasCapacity(const torch::Device& device, element_type&& obj);
 87 | 
 88 |   // Returns an object from the cache of the specified device. Cache
 89 |   // does not hold a reference to the object after this call.
 90 |   element_type get(const torch::Device& device);
 91 | 
 92 |  private:
 93 |   // 'Cache' class implementation contains mutex which makes it non-movable
 94 |   // and non-copyable, so we need to wrap it in std::unique_ptr.
 95 |   std::vector<std::unique_ptr<Cache<T, D>>> cache_;
 96 | };
 97 | 
 98 | // Forward declaration of getDeviceIndex which exists in CUDACommon.h
 99 | // This avoids circular dependency between Cache.h and CUDACommon.cpp which also
100 | // needs to include Cache.h
101 | int getDeviceIndex(const torch::Device& device);
102 | 
103 | template <typename T, typename D>
104 | bool PerGpuCache<T, D>::addIfCacheHasCapacity(
105 |     const torch::Device& device,
106 |     element_type&& obj) {
107 |   int deviceIndex = getDeviceIndex(device);
108 |   TORCH_CHECK(
109 |       static_cast<size_t>(deviceIndex) < cache_.size(),
110 |       "Device index out of range");
111 |   return cache_[deviceIndex]->addIfCacheHasCapacity(std::move(obj));
112 | }
113 | 
114 | template <typename T, typename D>
115 | typename PerGpuCache<T, D>::element_type PerGpuCache<T, D>::get(
116 |     const torch::Device& device) {
117 |   int deviceIndex = getDeviceIndex(device);
118 |   TORCH_CHECK(
119 |       static_cast<size_t>(deviceIndex) < cache_.size(),
120 |       "Device index out of range");
121 |   return cache_[deviceIndex]->get();
122 | }
123 | 
124 | } // namespace facebook::torchcodec
125 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/AVIOTensorContext.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | // All rights reserved.
  3 | //
  4 | // This source code is licensed under the BSD-style license found in the
  5 | // LICENSE file in the root directory of this source tree.
  6 | 
  7 | #include "AVIOTensorContext.h"
  8 | #include <torch/types.h>
  9 | 
 10 | namespace facebook::torchcodec {
 11 | 
 12 | namespace {
 13 | 
 14 | constexpr int64_t INITIAL_TENSOR_SIZE = 10'000'000; // 10 MB
 15 | constexpr int64_t MAX_TENSOR_SIZE = 320'000'000; // 320 MB
 16 | 
 17 | // The signature of this function is defined by FFMPEG.
 18 | int read(void* opaque, uint8_t* buf, int buf_size) {
 19 |   auto tensorContext = static_cast<detail::TensorContext*>(opaque);
 20 |   TORCH_CHECK(
 21 |       tensorContext->current_pos <= tensorContext->data.numel(),
 22 |       "Tried to read outside of the buffer: current_pos=",
 23 |       tensorContext->current_pos,
 24 |       ", size=",
 25 |       tensorContext->data.numel());
 26 | 
 27 |   int64_t numBytesRead = std::min(
 28 |       static_cast<int64_t>(buf_size),
 29 |       tensorContext->data.numel() - tensorContext->current_pos);
 30 | 
 31 |   TORCH_CHECK(
 32 |       numBytesRead >= 0,
 33 |       "Tried to read negative bytes: numBytesRead=",
 34 |       numBytesRead,
 35 |       ", size=",
 36 |       tensorContext->data.numel(),
 37 |       ", current_pos=",
 38 |       tensorContext->current_pos);
 39 | 
 40 |   if (numBytesRead == 0) {
 41 |     return AVERROR_EOF;
 42 |   }
 43 | 
 44 |   std::memcpy(
 45 |       buf,
 46 |       tensorContext->data.data_ptr<uint8_t>() + tensorContext->current_pos,
 47 |       numBytesRead);
 48 |   tensorContext->current_pos += numBytesRead;
 49 |   return numBytesRead;
 50 | }
 51 | 
 52 | // The signature of this function is defined by FFMPEG.
 53 | int write(void* opaque, const uint8_t* buf, int buf_size) {
 54 |   auto tensorContext = static_cast<detail::TensorContext*>(opaque);
 55 | 
 56 |   int64_t bufSize = static_cast<int64_t>(buf_size);
 57 |   if (tensorContext->current_pos + bufSize > tensorContext->data.numel()) {
 58 |     TORCH_CHECK(
 59 |         tensorContext->data.numel() * 2 <= MAX_TENSOR_SIZE,
 60 |         "We tried to allocate an output encoded tensor larger than ",
 61 |         MAX_TENSOR_SIZE,
 62 |         " bytes. If you think this should be supported, please report.");
 63 | 
 64 |     // We double the size of the outpout tensor. Calling cat() may not be the
 65 |     // most efficient, but it's simple.
 66 |     tensorContext->data =
 67 |         torch::cat({tensorContext->data, tensorContext->data});
 68 |   }
 69 | 
 70 |   TORCH_CHECK(
 71 |       tensorContext->current_pos + bufSize <= tensorContext->data.numel(),
 72 |       "Re-allocation of the output tensor didn't work. ",
 73 |       "This should not happen, please report on TorchCodec bug tracker");
 74 | 
 75 |   uint8_t* outputTensorData = tensorContext->data.data_ptr<uint8_t>();
 76 |   std::memcpy(outputTensorData + tensorContext->current_pos, buf, bufSize);
 77 |   tensorContext->current_pos += bufSize;
 78 |   // Track the maximum position written so getOutputTensor's narrow() does not
 79 |   // truncate the file if final seek was backwards
 80 |   tensorContext->max_pos =
 81 |       std::max(tensorContext->current_pos, tensorContext->max_pos);
 82 |   return buf_size;
 83 | }
 84 | 
 85 | // The signature of this function is defined by FFMPEG.
 86 | int64_t seek(void* opaque, int64_t offset, int whence) {
 87 |   auto tensorContext = static_cast<detail::TensorContext*>(opaque);
 88 |   int64_t ret = -1;
 89 | 
 90 |   switch (whence) {
 91 |     case AVSEEK_SIZE:
 92 |       ret = tensorContext->data.numel();
 93 |       break;
 94 |     case SEEK_SET:
 95 |       tensorContext->current_pos = offset;
 96 |       ret = offset;
 97 |       break;
 98 |     default:
 99 |       break;
100 |   }
101 | 
102 |   return ret;
103 | }
104 | 
105 | } // namespace
106 | 
107 | AVIOFromTensorContext::AVIOFromTensorContext(torch::Tensor data)
108 |     : tensorContext_{data, 0, 0} {
109 |   TORCH_CHECK(data.numel() > 0, "data must not be empty");
110 |   TORCH_CHECK(data.is_contiguous(), "data must be contiguous");
111 |   TORCH_CHECK(data.scalar_type() == torch::kUInt8, "data must be kUInt8");
112 |   createAVIOContext(
113 |       &read, nullptr, &seek, &tensorContext_, /*isForWriting=*/false);
114 | }
115 | 
116 | AVIOToTensorContext::AVIOToTensorContext()
117 |     : tensorContext_{
118 |           torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}),
119 |           0,
120 |           0} {
121 |   createAVIOContext(
122 |       nullptr, &write, &seek, &tensorContext_, /*isForWriting=*/true);
123 | }
124 | 
125 | torch::Tensor AVIOToTensorContext::getOutputTensor() {
126 |   return tensorContext_.data.narrow(
127 |       /*dim=*/0, /*start=*/0, /*length=*/tensorContext_.max_pos);
128 | }
129 | 
130 | } // namespace facebook::torchcodec
131 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/custom_torchcodec.css:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  * All rights reserved.
  4 |  *
  5 |  * This source code is licensed under the BSD-style license found in the
  6 |  * LICENSE file in the root directory of this source tree.
  7 |  */
  8 | 
  9 | /* sphinx-design styles for cards/tabs */
 10 | 
 11 | 
 12 | :root {
 13 |     --sd-color-info: #ee4c2c;
 14 |     --sd-color-primary: #6c6c6d;
 15 |     --sd-color-primary-highlight: #f3f4f7;
 16 |     --sd-color-card-border-hover: #ee4c2c;
 17 |     --sd-color-card-border: #f3f4f7;
 18 |     --sd-color-card-background: #fff;
 19 |     --sd-color-card-text: inherit;
 20 |     --sd-color-card-header: transparent;
 21 |     --sd-color-card-footer: transparent;
 22 |     --sd-color-tabs-label-active: #ee4c2c;
 23 |     --sd-color-tabs-label-hover: #ee4c2c;
 24 |     --sd-color-tabs-label-inactive: #6c6c6d;
 25 |     --sd-color-tabs-underline-active: #ee4c2c;
 26 |     --sd-color-tabs-underline-hover: #fabdbd;
 27 |     --sd-color-tabs-underline-inactive: transparent;
 28 |     --sd-color-tabs-overline: rgb(222, 222, 222);
 29 |     --sd-color-tabs-underline: rgb(222, 222, 222);
 30 | }
 31 | 
 32 | .sd-text-info {
 33 |     color: #ee4c2c;
 34 | }
 35 | 
 36 | .sd-card-img-top {
 37 |     background: #ee4c2c;
 38 |     height: 5px !important;
 39 | }
 40 | 
 41 | .sd-card {
 42 |     position: relative;
 43 |     background-color: #fff;
 44 |     opacity: 1.0;
 45 |     border-radius: 0px;
 46 |     width: 30%;
 47 |     border: none;
 48 |     padding-bottom: 0px;
 49 | }
 50 | 
 51 | 
 52 | .sd-card-img:hover {
 53 |     opacity: 1.0;
 54 |     background-color: #f3f4f7;
 55 | }
 56 | 
 57 | 
 58 | .sd-card:after {
 59 |     display: block;
 60 |     opacity: 1;
 61 |     content: '';
 62 |     border-bottom: solid 1px #ee4c2c;
 63 |     background-color: #fff;
 64 |     transform: scaleX(0);
 65 |     transition: transform .250s ease-in-out;
 66 |     transform-origin:  0% 50%;
 67 | }
 68 | 
 69 | .sd-card:hover {
 70 |     background-color: #fff;
 71 |     opacity: 1;
 72 |     border-top: 1px solid #f3f4f7;
 73 |     border-left: 1px solid #f3f4f7;
 74 |     border-right: 1px solid #f3f4f7;
 75 | }
 76 | 
 77 | .sd-card:hover:after {
 78 |     transform: scaleX(1);
 79 | }
 80 | 
 81 | .card-prerequisites:hover {
 82 |     transition: none;
 83 |     border: none;
 84 | }
 85 | 
 86 | .card-prerequisites:hover:after {
 87 |     transition: none;
 88 |     transform: none;
 89 | }
 90 | 
 91 | .card-prerequisites:after {
 92 |     display: block;
 93 |     content: '';
 94 |     border-bottom: none;
 95 |     background-color: #fff;
 96 |     transform: none;
 97 |     transition: none;
 98 |     transform-origin: none;
 99 | }
100 | 
101 | 
102 | details.sd-dropdown {
103 |     font-weight: 300;
104 |     width: auto;
105 | }
106 | 
107 | details.sd-dropdown:after {
108 |     border: none;
109 |     transition: none;
110 | }
111 | 
112 | details.sd-dropdown:hover {
113 |     border: none;
114 |     transition: none;
115 | }
116 | 
117 | details.sd-dropdown .sd-summary-content {
118 |     font-weight: 300;
119 | }
120 | 
121 | details.sd-dropdown .highlight .n {
122 |     font-weight: normal;
123 | }
124 | 
125 | .et-page-column1 {
126 |   float: left;
127 |   width: 70%;
128 |   font-size: 1rem;
129 | }
130 | 
131 | .et-page-column2 {
132 |   float: right;
133 |   padding-top: 40px;
134 |   padding-left: 60px;
135 |   padding-right: 60px;
136 |   padding-bottom: 60px;
137 |   width: 30%;
138 | }
139 | 
140 | .et-page-column-row:after {
141 |   content: "";
142 |   display: table;
143 |   clear: both;
144 | }
145 | 
146 | /* For screens smaller than 768px (typical mobile devices) */
147 | @media screen and (max-width: 768px) {
148 |   .et-page-column1, .et-page-column2 {
149 |     float: none; /* Remove floats */
150 |     width: 100%; /* Full width for both columns */
151 |     padding: 0;
152 |     font-size: 1rem;
153 |   }
154 | 
155 |   .et-page-column2 img {
156 |     display: none;
157 |   }
158 |   .et-page-column-row:after {
159 |     content: "";
160 |     display: table;
161 |     clear: both;
162 |   }
163 | }
164 | 
165 | article.pytorch-article .class .method dt {
166 |     border-top: none;
167 | }
168 | 
169 | article.pytorch-article .class .simple dt {
170 |     border-top: none;
171 | }
172 | 
173 | article.pytorch-article .function dt.sig {
174 |     border-top: none;
175 | }
176 | 
177 | /* Fix for Sphinx gallery thumbnails.
178 | See https://github.com/sphinx-gallery/sphinx-gallery/issues/990
179 | */
180 | article.pytorch-article .sphx-glr-thumbnails .sphx-glr-thumbcontainer {
181 |     width: unset;
182 |     margin-right: 0;
183 |     margin-left: 0;
184 | }
185 | article.pytorch-article div.section div.wy-table-responsive tbody td {
186 |     width: 50%;
187 | }
188 | 
189 | article.pytorch-article section#glossary dl.simple.glossary dt {
190 |     font-weight: bold;
191 |     font-size: x-large;
192 | }
193 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | Welcome to the TorchCodec documentation!
  2 | ========================================
  3 | 
  4 | TorchCodec is a Python library for decoding video and audio data into PyTorch
  5 | tensors, on CPU and CUDA GPU. It also supports audio and video encoding!
  6 | It aims to be fast, easy to use, and well integrated into the PyTorch ecosystem.
  7 | If you want to use PyTorch to train ML models on videos and audio, TorchCodec is
  8 | how you turn these into data.
  9 | 
 10 | We achieve these capabilities through:
 11 | 
 12 | * Pythonic APIs that mirror Python and PyTorch conventions.
 13 | * Relying on `FFmpeg <https://www.ffmpeg.org/>`_ to do the decoding / encoding.
 14 |   TorchCodec uses the version of FFmpeg you already have installed. FFmpeg is a
 15 |   mature library with broad coverage available on most systems. It is, however,
 16 |   not easy to use.  TorchCodec abstracts FFmpeg's complexity to ensure it is
 17 |   used correctly and efficiently.
 18 | * Returning data as PyTorch tensors, ready to be fed into PyTorch transforms
 19 |   or used directly to train models.
 20 | 
 21 | Installation instructions
 22 | ^^^^^^^^^^^^^^^^^^^^^^^^^
 23 | 
 24 | .. grid:: 3
 25 | 
 26 |      .. grid-item-card:: :octicon:`file-code;1em`
 27 |         Installation instructions
 28 |         :img-top: _static/img/card-background.svg
 29 |         :link: https://github.com/pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec
 30 |         :link-type: url
 31 | 
 32 |         How to install TorchCodec
 33 | 
 34 | Decoding
 35 | ^^^^^^^^
 36 | 
 37 | .. grid:: 3
 38 | 
 39 |      .. grid-item-card:: :octicon:`file-code;1em`
 40 |         Getting Started with TorchCodec
 41 |         :img-top: _static/img/card-background.svg
 42 |         :link: generated_examples/decoding/basic_example.html
 43 |         :link-type: url
 44 | 
 45 |         A simple video decoding example
 46 | 
 47 |      .. grid-item-card:: :octicon:`file-code;1em`
 48 |         Audio Decoding
 49 |         :img-top: _static/img/card-background.svg
 50 |         :link: generated_examples/decoding/audio_decoding.html
 51 |         :link-type: url
 52 | 
 53 |         A simple audio decoding example
 54 | 
 55 |      .. grid-item-card:: :octicon:`file-code;1em`
 56 |         GPU decoding
 57 |         :img-top: _static/img/card-background.svg
 58 |         :link: generated_examples/decoding/basic_cuda_example.html
 59 |         :link-type: url
 60 | 
 61 |         A simple example demonstrating CUDA GPU decoding
 62 | 
 63 |      .. grid-item-card:: :octicon:`file-code;1em`
 64 |         Streaming video
 65 |         :img-top: _static/img/card-background.svg
 66 |         :link: generated_examples/decoding/file_like.html
 67 |         :link-type: url
 68 | 
 69 |         How to efficiently decode videos from the cloud
 70 | 
 71 |      .. grid-item-card:: :octicon:`file-code;1em`
 72 |         Parallel decoding
 73 |         :img-top: _static/img/card-background.svg
 74 |         :link: generated_examples/decoding/parallel_decoding.html
 75 |         :link-type: url
 76 | 
 77 |         How to decode a video with multiple processes or threads.
 78 | 
 79 |      .. grid-item-card:: :octicon:`file-code;1em`
 80 |         Clip sampling
 81 |         :img-top: _static/img/card-background.svg
 82 |         :link: generated_examples/decoding/sampling.html
 83 |         :link-type: url
 84 | 
 85 |         How to sample regular and random clips from a video
 86 | 
 87 |      .. grid-item-card:: :octicon:`file-code;1em`
 88 |         Performance Tips
 89 |         :img-top: _static/img/card-background.svg
 90 |         :link: generated_examples/decoding/performance_tips.html
 91 |         :link-type: url
 92 | 
 93 |         Tips for optimizing video decoding performance
 94 | 
 95 | 
 96 | Encoding
 97 | ^^^^^^^^
 98 | 
 99 | .. grid:: 3
100 | 
101 |      .. grid-item-card:: :octicon:`file-code;1em`
102 |         Audio Encoding
103 |         :img-top: _static/img/card-background.svg
104 |         :link: generated_examples/encoding/audio_encoding.html
105 |         :link-type: url
106 | 
107 |         How encode audio samples
108 | 
109 |      .. grid-item-card:: :octicon:`file-code;1em`
110 |         Video Encoding
111 |         :img-top: _static/img/card-background.svg
112 |         :link: generated_examples/encoding/video_encoding.html
113 |         :link-type: url
114 | 
115 |         How to encode video frames
116 | 
117 | .. toctree::
118 |    :maxdepth: 1
119 |    :caption: TorchCodec documentation
120 |    :hidden:
121 | 
122 |    Home <self>
123 |    glossary
124 | 
125 | .. toctree::
126 |    :maxdepth: 1
127 |    :caption: Examples and tutorials
128 |    :hidden:
129 | 
130 |    Installation instructions <https://github.com/pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec>
131 |    generated_examples/index
132 | 
133 | 
134 | .. toctree::
135 |    :glob:
136 |    :maxdepth: 1
137 |    :caption: API Reference
138 |    :hidden:
139 | 
140 |    api_ref_torchcodec
141 |    api_ref_decoders
142 |    api_ref_encoders
143 |    api_ref_samplers
144 |    api_ref_transforms
145 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake:
--------------------------------------------------------------------------------
  1 | # This file fetches the non-GPL ffmpeg libraries from the torchcodec S3 bucket,
  2 | # and exposes them as CMake targets so we can dynamically link against them.
  3 | # These libraries were built on the CI via the build_ffmpeg.yaml workflow.
  4 | 
  5 | # Avoid warning: see https://cmake.org/cmake/help/latest/policy/CMP0135.html
  6 | if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
  7 |     cmake_policy(SET CMP0135 NEW)
  8 | endif()
  9 | 
 10 | include(FetchContent)
 11 | 
 12 | set(
 13 |     base_url
 14 |     https://pytorch.s3.amazonaws.com/torchcodec/ffmpeg/2025-03-14
 15 | )
 16 | 
 17 | if (LINUX)
 18 |     if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
 19 |         set(
 20 |             platform_url
 21 |             ${base_url}/linux_aarch64
 22 |         )
 23 | 
 24 |         set(
 25 |             f4_sha256
 26 |             a310a2ed9ffe555fd3278dae15065541098dd35e124564671dcda6a6620ac842
 27 |         )
 28 |         set(
 29 |             f5_sha256
 30 |             89ca7996bccbc2db49adaa401d20fdbabffe0e1b4e07a0f81d6b143e858b7c8d
 31 |         )
 32 |         set(
 33 |             f6_sha256
 34 |             ae44c67b4587d061b8e9cc8990ca891ee013fe52ad79e5016ba29871562621da
 35 |         )
 36 |         set(
 37 |             f7_sha256
 38 |             948e2cac66ca6f68ff526d5e84138e94bce0f1a7c83f502d15d85d0bd3ddc112
 39 |         )
 40 |         set(
 41 |             f8_sha256
 42 |             b9cfd99ae75a14e58300854967d4dc49de0b3daa551df51ea1f52a3f08d2c8af
 43 |         )
 44 |     elseif (LINUX)  # assume x86_64
 45 |         set(
 46 |             platform_url
 47 |             ${base_url}/linux_x86_64
 48 |         )
 49 | 
 50 |         set(
 51 |             f4_sha256
 52 |             1a083f1922443bedb5243d04896383b8c606778a7ddb9d886c8303e55339fe0c
 53 |         )
 54 |         set(
 55 |             f5_sha256
 56 |             65d6ad54082d94dcb3f801d73df2265e0e1bb303c7afbce7723e3b77ccd0e207
 57 |         )
 58 |         set(
 59 |             f6_sha256
 60 |             8bd5939c2f4a4b072e837e7870c13fe7d13824e5ff087ab534e4db4e90b7be9c
 61 |         )
 62 |         set(
 63 |             f7_sha256
 64 |             1cb946d8b7c6393c2c3ebe1f900b8de7a2885fe614c45d4ec32c9833084f2f26
 65 |         )
 66 |         set(
 67 |             f8_sha256
 68 |             c55b3c1a4b5e4d5fdd7c632bea3ab6f45b4e37cc8e0999dda3f84a8ed8defad8
 69 |         )
 70 |   endif()
 71 | elseif (APPLE)
 72 |     set(
 73 |         platform_url
 74 |         ${base_url}/macos_arm64
 75 |     )
 76 |     set(
 77 |         f4_sha256
 78 |         f0335434529d9e19359eae0fe912dd9e747667534a1c92e662f5219a55dfad8c
 79 |     )
 80 |     set(
 81 |         f5_sha256
 82 |         cfc3449c9af6863731a431ce89e32c08c5f8ece94b306fb6b695828502a76166
 83 |     )
 84 |     set(
 85 |         f6_sha256
 86 |         ec47b4783c342038e720e33b2fdfa55a9a490afb1cf37a26467733983688647e
 87 |     )
 88 |     set(
 89 |         f7_sha256
 90 |         48a4fc8ce098305cfd4a58f40889249c523ca3c285f66ba704b5bad0e3ada53a
 91 |     )
 92 |     set(
 93 |         f8_sha256
 94 |         beb936b76f25d2621228a12cdb67c9ae3d1eff7aa713ef8d1167ebf0c25bd5ec
 95 |     )
 96 | elseif (WIN32)
 97 |     set(
 98 |         platform_url
 99 |         ${base_url}/windows_x86_64
100 |     )
101 |     set(
102 |         f4_sha256
103 |         270a1aa8892225267e68a7eb87c417931da30dccbf08ee2bde8833e659cab5cb
104 |     )
105 |     set(
106 |         f5_sha256
107 |         b8b2a349a847e56a6da875b066dff1cae53cb8ee7cf5ba9321ec1243dea0cde0
108 |     )
109 |     set(
110 |         f6_sha256
111 |         5d9f8c76dc55f790fa31d825985e9270bf9e498b8bfec21a0ad3a1feb1fa053a
112 |     )
113 |     set(
114 |         f7_sha256
115 |         ae391ace382330e912793b70b68529ee7c91026d2869b4df7e7c3e7d3656bdd5
116 |     )
117 |     set(
118 |         f8_sha256
119 |         bac845ac79876b104959cb0e7b9dec772a261116344dd17d2f97e7ddfac4a73f
120 |     )
121 | else()
122 |     message(
123 |         FATAL_ERROR
124 |         "Unsupported operating system: ${CMAKE_SYSTEM_NAME}"
125 |     )
126 | endif()
127 | 
128 | FetchContent_Declare(
129 |     f4
130 |     URL ${platform_url}/4.4.4.tar.gz
131 |     URL_HASH
132 |     SHA256=${f4_sha256}
133 | )
134 | FetchContent_Declare(
135 |     f5
136 |     URL ${platform_url}/5.1.4.tar.gz
137 |     URL_HASH
138 |     SHA256=${f5_sha256}
139 | )
140 | FetchContent_Declare(
141 |     f6
142 |     URL ${platform_url}/6.1.1.tar.gz
143 |     URL_HASH
144 |     SHA256=${f6_sha256}
145 | )
146 | FetchContent_Declare(
147 |     f7
148 |     URL ${platform_url}/7.0.1.tar.gz
149 |     URL_HASH
150 |     SHA256=${f7_sha256}
151 | )
152 | FetchContent_Declare(
153 |     f8
154 |     URL ${platform_url}/8.0.tar.gz
155 |     URL_HASH
156 |     SHA256=${f8_sha256}
157 | )
158 | 
159 | FetchContent_MakeAvailable(f4 f5 f6 f7 f8)
160 | 
161 | # makes add_ffmpeg_target available
162 | include("${CMAKE_CURRENT_SOURCE_DIR}/../share/cmake/TorchCodec/ffmpeg_versions.cmake")
163 | 
164 | # Note: the f?_SOURCE_DIR variables were set by FetchContent_MakeAvailable
165 | add_ffmpeg_target(4 "${f4_SOURCE_DIR}")
166 | add_ffmpeg_target(5 "${f5_SOURCE_DIR}")
167 | add_ffmpeg_target(6 "${f6_SOURCE_DIR}")
168 | add_ffmpeg_target(7 "${f7_SOURCE_DIR}")
169 | add_ffmpeg_target(8 "${f8_SOURCE_DIR}")
170 | 


--------------------------------------------------------------------------------
/.github/workflows/macos_wheel.yaml:
--------------------------------------------------------------------------------
  1 | name: Build and test MacOS wheel
  2 | 
  3 | on:
  4 |   pull_request:
  5 |   push:
  6 |     branches:
  7 |       - nightly
  8 |       - main
  9 |       - release/*
 10 |     tags:
 11 |         - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
 12 |   workflow_dispatch:
 13 | 
 14 | concurrency:
 15 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
 16 |   cancel-in-progress: true
 17 | 
 18 | permissions:
 19 |   id-token: write
 20 |   contents: write
 21 | 
 22 | defaults:
 23 |   run:
 24 |     shell: bash -l -eo pipefail {0}
 25 | 
 26 | jobs:
 27 | 
 28 |   generate-matrix:
 29 |     uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
 30 |     with:
 31 |       package-type: wheel
 32 |       os: macos-arm64
 33 |       test-infra-repository: pytorch/test-infra
 34 |       test-infra-ref: main
 35 |       with-xpu: disable
 36 |       with-rocm: disable
 37 |       with-cuda: disable
 38 |       build-python-only: "disable"
 39 | 
 40 |   build:
 41 |     needs: generate-matrix
 42 |     strategy:
 43 |       fail-fast: false
 44 |     name: Build and Upload Mac wheel
 45 |     uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@main
 46 |     with:
 47 |       repository: meta-pytorch/torchcodec
 48 |       ref: ""
 49 |       test-infra-repository: pytorch/test-infra
 50 |       test-infra-ref: main
 51 |       build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
 52 |       pre-script: packaging/pre_build_script.sh
 53 |       post-script: packaging/post_build_script.sh
 54 |       smoke-test-script: packaging/fake_smoke_test.py
 55 |       runner-type: macos-14
 56 |       setup-miniconda: true
 57 |       package-name: torchcodec
 58 |       trigger-event: ${{ github.event_name }}
 59 |       build-platform: "python-build-package"
 60 |       build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 python -m build --wheel -vvv --no-isolation"
 61 | 
 62 |   install-and-test:
 63 |     runs-on: macos-14-xlarge
 64 |     strategy:
 65 |       fail-fast: false
 66 |       matrix:
 67 |         python-version: ['3.10']
 68 |         ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1', '8.0']
 69 |     needs: build
 70 |     steps:
 71 |       - name: Download wheel
 72 |         uses: actions/download-artifact@v4
 73 |         with:
 74 |           name: meta-pytorch_torchcodec__${{ matrix.python-version }}_cpu_
 75 |           path: pytorch/torchcodec/dist/
 76 | 
 77 |       - name: Setup conda env
 78 |         uses: conda-incubator/setup-miniconda@v3
 79 |         with:
 80 |           auto-update-conda: true
 81 |           miniconda-version: "latest"
 82 |           activate-environment: test
 83 |           python-version: ${{ matrix.python-version }}
 84 |       - name: Update pip
 85 |         run: python -m pip install --upgrade pip
 86 | 
 87 |       - name: Install PyTorch
 88 |         run: |
 89 |           # If we're in a release branch or in a PR against a release branch,
 90 |           # we install the PyTorch RCs from the test channel. Otherwise, e.g. in
 91 |           # `main` or in PRs against `main`, we install the nightly builds.
 92 |           # Note that the `test` RCs are
 93 |           if [[ (${GITHUB_EVENT_NAME} = 'pull_request' && (${GITHUB_BASE_REF} = 'release'*)) || (${GITHUB_REF} = 'refs/heads/release'*) ]]; then
 94 |             CHANNEL=test
 95 |           else
 96 |             CHANNEL=nightly
 97 |           fi
 98 |           python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/${CHANNEL}/cpu
 99 | 
100 |       - name: Install torchcodec from the wheel
101 |         run: |
102 |           wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
103 |           echo Installing $wheel_path
104 |           python -m pip install $wheel_path -vvv
105 | 
106 |       - name: Check out torchcodec repo
107 |         uses: actions/checkout@v3
108 | 
109 |       - name: Install ffmpeg
110 |         run: |
111 |           conda install "ffmpeg=${{ matrix.ffmpeg-version-for-tests }}" -c conda-forge
112 |           ffmpeg -version
113 | 
114 |       - name: Install test dependencies
115 |         run: |
116 |           python -m pip install numpy pytest pillow
117 | 
118 |       - name: Delete the src/ folder just for fun
119 |         run: |
120 |           # The only reason we checked-out the repo is to get access to the
121 |           # tests. We don't care about the rest. Out of precaution, we delete
122 |           # the src/ folder to be extra sure that we're running the code from
123 |           # the installed wheel rather than from the source.
124 |           # This is just to be extra cautious and very overkill because a)
125 |           # there's no way the `torchcodec` package from src/ can be found from
126 |           # the PythonPath: the main point of `src/` is precisely to protect
127 |           # against that and b) if we ever were to execute code from
128 |           # `src/torchcodec`, it would fail loudly because the built .so files
129 |           # aren't present there.
130 |           rm -r src/
131 |           ls -lh
132 | 
133 |       - name: Run Python tests
134 |         run: |
135 |           pytest --override-ini="addopts=-v" test
136 | 


--------------------------------------------------------------------------------
/src/torchcodec/_core/CpuDeviceInterface.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | // All rights reserved.
  3 | //
  4 | // This source code is licensed under the BSD-style license found in the
  5 | // LICENSE file in the root directory of this source tree.
  6 | 
  7 | #pragma once
  8 | 
  9 | #include "DeviceInterface.h"
 10 | #include "FFMPEGCommon.h"
 11 | #include "FilterGraph.h"
 12 | 
 13 | namespace facebook::torchcodec {
 14 | 
 15 | class CpuDeviceInterface : public DeviceInterface {
 16 |  public:
 17 |   CpuDeviceInterface(const torch::Device& device);
 18 | 
 19 |   virtual ~CpuDeviceInterface() {}
 20 | 
 21 |   std::optional<const AVCodec*> findCodec(
 22 |       [[maybe_unused]] const AVCodecID& codecId,
 23 |       [[maybe_unused]] bool isDecoder = true) override {
 24 |     return std::nullopt;
 25 |   }
 26 | 
 27 |   virtual void initialize(
 28 |       const AVStream* avStream,
 29 |       const UniqueDecodingAVFormatContext& avFormatCtx,
 30 |       const SharedAVCodecContext& codecContext) override;
 31 | 
 32 |   virtual void initializeVideo(
 33 |       const VideoStreamOptions& videoStreamOptions,
 34 |       const std::vector<std::unique_ptr<Transform>>& transforms,
 35 |       const std::optional<FrameDims>& resizedOutputDims) override;
 36 | 
 37 |   virtual void initializeAudio(
 38 |       const AudioStreamOptions& audioStreamOptions) override;
 39 | 
 40 |   virtual std::optional<torch::Tensor> maybeFlushAudioBuffers() override;
 41 | 
 42 |   void convertAVFrameToFrameOutput(
 43 |       UniqueAVFrame& avFrame,
 44 |       FrameOutput& frameOutput,
 45 |       std::optional<torch::Tensor> preAllocatedOutputTensor) override;
 46 | 
 47 |   std::string getDetails() override;
 48 | 
 49 |  private:
 50 |   void convertAudioAVFrameToFrameOutput(
 51 |       UniqueAVFrame& srcAVFrame,
 52 |       FrameOutput& frameOutput);
 53 | 
 54 |   void convertVideoAVFrameToFrameOutput(
 55 |       UniqueAVFrame& avFrame,
 56 |       FrameOutput& frameOutput,
 57 |       std::optional<torch::Tensor> preAllocatedOutputTensor);
 58 | 
 59 |   int convertAVFrameToTensorUsingSwScale(
 60 |       const UniqueAVFrame& avFrame,
 61 |       torch::Tensor& outputTensor,
 62 |       const FrameDims& outputDims);
 63 | 
 64 |   torch::Tensor convertAVFrameToTensorUsingFilterGraph(
 65 |       const UniqueAVFrame& avFrame,
 66 |       const FrameDims& outputDims);
 67 | 
 68 |   ColorConversionLibrary getColorConversionLibrary(
 69 |       const FrameDims& inputFrameDims) const;
 70 | 
 71 |   VideoStreamOptions videoStreamOptions_;
 72 |   AVRational timeBase_;
 73 | 
 74 |   // If the resized output dimensions are present, then we always use those as
 75 |   // the output frame's dimensions. If they are not present, then we use the
 76 |   // dimensions of the raw decoded frame. Note that we do not know the
 77 |   // dimensions of the raw decoded frame until very late; we learn it in
 78 |   // convertAVFrameToFrameOutput(). Deciding the final output frame's actual
 79 |   // dimensions late allows us to handle video streams with variable
 80 |   // resolutions.
 81 |   std::optional<FrameDims> resizedOutputDims_;
 82 | 
 83 |   // Color-conversion objects. Only one of filterGraph_ and swsContext_ should
 84 |   // be non-null. Which one we use is determined dynamically in
 85 |   // getColorConversionLibrary() each time we decode a frame.
 86 |   //
 87 |   // Creating both filterGraph_ and swsContext_ is relatively expensive, so we
 88 |   // reuse them across frames. However, it is possbile that subsequent frames
 89 |   // are different enough (change in dimensions) that we can't reuse the color
 90 |   // conversion object. We store the relevant frame context from the frame used
 91 |   // to create the object last time. We always compare the current frame's info
 92 |   // against the previous one to determine if we need to recreate the color
 93 |   // conversion object.
 94 |   //
 95 |   // TODO: The names of these fields is confusing, as the actual color
 96 |   //       conversion object for Sws has "context" in the name,  and we use
 97 |   //       "context" for the structs we store to know if we need to recreate a
 98 |   //       color conversion object. We should clean that up.
 99 |   std::unique_ptr<FilterGraph> filterGraph_;
100 |   FiltersContext prevFiltersContext_;
101 |   UniqueSwsContext swsContext_;
102 |   SwsFrameContext prevSwsFrameContext_;
103 | 
104 |   // We pass these filters to FFmpeg's filtergraph API. It is a simple pipeline
105 |   // of what FFmpeg calls "filters" to apply to decoded frames before returning
106 |   // them. In the PyTorch ecosystem, we call these "transforms". During
107 |   // initialization, we convert the user-supplied transforms into this string of
108 |   // filters.
109 |   //
110 |   // Note that if there are no user-supplied transforms, then the default filter
111 |   // we use is the copy filter, which is just an identity: it emits the output
112 |   // frame unchanged. We supply such a filter because we can't supply just the
113 |   // empty-string; we must supply SOME filter.
114 |   //
115 |   // See also [Tranform and Format Conversion Order] for more on filters.
116 |   std::string filters_ = "copy";
117 | 
118 |   // Values set during initialization and referred to in
119 |   // getColorConversionLibrary().
120 |   bool areTransformsSwScaleCompatible_;
121 |   bool userRequestedSwScale_;
122 | 
123 |   bool initialized_ = false;
124 | 
125 |   // Audio-specific members
126 |   AudioStreamOptions audioStreamOptions_;
127 |   UniqueSwrContext swrContext_;
128 | };
129 | 
130 | } // namespace facebook::torchcodec
131 | 


--------------------------------------------------------------------------------