├── .circleci
    └── config.yml
├── .flake8
├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── bugs.md
    │   ├── config.yml
    │   ├── feature_request.md
    │   └── questions-help.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── media
    │   ├── ava_slowfast.gif
    │   └── logo_horizontal_color.png
├── .gitignore
├── .readthedocs.yml
├── CONTRIBUTING.md
├── INSTALL.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── dev
    ├── README.md
    └── linter.sh
├── docs
    ├── Makefile
    ├── README.md
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── accelerator.md
    │   ├── api
    │       ├── data
    │       │   ├── data.rst
    │       │   └── index.rst
    │       ├── index.rst
    │       ├── layers
    │       │   ├── index.rst
    │       │   └── layers.rst
    │       ├── models
    │       │   ├── byol.rst
    │       │   ├── csn.rst
    │       │   ├── head.rst
    │       │   ├── index.rst
    │       │   ├── masked_multistream.rst
    │       │   ├── memory_bank.rst
    │       │   ├── net.rst
    │       │   ├── r2plus1d.rst
    │       │   ├── resnet.rst
    │       │   ├── simclr.rst
    │       │   ├── slowfast.rst
    │       │   ├── stem.rst
    │       │   └── x3d.rst
    │       └── transforms
    │       │   ├── index.rst
    │       │   └── transforms.rst
    │   ├── conf.py
    │   ├── data.md
    │   ├── data_preparation.md
    │   ├── index.rst
    │   ├── layers.md
    │   ├── model_zoo.md
    │   ├── models.md
    │   └── transforms.md
├── hubconf.py
├── projects
    └── video_nerf
    │   ├── README.md
    │   ├── dataset.py
    │   ├── dataset_utils.py
    │   ├── download_objectron_data.py
    │   └── objectron.yaml
├── pytorchvideo
    ├── __init__.py
    ├── accelerator
    │   ├── __init__.py
    │   ├── deployment
    │   │   ├── __init__.py
    │   │   ├── common
    │   │   │   ├── __init__.py
    │   │   │   └── model_transmuter.py
    │   │   └── mobile_cpu
    │   │   │   ├── __init__.py
    │   │   │   ├── transmuter
    │   │   │       ├── __init__.py
    │   │   │       └── transmuter_mobile_cpu.py
    │   │   │   └── utils
    │   │   │       ├── __init__.py
    │   │   │       └── model_conversion.py
    │   └── efficient_blocks
    │   │   ├── __init__.py
    │   │   ├── efficient_block_base.py
    │   │   └── no_op_convert_block.py
    ├── data
    │   ├── __init__.py
    │   ├── ava.py
    │   ├── charades.py
    │   ├── clip_sampling.py
    │   ├── dataset_manifest_utils.py
    │   ├── decoder.py
    │   ├── domsev.py
    │   ├── ego4d
    │   │   ├── __init__.py
    │   │   ├── ego4d_dataset.py
    │   │   └── utils.py
    │   ├── encoded_video.py
    │   ├── encoded_video_decord.py
    │   ├── encoded_video_pyav.py
    │   ├── encoded_video_torchvision.py
    │   ├── epic_kitchen
    │   │   ├── __init__.py
    │   │   ├── epic_kitchen_dataset.py
    │   │   └── utils.py
    │   ├── epic_kitchen_forecasting.py
    │   ├── epic_kitchen_recognition.py
    │   ├── frame_video.py
    │   ├── hmdb51.py
    │   ├── json_dataset.py
    │   ├── kinetics.py
    │   ├── labeled_video_dataset.py
    │   ├── labeled_video_paths.py
    │   ├── ssv2.py
    │   ├── ucf101.py
    │   ├── utils.py
    │   └── video.py
    ├── layers
    │   ├── __init__.py
    │   ├── accelerator
    │   │   ├── __init__.py
    │   │   └── mobile_cpu
    │   │   │   ├── __init__.py
    │   │   │   ├── activation_functions.py
    │   │   │   ├── attention.py
    │   │   │   ├── conv_helper.py
    │   │   │   ├── convolutions.py
    │   │   │   ├── fully_connected.py
    │   │   │   └── pool.py
    │   ├── attention.py
    │   ├── attention_torchscript.py
    │   ├── batch_norm.py
    │   ├── convolutions.py
    │   ├── distributed.py
    │   ├── drop_path.py
    │   ├── fusion.py
    │   ├── mlp.py
    │   ├── nonlocal_net.py
    │   ├── positional_encoding.py
    │   ├── positional_encoding_torchscript.py
    │   ├── squeeze_excitation.py
    │   ├── swish.py
    │   └── utils.py
    ├── losses
    │   ├── __init__.py
    │   └── soft_target_cross_entropy.py
    ├── models
    │   ├── __init__.py
    │   ├── accelerator
    │   │   ├── __init__.py
    │   │   └── mobile_cpu
    │   │   │   ├── __init__.py
    │   │   │   ├── efficient_x3d.py
    │   │   │   └── residual_blocks.py
    │   ├── audio_visual_slowfast.py
    │   ├── byol.py
    │   ├── csn.py
    │   ├── head.py
    │   ├── hub
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── csn.py
    │   │   ├── efficient_x3d_mobile_cpu.py
    │   │   ├── r2plus1d.py
    │   │   ├── resnet.py
    │   │   ├── slowfast.py
    │   │   ├── utils.py
    │   │   ├── vision_transformers.py
    │   │   └── x3d.py
    │   ├── masked_multistream.py
    │   ├── memory_bank.py
    │   ├── net.py
    │   ├── r2plus1d.py
    │   ├── resnet.py
    │   ├── simclr.py
    │   ├── slowfast.py
    │   ├── stem.py
    │   ├── vision_transformers.py
    │   ├── weight_init.py
    │   └── x3d.py
    ├── neural_engine
    │   ├── detection_hook.py
    │   ├── engine.py
    │   └── hook.py
    └── transforms
    │   ├── __init__.py
    │   ├── augmentations.py
    │   ├── augmix.py
    │   ├── functional.py
    │   ├── mix.py
    │   ├── rand_augment.py
    │   ├── transforms.py
    │   └── transforms_factory.py
├── pytorchvideo_trainer
    ├── README.md
    ├── pytorchvideo_trainer
    │   ├── __init__.py
    │   ├── callbacks
    │   │   ├── __init__.py
    │   │   └── precise_batchnorm.py
    │   ├── conf
    │   │   ├── __init__.py
    │   │   ├── byol_train_app_conf.yaml
    │   │   ├── callbacks
    │   │   │   └── precise_bn.yaml
    │   │   ├── classification_mvit_16x4.yaml
    │   │   ├── classification_slow_8x8_r50.yaml
    │   │   ├── classification_slowfast_8x8_r50.yaml
    │   │   ├── classification_x3d_xs.yaml
    │   │   ├── datamodule
    │   │   │   ├── dataloader
    │   │   │   │   ├── kinetics_classification.yaml
    │   │   │   │   └── kinetics_contrastive.yaml
    │   │   │   └── transforms
    │   │   │   │   ├── kinetics_classification_mvit_16x4.yaml
    │   │   │   │   ├── kinetics_classification_slow.yaml
    │   │   │   │   ├── kinetics_classification_slowfast.yaml
    │   │   │   │   ├── kinetics_classification_x3d_xs.yaml
    │   │   │   │   ├── kinetics_contrastive.yaml
    │   │   │   │   └── kinetics_moco_v2.yaml
    │   │   ├── logger
    │   │   │   └── ptl.yaml
    │   │   ├── moco_v2_train_app_conf.yaml
    │   │   ├── module
    │   │   │   ├── knn_memory
    │   │   │   │   └── kinetics_k400.yaml
    │   │   │   ├── loss
    │   │   │   │   ├── contrastive.yaml
    │   │   │   │   ├── cross_entropy.yaml
    │   │   │   │   ├── nt_xent.yaml
    │   │   │   │   ├── similarity.yaml
    │   │   │   │   └── soft_cross_entropy.yaml
    │   │   │   ├── lr_scheduler
    │   │   │   │   └── cosine_with_warmup.yaml
    │   │   │   ├── metrics
    │   │   │   │   ├── accuracy.yaml
    │   │   │   │   └── average_precision.yaml
    │   │   │   ├── model
    │   │   │   │   ├── from_lightning_checkpoint.yaml
    │   │   │   │   ├── from_model_zoo_checkpoint.yaml
    │   │   │   │   ├── from_ssl_checkpoint.yaml
    │   │   │   │   ├── mvit_base_16x4.yaml
    │   │   │   │   ├── slow_r50.yaml
    │   │   │   │   ├── slow_r50_byol.yaml
    │   │   │   │   ├── slow_r50_moco_v2.yaml
    │   │   │   │   ├── slow_r50_simclr.yaml
    │   │   │   │   ├── slowfast_r50.yaml
    │   │   │   │   └── x3d_xs.yaml
    │   │   │   └── optim
    │   │   │   │   ├── adam.yaml
    │   │   │   │   ├── adamw.yaml
    │   │   │   │   ├── sgd.yaml
    │   │   │   │   └── sgd_ssl.yaml
    │   │   ├── simclr_train_app_conf.yaml
    │   │   ├── submitit_conf
    │   │   │   └── fair_cluster.yaml
    │   │   └── trainer
    │   │   │   ├── cpu.yaml
    │   │   │   ├── multi_gpu.yaml
    │   │   │   └── single_gpu.yaml
    │   ├── datamodule
    │   │   ├── __init__.py
    │   │   ├── collators.py
    │   │   ├── datamodule.py
    │   │   ├── rand_erase_transform.py
    │   │   └── transforms.py
    │   ├── module
    │   │   ├── __init__.py
    │   │   ├── byol.py
    │   │   ├── distributed_utils.py
    │   │   ├── losses.py
    │   │   ├── lr_policy.py
    │   │   ├── moco_v2.py
    │   │   ├── optimizer.py
    │   │   ├── simclr.py
    │   │   ├── ssl_helper.py
    │   │   └── video_classification.py
    │   └── train_app.py
    ├── setup.py
    └── tests
    │   ├── __init__.py
    │   ├── test_conf_datamodule.py
    │   ├── test_conf_module.py
    │   ├── test_task_byol.py
    │   ├── test_task_moco_v2.py
    │   ├── test_task_module_all.py
    │   ├── test_task_simclr.py
    │   ├── test_task_video_classification.py
    │   └── util.py
├── setup.cfg
├── setup.py
├── tests
    ├── README.md
    ├── __init__.py
    ├── benchmark_accelerator_efficient_blocks.py
    ├── benchmark_transforms.py
    ├── test_accelerator_deployment_mobile_cpu_model_conversion.py
    ├── test_accelerator_deployment_model_transmuter.py
    ├── test_accelerator_efficient_blocks_mobile_cpu_activation_attention.py
    ├── test_accelerator_efficient_blocks_mobile_cpu_conv3d.py
    ├── test_accelerator_efficient_blocks_mobile_cpu_head_layer.py
    ├── test_accelerator_efficient_blocks_mobile_cpu_residual_block.py
    ├── test_accelerator_models_efficient_x3d.py
    ├── test_data_ava_dataset.py
    ├── test_data_charades_dataset.py
    ├── test_data_dataset_manifest_utils.py
    ├── test_data_domsev_dataset.py
    ├── test_data_encoded_video.py
    ├── test_data_epic_kitchen_dataset.py
    ├── test_data_epic_kitchen_forecasting.py
    ├── test_data_epic_kitchen_recognition.py
    ├── test_data_epic_kitchen_utils.py
    ├── test_data_frame_video.py
    ├── test_data_json_dataset.py
    ├── test_data_labeled_video_dataset.py
    ├── test_data_ssv2_dataset.py
    ├── test_data_utils.py
    ├── test_fuse_bn.py
    ├── test_layers_attention.py
    ├── test_layers_convolutions.py
    ├── test_layers_drop_path.py
    ├── test_layers_fusion.py
    ├── test_layers_mlp.py
    ├── test_layers_nonlocal_net.py
    ├── test_layers_positional_encoding.py
    ├── test_layers_squeeze_excitation.py
    ├── test_losses_soft_target_cross_entropy.py
    ├── test_models_audio_visual_slowfast.py
    ├── test_models_byol.py
    ├── test_models_csn.py
    ├── test_models_head.py
    ├── test_models_hub_vision_transformers.py
    ├── test_models_masked_multistream.py
    ├── test_models_memory_bank.py
    ├── test_models_r2plus1d.py
    ├── test_models_resnet.py
    ├── test_models_slowfast.py
    ├── test_models_stem.py
    ├── test_models_vision_transformers.py
    ├── test_models_x3d.py
    ├── test_simclr.py
    ├── test_transforms.py
    ├── test_uniform_clip_sampler.py
    └── utils.py
├── tutorials
    ├── accelerator
    │   ├── Build_your_model_with_PytorchVideo_Accelerator.ipynb
    │   ├── Use_Model_Transmuter.ipynb
    │   └── Use_PytorchVideo_Accelerator_Model_Zoo.ipynb
    ├── torchhub_inference_tutorial.ipynb
    ├── video_classification_example
    │   ├── environment.yml
    │   ├── slurm.py
    │   └── train.py
    └── video_detection_example
    │   ├── video_detection_inference_tutorial.ipynb
    │   └── visualization.py
└── website
    ├── .dockerignore
    ├── .gitignore
    ├── docs
        ├── tutorial_accelerator_build_your_model.md
        ├── tutorial_accelerator_use_accelerator_model_zoo.md
        ├── tutorial_accelerator_use_model_transmuter.md
        ├── tutorial_classification.md
        ├── tutorial_overview.md
        ├── tutorial_torchhub_detection_inference.md
        └── tutorial_torchhub_inference.md
    └── website
        ├── README.md
        ├── core
            └── Footer.js
        ├── package.json
        ├── pages
            └── en
            │   └── index.js
        ├── sidebars.json
        ├── siteConfig.js
        └── static
            ├── CNAME
            ├── css
                └── custom.css
            └── img
                ├── efficient.svg
                ├── favicon.png
                ├── logo.svg
                ├── logo_no_text.svg
                ├── logo_white.svg
                ├── modelzoo.svg
                ├── oss_logo.png
                ├── pytorch.svg
                └── reproducible.svg


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E203, E266, E501, W503, E221
3 | max-line-length = 88
4 | max-complexity = 18
5 | select = B,C,E,F,W,T4,B9
6 | exclude = build,__init__.py
7 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at <opensource-conduct@fb.com>. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to PyTorchVIdeo
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | However, if you're adding any significant features, please make sure to have a corresponding issue to outline your proposal and motivation and allow time for us to give feedback, *before* you send a PR.
 9 | We do not always accept new features, and we take the following factors into consideration:
10 | 
11 | - Whether the same feature can be achieved without modifying PyTorchVideo directly. If any aspect of the API is not extensible, please highlight this in an issue so we can work on making this more extensible.
12 | - Whether the feature is potentially useful to a large audience, or only to a small portion of users.
13 | - Whether the proposed solution has a good design and interface.
14 | - Whether the proposed solution adds extra mental/practical overhead to users who don't need such feature.
15 | - Whether the proposed solution breaks existing APIs.
16 | 
17 | When sending a PR, please ensure you complete the following steps:
18 | 
19 | 1. Fork the repo and create your branch from `main`. Follow the instructions
20 |    in [INSTALL.md](../INSTALL.md) to build the repo.
21 | 2. If you've added code that should be tested, add tests.
22 | 3. If you've changed any APIs, please update the documentation.
23 | 4. Ensure the test suite passes:
24 |     ```
25 |     cd pytorchvideo/tests
26 |     python -m unittest -v
27 |     ```
28 | 5. Make sure your code lints by running `dev/linter.sh` from  the project root.
29 | 6. If a PR contains multiple orthogonal changes, split it into multiple separate PRs.
30 | 7. If you haven't already, complete the Contributor License Agreement ("CLA").
31 | 
32 | ## Contributor License Agreement ("CLA")
33 | In order to accept your pull request, we need you to submit a CLA. You only need
34 | to do this once to work on any of Facebook's open source projects.
35 | 
36 | Complete your CLA here: <https://code.facebook.com/cla>
37 | 
38 | ## Issues
39 | We use GitHub issues to track public bugs. Please ensure your description is
40 | clear and has sufficient instructions to be able to reproduce the issue.
41 | 
42 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
43 | disclosure of security bugs. In those cases, please go through the process
44 | outlined on that page and do not file a public issue.
45 | 
46 | ## Coding Style  
47 | We follow these [python](http://google.github.io/styleguide/pyguide.html) and [C++](https://google.github.io/styleguide/cppguide.html) style guides.
48 | 
49 | For the linter to work, you will need to install `black`, `flake`, `isort` and `clang-format`, and
50 | they need to be fairly up to date.
51 | 
52 | ## License
53 | By contributing to PyTorchVideo, you agree that your contributions will be licensed
54 | under the LICENSE file in the root directory of this source tree.
55 | 
56 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bugs.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "🐛 Bugs / Unexpected behaviors"
 3 | about: Please report unexpected behaviors or bugs in PyTorchVideo.
 4 | 
 5 | ---
 6 | 
 7 | If you do not know the root cause of the problem / bug, and wish someone to help you, please
 8 | post according to this template:
 9 | 
10 | ## 🐛 Bugs / Unexpected behaviors
11 | <!-- A clear and concise description of the issue -->
12 | 
13 | NOTE: Please look at the existing list of Issues tagged with the label ['bug`](https://github.com/facebookresearch/pytorchvideo/issues?q=label%3Abug). **Only open a new issue if this bug has not already been reported. If an issue already exists, please comment there instead.**.
14 | 
15 | ## Instructions To Reproduce the Issue:
16 | 
17 | Please include the following (depending on what the issue is):
18 | 
19 | 1. Any changes you made (`git diff`) or code you wrote
20 | ```
21 | <put diff or code here>
22 | ```
23 | 2. The exact command(s) you ran:
24 | 3. What you observed (including the full logs):
25 | ```
26 | <put logs here>
27 | ```
28 | 
29 | Please also simplify the steps as much as possible so they do not require additional resources to
30 | 	 run, such as a private dataset, models, etc.
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "\U0001F680 Feature Request"
 3 | about: Submit a proposal/request for a new PyTorchVideo feature
 4 | 
 5 | ---
 6 | 
 7 | ## 🚀 Feature
 8 | <!-- A clear and concise description of the feature proposal -->
 9 | 
10 | NOTE: Please look at the existing list of Issues tagged with the label ['enhancement`](https://github.com/facebookresearch/pytorchvideo/issues?q=label%3Aenhancement). **Only open a new issue if you do not see your feature request there**.
11 | 
12 | ## Motivation
13 | 
14 | <!-- Please outline the motivation for the proposal.
15 | e.g. It would be great if I could do [...], I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
16 | 
17 | ## Pitch
18 | 
19 | <!-- A clear and concise description, optionally with code examples showing the functionality you want. -->
20 | 
21 | NOTE: we only consider adding new features if they are useful for many users.
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/questions-help.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: "❓ Questions"
 3 | about: How do I do X with PyTorchVideo? How does PyTorchVideo do X?
 4 | 
 5 | ---
 6 | 
 7 | ## ❓ Questions on how to use PyTorchVideo
 8 | 
 9 | <!-- A clear and concise description of the question you need help with. -->
10 | 
11 | 
12 | NOTE: Please look at the existing list of Issues tagged with the label ['question`](https://github.com/facebookresearch/pytorchvideo/issues?q=label%3Aquestion) or ['how-to`](https://github.com/facebookresearch/pytorchvideo/issues?q=label%3A%22how+to%22). **Only open a new issue if you cannot find an answer there**.
13 | 
14 | Also note the following:
15 | 
16 | 1. If you encountered any errors or unexpected issues while using PyTorchVideo and need help resolving them,
17 |    please use the "Bugs / Unexpected behaviors" issue template.
18 | 
19 | 2. We do not answer general machine learning / computer vision questions that are not specific to
20 | 	 PyTorchVideo, such as how a model works or what algorithm/methods can be
21 | 	 used to achieve X.
22 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Motivation and Context
 2 | 
 3 | <!--- Why is this change required? What problem does it solve? -->
 4 | <!--- Please link to an existing issue here if one exists. -->
 5 | <!--- (we recommend to have an existing issue for each pull request) -->
 6 | 
 7 | ## How Has This Been Tested
 8 | 
 9 | <!--- Please describe here how your modifications have been tested. -->
10 | 
11 | ## Types of changes
12 | 
13 | <!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
14 | - [ ] Docs change / refactoring / dependency upgrade
15 | - [ ] Bug fix (non-breaking change which fixes an issue)
16 | - [ ] New feature (non-breaking change which adds functionality)
17 | - [ ] Breaking change (fix or feature that would cause existing functionality to change)
18 | 
19 | ## Checklist
20 | 
21 | <!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
22 | <!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
23 | - [ ] My code follows the code style of this project.
24 | - [ ] My change requires a change to the documentation.
25 | - [ ] I have updated the documentation accordingly.
26 | - [ ] I have read the **CONTRIBUTING** document.
27 | - [ ] I have completed my CLA (see **CONTRIBUTING**)
28 | - [ ] I have added tests to cover my changes.
29 | - [ ] All new and existing tests passed.
30 | 
31 | 


--------------------------------------------------------------------------------
/.github/media/ava_slowfast.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pytorchvideo/6cdc929315aab1b5674b6dcf73b16ec99147735f/.github/media/ava_slowfast.gif


--------------------------------------------------------------------------------
/.github/media/logo_horizontal_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pytorchvideo/6cdc929315aab1b5674b6dcf73b16ec99147735f/.github/media/logo_horizontal_color.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.DS_Store
 2 | 
 3 | build/
 4 | _ext
 5 | *.pyc
 6 | *.pyd
 7 | *.so
 8 | *.dll
 9 | *.egg-info/
10 | **/__pycache__/
11 | *-checkpoint.ipynb
12 | **/.ipynb_checkpoints
13 | **/.ipynb_checkpoints/**
14 | 
15 | 
16 | # Docusaurus site
17 | website/yarn.lock
18 | website/build/
19 | website/i18n/
20 | website/node_modules/*
21 | website/npm-debug.log
22 | 
23 | ## Generated for tutorials
24 | website/_tutorials/
25 | website/static/files/
26 | website/pages/tutorials/*
27 | !website/pages/tutorials/index.js
28 | 
29 | 
30 | ## Conda and pip builds
31 | packaging/out/
32 | packaging/output_files/
33 | dist/
34 | wheels/
35 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   builder: html
11 |   configuration: docs/source/conf.py
12 | 
13 | # Build documentation with MkDocs
14 | #mkdocs:
15 | #  configuration: mkdocs.yml
16 | 
17 | # Optionally build your docs in additional formats such as PDF and ePub
18 | formats: all
19 | 
20 | # Optionally set the version of Python and requirements required to build your docs
21 | python:
22 |   version: 3.7
23 |   system_packages: true
24 |   install:
25 |     - requirements: docs/requirements.txt
26 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to fvcore
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `main`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Testing
16 | 
17 | Please follow the instructions mentioned in [test-README](https://github.com/facebookresearch/pytorchvideo/blob/main/tests/README.md) to run the existing and your newly added tests.
18 | 
19 | ## Linting
20 | 
21 | We provide a linting script to correctly format your code changes.
22 | Please follow the instructions mentioned in [dev-README](https://github.com/facebookresearch/pytorchvideo/blob/main/dev/README.md) to run the linter.
23 | 
24 | 
25 | ## Contributor License Agreement ("CLA")
26 | In order to accept your pull request, we need you to submit a CLA. You only need
27 | to do this once to work on any of Facebook's open source projects.
28 | 
29 | Complete your CLA here: <https://code.facebook.com/cla>
30 | 
31 | ## Issues
32 | We use GitHub issues to track public bugs. Please ensure your description is
33 | clear and has sufficient instructions to be able to reproduce the issue.
34 | 
35 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
36 | disclosure of security bugs. In those cases, please go through the process
37 | outlined on that page and do not file a public issue.
38 | 
39 | ## License
40 | By contributing to fvcore, you agree that your contributions will be licensed
41 | under the LICENSE file in the root directory of this source tree.
42 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Installing PytorchVideo
 4 | 
 5 | 
 6 | ### 1. Install from PyPI
 7 | For stable release,
 8 | ```
 9 | pip install pytorchvideo
10 | =======
11 | conda create -n pytorchvideo python=3.7
12 | conda activate pytorchvideo
13 | conda install -c pytorch pytorch=1.8.0 torchvision cudatoolkit=10.2
14 | conda install -c conda-forge -c fvcore -c iopath fvcore=0.1.4 iopath
15 | ```
16 | 
17 | For nightly builds,
18 | ```
19 | pip install pytorchvideo-nightly
20 | ```
21 | 
22 | ### 2. Install from GitHub using pip
23 | ```
24 | pip install "git+https://github.com/facebookresearch/pytorchvideo.git"
25 | ```
26 | To install using the code of the released version instead of from the main branch, use the following instead.
27 | ```
28 | pip install "git+https://github.com/facebookresearch/pytorchvideo.git@stable"
29 | ```
30 | 
31 | ### 3. Install from a local clone
32 | ```
33 | git clone https://github.com/facebookresearch/pytorchvideo.git
34 | cd pytorchvideo
35 | pip install -e .
36 | 
37 | # For developing and testing
38 | pip install -e . [test,dev]
39 | ```
40 | 
41 | 
42 | ## Requirements
43 | 
44 | ### Core library
45 | 
46 | - Python 3.7 or 3.8 
47 | - PyTorch 1.8.0 or higher.
48 | - torchvision that matches the PyTorch installation. You can install them together as explained at pytorch.org to make sure of this.
49 | - [fvcore](https://github.com/facebookresearch/fvcore) version 0.1.4 or higher
50 | - [ioPath](https://github.com/facebookresearch/iopath)
51 | - If CUDA is to be used, use a version which is supported by the corresponding pytorch version and at least version 10.2 or higher.
52 | 
53 | We recommend setting up a conda environment with Pytorch and Torchvision before installing PyTorchVideo.
54 | For instance, follow the bellow instructions to setup the conda environment,
55 | ```
56 | conda create -n pytorchvideo python=3.7
57 | conda activate pytorchvideo
58 | conda install -c pytorch pytorch=1.8.0 torchvision cudatoolkit=10.2
59 | ```
60 | 
61 | ## Testing
62 | 
63 | Please follow the instructions mentioned in [test-README](https://github.com/facebookresearch/pytorchvideo/blob/main/tests/README.md) to run the provided tests.
64 | 
65 | ## Linting
66 | 
67 | We also provide a linting script to correctly format your code edits.
68 | Please follow the instructions mentioned in [dev-README](https://github.com/facebookresearch/pytorchvideo/blob/main/dev/README.md) to run the linter.
69 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include CONTRIBUTING.md
3 | include requirements.txt


--------------------------------------------------------------------------------
/dev/README.md:
--------------------------------------------------------------------------------
 1 | ## Running Linter
 2 | 
 3 | 
 4 | Before running the linter, please ensure that you installed the necessary additional linter dependencies.
 5 | If not installed, check the [install-README](https://github.com/facebookresearch/pytorchvideo/blob/main/INSTALL.md) on how to do it.
 6 | 
 7 | Post that, you can run the linter from the project root using,
 8 | 
 9 | ```
10 | ./dev/linter.sh
11 | ```
12 | 


--------------------------------------------------------------------------------
/dev/linter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ev
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # Run this script at project root with "./dev/linter.sh" before you commit.
 5 | 
 6 | echo "Running autoflake..."
 7 | python -m autoflake --remove-all-unused-imports -i .
 8 | 
 9 | echo "Running isort..."
10 | isort -y -sp .
11 | 
12 | echo "Running black..."
13 | black .
14 | 
15 | echo "Running flake8..."
16 | if [ -x "$(command -v flake8)" ]; then
17 |   flake8 .
18 | else
19 |   python3 -m flake8 .
20 | fi
21 | 
22 | command -v arc > /dev/null && {
23 |   echo "Running arc lint ..."
24 |   arc lint
25 | }
26 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Setup
 3 | 
 4 | ### Install dependencies
 5 | 
 6 | ```
 7 | pip install -U recommonmark mock sphinx sphinx_rtd_theme sphinx_markdown_tables
 8 | ```
 9 | 
10 | ### Add symlink to the root README.md
11 | 
12 | We want to include the root readme as an overview. Before generating the docs create a symlink to the root readme.
13 | 
14 | ```
15 | cd <pytorchvideo>/docs
16 | ln -s ../README.md  overview.md
17 | ```
18 | 
19 | In `conf.py` for deployment this is done using `subprocess.call`.
20 | 
21 | ### Add a new file
22 | 
23 | Add a new `.md` or `.rst` file and add the name to the doc tree in `index.rst` e.g
24 | 
25 | ```
26 | .. toctree::
27 |    :maxdepth: 1
28 |    :caption: Intro Documentation
29 | 
30 |    overview
31 | ```
32 | 
33 | ### Build
34 | 
35 | From `pytorchvideo/docs` run:
36 | 
37 | ```
38 | > make html
39 | ```
40 | 
41 | The website is generated in `build/html`.
42 | 
43 | ### Common Issues
44 | 
45 | Sphinx can be fussy, and sometimes about things you weren’t expecting. For example, you might encounter something like:
46 | 
47 | WARNING: toctree contains reference to nonexisting document u'overview'
48 | ...
49 | checking consistency...
50 | <pytorchvideo>/docs/overview.rst::
51 | WARNING: document isn't included in any toctree
52 | 
53 | You might have indented overview in the .. toctree:: in index.rst with four spaces, when Sphinx is expecting three.
54 | 
55 | 
56 | ### View
57 | 
58 | Start a python simple server:
59 | 
60 | ```
61 | > python -m http.server
62 | ```
63 | 
64 | Navigate to: `http://0.0.0.0:8000/`
65 | 
66 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | docutils==0.16
 2 | # https://github.com/sphinx-doc/sphinx/commit/7acd3ada3f38076af7b2b5c9f3b60bb9c2587a3d
 3 | sphinx==3.2.0
 4 | recommonmark==0.6.0
 5 | sphinx_markdown_tables
 6 | mock
 7 | numpy
 8 | av
 9 | torch
10 | torchvision
11 | opencv-python
12 | parameterized
13 | git+git://github.com/facebookresearch/fvcore.git
14 | git+git://github.com/facebookresearch/iopath.git
15 | git+git://github.com/kalyanvasudev/pytorch_sphinx_theme.git
16 | 


--------------------------------------------------------------------------------
/docs/source/api/data/data.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.data
2 | =================
3 | 
4 | .. automodule:: pytorchvideo.data
5 |  :imported-members:
6 |  :members:
7 |  :undoc-members:
8 |  :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/source/api/data/index.rst:
--------------------------------------------------------------------------------
1 | Data API
2 | ==================
3 | 
4 | .. toctree::
5 | 
6 |   data
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/source/api/index.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | ==================
3 | 
4 | .. toctree::
5 | 
6 |     models/index
7 |     data/index
8 |     layers/index
9 |     transforms/index


--------------------------------------------------------------------------------
/docs/source/api/layers/index.rst:
--------------------------------------------------------------------------------
1 | Layers API
2 | ==================
3 | 
4 | .. toctree::
5 | 
6 |     layers


--------------------------------------------------------------------------------
/docs/source/api/layers/layers.rst:
--------------------------------------------------------------------------------
 1 | pytorchvideo.layers.batch_norm 
 2 | =================================
 3 | 
 4 | 
 5 | .. automodule:: pytorchvideo.layers.batch_norm
 6 |   :members:
 7 | 
 8 | 
 9 | pytorchvideo.layers.convolutions 
10 | =================================
11 | 
12 | 
13 | .. automodule:: pytorchvideo.layers.convolutions
14 |   :members:
15 | 
16 | pytorchvideo.layers.fusion 
17 | =================================
18 | 
19 | 
20 | .. automodule:: pytorchvideo.layers.fusion
21 |   :members:
22 | 
23 | pytorchvideo.layers.mlp 
24 | =================================
25 | 
26 | 
27 | .. automodule:: pytorchvideo.layers.mlp
28 |   :members:
29 | 
30 | pytorchvideo.layers.nonlocal_net 
31 | =================================
32 | 
33 | 
34 | .. automodule:: pytorchvideo.layers.nonlocal_net
35 |   :members:
36 | 
37 | pytorchvideo.layers.positional_encoding 
38 | =================================
39 | 
40 | 
41 | .. automodule:: pytorchvideo.layers.positional_encoding
42 |   :members:
43 | 
44 | pytorchvideo.layers.swish 
45 | =================================
46 | 
47 | 
48 | .. automodule:: pytorchvideo.layers.swish
49 |   :members:
50 | 
51 | pytorchvideo.layers.squeeze_excitation 
52 | =================================
53 | 
54 | 
55 | .. automodule:: pytorchvideo.layers.squeeze_excitation
56 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/byol.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.byol 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.byol
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/csn.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.csn
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.csn
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/head.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.head 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.head
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/index.rst:
--------------------------------------------------------------------------------
 1 | Models API
 2 | ==================
 3 | 
 4 | .. toctree::
 5 | 
 6 |     resnet
 7 |     net
 8 |     head
 9 |     stem
10 |     csn
11 |     x3d
12 |     slowfast
13 |     r2plus1d
14 |     simclr
15 |     byol
16 |     memory_bank
17 |     masked_multistream


--------------------------------------------------------------------------------
/docs/source/api/models/masked_multistream.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.masked_multistream 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.masked_multistream
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/memory_bank.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.memory_bank 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.memory_bank
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/net.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.net 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.net
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/r2plus1d.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.r2plus1d 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.r2plus1d
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/resnet.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.resnet 
2 | =================================
3 | 
4 | Building blocks for Resnet and resnet-like models
5 | 
6 | .. automodule:: pytorchvideo.models.resnet
7 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/simclr.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.simclr 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.simclr
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/slowfast.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.slowfast 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.slowfast
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/stem.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.stem 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.stem
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/models/x3d.rst:
--------------------------------------------------------------------------------
1 | pytorchvideo.models.x3d 
2 | =================================
3 | 
4 | 
5 | .. automodule:: pytorchvideo.models.x3d
6 |   :members:


--------------------------------------------------------------------------------
/docs/source/api/transforms/index.rst:
--------------------------------------------------------------------------------
1 | Transforms API
2 | ==================
3 | 
4 | .. toctree::
5 | 
6 |     transforms


--------------------------------------------------------------------------------
/docs/source/api/transforms/transforms.rst:
--------------------------------------------------------------------------------
 1 | pytorchvideo.transforms
 2 | ==================================
 3 | 
 4 | 
 5 | .. automodule:: pytorchvideo.transforms
 6 |  :imported-members:
 7 |  :members:
 8 |  :undoc-members:
 9 |  :show-inheritance:
10 | 
11 | 
12 | pytorchvideo.transforms.functional
13 | ==================================
14 | 
15 | 
16 | .. automodule:: pytorchvideo.transforms.functional
17 |  :imported-members:
18 |  :members:
19 |  :undoc-members:
20 |  :show-inheritance:
21 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. pytorchvideo documentation master file, created by
 2 |    sphinx-quickstart on Tue Feb 23 17:19:36 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | :github_url: https://github.com/facebookresearch/pytorchvideo/
 7 | 
 8 | 
 9 | PyTorchVideo Documentation
10 | ========================================
11 | 
12 | .. toctree::
13 |    :maxdepth: 1
14 |    :caption: Models
15 | 
16 |    models
17 |    model_zoo
18 |    api/models/index
19 | 
20 | .. toctree::
21 |    :maxdepth: 1
22 |    :caption: Data
23 | 
24 |    data
25 |    data_preparation
26 |    api/data/index
27 | 
28 | .. toctree::
29 |    :maxdepth: 1
30 |    :caption: Transforms
31 | 
32 |    transforms
33 |    api/transforms/index
34 | 
35 | .. toctree::
36 |    :maxdepth: 1
37 |    :caption: Layers
38 | 
39 |    layers
40 |    api/layers/index
41 | 
42 | .. toctree::
43 |    :maxdepth: 1
44 |    :caption: Accelerator
45 | 
46 |    accelerator   
47 |    
48 | 


--------------------------------------------------------------------------------
/docs/source/layers.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | 
 4 | PyTorchVideo is an open source video understanding library that provides up to date builders for state of the art video understanding backbones, layers, heads, and losses addressing different tasks, including acoustic event detection, action recognition (video classification), action detection (video detection), multimodal understanding (acoustic visual classification), self-supervised learning.
 5 | 
 6 | The layers subpackage contains definitions for the following layers and activations:
 7 | 
 8 | 
 9 | * Layer
10 |     * [BatchNorm](https://arxiv.org/abs/1502.03167)
11 |     * [2+1 Conv](https://arxiv.org/abs/1711.11248)
12 |     * ConCat
13 |     * MLP
14 |     * [Nonlocal Net](https://arxiv.org/abs/1711.07971)
15 |     * Positional Encoding
16 |     * [Squeeze and Excitation](https://arxiv.org/abs/1709.01507)
17 |     * [Swish](https://arxiv.org/abs/1710.05941)
18 | 
19 | ## Build standard models
20 | 
21 | PyTorchVideo provide default builders to construct state-of-the-art video understanding layers and activations.
22 | 
23 | 
24 | ### Layers
25 | 
26 | You can construct a layer with random weights by calling its constructor:
27 | 
28 | ```
29 | import pytorchvideo.layers as layers
30 | 
31 | nonlocal = layers.create_nonlocal(dim_in=256, dim_inner=128)
32 | swish = layers.Swish()
33 | conv_2plus1d = layers.create_conv_2plus1d(in_channels=256, out_channels=512)
34 | ```
35 | 
36 | You can verify whether you have built the model successfully by:
37 | 
38 | ```
39 | import pytorchvideo.layers as layers
40 | 
41 | nonlocal = layers.create_nonlocal(dim_in=256, dim_inner=128)
42 | B, C, T, H, W = 2, 256, 4, 14, 14
43 | input_tensor = torch.zeros(B, C, T, H, W)
44 | output = nonlocal(input_tensor)
45 | 
46 | swish = layers.Swish()
47 | B, C, T, H, W = 2, 256, 4, 14, 14
48 | input_tensor = torch.zeros(B, C, T, H, W)
49 | output = swish(input_tensor)
50 | 
51 | conv_2plus1d = layers.create_conv_2plus1d(in_channels=256, out_channels=512)
52 | B, C, T, H, W = 2, 256, 4, 14, 14
53 | input_tensor = torch.zeros(B, C, T, H, W)
54 | output = conv_2plus1d(input_tensor)
55 | ```
56 | 


--------------------------------------------------------------------------------
/docs/source/transforms.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | The PyTorchVideo transforms package contains common video algorithms used for preprocessing and/or augmenting video data. The package also contains helper dictionary transforms that are useful for interoperability between PyTorchVideo [dataset's clip outputs](https://pytorchvideo.readthedocs.io/en/latest/data.html) and domain specific transforms. For example, here is a standard transform pipeline for a video model, that could be used with a PyTorchVideo dataset:
 4 | 
 5 | ```python
 6 | transform = torchvision.transforms.Compose([
 7 |   pytorchvideo.transforms.ApplyTransformToKey(
 8 |     key="video",
 9 |     transform=torchvision.transforms.Compose([
10 |       pytorchvideo.transforms.UniformTemporalSubsample(8),
11 |       pytorchvideo.transforms.Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
12 |       pytorchvideo.transforms.RandomShortSideScale(min_size=256, max_size=320),
13 |       torchvision.transforms.RandomCrop(244),
14 |       torchvision.transforms.RandomHorizontalFlip(p=0.5),
15 |     )]
16 |   )
17 | ])
18 | dataset = pytorchvideo.data.Kinetics(
19 |   data_path="path/to/kinetics_root/train.csv",
20 |   clip_sampler=pytorchvideo.data.make_clip_sampler("random", duration=2),
21 |   transform=transform
22 | )
23 | ```
24 | 
25 | Notice how the example also includes transforms from TorchVision? PyTorchVideo uses the same canonical tensor shape as TorchVision for video and TorchAudio for audio. This allows the frameworks to be used together freely.
26 | 
27 | ## Transform vs Functional interface
28 | 
29 | The example above demonstrated the [```pytorchvideo.transforms```](https://pytorchvideo.readthedocs.io/en/latest/api/transforms/transforms.html) interface. These transforms are [```torch.nn.module```](https://pytorch.org/docs/stable/generated/torch.nn.Module.html) callable classes that can be stringed together in a declarative way. PyTorchVideo also provides a [```pytorchvideo.transforms.functional```](https://pytorchvideo.readthedocs.io/en/latest/api/transforms/transforms.html#pytorchvideo-transforms-functional) interface, which are the functions that the transform API uses. These allow more fine-grained control over the transformations and may be more suitable for use outside the dataset preprocessing use case.
30 | 
31 | ## Scriptable transforms
32 | 
33 | All non-OpenCV transforms are TorchScriptable, as described in the [TorchVision docs](https://pytorch.org/vision/stable/transforms.html#scriptable-transforms), in order to script the transforms together, please use [```ltorch.nn.Sequential```](https://pytorch.org/docs/stable/generated/torch.nn.Sequential.html) instead of [```torchvision.transform.Compose```](https://pytorch.org/vision/stable/transforms.html#torchvision.transforms.Compose).
34 | 


--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | dependencies = ["torch"]
 4 | from pytorchvideo.models.hub import (  # noqa: F401, E402
 5 |     c2d_r50,
 6 |     csn_r101,
 7 |     efficient_x3d_s,
 8 |     efficient_x3d_xs,
 9 |     i3d_r50,
10 |     mvit_base_16,
11 |     mvit_base_16x4,
12 |     mvit_base_32x3,
13 |     r2plus1d_r50,
14 |     slow_r50,
15 |     slow_r50_detection,
16 |     slowfast_16x8_r101_50_50,
17 |     slowfast_r101,
18 |     slowfast_r50,
19 |     slowfast_r50_detection,
20 |     x3d_l,
21 |     x3d_m,
22 |     x3d_s,
23 |     x3d_xs,
24 | )
25 | 


--------------------------------------------------------------------------------
/projects/video_nerf/download_objectron_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import os
 4 | 
 5 | import requests
 6 | 
 7 | 
 8 | # URLs for downloading the Objectron dataset
 9 | public_url = "https://storage.googleapis.com/objectron"
10 | blob_path = public_url + "/v1/index/chair_annotations_train"
11 | video_ids = requests.get(blob_path).text
12 | video_ids = video_ids.split("\n")
13 | 
14 | DATA_PATH = "./nerf/data/objectron"
15 | 
16 | os.makedirs(DATA_PATH, exist_ok=True)
17 | 
18 | # Download a video of a chair.
19 | for i in range(3, 4):
20 |     video_filename = public_url + "/videos/" + video_ids[i] + "/video.MOV"
21 |     metadata_filename = public_url + "/videos/" + video_ids[i] + "/geometry.pbdata"
22 |     annotation_filename = public_url + "/annotations/" + video_ids[i] + ".pbdata"
23 | 
24 |     # This file contains the bundle adjusted cameras
25 |     sfm_filename = public_url + "/videos/" + video_ids[i] + "/sfm_arframe.pbdata"
26 | 
27 |     # video.content contains the video file.
28 |     video = requests.get(video_filename)
29 |     metadata = requests.get(metadata_filename)
30 | 
31 |     # Please refer to Parse Annotation tutorial to see how to parse the annotation files.
32 |     annotation = requests.get(annotation_filename)
33 | 
34 |     sfm = requests.get(sfm_filename)
35 | 
36 |     video_path = os.path.join(DATA_PATH, "video.MOV")
37 |     print("Writing video to %s" % video_path)
38 |     file = open(video_path, "wb")
39 |     file.write(video.content)
40 |     file.close()
41 | 
42 |     geometry_path = os.path.join(DATA_PATH, "geometry.pbdata")
43 |     print("Writing geometry data to %s" % geometry_path)
44 |     file = open(geometry_path, "wb")
45 |     file.write(metadata.content)
46 |     file.close()
47 | 
48 |     annotation_path = os.path.join(DATA_PATH, "annotation.pbdata")
49 |     print("Writing annotation data to %s" % annotation_path)
50 |     file = open(annotation_path, "wb")
51 |     file.write(annotation.content)
52 |     file.close()
53 | 
54 |     sfm_arframe_path = os.path.join(DATA_PATH, "sfm_arframe.pbdata")
55 |     print("Writing bundle adjusted camera data to %s" % sfm_arframe_path)
56 |     file = open(sfm_arframe_path, "wb")
57 |     file.write(sfm.content)
58 |     file.close()
59 | 


--------------------------------------------------------------------------------
/projects/video_nerf/objectron.yaml:
--------------------------------------------------------------------------------
 1 | seed: 3
 2 | resume: True
 3 | stats_print_interval: 10
 4 | validation_epoch_interval: 5
 5 | checkpoint_epoch_interval: 30
 6 | checkpoint_path: 'checkpoints/objectron.pth'
 7 | data:
 8 |   dataset_name: 'objectron'
 9 |   image_size: [1440, 1920] # [height, width]
10 |   precache_rays: True
11 | test:
12 |   mode: 'evaluation'
13 |   trajectory_type: 'circular'
14 |   up: [0.0, 1.0, 0.0]
15 |   scene_center: [-0.5365, -1.05,  7.6191]
16 |   n_frames: 50
17 |   fps: 1
18 |   trajectory_scale: 0.2
19 | optimizer:
20 |   max_epochs: 20000
21 |   lr: 0.0005
22 |   lr_scheduler_step_size: 5000
23 |   lr_scheduler_gamma: 0.1
24 | visualization:
25 |   history_size: 10
26 |   visdom: True
27 |   visdom_server: 'localhost'
28 |   visdom_port: 8097
29 |   visdom_env: 'objectron'
30 | raysampler:
31 |   n_pts_per_ray: 64
32 |   n_pts_per_ray_fine: 64
33 |   n_rays_per_image: 1024
34 |   min_depth: 0.1
35 |   max_depth: 100.0
36 |   stratified: True
37 |   stratified_test: False
38 |   chunk_size_test: 6000
39 | implicit_function:
40 |   n_harmonic_functions_xyz: 10
41 |   n_harmonic_functions_dir: 4
42 |   n_hidden_neurons_xyz: 256
43 |   n_hidden_neurons_dir: 128
44 |   density_noise_std: 0.0
45 |   n_layers_xyz: 8
46 | 


--------------------------------------------------------------------------------
/pytorchvideo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | __version__ = "0.1.5"
4 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/deployment/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/deployment/common/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/deployment/mobile_cpu/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/deployment/mobile_cpu/transmuter/__init__.py:
--------------------------------------------------------------------------------
 1 | from pytorchvideo.accelerator.deployment.common.model_transmuter import (
 2 |     EFFICIENT_BLOCK_TRANSMUTER_REGISTRY,
 3 | )
 4 | 
 5 | from .transmuter_mobile_cpu import EFFICIENT_BLOCK_TRANSMUTER_MOBILE_CPU
 6 | 
 7 | 
 8 | EFFICIENT_BLOCK_TRANSMUTER_REGISTRY["mobile_cpu"] = (
 9 |     EFFICIENT_BLOCK_TRANSMUTER_MOBILE_CPU
10 | )
11 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/deployment/mobile_cpu/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/efficient_blocks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/efficient_blocks/efficient_block_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from abc import abstractmethod
 4 | 
 5 | import torch.nn as nn
 6 | 
 7 | 
 8 | class EfficientBlockBase(nn.Module):
 9 |     """
10 |     PyTorchVideo/accelerator provides a set of efficient blocks
11 |     that have optimal efficiency for each target hardware device.
12 | 
13 |     Each efficient block has two forms:
14 |     - original form: this form is for training. When efficient block is instantiated,
15 |         it is in this original form.
16 |     - deployable form: this form is for deployment. Once the network is ready for
17 |         deploy, it can be converted into deployable form for efficient execution
18 |         on target hardware. One block is transformed into deployable form by calling
19 |         convert() method. By conversion to deployable form,
20 |         various optimization (operator fuse, kernel optimization, etc.) are applied.
21 | 
22 |     EfficientBlockBase is the base class for efficient blocks.
23 |     All efficient blocks should inherit this base class
24 |     and implement following methods:
25 |     - forward(): same as required by nn.Module
26 |     - convert(): called to convert block into deployable form
27 |     """
28 | 
29 |     @abstractmethod
30 |     def convert(self):
31 |         pass
32 | 
33 |     @abstractmethod
34 |     def forward(self):
35 |         pass
36 | 


--------------------------------------------------------------------------------
/pytorchvideo/accelerator/efficient_blocks/no_op_convert_block.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | from .efficient_block_base import EfficientBlockBase
 6 | 
 7 | 
 8 | class NoOpConvertBlock(EfficientBlockBase):
 9 |     """
10 |     This class provides an interface with EfficientBlockBase for modules that do not
11 |     need convert.
12 |     Args:
13 |         model (nn.Module): NoOpConvertBlock takes model as input and generate a wrapper
14 |             instance of EfficientBlockBase with same functionality as model, with no change
15 |             applied when convert() is called.
16 |     """
17 | 
18 |     def __init__(self, model: nn.Module):
19 |         super().__init__()
20 |         self.model = model
21 | 
22 |     def convert(self, *args, **kwargs):
23 |         pass
24 | 
25 |     def forward(self, x):
26 |         return self.model(x)
27 | 


--------------------------------------------------------------------------------
/pytorchvideo/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .ava import Ava  # noqa
 4 | from .charades import Charades  # noqa
 5 | from .clip_sampling import (  # noqa; noqa
 6 |     ClipSampler,
 7 |     make_clip_sampler,
 8 |     RandomClipSampler,
 9 |     UniformClipSampler,
10 | )
11 | from .domsev import DomsevFrameDataset, DomsevVideoDataset  # noqa
12 | from .epic_kitchen_forecasting import EpicKitchenForecasting  # noqa
13 | from .epic_kitchen_recognition import EpicKitchenRecognition  # noqa
14 | from .hmdb51 import Hmdb51  # noqa
15 | from .kinetics import Kinetics  # noqa
16 | from .labeled_video_dataset import labeled_video_dataset, LabeledVideoDataset  # noqa
17 | from .ssv2 import SSv2
18 | from .ucf101 import Ucf101  # noqa
19 | 


--------------------------------------------------------------------------------
/pytorchvideo/data/decoder.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from enum import Enum
3 | 
4 | 
5 | class DecoderType(Enum):
6 |     PYAV = "pyav"
7 |     TORCHVISION = "torchvision"
8 |     DECORD = "decord"
9 | 


--------------------------------------------------------------------------------
/pytorchvideo/data/ego4d/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from .ego4d_dataset import Ego4dMomentsDataset
4 | 


--------------------------------------------------------------------------------
/pytorchvideo/data/encoded_video.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import io
 4 | import logging
 5 | import pathlib
 6 | from typing import Any, Dict
 7 | 
 8 | from iopath.common.file_io import g_pathmgr
 9 | from pytorchvideo.data.decoder import DecoderType
10 | 
11 | from .video import Video
12 | 
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | 
17 | def select_video_class(decoder: str) -> Video:
18 |     """
19 |     Select the class for accessing clips based on provided decoder string
20 | 
21 |     Args:
22 |         decoder (str): Defines what type of decoder used to decode a video.
23 |     """
24 |     if DecoderType(decoder) == DecoderType.PYAV:
25 |         from .encoded_video_pyav import EncodedVideoPyAV
26 | 
27 |         video_cls = EncodedVideoPyAV
28 |     elif DecoderType(decoder) == DecoderType.TORCHVISION:
29 |         from .encoded_video_torchvision import EncodedVideoTorchVision
30 | 
31 |         video_cls = EncodedVideoTorchVision
32 |     elif DecoderType(decoder) == DecoderType.DECORD:
33 |         from .encoded_video_decord import EncodedVideoDecord
34 | 
35 |         video_cls = EncodedVideoDecord
36 |     else:
37 |         raise NotImplementedError(f"Unknown decoder type {decoder}")
38 | 
39 |     return video_cls
40 | 
41 | 
42 | class EncodedVideo(Video):
43 |     """
44 |     EncodedVideo is an abstraction for accessing clips from an encoded video.
45 |     It supports selective decoding when header information is available.
46 |     """
47 | 
48 |     @classmethod
49 |     def from_path(
50 |         cls,
51 |         file_path: str,
52 |         decode_video: bool = True,
53 |         decode_audio: bool = True,
54 |         decoder: str = "pyav",
55 |         **other_args: Dict[str, Any],
56 |     ):
57 |         """
58 |         Fetches the given video path using PathManager (allowing remote uris to be
59 |         fetched) and constructs the EncodedVideo object.
60 | 
61 |         Args:
62 |             file_path (str): a PathManager file-path.
63 |         """
64 |         # We read the file with PathManager so that we can read from remote uris.
65 |         with g_pathmgr.open(file_path, "rb") as fh:
66 |             video_file = io.BytesIO(fh.read())
67 | 
68 |         video_cls = select_video_class(decoder)
69 |         return video_cls(
70 |             file=video_file,
71 |             video_name=pathlib.Path(file_path).name,
72 |             decode_video=decode_video,
73 |             decode_audio=decode_audio,
74 |             **other_args,
75 |         )
76 | 


--------------------------------------------------------------------------------
/pytorchvideo/data/epic_kitchen/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from .epic_kitchen_dataset import ActionData, EpicKitchenDataset
4 | 


--------------------------------------------------------------------------------
/pytorchvideo/data/kinetics.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from typing import Any, Callable, Dict, Optional, Type
 4 | 
 5 | import torch
 6 | from pytorchvideo.data.clip_sampling import ClipSampler
 7 | 
 8 | from .labeled_video_dataset import labeled_video_dataset, LabeledVideoDataset
 9 | 
10 | 
11 | """
12 |     Action recognition video dataset for Kinetics-{400,600,700}
13 |     <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>
14 | """
15 | 
16 | 
17 | def Kinetics(
18 |     data_path: str,
19 |     clip_sampler: ClipSampler,
20 |     video_sampler: Type[torch.utils.data.Sampler] = torch.utils.data.RandomSampler,
21 |     transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
22 |     video_path_prefix: str = "",
23 |     decode_audio: bool = True,
24 |     decoder: str = "pyav",
25 | ) -> LabeledVideoDataset:
26 |     """
27 |     A helper function to create ``LabeledVideoDataset`` object for the Kinetics dataset.
28 | 
29 |     Args:
30 |         data_path (str): Path to the data. The path type defines how the data
31 |             should be read:
32 | 
33 |             * For a file path, the file is read and each line is parsed into a
34 |               video path and label.
35 |             * For a directory, the directory structure defines the classes
36 |               (i.e. each subdirectory is a class).
37 | 
38 |         clip_sampler (ClipSampler): Defines how clips should be sampled from each
39 |                 video. See the clip sampling documentation for more information.
40 | 
41 |         video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
42 |                 video container. This defines the order videos are decoded and,
43 |                 if necessary, the distributed split.
44 | 
45 |         transform (Callable): This callable is evaluated on the clip output before
46 |                 the clip is returned. It can be used for user defined preprocessing and
47 |                 augmentations to the clips. See the ``LabeledVideoDataset`` class for clip
48 |                 output format.
49 | 
50 |         video_path_prefix (str): Path to root directory with the videos that are
51 |                 loaded in ``LabeledVideoDataset``. All the video paths before loading
52 |                 are prefixed with this path.
53 | 
54 |         decode_audio (bool): If True, also decode audio from video.
55 | 
56 |         decoder (str): Defines what type of decoder used to decode a video.
57 | 
58 |     """
59 | 
60 |     torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Kinetics")
61 | 
62 |     return labeled_video_dataset(
63 |         data_path,
64 |         clip_sampler,
65 |         video_sampler,
66 |         transform,
67 |         video_path_prefix,
68 |         decode_audio,
69 |         decoder,
70 |     )
71 | 


--------------------------------------------------------------------------------
/pytorchvideo/data/ucf101.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from typing import Any, Callable, Dict, Optional, Type
 4 | 
 5 | import torch
 6 | from pytorchvideo.data.clip_sampling import ClipSampler
 7 | 
 8 | from .labeled_video_dataset import labeled_video_dataset, LabeledVideoDataset
 9 | 
10 | 
11 | """
12 |     Action recognition video dataset for UCF101
13 |     <https://www.crcv.ucf.edu/data/UCF101.php>
14 | """
15 | 
16 | 
17 | def Ucf101(
18 |     data_path: str,
19 |     clip_sampler: ClipSampler,
20 |     video_sampler: Type[torch.utils.data.Sampler] = torch.utils.data.RandomSampler,
21 |     transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
22 |     video_path_prefix: str = "",
23 |     decode_audio: bool = True,
24 |     decoder: str = "pyav",
25 | ) -> LabeledVideoDataset:
26 |     """
27 |     A helper function to create ``LabeledVideoDataset`` object for the Ucf101 dataset.
28 | 
29 |     Args:
30 |         data_path (str): Path to the data. The path type defines how the data
31 |             should be read:
32 | 
33 |             * For a file path, the file is read and each line is parsed into a
34 |               video path and label.
35 |             * For a directory, the directory structure defines the classes
36 |               (i.e. each subdirectory is a class).
37 | 
38 |         clip_sampler (ClipSampler): Defines how clips should be sampled from each
39 |                 video. See the clip sampling documentation for more information.
40 | 
41 |         video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
42 |                 video container. This defines the order videos are decoded and,
43 |                 if necessary, the distributed split.
44 | 
45 |         transform (Callable): This callable is evaluated on the clip output before
46 |                 the clip is returned. It can be used for user defined preprocessing and
47 |                 augmentations to the clips. See the ``LabeledVideoDataset`` class for clip
48 |                 output format.
49 | 
50 |         video_path_prefix (str): Path to root directory with the videos that are
51 |                 loaded in ``LabeledVideoDataset``. All the video paths before loading
52 |                 are prefixed with this path.
53 | 
54 |         decode_audio (bool): If True, also decode audio from video.
55 | 
56 |         decoder (str): Defines what type of decoder used to decode a video.
57 | 
58 |     """
59 | 
60 |     torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Ucf101")
61 | 
62 |     return labeled_video_dataset(
63 |         data_path,
64 |         clip_sampler,
65 |         video_sampler,
66 |         transform,
67 |         video_path_prefix,
68 |         decode_audio,
69 |         decoder,
70 |     )
71 | 


--------------------------------------------------------------------------------
/pytorchvideo/data/video.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | 
  3 | from abc import ABC, abstractmethod
  4 | from typing import BinaryIO, Dict, Optional
  5 | 
  6 | import torch
  7 | from iopath.common.file_io import g_pathmgr
  8 | 
  9 | 
 10 | class VideoPathHandler:
 11 |     """
 12 |     Utility class that handles all deciphering and caching of video paths for
 13 |     encoded and frame videos.
 14 |     """
 15 | 
 16 |     def __init__(self) -> None:
 17 |         # Pathmanager isn't guaranteed to be in correct order,
 18 |         # sorting is expensive, so we cache paths in case of frame video and reuse.
 19 |         self.path_order_cache = {}
 20 | 
 21 |     def video_from_path(
 22 |         self, filepath, decode_video=True, decode_audio=False, decoder="pyav", fps=30
 23 |     ):
 24 |         try:
 25 |             is_file = g_pathmgr.isfile(filepath)
 26 |             is_dir = g_pathmgr.isdir(filepath)
 27 |         except NotImplementedError:
 28 |             # Not all PathManager handlers support is{file,dir} functions, when this is the
 29 |             # case, we default to assuming the path is a file.
 30 |             is_file = True
 31 |             is_dir = False
 32 | 
 33 |         if is_file:
 34 |             from pytorchvideo.data.encoded_video import EncodedVideo
 35 | 
 36 |             return EncodedVideo.from_path(
 37 |                 filepath,
 38 |                 decode_video=decode_video,
 39 |                 decode_audio=decode_audio,
 40 |                 decoder=decoder,
 41 |             )
 42 |         elif is_dir:
 43 |             from pytorchvideo.data.frame_video import FrameVideo
 44 | 
 45 |             assert not decode_audio, "decode_audio must be False when using FrameVideo"
 46 |             return FrameVideo.from_directory(
 47 |                 filepath, fps, path_order_cache=self.path_order_cache
 48 |             )
 49 |         else:
 50 |             raise FileNotFoundError(f"{filepath} not found.")
 51 | 
 52 | 
 53 | class Video(ABC):
 54 |     """
 55 |     Video provides an interface to access clips from a video container.
 56 |     """
 57 | 
 58 |     @abstractmethod
 59 |     def __init__(
 60 |         self,
 61 |         file: BinaryIO,
 62 |         video_name: Optional[str] = None,
 63 |         decode_audio: bool = True,
 64 |     ) -> None:
 65 |         """
 66 |         Args:
 67 |             file (BinaryIO): a file-like object (e.g. io.BytesIO or io.StringIO) that
 68 |                 contains the encoded video.
 69 |         """
 70 |         pass
 71 | 
 72 |     @property
 73 |     @abstractmethod
 74 |     def duration(self) -> float:
 75 |         """
 76 |         Returns:
 77 |             duration of the video in seconds
 78 |         """
 79 |         pass
 80 | 
 81 |     @abstractmethod
 82 |     def get_clip(
 83 |         self, start_sec: float, end_sec: float
 84 |     ) -> Dict[str, Optional[torch.Tensor]]:
 85 |         """
 86 |         Retrieves frames from the internal video at the specified start and end times
 87 |         in seconds (the video always starts at 0 seconds).
 88 | 
 89 |         Args:
 90 |             start_sec (float): the clip start time in seconds
 91 |             end_sec (float): the clip end time in seconds
 92 |         Returns:
 93 |             video_data_dictonary: A dictionary mapping strings to tensor of the clip's
 94 |                 underlying data.
 95 | 
 96 |         """
 97 |         pass
 98 | 
 99 |     def close(self):
100 |         pass
101 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from .attention import Mlp, MultiScaleAttention, MultiScaleBlock
 4 | from .attention_torchscript import ScriptableMultiScaleBlock
 5 | from .drop_path import DropPath
 6 | from .fusion import ConcatFusion, make_fusion_layer, ReduceFusion
 7 | from .mlp import make_multilayer_perceptron
 8 | from .positional_encoding import PositionalEncoding, SpatioTemporalClsPositionalEncoding
 9 | from .positional_encoding_torchscript import (
10 |     ScriptableSpatioTemporalClsPositionalEncoding,
11 | )
12 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/accelerator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/accelerator/mobile_cpu/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/accelerator/mobile_cpu/activation_functions.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | 
  3 | """
  4 | This file contains supported activation functions in efficient block and helper code.
  5 | All supported activation functions are child class of EfficientBlockBase, and included
  6 | in supported_act_functions.
  7 | """
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | from pytorchvideo.accelerator.efficient_blocks.efficient_block_base import (
 12 |     EfficientBlockBase,
 13 | )
 14 | from pytorchvideo.layers.swish import Swish as SwishCustomOp
 15 | 
 16 | 
 17 | class _NaiveSwish(nn.Module):
 18 |     """
 19 |     Helper class to implement naive swish for deploy. It is not intended to be used to
 20 |     build network.
 21 |     """
 22 | 
 23 |     def __init__(self):
 24 |         super().__init__()
 25 |         self.mul_func = nn.quantized.FloatFunctional()
 26 | 
 27 |     def forward(self, x):
 28 |         return self.mul_func.mul(x, torch.sigmoid(x))
 29 | 
 30 | 
 31 | class Swish(EfficientBlockBase):
 32 |     """
 33 |     Swish activation function for efficient block. When in original form for training,
 34 |     using custom op version of swish for better training memory efficiency. When in
 35 |     deployable form, use naive swish as custom op is not supported to run on Pytorch
 36 |     Mobile. For better latency on mobile CPU, use HardSwish instead.
 37 |     """
 38 | 
 39 |     def __init__(self):
 40 |         super().__init__()
 41 |         self.act = SwishCustomOp()
 42 | 
 43 |     def forward(self, x):
 44 |         return self.act(x)
 45 | 
 46 |     def convert(self, *args, **kwarg):
 47 |         self.act = _NaiveSwish()
 48 | 
 49 | 
 50 | class HardSwish(EfficientBlockBase):
 51 |     """
 52 |     Hardswish activation function. It is natively supported by Pytorch Mobile, and has
 53 |     better latency than Swish in int8 mode.
 54 |     """
 55 | 
 56 |     def __init__(self):
 57 |         super().__init__()
 58 |         self.act = nn.Hardswish()
 59 | 
 60 |     def forward(self, x):
 61 |         return self.act(x)
 62 | 
 63 |     def convert(self, *args, **kwarg):
 64 |         pass
 65 | 
 66 | 
 67 | class ReLU(EfficientBlockBase):
 68 |     """
 69 |     ReLU activation function for EfficientBlockBase.
 70 |     """
 71 | 
 72 |     def __init__(self):
 73 |         super().__init__()
 74 |         self.act = nn.ReLU(inplace=True)
 75 | 
 76 |     def forward(self, x):
 77 |         return self.act(x)
 78 | 
 79 |     def convert(self, *args, **kwarg):
 80 |         pass
 81 | 
 82 | 
 83 | class Identity(EfficientBlockBase):
 84 |     """
 85 |     Identity operation for EfficientBlockBase.
 86 |     """
 87 | 
 88 |     def __init__(self):
 89 |         super().__init__()
 90 |         self.act = nn.Identity()
 91 | 
 92 |     def forward(self, x):
 93 |         return self.act(x)
 94 | 
 95 |     def convert(self, *args, **kwarg):
 96 |         pass
 97 | 
 98 | 
 99 | supported_act_functions = {
100 |     "relu": ReLU,
101 |     "swish": Swish,
102 |     "hswish": HardSwish,
103 |     "identity": Identity,
104 | }
105 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/accelerator/mobile_cpu/fully_connected.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import torch.nn as nn
 4 | from pytorchvideo.accelerator.efficient_blocks.no_op_convert_block import (
 5 |     NoOpConvertBlock,
 6 | )
 7 | 
 8 | 
 9 | class FullyConnected(NoOpConvertBlock):
10 |     """
11 |     Implements fully connected layer. This operator is natively supported by QNNPACK for
12 |     mobile CPU with good efficiency, and no change is made upon convert().
13 |     Args:
14 |         in_features (int): input channels for FC layer.
15 |         out_features (int): output channels for FC layer.
16 |         bias (bool): if True, bias is applied
17 |     """
18 | 
19 |     def __init__(
20 |         self,
21 |         in_features: int,
22 |         out_features: int,
23 |         bias: bool = True,
24 |     ):
25 |         super().__init__(model=nn.Linear(in_features, out_features, bias=bias))
26 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/drop_path.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | def drop_path(
 8 |     x: torch.Tensor, drop_prob: float = 0.0, training: bool = False
 9 | ) -> torch.Tensor:
10 |     """
11 |     Stochastic Depth per sample.
12 | 
13 |     Args:
14 |         x (tensor): Input tensor.
15 |         drop_prob (float): Probability to apply drop path.
16 |         training (bool): If True, apply drop path to input. Otherwise (tesing), return input.
17 |     """
18 |     if drop_prob == 0.0 or not training:
19 |         return x
20 |     keep_prob = 1 - drop_prob
21 |     shape = (x.shape[0],) + (1,) * (
22 |         x.ndim - 1
23 |     )  # work with diff dim tensors, not just 2D ConvNets
24 |     mask = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
25 |     mask.floor_()  # binarize
26 |     output = x.div(keep_prob) * mask
27 |     return output
28 | 
29 | 
30 | class DropPath(nn.Module):
31 |     """
32 |     Drop paths (Stochastic Depth) per sample.
33 |     """
34 | 
35 |     def __init__(self, drop_prob: float = 0.0) -> None:
36 |         """
37 |         Args:
38 |             drop_prob (float): Probability to apply drop path.
39 |         """
40 |         super(DropPath, self).__init__()
41 |         self.drop_prob = drop_prob
42 | 
43 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
44 |         """
45 |         Args:
46 |             x (tensor): Input tensor.
47 |         """
48 |         return drop_path(x, self.drop_prob, self.training)
49 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/mlp.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from typing import Callable, List, Optional, Tuple
 4 | 
 5 | from torch import nn
 6 | 
 7 | 
 8 | def make_multilayer_perceptron(
 9 |     fully_connected_dims: List[int],
10 |     norm: Optional[Callable] = None,
11 |     mid_activation: Callable = nn.ReLU,
12 |     final_activation: Optional[Callable] = nn.ReLU,
13 |     dropout_rate: float = 0.0,
14 | ) -> Tuple[nn.Module, int]:
15 |     """
16 |     Factory function for Multi-Layer Perceptron. These are constructed as repeated
17 |     blocks of the following format where each fc represents the blocks output/input dimension.
18 | 
19 |     ::
20 | 
21 |                              Linear (in=fc[i-1], out=fc[i])
22 |                                            ↓
23 |                                  Normalization (norm)
24 |                                            ↓
25 |                                Activation (mid_activation)
26 |                                            ↓
27 |                             After the repeated Perceptron blocks,
28 |                       a final dropout and activation layer is applied:
29 |                                            ↓
30 |                                Dropout (p=dropout_rate)
31 |                                            ↓
32 |                                Activation (final_activation)
33 | 
34 |     """
35 |     assert isinstance(fully_connected_dims, list)
36 |     assert len(fully_connected_dims) > 1
37 |     assert all(_is_pos_int(x) for x in fully_connected_dims)
38 | 
39 |     layers = []
40 |     cur_dim = fully_connected_dims[0]
41 |     for dim in fully_connected_dims[1:-1]:
42 |         layers.append(nn.Linear(cur_dim, dim))
43 |         if norm is not None:
44 |             layers.append(norm(dim))
45 |         layers.append(mid_activation())
46 |         cur_dim = dim
47 |     layers.append(nn.Linear(cur_dim, fully_connected_dims[-1]))
48 |     if dropout_rate > 0:
49 |         layers.append(nn.Dropout(p=dropout_rate))
50 |     if final_activation is not None:
51 |         layers.append(final_activation())
52 | 
53 |     mlp = nn.Sequential(*layers)
54 |     output_dim = fully_connected_dims[-1]
55 |     return mlp, output_dim
56 | 
57 | 
58 | def _is_pos_int(number: int) -> bool:
59 |     """
60 |     Returns True if a number is a positive integer.
61 |     """
62 |     return type(number) == int and number >= 0
63 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/positional_encoding_torchscript.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from typing import Tuple
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | 
 8 | 
 9 | class ScriptableSpatioTemporalClsPositionalEncoding(nn.Module):
10 |     """
11 |     Add a cls token and apply a spatiotemporal encoding to a tensor.
12 |     """
13 | 
14 |     def __init__(
15 |         self,
16 |         embed_dim: int,
17 |         patch_embed_shape: Tuple[int, int, int],
18 |         sep_pos_embed: bool = False,
19 |         has_cls: bool = True,
20 |     ) -> None:
21 |         """
22 |         Args:
23 |             embed_dim (int): Embedding dimension for input sequence.
24 |             patch_embed_shape (Tuple): The number of patches in each dimension
25 |                 (T, H, W) after patch embedding.
26 |             sep_pos_embed (bool): If set to true, one positional encoding is used for
27 |                 spatial patches and another positional encoding is used for temporal
28 |                 sequence. Otherwise, only one positional encoding is used for all the
29 |                 patches.
30 |             has_cls (bool): If set to true, a cls token is added in the beginning of each
31 |                 input sequence.
32 |         """
33 |         super().__init__()
34 |         assert (
35 |             len(patch_embed_shape) == 3
36 |         ), "Patch_embed_shape should be in the form of (T, H, W)."
37 |         assert not has_cls
38 |         self.sep_pos_embed = sep_pos_embed
39 |         self._patch_embed_shape = patch_embed_shape
40 |         self.num_spatial_patch = patch_embed_shape[1] * patch_embed_shape[2]
41 |         self.num_temporal_patch = patch_embed_shape[0]
42 | 
43 |         self.pos_embed_spatial = nn.Parameter(
44 |             torch.zeros(1, self.num_spatial_patch, embed_dim)
45 |         )
46 |         self.pos_embed_temporal = nn.Parameter(
47 |             torch.zeros(1, self.num_temporal_patch, embed_dim)
48 |         )
49 | 
50 |     @property
51 |     def patch_embed_shape(self):
52 |         return self._patch_embed_shape
53 | 
54 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
55 |         """
56 |         Args:
57 |             x (torch.Tensor): Input tensor.
58 |         """
59 |         B, N, C = x.shape
60 | 
61 |         assert self.sep_pos_embed
62 |         pos_embed = self.pos_embed_spatial.repeat(
63 |             1, self.num_temporal_patch, 1
64 |         ) + torch.repeat_interleave(
65 |             self.pos_embed_temporal,
66 |             self.num_spatial_patch,
67 |             dim=1,
68 |         )
69 |         x = x + pos_embed
70 |         return x
71 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/swish.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class Swish(nn.Module):
 8 |     """
 9 |     Wrapper for the Swish activation function.
10 |     """
11 | 
12 |     def forward(self, x):
13 |         return SwishFunction.apply(x)
14 | 
15 | 
16 | class SwishFunction(torch.autograd.Function):
17 |     """
18 |     Implementation of the Swish activation function: x * sigmoid(x).
19 | 
20 |     Searching for activation functions. Ramachandran, Prajit and Zoph, Barret
21 |     and Le, Quoc V. 2017
22 |     """
23 | 
24 |     @staticmethod
25 |     def forward(ctx, x):
26 |         result = x * torch.sigmoid(x)
27 |         ctx.save_for_backward(x)
28 |         return result
29 | 
30 |     @staticmethod
31 |     def backward(ctx, grad_output):
32 |         (x,) = ctx.saved_tensors
33 |         sigmoid_x = torch.sigmoid(x)
34 |         return grad_output * (sigmoid_x * (1 + x * (1 - sigmoid_x)))
35 | 


--------------------------------------------------------------------------------
/pytorchvideo/layers/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import math
 4 | from typing import List
 5 | 
 6 | 
 7 | def set_attributes(self, params: List[object] = None) -> None:
 8 |     """
 9 |     An utility function used in classes to set attributes from the input list of parameters.
10 |     Args:
11 |         params (list): list of parameters.
12 |     """
13 |     if params:
14 |         for k, v in params.items():
15 |             if k != "self":
16 |                 setattr(self, k, v)
17 | 
18 | 
19 | def round_width(width, multiplier, min_width=8, divisor=8, ceil=False):
20 |     """
21 |     Round width of filters based on width multiplier
22 |     Args:
23 |         width (int): the channel dimensions of the input.
24 |         multiplier (float): the multiplication factor.
25 |         min_width (int): the minimum width after multiplication.
26 |         divisor (int): the new width should be dividable by divisor.
27 |         ceil (bool): If True, use ceiling as the rounding method.
28 |     """
29 |     if not multiplier:
30 |         return width
31 | 
32 |     width *= multiplier
33 |     min_width = min_width or divisor
34 |     if ceil:
35 |         width_out = max(min_width, int(math.ceil(width / divisor)) * divisor)
36 |     else:
37 |         width_out = max(min_width, int(width + divisor / 2) // divisor * divisor)
38 |     if width_out < 0.9 * width:
39 |         width_out += divisor
40 |     return int(width_out)
41 | 
42 | 
43 | def round_repeats(repeats, multiplier):
44 |     """
45 |     Round number of layers based on depth multiplier.
46 |     """
47 |     if not multiplier:
48 |         return repeats
49 |     return int(math.ceil(multiplier * repeats))
50 | 


--------------------------------------------------------------------------------
/pytorchvideo/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pytorchvideo/6cdc929315aab1b5674b6dcf73b16ec99147735f/pytorchvideo/losses/__init__.py


--------------------------------------------------------------------------------
/pytorchvideo/losses/soft_target_cross_entropy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from pytorchvideo.layers.utils import set_attributes
 7 | from pytorchvideo.transforms.functional import convert_to_one_hot
 8 | 
 9 | 
10 | class SoftTargetCrossEntropyLoss(nn.Module):
11 |     """
12 |     Adapted from Classy Vision: ./classy_vision/losses/soft_target_cross_entropy_loss.py.
13 |     This allows the targets for the cross entropy loss to be multi-label.
14 |     """
15 | 
16 |     def __init__(
17 |         self,
18 |         ignore_index: int = -100,
19 |         reduction: str = "mean",
20 |         normalize_targets: bool = True,
21 |     ) -> None:
22 |         """
23 |         Args:
24 |             ignore_index (int): sample should be ignored for loss if the class is this value.
25 |             reduction (str): specifies reduction to apply to the output.
26 |             normalize_targets (bool): whether the targets should be normalized to a sum of 1
27 |                 based on the total count of positive targets for a given sample.
28 |         """
29 |         super().__init__()
30 |         set_attributes(self, locals())
31 |         assert isinstance(self.normalize_targets, bool)
32 |         if self.reduction not in ["mean", "none"]:
33 |             raise NotImplementedError(
34 |                 'reduction type "{}" not implemented'.format(self.reduction)
35 |             )
36 |         self.eps = torch.finfo(torch.float32).eps
37 | 
38 |     def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
39 |         """
40 |         Args:
41 |             input (torch.Tensor): the shape of the tensor is N x C, where N is the number of
42 |                 samples and C is the number of classes. The tensor is raw input without
43 |                 softmax/sigmoid.
44 |             target (torch.Tensor): the shape of the tensor is N x C or N. If the shape is N, we
45 |                 will convert the target to one hot vectors.
46 |         """
47 |         # Check if targets are inputted as class integers
48 |         if target.ndim == 1:
49 |             assert (
50 |                 input.shape[0] == target.shape[0]
51 |             ), "SoftTargetCrossEntropyLoss requires input and target to have same batch size!"
52 |             target = convert_to_one_hot(target.view(-1, 1), input.shape[1])
53 | 
54 |         assert input.shape == target.shape, (
55 |             "SoftTargetCrossEntropyLoss requires input and target to be same "
56 |             f"shape: {input.shape} != {target.shape}"
57 |         )
58 | 
59 |         # Samples where the targets are ignore_index do not contribute to the loss
60 |         N, C = target.shape
61 |         valid_mask = torch.ones((N, 1), dtype=torch.float).to(input.device)
62 |         if 0 <= self.ignore_index <= C - 1:
63 |             drop_idx = target[:, self.ignore_idx] > 0
64 |             valid_mask[drop_idx] = 0
65 | 
66 |         valid_targets = target.float() * valid_mask
67 |         if self.normalize_targets:
68 |             valid_targets /= self.eps + valid_targets.sum(dim=1, keepdim=True)
69 |         per_sample_per_target_loss = -valid_targets * F.log_softmax(input, -1)
70 | 
71 |         per_sample_loss = torch.sum(per_sample_per_target_loss, -1)
72 |         # Perform reduction
73 |         if self.reduction == "mean":
74 |             # Normalize based on the number of samples with > 0 non-ignored targets
75 |             loss = per_sample_loss.sum() / torch.sum(
76 |                 (torch.sum(valid_mask, -1) > 0)
77 |             ).clamp(min=1)
78 |         elif self.reduction == "none":
79 |             loss = per_sample_loss
80 | 
81 |         return loss
82 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .csn import create_csn
 4 | from .head import create_res_basic_head, ResNetBasicHead
 5 | from .masked_multistream import (
 6 |     LearnMaskedDefault,
 7 |     LSTM,
 8 |     MaskedMultiPathWay,
 9 |     MaskedSequential,
10 |     MaskedTemporalPooling,
11 |     TransposeMultiheadAttention,
12 |     TransposeTransformerEncoder,
13 | )
14 | from .net import MultiPathWayWithFuse, Net
15 | from .resnet import BottleneckBlock, create_bottleneck_block, create_resnet
16 | from .slowfast import create_slowfast
17 | from .stem import create_conv_patch_embed, create_res_basic_stem, ResNetBasicStem
18 | from .vision_transformers import create_multiscale_vision_transformers
19 | from .weight_init import init_net_weights
20 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/accelerator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/accelerator/mobile_cpu/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/hub/README.md:
--------------------------------------------------------------------------------
 1 | ## TorchHub Models
 2 | 
 3 | PyTorchVideo provides a large set of [TorchHub](https://pytorch.org/hub/) models for state-of-the-art models with pre-trained weights. Check the tables below for the torchhub names and corresponding models.
 4 | 
 5 | 
 6 | ### Kinetics-400
 7 | 
 8 | Models are trained on Kinetics-400. For more benchmarking and model details, please check the [PyTorchVideo Model Zoo](https://github.com/facebookresearch/pytorchvideo/blob/main/docs/source/model_zoo.md)
 9 | 
10 | torchhub name            | arch     | depth | frame length x sample rate | top 1 | top 5 |
11 | ------------------------ | -------- | ----- | -------------------------- | ----- | ----- |
12 | c2d_r50                  | C2D      | R50   | 8x8                        | 71.46 | 89.68 |
13 | i3d_r50                  | I3D      | R50   | 8x8                        | 73.27 | 90.70 |
14 | slow_r50                 | Slow     | R50   | 8x8                        | 74.58 | 91.63 |
15 | slowfast_r50             | SlowFast | R50   | 8x8                        | 76.94 | 92.69 |
16 | slowfast_r101            | SlowFast | R101  | 8x8                        | 77.90 | 93.27 |
17 | slowfast_16x8_r101_50_50 | SlowFast | R101  | 16x8                       | 78.70 | 93.61 |
18 | csn_r101                 | CSN      | R101  | 32x2                       | 77.00 | 92.90 |
19 | r2plus1d_r50             | R(2+1)D  | R50   | 16x4                       | 76.01 | 92.23 |
20 | x3d_xs                   | X3D      | XS    | 4x12                       | 69.12 | 88.63 |
21 | x3d_s                    | X3D      | S     | 13x6                       | 73.33 | 91.27 |
22 | x3d_m                    | X3D      | M     | 16x5                       | 75.94 | 92.72 |
23 | x3d_l                    | X3D      | L     | 16x5                       | 77.44 | 93.31 |
24 | 
25 | ### PytorchVideo Accelerator Models
26 | 
27 | **Efficient Models for mobile CPU**
28 | Models are trained on Kinetics-400. Latency is benchmarked on Samsung S8 phone with 1s input clip length.
29 | 
30 | torchhub name    | model  | top 1 | top 5 | latency (ms) |
31 | ---------------- |--------|-------|-------|--------------|
32 | efficient_x3d_xs | X3D_XS | 68.5  | 88.0  |          233 |
33 | efficient_x3d_s  | X3D_S  | 73.0  | 90.6  |          764 |
34 | 
35 | 
36 | 
37 | ### Using PyTorchVideo torchhub models
38 | The models have been integrated into TorchHub, so could be loaded with TorchHub with or without pre-trained models. You can specify the torchhub name for the model to construct the model with pre-trained weights:
39 | 
40 | ```Python
41 | # Pick a pretrained model
42 | model_name = "slowfast_r50"
43 | model = torch.hub.load("facebookresearch/pytorchvideo:main", model=model_name, pretrained=True)
44 | ```
45 | 
46 | Notes:
47 | * Please check [torchhub inference tutorial](https://pytorchvideo.org/docs/tutorial_torchhub_inference) for more details about how to load models from TorchHub and perform inference.
48 | * Check [Model Zoo](https://github.com/facebookresearch/pytorchvideo/blob/main/docs/source/model_zoo.md) for the full set of supported PytorchVideo model zoo and more details about how the model zoo is prepared.
49 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/hub/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .csn import csn_r101
 4 | from .efficient_x3d_mobile_cpu import efficient_x3d_s, efficient_x3d_xs
 5 | from .r2plus1d import r2plus1d_r50
 6 | from .resnet import c2d_r50, i3d_r50, slow_r50, slow_r50_detection
 7 | from .slowfast import (
 8 |     slowfast_16x8_r101_50_50,
 9 |     slowfast_r101,
10 |     slowfast_r50,
11 |     slowfast_r50_detection,
12 | )
13 | from .vision_transformers import mvit_base_16, mvit_base_16x4, mvit_base_32x3
14 | from .x3d import x3d_l, x3d_m, x3d_s, x3d_xs
15 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/hub/csn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from typing import Any
 4 | 
 5 | import torch.nn as nn
 6 | from pytorchvideo.models.csn import create_csn
 7 | from torch.hub import load_state_dict_from_url
 8 | 
 9 | 
10 | """
11 | Channel-Separated Convolutional Network models for video recognition.
12 | """
13 | 
14 | root_dir = "https://dl.fbaipublicfiles.com/pytorchvideo/model_zoo/kinetics"
15 | checkpoint_paths = {
16 |     "csn_r101": f"{root_dir}/CSN_32x2_R101.pyth",
17 | }
18 | 
19 | 
20 | def csn_r101(
21 |     pretrained: bool = False, progress: bool = True, **kwargs: Any
22 | ) -> nn.Module:
23 |     r"""
24 |     Channel-Separated Convolutional Networks (CSN) R101 model architecture [1]
25 |     with pretrained weights based on 32x2 setting on the Kinetics dataset.
26 |     Model with pretrained weights has top1 accuracy of 77.0 (trained on 16x8 GPUs).
27 | 
28 |     [1] "Video classification with channel-separated convolutional networks"
29 |         Du Tran, Heng Wang, Lorenzo Torresani, Matt Feiszli. ICCV 2019.
30 |         https://arxiv.org/abs/1904.02811
31 | 
32 |     Args:
33 |         pretrained (bool): If True, returns a model pre-trained on the Kinetics dataset
34 |         progress (bool): If True, displays a progress bar of the download to stderr
35 |         kwargs: use these to modify any of the other model settings. All the
36 |             options are defined in pytorchvideo/models/resnet.py
37 | 
38 |     NOTE: to use the pretrained model, do not modify the model configuration
39 |     via the kwargs. Only modify settings via kwargs to initialize a new model
40 |     without pretrained weights.
41 |     """
42 |     model = create_csn(
43 |         model_depth=101,
44 |         stem_pool=nn.MaxPool3d,
45 |         head_pool_kernel_size=(4, 7, 7),
46 |         **kwargs,
47 |     )
48 | 
49 |     if pretrained:
50 |         path = checkpoint_paths["csn_r101"]
51 |         # All models are loaded onto CPU by default
52 |         checkpoint = load_state_dict_from_url(
53 |             path, progress=progress, map_location="cpu"
54 |         )
55 |         state_dict = checkpoint["model_state"]
56 |         model.load_state_dict(state_dict)
57 | 
58 |     return model
59 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/hub/efficient_x3d_mobile_cpu.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from typing import Any, Optional
 4 | 
 5 | import torch.nn as nn
 6 | from pytorchvideo.models.accelerator.mobile_cpu.efficient_x3d import create_x3d
 7 | from torch.hub import load_state_dict_from_url
 8 | 
 9 | 
10 | _root_dir = "https://dl.fbaipublicfiles.com/pytorchvideo/model_zoo/kinetics"
11 | _checkpoint_paths = {
12 |     "efficient_x3d_xs": f"{_root_dir}/efficient_x3d_xs_original_form.pyth",
13 |     "efficient_x3d_s": f"{_root_dir}/efficient_x3d_s_original_form.pyth",
14 | }
15 | 
16 | 
17 | def _efficient_x3d(
18 |     pretrained: bool = False,
19 |     progress: bool = True,
20 |     checkpoint_path: Optional[str] = None,
21 |     # Model params
22 |     expansion: str = "XS",
23 |     **kwargs: Any,
24 | ) -> nn.Module:
25 |     model = create_x3d(
26 |         expansion=expansion,
27 |         **kwargs,
28 |     )
29 | 
30 |     if pretrained and checkpoint_path is not None:
31 |         # All models are loaded onto CPU by default
32 |         state_dict = load_state_dict_from_url(
33 |             checkpoint_path, progress=progress, map_location="cpu"
34 |         )
35 |         model.load_state_dict(state_dict)
36 | 
37 |     return model
38 | 
39 | 
40 | def efficient_x3d_xs(pretrained: bool = False, progress: bool = True, **kwargs):
41 |     r"""
42 |     X3D-XS model architectures [1] with pretrained weights trained
43 |     on the Kinetics dataset with efficient implementation for mobile cpu.
44 | 
45 |     [1] Christoph Feichtenhofer, "X3D: Expanding Architectures for
46 |     Efficient Video Recognition." https://arxiv.org/abs/2004.04730
47 | 
48 |     Args:
49 |         pretrained (bool): If True, returns a model pre-trained on Kinetcis-400 dataset
50 |         progress (bool): If True, displays a progress bar of the download to stderr
51 |         To modify any other model settings, specify them in the kwargs.
52 |         All the args are defined in pytorchvideo/models/x3d.py
53 |     """
54 |     return _efficient_x3d(
55 |         pretrained=pretrained,
56 |         progress=progress,
57 |         checkpoint_path=_checkpoint_paths["efficient_x3d_xs"],
58 |         expansion="XS",
59 |         **kwargs,
60 |     )
61 | 
62 | 
63 | def efficient_x3d_s(pretrained: bool = False, progress: bool = True, **kwargs):
64 |     r"""
65 |     X3D-S model architectures [1] with pretrained weights trained
66 |     on the Kinetics dataset with efficient implementation for mobile cpu.
67 | 
68 |     [1] Christoph Feichtenhofer, "X3D: Expanding Architectures for
69 |     Efficient Video Recognition." https://arxiv.org/abs/2004.04730
70 | 
71 |     Args:
72 |         pretrained (bool): If True, returns a model pre-trained on Kinetcis-400 dataset
73 |         progress (bool): If True, displays a progress bar of the download to stderr
74 |         To modify any other model settings, specify them in the kwargs.
75 |         All the args are defined in pytorchvideo/models/x3d.py
76 |     """
77 |     return _efficient_x3d(
78 |         pretrained=pretrained,
79 |         progress=progress,
80 |         checkpoint_path=_checkpoint_paths["efficient_x3d_s"],
81 |         expansion="S",
82 |         **kwargs,
83 |     )
84 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/hub/r2plus1d.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from typing import Any
 4 | 
 5 | import torch.nn as nn
 6 | from pytorchvideo.models.r2plus1d import create_r2plus1d
 7 | from torch.hub import load_state_dict_from_url
 8 | 
 9 | 
10 | """
11 | R(2+1)D style models for video recognition.
12 | """
13 | 
14 | root_dir = "https://dl.fbaipublicfiles.com/pytorchvideo/model_zoo/kinetics"
15 | checkpoint_paths = {
16 |     "r2plus1d_r50": f"{root_dir}/R2PLUS1D_16x4_R50.pyth",
17 | }
18 | 
19 | 
20 | def r2plus1d_r50(
21 |     pretrained: bool = False, progress: bool = True, **kwargs: Any
22 | ) -> nn.Module:
23 |     r"""
24 | 
25 |     R(2+1)D model architecture from [1] with pretrained weights based on 16x4 setting
26 |     on the Kinetics dataset. Model with pretrained weights has top1 accuracy of 76.01.
27 |     (trained on 8*8 GPUs)
28 | 
29 |     [1] "A closer look at spatiotemporal convolutions for action recognition"
30 |         Du Tran, Heng Wang, Lorenzo Torresani, Jamie Ray, Yann LeCun, Manohar Paluri. CVPR 2018.
31 |         https://arxiv.org/abs/1711.11248
32 | 
33 |     Args:
34 |         pretrained (bool): If True, returns a model pre-trained on the Kinetics dataset
35 |         progress (bool): If True, displays a progress bar of the download to stderr
36 |         kwargs: use these to modify any of the other model settings. All the
37 |             options are defined in pytorchvideo/models/resnet.py
38 | 
39 |     NOTE: to use the pretrained model, do not modify the model configuration
40 |     via the kwargs. Only modify settings via kwargs to initialize a new model
41 |     without pretrained weights.
42 |     """
43 |     model = create_r2plus1d(dropout_rate=0.5, **kwargs)
44 | 
45 |     if pretrained:
46 |         path = checkpoint_paths["r2plus1d_r50"]
47 |         # All models are loaded onto CPU by default
48 |         checkpoint = load_state_dict_from_url(
49 |             path, progress=progress, map_location="cpu"
50 |         )
51 |         state_dict = checkpoint["model_state"]
52 |         model.load_state_dict(state_dict)
53 | 
54 |     return model
55 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/hub/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from typing import Any, Callable, Dict, Optional
 4 | 
 5 | import torch.nn as nn
 6 | from torch.hub import load_state_dict_from_url
 7 | 
 8 | 
 9 | MODEL_ZOO_ROOT_DIR = "https://dl.fbaipublicfiles.com/pytorchvideo/model_zoo"
10 | 
11 | 
12 | def hub_model_builder(
13 |     model_builder_func: Callable,
14 |     pretrained: bool = False,
15 |     progress: bool = True,
16 |     checkpoint_path: str = "",
17 |     default_config: Optional[Dict[Any, Any]] = None,
18 |     **kwargs: Any,
19 | ) -> nn.Module:
20 |     """
21 |     model_builder_func (Callable): Model builder function.
22 |     pretrained (bool): Whether to load a pretrained model or not. Default: False.
23 |     progress (bool): Whether or not to display a progress bar to stderr. Default: True.
24 |     checkpoint_path (str): URL of the model weight to download.
25 |     default_config (Dict): Default model configs that is passed to the model builder.
26 |     **kwargs: (Any): Additional model configs. Do not modify the model configuration
27 |     via the kwargs for pretrained model.
28 |     """
29 |     if pretrained:
30 |         assert len(kwargs) == 0, "Do not change kwargs for pretrained model."
31 | 
32 |     if default_config is not None:
33 |         for argument, value in default_config.items():
34 |             if kwargs.get(argument) is None:
35 |                 kwargs[argument] = value
36 | 
37 |     model = model_builder_func(**kwargs)
38 |     if pretrained:
39 |         # All models are loaded onto CPU by default
40 |         checkpoint = load_state_dict_from_url(
41 |             checkpoint_path, progress=progress, map_location="cpu"
42 |         )
43 |         state_dict = checkpoint["model_state"]
44 |         model.load_state_dict(state_dict)
45 |     return model
46 | 


--------------------------------------------------------------------------------
/pytorchvideo/models/simclr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from typing import Optional
 4 | 
 5 | import torch
 6 | import torch.distributed as dist
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | from fvcore.nn.distributed import differentiable_all_gather
10 | from pytorchvideo.layers.utils import set_attributes
11 | 
12 | 
13 | class SimCLR(nn.Module):
14 |     """
15 |     A Simple Framework for Contrastive Learning of Visual Representations
16 |     Details can be found from:
17 |     https://arxiv.org/abs/2002.05709
18 |     """
19 | 
20 |     def __init__(
21 |         self,
22 |         mlp: nn.Module,
23 |         backbone: Optional[nn.Module] = None,
24 |         temperature: float = 0.07,
25 |     ) -> None:
26 |         super().__init__()
27 | 
28 |         torch._C._log_api_usage_once("PYTORCHVIDEO.model.SimCLR.__init__")
29 | 
30 |         set_attributes(self, locals())
31 | 
32 |     def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
33 |         """
34 |         Args:
35 |             x1 (torch.tensor): a batch of image with augmentation. The input tensor
36 |                 shape should able to be feed into the backbone.
37 |             x2 (torch.tensor): the size batch of image with different augmentation. The
38 |                 input tensor shape should able to be feed into the backbone.
39 |         """
40 |         if self.backbone is not None:
41 |             x1 = self.backbone(x1)
42 |         x1 = self.mlp(x1)
43 |         x1 = F.normalize(x1, p=2, dim=1)
44 | 
45 |         if self.backbone is not None:
46 |             x2 = self.backbone(x2)
47 |         x2 = self.mlp(x2)
48 |         x2 = F.normalize(x2, p=2, dim=1)
49 |         x2 = torch.cat(differentiable_all_gather(x2), dim=0)
50 | 
51 |         prod = torch.einsum("nc,kc->nk", [x1, x2])
52 |         prod = prod.div(self.temperature)
53 |         batch_size = x1.size(0)
54 |         if dist.is_available() and dist.is_initialized():
55 |             device_ind = dist.get_rank()
56 |         else:
57 |             device_ind = 0
58 |         gt = (
59 |             torch.tensor(
60 |                 list(range(device_ind * batch_size, (device_ind + 1) * batch_size))
61 |             )
62 |             .long()
63 |             .to(x1.device)
64 |         )
65 |         loss = torch.nn.functional.cross_entropy(prod, gt)
66 |         return loss
67 | 


--------------------------------------------------------------------------------
/pytorchvideo/neural_engine/engine.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import itertools
 4 | from collections import OrderedDict
 5 | from typing import List, Union
 6 | 
 7 | import networkx as nx
 8 | from pytorchvideo.neural_engine import HookBase
 9 | 
10 | 
11 | class NeuralEngine:
12 |     """
13 |     NeuralEngine takes a list of hooks and executes them in their topological order. The
14 |     topological order of the hooks is determined by their required inputs and outputs.
15 |     """
16 | 
17 |     def __init__(self, hooks: List[HookBase]) -> None:
18 |         self.hooks = hooks
19 |         self.execution_order_func = NeuralEngine.topological_sort
20 | 
21 |     def get_execution_order(self, status):
22 |         self.execution_order_func(status, self.hooks)
23 | 
24 |     def set_execution_order_func(self, func):
25 |         self.execution_order_func = func
26 | 
27 |     @staticmethod
28 |     def topological_sort(status, hooks):
29 |         # Get DAG
30 |         graph = nx.DiGraph()
31 |         edges = []
32 |         pending_outputs = []
33 |         output_to_hook = {}
34 |         for hook in hooks:
35 |             for pair in itertools.product(hook.get_inputs(), hook.get_outputs()):
36 |                 edges.append(pair)
37 |             for output in hook.get_outputs():
38 |                 assert output not in pending_outputs
39 |                 output_to_hook[output] = hook
40 |                 pending_outputs.append(output)
41 |         graph.add_edges_from(edges)
42 |         for _current in nx.topological_sort(graph):
43 |             if _current in pending_outputs:
44 |                 _hook = output_to_hook[_current]
45 |                 yield _hook
46 |                 for _hook_out in _hook.get_outputs():
47 |                     pending_outputs.remove(_hook_out)
48 |             else:
49 |                 assert _current in status
50 |         assert len(pending_outputs) == 0
51 | 
52 |     def run(self, status: OrderedDict):
53 |         for hook in self.get_execution_order(status):
54 |             status.update(hook.run(status))
55 |         return status
56 | 
57 |     def __enter__(
58 |         self,
59 |     ):
60 |         return self
61 | 
62 |     def __exit__(
63 |         self,
64 |         type,
65 |         value,
66 |         traceback,
67 |     ):
68 |         pass
69 | 
70 |     def __call__(
71 |         self,
72 |         status: Union[OrderedDict, str],
73 |     ):
74 |         # If not specified, the default input should be the path to video.
75 |         if type(status) == str:
76 |             status = {"path": status}
77 |         return self.run(status)
78 | 


--------------------------------------------------------------------------------
/pytorchvideo/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from .augmix import AugMix  # noqa
4 | from .mix import CutMix, MixUp, MixVideo  # noqa
5 | from .rand_augment import RandAugment  # noqa
6 | from .transforms import *  # noqa
7 | from .transforms_factory import create_video_transform  # noqa
8 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/README.md:
--------------------------------------------------------------------------------
 1 | ## PyTorchVideo Trainer
 2 | 
 3 | A [PyTorch-Lightning]() based trainer supporting PytorchVideo models and dataloaders for various video understanding tasks.
 4 | 
 5 | Currently supported tasks include:
 6 | 
 7 | - Video Action Recognition: ResNet's, SlowFast Models, X3D models and MViT
 8 | - Video Self-Supervised Learning: SimCLR, BYOL, MoCo
 9 | - (Planned) Video Action Detection
10 | 
11 | ## Installation
12 | 
13 | These instructions assumes that both pytorch and torchvision are already installed
14 | using the instructions in [INSTALL.md](https://github.com/facebookresearch/pytorchvideo/blob/main/INSTALL.md#requirements)
15 | 
16 | Install the required additional dependency `recipes` by running the following command,
17 | ```
18 | pip install "git+https://github.com/facebookresearch/recipes.git"
19 | ```
20 | 
21 | Post that, install PyTorchVideo Trainer by running,
22 | ```
23 | git clone https://github.com/facebookresearch/pytorchvideo.git
24 | cd pytorchvideo/pytorchvideo_trainer
25 | pip install -e .
26 | 
27 | # For developing and testing
28 | pip install -e . [test,dev]
29 | ```
30 | 
31 | ## Testing
32 | 
33 | Before running the tests, please ensure that you installed the necessary additional test dependencies.
34 | 
35 | Use the the following command to run the tests:
36 | ```
37 | # From the current directory
38 | python -m unittest discover -v -s ./tests
39 | ```
40 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def register_components() -> None:
 5 |     """
 6 |     Calls register_components() for all subfolders so we can register
 7 |     subcomponents to Hydra's ConfigStore.
 8 |     """
 9 |     import pytorchvideo_trainer.datamodule.datamodule  # noqa
10 |     import pytorchvideo_trainer.module.byol  # noqa
11 |     import pytorchvideo_trainer.module.lr_policy  # noqa
12 |     import pytorchvideo_trainer.module.moco_v2  # noqa
13 |     import pytorchvideo_trainer.module.optimizer  # noqa
14 |     import pytorchvideo_trainer.module.simclr  # noqa
15 |     import pytorchvideo_trainer.module.video_classification  # noqa
16 |     import pytorchvideo_trainer.train_app  # noqa
17 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from .precise_batchnorm import PreciseBn  # noqa
4 | 
5 | 
6 | __all__ = [
7 |     "PreciseBn",
8 | ]
9 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/callbacks/precise_batchnorm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | from typing import Generator
 4 | 
 5 | import torch
 6 | from fvcore.nn.precise_bn import update_bn_stats
 7 | from pytorch_lightning.callbacks import Callback
 8 | from pytorch_lightning.core.lightning import LightningModule
 9 | from pytorch_lightning.trainer.trainer import Trainer
10 | from torch.utils.data import DataLoader
11 | 
12 | 
13 | class PreciseBn(Callback):
14 |     """
15 |     Recompute and update the batch norm stats to make them more precise. During
16 |     training both BN stats and the weight are changing after every iteration, so
17 |     the running average can not precisely reflect the actual stats of the
18 |     current model.
19 |     In this callaback, the BN stats are recomputed with fixed weights, to make
20 |     the running average more precise during Training Phase. Specifically, it
21 |     computes the true average of per-batch mean/variance instead of the
22 |     running average. See Sec. 3 of the paper "Rethinking Batch in BatchNorm"
23 |     for details.
24 |     """
25 | 
26 |     def __init__(self, num_batches: int) -> None:
27 |         """
28 |         Args:
29 |             num_batches (int): Number of steps / mini-batches to
30 |             perform to sample for updating the precise batchnorm
31 |             stats.
32 |         """
33 |         self.num_batches = num_batches
34 | 
35 |     def _get_precise_bn_loader(
36 |         self, data_loader: DataLoader, pl_module: LightningModule
37 |     ) -> Generator[torch.Tensor, None, None]:
38 |         for batch in data_loader:
39 |             inputs = batch[pl_module.modality_key]
40 |             if isinstance(inputs, list):
41 |                 inputs = [x.to(pl_module.device) for x in inputs]
42 |             else:
43 |                 inputs = inputs.to(pl_module.device)
44 |             yield inputs
45 | 
46 |     def on_train_epoch_end(
47 |         self,
48 |         trainer: Trainer,
49 |         pl_module: LightningModule,
50 |     ) -> None:
51 |         """
52 |         Called at the end of every epoch only during the training
53 |         phase.
54 | 
55 |         Args:
56 |             trainer (Trainer): A PyTorch-Lightning trainer object.
57 |             pl_module (LightningModule): A PyTorch-Lightning module.
58 |             Typically supported modules include -
59 |             pytorchvideo_trainer.module.VideoClassificationModule, etc.
60 |         """
61 |         # pyre-ignore[16]
62 |         dataloader = trainer.datamodule.train_dataloader()
63 |         precise_bn_loader = self._get_precise_bn_loader(
64 |             data_loader=dataloader, pl_module=pl_module
65 |         )
66 |         update_bn_stats(
67 |             model=pl_module.model,  # pyre-ignore[6]
68 |             data_loader=precise_bn_loader,
69 |             num_iters=self.num_batches,
70 |         )
71 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import torchrecipes.core.conf  # noqa
 4 | 
 5 | # Components to register with this config
 6 | from pytorchvideo_trainer import register_components
 7 | 
 8 | 
 9 | register_components()
10 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/byol_train_app_conf.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo_trainer.train_app.VideoClassificationTrainApp
 2 | 
 3 | defaults:
 4 |   - schema/module: byol_module_conf
 5 |   - schema/module/optim: optim_conf
 6 |   - schema/datamodule: ptv_video_classification_data_module_conf
 7 |   - datamodule/dataloader: kinetics_contrastive
 8 |   - logger: ptl
 9 |   - datamodule/transforms: kinetics_contrastive
10 |   - module/knn_memory: kinetics_k400
11 |   - module/model: slow_r50_byol
12 |   - module/loss: similarity
13 |   - module/optim: sgd_ssl
14 |   - module/metrics: accuracy
15 |   - schema/trainer: trainer
16 |   - trainer: cpu
17 |   - callbacks: null
18 |   - _self_
19 | trainer:
20 |   sync_batchnorm: false # set this to true for training
21 | 
22 | module:
23 |   momentum_anneal_cosine: true
24 | 
25 | hydra:
26 |   searchpath:
27 |     - pkg://pytorchvideo_trainer.conf
28 |     - pkg://torchrecipes.core.conf
29 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/callbacks/precise_bn.yaml:
--------------------------------------------------------------------------------
1 | precise_bn:
2 |   _target_: pytorchvideo_trainer.callbacks.precise_batchnorm.PreciseBn
3 |   num_batches: null
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/classification_mvit_16x4.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo_trainer.train_app.VideoClassificationTrainApp
 2 | 
 3 | defaults:
 4 |   - schema/module: video_classification_module_conf_vision_transformer
 5 |   - schema/module/optim: optim_conf
 6 |   - schema/datamodule: ptv_video_classification_data_module_conf
 7 |   - datamodule/dataloader: kinetics_classification
 8 |   - logger: ptl
 9 |   - datamodule/transforms: kinetics_classification_mvit_16x4
10 |   - module/model: mvit_base_16x4
11 |   - module/loss: soft_cross_entropy
12 |   - module/optim: adamw
13 |   - module/metrics: accuracy
14 |   - module/lr_scheduler: cosine_with_warmup
15 |   - schema/trainer: trainer
16 |   - trainer: multi_gpu
17 |   - _self_
18 | 
19 | module:
20 |   clip_gradient_norm: 1.0
21 |   ensemble_method: "sum"
22 |   lr_scheduler:
23 |     max_iters: 200
24 |     warmup_start_lr: 1.6e-05
25 |     warmup_iters: 30
26 |     cosine_after_warmup: true
27 |     cosine_end_lr: 1.6e-05
28 |   optim:
29 |     lr: 0.0016
30 |     weight_decay: 0.05
31 |     method: adamw
32 |     zero_weight_decay_1d_param: true
33 |   batch_transform:
34 |     _target_: pytorchvideo_trainer.datamodule.transforms.MixVideoBatchWrapper
35 |     mixup_alpha: 0.8
36 |     cutmix_prob: 0.5
37 |     cutmix_alpha: 1.0
38 |     label_smoothing: 0.1
39 | 
40 | datamodule:
41 |   dataloader:
42 |     train:
43 |       batch_size: 2
44 |       dataset:
45 |         clip_sampler:
46 |           clip_duration: 2.13
47 |       collate_fn:
48 |         _target_: pytorchvideo_trainer.datamodule.collators.build_collator_from_name
49 |         name: multiple_samples_collate
50 |     val:
51 |       batch_size: 8
52 |       dataset:
53 |         clip_sampler:
54 |           clip_duration: 2.13
55 |     test:
56 |       batch_size: 8
57 |       dataset:
58 |         clip_sampler:
59 |           clip_duration: 2.13
60 | 
61 | trainer:
62 |   num_nodes: 16
63 |   gpus: 8
64 |   max_epochs: 200
65 |   sync_batchnorm: False
66 |   replace_sampler_ddp: False
67 | 
68 | hydra:
69 |   searchpath:
70 |     - pkg://pytorchvideo_trainer.conf
71 |     - pkg://torchrecipes.core.conf
72 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/classification_slow_8x8_r50.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo_trainer.train_app.VideoClassificationTrainApp
 2 | 
 3 | defaults:
 4 |   - schema/module: video_classification_module_conf
 5 |   - schema/module/optim: optim_conf
 6 |   - schema/datamodule: ptv_video_classification_data_module_conf
 7 |   - datamodule/dataloader: kinetics_classification
 8 |   - logger: ptl
 9 |   - datamodule/transforms: kinetics_classification_slow
10 |   - module/model: slow_r50
11 |   - module/loss: cross_entropy
12 |   - module/optim: sgd
13 |   - module/metrics: accuracy
14 |   - module/lr_scheduler: cosine_with_warmup
15 |   - schema/trainer: trainer
16 |   - trainer: multi_gpu
17 |   - callbacks: precise_bn
18 |   - _self_
19 | 
20 | module:
21 |   ensemble_method: "sum"
22 |   lr_scheduler:
23 |     max_iters: 196
24 |     warmup_start_lr: 0.01
25 |     warmup_iters: 34
26 |   optim:
27 |     lr: 0.8
28 |     nesterov: true
29 | 
30 | callbacks:
31 |   precise_bn:
32 |     num_batches: 200
33 | 
34 | trainer:
35 |   num_nodes: 8
36 |   gpus: 8
37 |   max_epochs: 196
38 |   sync_batchnorm: False
39 |   replace_sampler_ddp: False
40 | 
41 | 
42 | hydra:
43 |   searchpath:
44 |     - pkg://pytorchvideo_trainer.conf
45 |     - pkg://torchrecipes.core.conf
46 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/classification_slowfast_8x8_r50.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo_trainer.train_app.VideoClassificationTrainApp
 2 | 
 3 | defaults:
 4 |   - schema/module: video_classification_module_conf
 5 |   - schema/module/optim: optim_conf
 6 |   - schema/datamodule: ptv_video_classification_data_module_conf
 7 |   - datamodule/dataloader: kinetics_classification
 8 |   - logger: ptl
 9 |   - datamodule/transforms: kinetics_classification_slowfast
10 |   - module/model: slowfast_r50
11 |   - module/loss: cross_entropy
12 |   - module/optim: sgd
13 |   - module/metrics: accuracy
14 |   - module/lr_scheduler: cosine_with_warmup
15 |   - schema/trainer: trainer
16 |   - trainer: multi_gpu
17 |   - callbacks: precise_bn
18 |   - _self_
19 | 
20 | module:
21 |   ensemble_method: "sum"
22 |   lr_scheduler:
23 |     max_iters: 196
24 |     warmup_start_lr: 0.01
25 |     warmup_iters: 34
26 |   optim:
27 |     lr: 0.8
28 |     nesterov: true
29 | 
30 | callbacks:
31 |   precise_bn:
32 |     num_batches: 200
33 | 
34 | trainer:
35 |   num_nodes: 8
36 |   gpus: 8
37 |   max_epochs: 196
38 |   sync_batchnorm: False
39 |   replace_sampler_ddp: False
40 | 
41 | 
42 | hydra:
43 |   searchpath:
44 |     - pkg://pytorchvideo_trainer.conf
45 |     - pkg://torchrecipes.core.conf
46 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/classification_x3d_xs.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo_trainer.train_app.VideoClassificationTrainApp
 2 | 
 3 | defaults:
 4 |   - schema/module: video_classification_module_conf
 5 |   - schema/module/optim: optim_conf
 6 |   - schema/datamodule: ptv_video_classification_data_module_conf
 7 |   - datamodule/dataloader: kinetics_classification
 8 |   - logger: ptl
 9 |   - datamodule/transforms: kinetics_classification_x3d_xs
10 |   - module/model: x3d_xs
11 |   - module/loss: cross_entropy
12 |   - module/optim: sgd
13 |   - module/metrics: accuracy
14 |   - module/lr_scheduler: cosine_with_warmup
15 |   - schema/trainer: trainer
16 |   - trainer: multi_gpu
17 |   - callbacks: precise_bn
18 |   - _self_
19 | 
20 | module:
21 |   ensemble_method: "sum"
22 |   lr_scheduler:
23 |     max_iters: 300
24 |     warmup_start_lr: 0.01
25 |     warmup_iters: 35
26 |   optim:
27 |     lr: 0.8
28 |     nesterov: true
29 |     weight_decay: 5e-5
30 | 
31 | datamodule:
32 |   dataloader:
33 |     train:
34 |       batch_size: 16
35 |       dataset:
36 |         clip_sampler:
37 |           clip_duration: 1.6
38 |     val:
39 |       batch_size: 16
40 |       dataset:
41 |         clip_sampler:
42 |           clip_duration: 1.6
43 |     test:
44 |       batch_size: 16
45 |       dataset:
46 |         clip_sampler:
47 |           clip_duration: 1.6
48 | 
49 | callbacks:
50 |   precise_bn:
51 |     num_batches: 200
52 | 
53 | trainer:
54 |   num_nodes: 8
55 |   gpus: 8
56 |   max_epochs: 300
57 |   sync_batchnorm: False
58 |   replace_sampler_ddp: False
59 | 
60 | 
61 | hydra:
62 |   searchpath:
63 |     - pkg://pytorchvideo_trainer.conf
64 |     - pkg://torchrecipes.core.conf
65 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/datamodule/dataloader/kinetics_classification.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   dataset:
 3 |     _target_: pytorchvideo.data.Kinetics
 4 |     data_path: ???
 5 |     video_path_prefix: ???
 6 |     clip_sampler:
 7 |       _target_: pytorchvideo.data.clip_sampling.RandomClipSampler
 8 |       clip_duration: 2.13
 9 | 
10 |   shuffle: True
11 |   batch_size: 8
12 |   num_workers: 8
13 |   pin_memory: True
14 | 
15 | val:
16 |   dataset:
17 |     _target_: pytorchvideo.data.Kinetics
18 |     data_path: ???
19 |     video_path_prefix: ???
20 |     clip_sampler:
21 |       _target_: pytorchvideo.data.clip_sampling.UniformClipSampler
22 |       clip_duration: 2.13
23 | 
24 |   shuffle: False
25 |   batch_size: 8
26 |   num_workers: 8
27 |   pin_memory: True
28 | 
29 | test:
30 |   dataset:
31 |     _target_: pytorchvideo.data.Kinetics
32 |     data_path: ???
33 |     video_path_prefix: ???
34 |     clip_sampler:
35 |       _target_: pytorchvideo.data.clip_sampling.ConstantClipsPerVideoSampler
36 |       clip_duration: 2.13
37 |       clips_per_video: 10 #num_ensemble_views
38 |       augs_per_clip: 3 # num_spatial_crops
39 | 
40 |   shuffle: False
41 |   batch_size: 8
42 |   num_workers: 8
43 |   pin_memory: True
44 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/datamodule/dataloader/kinetics_contrastive.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   dataset:
 3 |     _target_: pytorchvideo.data.Kinetics
 4 |     data_path: ???
 5 |     video_path_prefix: ???
 6 |     clip_sampler:
 7 |       _target_: pytorchvideo.data.clip_sampling.RandomMultiClipSampler
 8 |       clip_duration: 2.0
 9 |       num_clips: 2
10 | 
11 |   shuffle: True
12 |   batch_size: 8
13 |   num_workers: 8
14 | 
15 | val:
16 |   dataset:
17 |     _target_: pytorchvideo.data.Kinetics
18 |     data_path: ???
19 |     video_path_prefix: ???
20 |     clip_sampler:
21 |       _target_: pytorchvideo.data.clip_sampling.UniformClipSampler
22 |       clip_duration: 2.0
23 | 
24 |   shuffle: False
25 |   batch_size: 8
26 |   num_workers: 8
27 | 
28 | test:
29 |   dataset:
30 |     _target_: pytorchvideo.data.Kinetics
31 |     data_path: ???
32 |     video_path_prefix: ???
33 |     clip_sampler:
34 |       _target_: pytorchvideo.data.clip_sampling.ConstantClipsPerVideoSampler
35 |       clip_duration: 2.0
36 |       clips_per_video: 10 #num_ensemble_views
37 |       augs_per_clip: 3 # num_spatial_crops
38 | 
39 |   shuffle: False
40 |   batch_size: 8
41 |   num_workers: 8
42 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/datamodule/transforms/kinetics_classification_mvit_16x4.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   - _target_: pytorchvideo_trainer.datamodule.transforms.RepeatandConverttoList
 3 |     repeat_num: 2
 4 |   - _target_: pytorchvideo_trainer.datamodule.transforms.ApplyTransformToKeyOnList
 5 |     transform:
 6 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
 7 |         num_samples: 16
 8 |       - _target_: pytorchvideo.transforms.Div255
 9 |       - _target_: pytorchvideo.transforms.Permute
10 |         dims: [1,0,2,3]
11 |       - _target_: pytorchvideo.transforms.rand_augment.RandAugment
12 |         magnitude: 7
13 |         num_layers: 4
14 |       - _target_: pytorchvideo.transforms.Permute
15 |         dims: [1,0,2,3]
16 |       - _target_: pytorchvideo.transforms.Normalize
17 |         mean: [0.45, 0.45, 0.45]
18 |         std: [0.225, 0.225, 0.225]
19 |       - _target_: pytorchvideo.transforms.RandomResizedCrop
20 |         target_height: 224
21 |         target_width: 224
22 |         scale: [0.08, 1.0]
23 |         aspect_ratio: [0.75, 1.3333]
24 |       - _target_: torchvision.transforms.RandomHorizontalFlip
25 |         p: 0.5
26 |       - _target_: pytorchvideo.transforms.Permute
27 |         dims: [1,0,2,3]
28 |       - _target_: pytorchvideo_trainer.datamodule.rand_erase_transform.RandomErasing
29 |         probability: 0.25
30 |         mode: "pixel"
31 |         max_count: 1
32 |         num_splits: 1
33 |         device: "cpu"
34 |       - _target_: pytorchvideo.transforms.Permute
35 |         dims: [1,0,2,3]
36 |     key: video
37 |   - _target_: pytorchvideo.transforms.RemoveKey
38 |     key: audio
39 | val:
40 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
41 |     transform:
42 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
43 |         num_samples: 16
44 |       - _target_: pytorchvideo.transforms.Div255
45 |       - _target_: pytorchvideo.transforms.Normalize
46 |         mean: [0.45, 0.45, 0.45]
47 |         std: [0.225, 0.225, 0.225]
48 |       - _target_: pytorchvideo.transforms.ShortSideScale
49 |         size: 224
50 |       - _target_: torchvision.transforms.CenterCrop
51 |         size: 224
52 |     key: video
53 |   - _target_: pytorchvideo.transforms.RemoveKey
54 |     key: audio
55 | test:
56 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
57 |     transform:
58 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
59 |         num_samples: 16
60 |       - _target_: pytorchvideo.transforms.Div255
61 |       - _target_: pytorchvideo.transforms.Normalize
62 |         mean: [0.45, 0.45, 0.45]
63 |         std: [0.225, 0.225, 0.225]
64 |       - _target_: pytorchvideo.transforms.ShortSideScale
65 |         size: 224
66 |     key: video
67 |   - _target_: pytorchvideo.transforms.UniformCropVideo
68 |     size: 224
69 |   - _target_: pytorchvideo.transforms.RemoveKey
70 |     key: audio
71 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/datamodule/transforms/kinetics_classification_slow.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
 3 |     transform:
 4 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
 5 |         num_samples: 8
 6 |       - _target_: pytorchvideo.transforms.Div255
 7 |       - _target_: pytorchvideo.transforms.Normalize
 8 |         mean: [0.45, 0.45, 0.45]
 9 |         std: [0.225, 0.225, 0.225]
10 |       - _target_: pytorchvideo.transforms.RandomShortSideScale
11 |         min_size: 256
12 |         max_size: 320
13 |       - _target_: torchvision.transforms.RandomCrop
14 |         size: 224
15 |       - _target_: torchvision.transforms.RandomHorizontalFlip
16 |         p: 0.5
17 |     key: video
18 |   - _target_: pytorchvideo.transforms.RemoveKey
19 |     key: audio
20 | val:
21 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
22 |     transform:
23 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
24 |         num_samples: 8
25 |       - _target_: pytorchvideo.transforms.Div255
26 |       - _target_: pytorchvideo.transforms.Normalize
27 |         mean: [0.45, 0.45, 0.45]
28 |         std: [0.225, 0.225, 0.225]
29 |       - _target_: pytorchvideo.transforms.ShortSideScale
30 |         size: 256
31 |       - _target_: torchvision.transforms.CenterCrop
32 |         size: 256
33 |     key: video
34 |   - _target_: pytorchvideo.transforms.RemoveKey
35 |     key: audio
36 | test:
37 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
38 |     transform:
39 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
40 |         num_samples: 8
41 |       - _target_: pytorchvideo.transforms.Div255
42 |       - _target_: pytorchvideo.transforms.Normalize
43 |         mean: [0.45, 0.45, 0.45]
44 |         std: [0.225, 0.225, 0.225]
45 |       - _target_: pytorchvideo.transforms.ShortSideScale
46 |         size: 256
47 |     key: video
48 |   - _target_: pytorchvideo.transforms.UniformCropVideo
49 |     size: 256
50 |   - _target_: pytorchvideo.transforms.RemoveKey
51 |     key: audio
52 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/datamodule/transforms/kinetics_classification_slowfast.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
 3 |     transform:
 4 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
 5 |         num_samples: 32
 6 |       - _target_: pytorchvideo.transforms.Div255
 7 |       - _target_: pytorchvideo.transforms.Normalize
 8 |         mean: [0.45, 0.45, 0.45]
 9 |         std: [0.225, 0.225, 0.225]
10 |       - _target_: pytorchvideo.transforms.RandomShortSideScale
11 |         min_size: 256
12 |         max_size: 320
13 |       - _target_: torchvision.transforms.RandomCrop
14 |         size: 224
15 |       - _target_: torchvision.transforms.RandomHorizontalFlip
16 |         p: 0.5
17 |       - _target_: pytorchvideo_trainer.datamodule.transforms.SlowFastPackPathway
18 |         alpha: 4
19 |     key: video
20 |   - _target_: pytorchvideo.transforms.RemoveKey
21 |     key: audio
22 | val:
23 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
24 |     transform:
25 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
26 |         num_samples: 32
27 |       - _target_: pytorchvideo.transforms.Div255
28 |       - _target_: pytorchvideo.transforms.Normalize
29 |         mean: [0.45, 0.45, 0.45]
30 |         std: [0.225, 0.225, 0.225]
31 |       - _target_: pytorchvideo.transforms.ShortSideScale
32 |         size: 256
33 |       - _target_: torchvision.transforms.CenterCrop
34 |         size: 256
35 |       - _target_: pytorchvideo_trainer.datamodule.transforms.SlowFastPackPathway
36 |         alpha: 4
37 |     key: video
38 |   - _target_: pytorchvideo.transforms.RemoveKey
39 |     key: audio
40 | test:
41 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
42 |     transform:
43 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
44 |         num_samples: 32
45 |       - _target_: pytorchvideo.transforms.Div255
46 |       - _target_: pytorchvideo.transforms.Normalize
47 |         mean: [0.45, 0.45, 0.45]
48 |         std: [0.225, 0.225, 0.225]
49 |       - _target_: pytorchvideo.transforms.ShortSideScale
50 |         size: 256
51 |     key: video
52 |   - _target_: pytorchvideo.transforms.UniformCropVideo
53 |     size: 256
54 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
55 |     transform:
56 |       - _target_: pytorchvideo_trainer.datamodule.transforms.SlowFastPackPathway
57 |         alpha: 4
58 |     key: video
59 |   - _target_: pytorchvideo.transforms.RemoveKey
60 |     key: audio
61 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/datamodule/transforms/kinetics_classification_x3d_xs.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
 3 |     transform:
 4 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
 5 |         num_samples: 4
 6 |       - _target_: pytorchvideo.transforms.Div255
 7 |       - _target_: pytorchvideo.transforms.Normalize
 8 |         mean: [0.45, 0.45, 0.45]
 9 |         std: [0.225, 0.225, 0.225]
10 |       - _target_: pytorchvideo.transforms.RandomShortSideScale
11 |         min_size: 182
12 |         max_size: 228
13 |       - _target_: torchvision.transforms.RandomCrop
14 |         size: 160
15 |       - _target_: torchvision.transforms.RandomHorizontalFlip
16 |         p: 0.5
17 |     key: video
18 |   - _target_: pytorchvideo.transforms.RemoveKey
19 |     key: audio
20 | val:
21 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
22 |     transform:
23 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
24 |         num_samples: 4
25 |       - _target_: pytorchvideo.transforms.Div255
26 |       - _target_: pytorchvideo.transforms.Normalize
27 |         mean: [0.45, 0.45, 0.45]
28 |         std: [0.225, 0.225, 0.225]
29 |       - _target_: pytorchvideo.transforms.ShortSideScale
30 |         size: 182
31 |       - _target_: torchvision.transforms.CenterCrop
32 |         size: 182
33 |     key: video
34 |   - _target_: pytorchvideo.transforms.RemoveKey
35 |     key: audio
36 | test:
37 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
38 |     transform:
39 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
40 |         num_samples: 4
41 |       - _target_: pytorchvideo.transforms.Div255
42 |       - _target_: pytorchvideo.transforms.Normalize
43 |         mean: [0.45, 0.45, 0.45]
44 |         std: [0.225, 0.225, 0.225]
45 |       - _target_: pytorchvideo.transforms.ShortSideScale
46 |         size: 182
47 |     key: video
48 |   - _target_: pytorchvideo.transforms.UniformCropVideo
49 |     size: 182
50 |   - _target_: pytorchvideo.transforms.RemoveKey
51 |     key: audio
52 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/datamodule/transforms/kinetics_contrastive.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   - _target_: pytorchvideo_trainer.datamodule.transforms.ApplyTransformToKeyOnList
 3 |     transform:
 4 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
 5 |         num_samples: 8
 6 |       - _target_: pytorchvideo.transforms.Div255
 7 |       - _target_: pytorchvideo_trainer.datamodule.transforms.ColorJitterVideoSSl
 8 |         bri_con_sat: [0.6, 0.6, 0.6]
 9 |         hue: 0.15
10 |         p_color_jitter: 0.8
11 |         p_convert_gray: 0.2
12 |       - _target_: pytorchvideo.transforms.Normalize
13 |         mean: [0.45, 0.45, 0.45]
14 |         std: [0.225, 0.225, 0.225]
15 |       - _target_: pytorchvideo.transforms.RandomResizedCrop
16 |         target_height: 224
17 |         target_width: 224
18 |         scale: [0.2, 0.766]
19 |         aspect_ratio: [0.75, 1.3333]
20 |       - _target_: torchvision.transforms.RandomHorizontalFlip
21 |         p: 0.5
22 |     key: video
23 |   - _target_: pytorchvideo.transforms.RemoveKey
24 |     key: audio
25 | val:
26 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
27 |     transform:
28 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
29 |         num_samples: 8
30 |       - _target_: pytorchvideo.transforms.Div255
31 |       - _target_: pytorchvideo.transforms.Normalize
32 |         mean: [0.45, 0.45, 0.45]
33 |         std: [0.225, 0.225, 0.225]
34 |       - _target_: pytorchvideo.transforms.ShortSideScale
35 |         size: 256
36 |       - _target_: torchvision.transforms.CenterCrop
37 |         size: 256
38 |     key: video
39 |   - _target_: pytorchvideo.transforms.RemoveKey
40 |     key: audio
41 | test:
42 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
43 |     transform:
44 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
45 |         num_samples: 8
46 |       - _target_: pytorchvideo.transforms.Div255
47 |       - _target_: pytorchvideo.transforms.Normalize
48 |         mean: [0.45, 0.45, 0.45]
49 |         std: [0.225, 0.225, 0.225]
50 |       - _target_: pytorchvideo.transforms.ShortSideScale
51 |         size: 256
52 |     key: video
53 |   - _target_: pytorchvideo.transforms.UniformCropVideo
54 |     size: 256
55 |   - _target_: pytorchvideo.transforms.RemoveKey
56 |     key: audio
57 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/datamodule/transforms/kinetics_moco_v2.yaml:
--------------------------------------------------------------------------------
 1 | train:
 2 |   - _target_: pytorchvideo_trainer.datamodule.transforms.ApplyTransformToKeyOnList
 3 |     transform:
 4 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
 5 |         num_samples: 8
 6 |       - _target_: pytorchvideo.transforms.Div255
 7 |       - _target_: pytorchvideo_trainer.datamodule.transforms.ColorJitterVideoSSl
 8 |         bri_con_sat: [0.4, 0.4, 0.4]
 9 |         hue: 0.4
10 |         p_color_jitter: 0.8
11 |         p_convert_gray: 0.2
12 |       - _target_: pytorchvideo.transforms.Normalize
13 |         mean: [0.45, 0.45, 0.45]
14 |         std: [0.225, 0.225, 0.225]
15 |       - _target_: pytorchvideo.transforms.RandomResizedCrop
16 |         target_height: 224
17 |         target_width: 224
18 |         scale: [0.2, 0.766]
19 |         aspect_ratio: [0.75, 1.3333]
20 |       - _target_: torchvision.transforms.RandomHorizontalFlip
21 |         p: 0.5
22 |     key: video
23 |   - _target_: pytorchvideo.transforms.RemoveKey
24 |     key: audio
25 | val:
26 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
27 |     transform:
28 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
29 |         num_samples: 8
30 |       - _target_: pytorchvideo.transforms.Div255
31 |       - _target_: pytorchvideo.transforms.Normalize
32 |         mean: [0.45, 0.45, 0.45]
33 |         std: [0.225, 0.225, 0.225]
34 |       - _target_: pytorchvideo.transforms.ShortSideScale
35 |         size: 256
36 |       - _target_: torchvision.transforms.CenterCrop
37 |         size: 256
38 |     key: video
39 |   - _target_: pytorchvideo.transforms.RemoveKey
40 |     key: audio
41 | test:
42 |   - _target_: pytorchvideo.transforms.ApplyTransformToKey
43 |     transform:
44 |       - _target_: pytorchvideo.transforms.UniformTemporalSubsample
45 |         num_samples: 8
46 |       - _target_: pytorchvideo.transforms.Div255
47 |       - _target_: pytorchvideo.transforms.Normalize
48 |         mean: [0.45, 0.45, 0.45]
49 |         std: [0.225, 0.225, 0.225]
50 |       - _target_: pytorchvideo.transforms.ShortSideScale
51 |         size: 256
52 |     key: video
53 |   - _target_: pytorchvideo.transforms.UniformCropVideo
54 |     size: 256
55 |   - _target_: pytorchvideo.transforms.RemoveKey
56 |     key: audio
57 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/logger/ptl.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorch_lightning.loggers.TensorBoardLogger
2 | save_dir: ???
3 | name: default
4 | version: null
5 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/moco_v2_train_app_conf.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo_trainer.train_app.VideoClassificationTrainApp
 2 | 
 3 | defaults:
 4 |   - schema/module: moco_v2_module_conf
 5 |   - schema/module/optim: optim_conf
 6 |   - schema/datamodule: ptv_video_classification_data_module_conf
 7 |   - datamodule/dataloader: kinetics_contrastive
 8 |   - logger: ptl
 9 |   - datamodule/transforms: kinetics_moco_v2
10 |   - module/knn_memory: kinetics_k400
11 |   - module/model: slow_r50_moco_v2
12 |   - module/loss: contrastive
13 |   - module/optim: sgd_ssl
14 |   - module/metrics: accuracy
15 |   - schema/trainer: trainer
16 |   - trainer: cpu
17 |   - callbacks: null
18 |   - _self_
19 | trainer:
20 |   sync_batchnorm: false # set this to true for training
21 | 
22 | module:
23 |   dim: ${module.model.backbone_embed_dim}
24 |   k: 65536
25 |   batch_shuffle: true
26 |   local_shuffle_bn: true
27 | 
28 | hydra:
29 |   searchpath:
30 |     - pkg://pytorchvideo_trainer.conf
31 |     - pkg://torchrecipes.core.conf
32 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/knn_memory/kinetics_k400.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo_trainer.module.ssl_helper.KnnMemory
2 | temperature: ${module.loss.temperature}
3 | dim: ${module.model.backbone_embed_dim}
4 | length: 239975
5 | downstream_classes: 400
6 | knn_k: 200
7 | momentum: 1.0
8 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/loss/contrastive.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo_trainer.module.losses.ContrastiveLoss
2 | temperature: 0.1
3 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/loss/cross_entropy.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | _target_: torch.nn.CrossEntropyLoss
3 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/loss/nt_xent.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | _target_: pytorchvideo_trainer.module.losses.NtxentLoss
3 | temperature: 0.1
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/loss/similarity.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | _target_: pytorchvideo_trainer.module.losses.SimilarityLoss
3 | temperature: 0.1
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/loss/soft_cross_entropy.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | _target_: pytorchvideo_trainer.module.losses.SoftTargetCrossEntropy
3 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/lr_scheduler/cosine_with_warmup.yaml:
--------------------------------------------------------------------------------
1 | lr_policy: 'cosine'
2 | cosine_after_warmup: False
3 | cosine_end_lr: 0
4 | warmup_iters: 34
5 | warmup_start_lr: 0.01
6 | max_iters: ${trainer.max_epochs}
7 | lr: ${module.optim.lr}
8 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/metrics/accuracy.yaml:
--------------------------------------------------------------------------------
1 | - name: accuracy_top1
2 |   config:
3 |     _target_: torchmetrics.Accuracy
4 |     top_k: 1
5 | - name: accuracy_top5
6 |   config:
7 |     _target_: torchmetrics.Accuracy
8 |     top_k: 5
9 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/metrics/average_precision.yaml:
--------------------------------------------------------------------------------
1 | - name: average_precision
2 |   config:
3 |     _target_: torchmetrics.AveragePrecision
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/from_lightning_checkpoint.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo_trainer.module.video_classification.create_classification_model_from_lightning
2 | checkpoint_path: ???
3 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/from_model_zoo_checkpoint.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo_trainer.module.video_classification.create_classification_model_from_modelzoo
2 | checkpoint_path: manifold://fair_logging/tree/kalyanv/hub_models/SLOW_8x8_R50.pyth
3 | model:
4 |   _target_: pytorchvideo.models.hub.resnet.slow_r50
5 |   pretrained: False
6 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/from_ssl_checkpoint.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo_trainer.module.ssl_helper.create_classification_model_from_ssl_checkpoint
 2 | ssl_checkpoint_path: null
 3 | checkpoint_type: simclr
 4 | mlp:
 5 |   _target_: pytorchvideo_trainer.module.byol.create_mlp_util
 6 |   dim_in: null
 7 |   dim_out: 400
 8 |   mlp_dim: 256
 9 |   num_layers: 1
10 |   norm: null
11 | detach_backbone: true
12 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/mvit_base_16x4.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo.models.vision_transformers.create_multiscale_vision_transformers
 2 | spatial_size: 224
 3 | temporal_size: 16
 4 | cls_embed_on: True
 5 | sep_pos_embed: True
 6 | depth: 16
 7 | norm: "layernorm"
 8 | input_channels: 3
 9 | patch_embed_dim: 96
10 | conv_patch_embed_kernel: [3, 7, 7]
11 | conv_patch_embed_stride: [2, 4, 4]
12 | conv_patch_embed_padding: [1, 3, 3]
13 | enable_patch_embed_norm: False
14 | use_2d_patch: False
15 | # Attention block config.
16 | num_heads: 1
17 | mlp_ratio: 4.0
18 | qkv_bias: True
19 | dropout_rate_block: 0.0
20 | droppath_rate_block: 0.2
21 | pooling_mode: "conv"
22 | pool_first: False
23 | embed_dim_mul: [[1, 2.0], [3, 2.0], [14, 2.0]]
24 | atten_head_mul: [[1, 2.0], [3, 2.0], [14, 2.0]]
25 | pool_q_stride_size: [[1, 1, 2, 2], [3, 1, 2, 2], [14, 1, 2, 2]]
26 | pool_kv_stride_size: null
27 | pool_kv_stride_adaptive: [1, 8, 8]
28 | pool_kvq_kernel: [3, 3, 3]
29 | # Head config.
30 | head_dropout_rate: 0.5
31 | head_activation: null
32 | head_num_classes: 400
33 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/slow_r50.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo.models.resnet.create_resnet
2 | input_channel: 3
3 | model_depth: 50
4 | model_num_class: 400
5 | dropout_rate: 0.5
6 | stem_conv_kernel_size: [1, 7, 7]
7 | head_pool_kernel_size: [8, 7, 7]
8 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/slow_r50_byol.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo_trainer.module.byol.create_byol_resnet_50
2 | backbone_embed_dim: 128
3 | mmt: 0.996
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/slow_r50_moco_v2.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo_trainer.module.moco_v2.create_moco_resnet_50
2 | backbone_embed_dim: 128
3 | mmt: 0.994
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/slow_r50_simclr.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo_trainer.module.simclr.create_simclr_resnet_50
2 | backbone_embed_dim: 128
3 | mlp_depth: 1
4 | mlp_inner_dim: 2048
5 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/slowfast_r50.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo.models.slowfast.create_slowfast
2 | input_channels: [3,3]
3 | model_depth: 50
4 | model_num_class: 400
5 | dropout_rate: 0.5
6 | slowfast_fusion_conv_kernel_size: [7, 1, 1]
7 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/model/x3d_xs.yaml:
--------------------------------------------------------------------------------
1 | _target_: pytorchvideo.models.x3d.create_x3d
2 | input_channel: 3
3 | model_num_class: 400
4 | dropout_rate: 0.5
5 | input_clip_length: 4
6 | input_crop_size: 160
7 | depth_factor: 2.2
8 | head_activation: null
9 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/optim/adam.yaml:
--------------------------------------------------------------------------------
1 | method: 'adam'
2 | lr: 0.001
3 | weight_decay: 0
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/optim/adamw.yaml:
--------------------------------------------------------------------------------
1 | method: 'adamw'
2 | lr: 0.001
3 | weight_decay: 0.01
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/optim/sgd.yaml:
--------------------------------------------------------------------------------
1 | method: 'sgd'
2 | lr: 0.1
3 | weight_decay: 1e-4
4 | momentum: 0.9
5 | nesterov: True
6 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/module/optim/sgd_ssl.yaml:
--------------------------------------------------------------------------------
1 | method: 'sgd'
2 | lr: 0.6
3 | weight_decay: 1e-6
4 | momentum: 0.9
5 | nesterov: True
6 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/simclr_train_app_conf.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorchvideo_trainer.train_app.VideoClassificationTrainApp
 2 | 
 3 | defaults:
 4 |   - schema/module: simclr_module_conf
 5 |   - schema/module/optim: optim_conf
 6 |   - schema/datamodule: ptv_video_classification_data_module_conf
 7 |   - datamodule/dataloader: kinetics_contrastive
 8 |   - logger: ptl
 9 |   - datamodule/transforms: kinetics_moco_v2
10 |   - module/knn_memory: kinetics_k400
11 |   - module/model: slow_r50_simclr
12 |   - module/loss: nt_xent
13 |   - module/optim: sgd_ssl
14 |   - module/metrics: accuracy
15 |   - schema/trainer: trainer
16 |   - trainer: cpu
17 |   - callbacks: null
18 |   - _self_
19 | trainer:
20 |   sync_batchnorm: false # set this to true for training
21 | 
22 | hydra:
23 |   searchpath:
24 |     - pkg://pytorchvideo_trainer.conf
25 |     - pkg://torchrecipes.core.conf
26 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/submitit_conf/fair_cluster.yaml:
--------------------------------------------------------------------------------
 1 | # @package _group_
 2 | log_save_dir: null
 3 | name: "ptv_trainer_job"
 4 | time: "72:00:00"
 5 | cpus_per_task: 10
 6 | partition: "learnlab"
 7 | mem: "470GB"
 8 | constraint: "volta32gb"
 9 | mode: "prod"
10 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/trainer/cpu.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | max_epochs: 1
3 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/trainer/multi_gpu.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | gpus: 8
3 | strategy: ddp
4 | max_epochs: 1
5 | num_sanity_val_steps: 0
6 | log_every_n_steps: 10
7 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/conf/trainer/single_gpu.yaml:
--------------------------------------------------------------------------------
1 | # @package _group_
2 | gpus: 1
3 | max_epochs: 1
4 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/datamodule/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from .datamodule import PyTorchVideoDataModule  # noqa
4 | 
5 | 
6 | __all__ = [
7 |     "PyTorchVideoDataModule",
8 | ]
9 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/datamodule/collators.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from typing import Any, Callable, Dict, List
 4 | 
 5 | from torch.utils.data._utils.collate import default_collate
 6 | 
 7 | 
 8 | # pyre-ignore[2]
 9 | def multiple_samples_collate(batch: List[Dict[str, List[Any]]]) -> Dict[str, Any]:
10 |     """
11 |     Collate function for repeated augmentation. Each instance in the batch has
12 |     more than one sample.
13 | 
14 |     To be used when working with,
15 |     `pytorchvideo_trainer.datamodule.transforms.RepeatandConverttoList`
16 |     """
17 |     batch_dict = {}
18 |     for k in batch[0].keys():
19 |         v_iter = []
20 |         for sample_dict in batch:
21 |             v_iter += sample_dict[k]
22 |         batch_dict[k] = default_collate(v_iter)
23 | 
24 |     return batch_dict
25 | 
26 | 
27 | # pyre-ignore[24]
28 | _COLLATORS: Dict[str, Callable] = {
29 |     "multiple_samples_collate": multiple_samples_collate,
30 | }
31 | 
32 | 
33 | def build_collator_from_name(name: str) -> Callable:  # pyre-ignore[24]
34 |     """
35 |     A utility function that returns the function handles to specific collators
36 |     in `_COLLATORS` dictionary object based on the queried key. Used in
37 |     `pytorchvideo_trainer.datamodule.PyTorchVideoDataModule`, etc.
38 | 
39 |     Arg:
40 |         name (str): name of the qurried collators. The key should be present in
41 |         `_COLLATORS` dictionary object
42 |     """
43 |     assert (
44 |         name in _COLLATORS
45 |     ), f"Inavalid Collator method. Available methods are {_COLLATORS.keys()}"
46 |     return _COLLATORS[name]
47 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/pytorchvideo_trainer/module/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .byol import BYOLModule  # noqa
 4 | from .moco_v2 import MOCOV2Module  # noqa
 5 | from .simclr import SimCLRModule  # noqa
 6 | from .video_classification import VideoClassificationModule  # noqa
 7 | 
 8 | 
 9 | __all__ = [
10 |     "VideoClassificationModule",
11 |     "SimCLRModule",
12 |     "BYOLModule",
13 |     "MOCOV2Module",
14 | ]
15 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | from setuptools import find_packages, setup
 5 | 
 6 | 
 7 | setup(
 8 |     name="pytorchvideo_trainer",
 9 |     version="0.0.1",
10 |     license="Apache 2.0",
11 |     author="Facebook AI",
12 |     url="https://github.com/facebookresearch/pytorchvideo",
13 |     description="PyTorch-Lightning trainer powering PyTorchVideo models.",
14 |     python_requires=">=3.8",
15 |     install_requires=[
16 |         "submitit",
17 |         "pytorchvideo>=0.1.5",
18 |     ],
19 |     extras_require={
20 |         "test": ["coverage", "pytest", "opencv-python"],
21 |         "dev": [
22 |             "opencv-python",
23 |             "black==20.8b1",
24 |             "sphinx",
25 |             "isort==4.3.21",
26 |             "flake8==3.8.1",
27 |             "flake8-bugbear",
28 |             "flake8-comprehensions",
29 |             "pre-commit",
30 |             "nbconvert",
31 |             "bs4",
32 |             "autoflake==1.4",
33 |         ],
34 |         "opencv-python": [
35 |             "opencv-python",
36 |         ],
37 |     },
38 |     packages=find_packages(exclude=("scripts", "tests")),
39 | )
40 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/tests/test_conf_datamodule.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | from hydra.experimental import compose, initialize_config_module
 6 | from hydra.utils import instantiate  # @manual
 7 | from pytorchvideo_trainer.datamodule.datamodule import PyTorchVideoDataModule
 8 | 
 9 | 
10 | class TestKineticsDataModuleConf(unittest.TestCase):
11 |     def test_init_with_hydra(self) -> None:
12 |         with initialize_config_module(config_module="pytorchvideo_trainer.conf"):
13 |             test_conf = compose(
14 |                 config_name="video_classification_train_app_conf",
15 |                 overrides=[
16 |                     "datamodule/dataloader=kinetics_classification",
17 |                     "datamodule/transforms=kinetics_classification_slow",
18 |                 ],
19 |             )
20 |             print(test_conf)
21 |             kinetics_data_module = instantiate(
22 |                 test_conf.datamodule,
23 |                 _recursive_=False,
24 |             )
25 |             self.assertIsInstance(kinetics_data_module, PyTorchVideoDataModule)
26 |             self.assertIsNotNone(kinetics_data_module.transforms["train"])
27 |             self.assertIsNotNone(kinetics_data_module.transforms["val"])
28 |             self.assertIsNotNone(kinetics_data_module.transforms["test"])
29 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/tests/test_conf_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | import hydra
 6 | from hydra.experimental import compose, initialize_config_module
 7 | from pytorchvideo_trainer.module.byol import BYOLModule
 8 | from pytorchvideo_trainer.module.moco_v2 import MOCOV2Module
 9 | from pytorchvideo_trainer.module.simclr import SimCLRModule
10 | from pytorchvideo_trainer.module.video_classification import VideoClassificationModule
11 | 
12 | 
13 | class TestVideoClassificationModuleConf(unittest.TestCase):
14 |     def test_init_with_hydra(self) -> None:
15 |         with initialize_config_module(config_module="pytorchvideo_trainer.conf"):
16 |             test_conf = compose(
17 |                 config_name="video_classification_train_app_conf",
18 |                 overrides=["module/model=slow_r50"],
19 |             )
20 |             test_module = hydra.utils.instantiate(test_conf.module, _recursive_=False)
21 |             self.assertIsInstance(test_module, VideoClassificationModule)
22 |             self.assertIsNotNone(test_module.model)
23 | 
24 | 
25 | class TestVideoSimCLRModuleConf(unittest.TestCase):
26 |     def test_init_with_hydra(self) -> None:
27 |         with initialize_config_module(config_module="pytorchvideo_trainer.conf"):
28 |             test_conf = compose(
29 |                 config_name="simclr_train_app_conf",
30 |             )
31 |             test_module = hydra.utils.instantiate(test_conf.module, _recursive_=False)
32 |             self.assertIsInstance(test_module, SimCLRModule)
33 |             self.assertIsNotNone(test_module.model)
34 | 
35 | 
36 | class TestVideoBYOLModuleConf(unittest.TestCase):
37 |     def test_init_with_hydra(self) -> None:
38 |         with initialize_config_module(config_module="pytorchvideo_trainer.conf"):
39 |             test_conf = compose(
40 |                 config_name="byol_train_app_conf",
41 |             )
42 |             test_module = hydra.utils.instantiate(test_conf.module, _recursive_=False)
43 |             self.assertIsInstance(test_module, BYOLModule)
44 |             self.assertIsNotNone(test_module.model)
45 | 
46 | 
47 | class TestVideoMOCOV2ModuleConf(unittest.TestCase):
48 |     def test_init_with_hydra(self) -> None:
49 |         with initialize_config_module(config_module="pytorchvideo_trainer.conf"):
50 |             test_conf = compose(
51 |                 config_name="moco_v2_train_app_conf",
52 |                 # overrides=["module/model=resnet"],
53 |             )
54 |             test_module = hydra.utils.instantiate(test_conf.module, _recursive_=False)
55 |             self.assertIsInstance(test_module, MOCOV2Module)
56 |             self.assertIsNotNone(test_module.model)
57 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/tests/test_task_byol.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | # pyre-strict
 4 | from torchrecipes.core.base_train_app import BaseTrainApp
 5 | from vision.fair.pytorchvideo.pytorchvideo_trainer.tests.util import (
 6 |     BaseTrainAppTestCase,
 7 |     create_small_kinetics_dataset,
 8 |     run_locally,
 9 |     tempdir,
10 | )
11 | 
12 | 
13 | class TestBYOLTrainApp(BaseTrainAppTestCase):
14 |     def get_train_app(
15 |         self,
16 |         root_dir: str,
17 |         fast_dev_run: bool = True,
18 |         logger: bool = False,
19 |     ) -> BaseTrainApp:
20 |         create_small_kinetics_dataset(root_dir)
21 |         overrides = [
22 |             f"datamodule.dataloader.train.dataset.data_path={root_dir}/train.csv",
23 |             f"datamodule.dataloader.val.dataset.data_path={root_dir}/val.csv",
24 |             f"datamodule.dataloader.test.dataset.data_path={root_dir}/val.csv",
25 |             f"datamodule.dataloader.train.dataset.video_path_prefix={root_dir}",
26 |             f"datamodule.dataloader.val.dataset.video_path_prefix={root_dir}",
27 |             f"datamodule.dataloader.test.dataset.video_path_prefix={root_dir}",
28 |             "datamodule.dataloader.train.num_workers=0",
29 |             "datamodule.dataloader.val.num_workers=0",
30 |             "datamodule.dataloader.test.num_workers=0",
31 |             "module.knn_memory.length=50",
32 |             "module.knn_memory.knn_k=2",
33 |             "datamodule.dataloader.train.batch_size=2",
34 |             "datamodule.dataloader.val.batch_size=2",
35 |             "datamodule.dataloader.test.batch_size=2",
36 |             "trainer.logger=false",
37 |         ]
38 |         app = self.create_app_from_hydra(
39 |             config_module="pytorchvideo_trainer.conf",
40 |             config_name="byol_train_app_conf",
41 |             overrides=overrides,
42 |         )
43 |         trainer_overrides = {"fast_dev_run": fast_dev_run, "logger": logger}
44 |         self.mock_trainer_params(app, trainer_overrides)
45 |         return app
46 | 
47 |     @run_locally
48 |     @tempdir
49 |     def test_byol_app_train_test_30_views(self, root_dir: str) -> None:
50 |         train_app = self.get_train_app(
51 |             root_dir=root_dir, fast_dev_run=False, logger=False
52 |         )
53 |         output = train_app.train()
54 |         self.assertIsNotNone(output)
55 |         output = train_app.test()
56 |         self.assertIsNotNone(output)
57 | 
58 |         video_clips_cnts = getattr(train_app.module, "video_clips_cnts", None)
59 |         num_ensemble_views = getattr(train_app.datamodule, "num_ensemble_views", 10)
60 |         num_spatial_crops = getattr(train_app.datamodule, "num_spatial_crops", 3)
61 |         self.assertIsNotNone(video_clips_cnts)
62 |         for _, sample_cnts in video_clips_cnts.items():
63 |             self.assertEqual(num_ensemble_views * num_spatial_crops, sample_cnts)
64 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/tests/test_task_moco_v2.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | # pyre-strict
 4 | from torchrecipes.core.base_train_app import BaseTrainApp
 5 | from vision.fair.pytorchvideo.pytorchvideo_trainer.tests.util import (
 6 |     BaseTrainAppTestCase,
 7 |     create_small_kinetics_dataset,
 8 |     run_locally,
 9 |     tempdir,
10 | )
11 | 
12 | 
13 | class TestMOCOV2TrainApp(BaseTrainAppTestCase):
14 |     def get_train_app(
15 |         self,
16 |         root_dir: str,
17 |         fast_dev_run: bool = True,
18 |         logger: bool = False,
19 |     ) -> BaseTrainApp:
20 |         create_small_kinetics_dataset(root_dir)
21 |         overrides = [
22 |             f"datamodule.dataloader.train.dataset.data_path={root_dir}/train.csv",
23 |             f"datamodule.dataloader.val.dataset.data_path={root_dir}/val.csv",
24 |             f"datamodule.dataloader.test.dataset.data_path={root_dir}/val.csv",
25 |             f"datamodule.dataloader.train.dataset.video_path_prefix={root_dir}",
26 |             f"datamodule.dataloader.val.dataset.video_path_prefix={root_dir}",
27 |             f"datamodule.dataloader.test.dataset.video_path_prefix={root_dir}",
28 |             "datamodule.dataloader.train.num_workers=0",
29 |             "datamodule.dataloader.val.num_workers=0",
30 |             "datamodule.dataloader.test.num_workers=0",
31 |             "module.knn_memory.length=50",
32 |             "module.knn_memory.knn_k=2",
33 |             "datamodule.dataloader.train.batch_size=2",
34 |             "datamodule.dataloader.val.batch_size=2",
35 |             "datamodule.dataloader.test.batch_size=2",
36 |             "trainer.logger=false",
37 |         ]
38 | 
39 |         app = self.create_app_from_hydra(
40 |             config_module="pytorchvideo_trainer.conf",
41 |             config_name="moco_v2_train_app_conf",
42 |             overrides=overrides,
43 |         )
44 |         trainer_overrides = {"fast_dev_run": fast_dev_run, "logger": logger}
45 |         self.mock_trainer_params(app, trainer_overrides)
46 |         return app
47 | 
48 |     @run_locally
49 |     @tempdir
50 |     def test_moco_v2_app_train_test_30_views(self, root_dir: str) -> None:
51 |         train_app = self.get_train_app(
52 |             root_dir=root_dir, fast_dev_run=False, logger=False
53 |         )
54 |         output = train_app.train()
55 |         self.assertIsNotNone(output)
56 |         output = train_app.test()
57 |         self.assertIsNotNone(output)
58 | 
59 |         video_clips_cnts = getattr(train_app.module, "video_clips_cnts", None)
60 |         num_ensemble_views = getattr(train_app.datamodule, "num_ensemble_views", 10)
61 |         num_spatial_crops = getattr(train_app.datamodule, "num_spatial_crops", 3)
62 |         self.assertIsNotNone(video_clips_cnts)
63 |         for _, sample_cnts in video_clips_cnts.items():
64 |             self.assertEqual(num_ensemble_views * num_spatial_crops, sample_cnts)
65 | 


--------------------------------------------------------------------------------
/pytorchvideo_trainer/tests/test_task_simclr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | # pyre-strict
 4 | from torchrecipes.core.base_train_app import BaseTrainApp
 5 | from vision.fair.pytorchvideo.pytorchvideo_trainer.tests.util import (
 6 |     BaseTrainAppTestCase,
 7 |     create_small_kinetics_dataset,
 8 |     run_locally,
 9 |     tempdir,
10 | )
11 | 
12 | 
13 | class TestSimCLRTrainApp(BaseTrainAppTestCase):
14 |     def get_train_app(
15 |         self,
16 |         root_dir: str,
17 |         fast_dev_run: bool = True,
18 |         logger: bool = False,
19 |     ) -> BaseTrainApp:
20 |         create_small_kinetics_dataset(root_dir)
21 |         overrides = [
22 |             f"datamodule.dataloader.train.dataset.data_path={root_dir}/train.csv",
23 |             f"datamodule.dataloader.val.dataset.data_path={root_dir}/val.csv",
24 |             f"datamodule.dataloader.test.dataset.data_path={root_dir}/val.csv",
25 |             f"datamodule.dataloader.train.dataset.video_path_prefix={root_dir}",
26 |             f"datamodule.dataloader.val.dataset.video_path_prefix={root_dir}",
27 |             f"datamodule.dataloader.test.dataset.video_path_prefix={root_dir}",
28 |             "datamodule.dataloader.train.num_workers=0",
29 |             "datamodule.dataloader.val.num_workers=0",
30 |             "datamodule.dataloader.test.num_workers=0",
31 |             "module.knn_memory.length=50",
32 |             "module.knn_memory.knn_k=2",
33 |             "datamodule.dataloader.train.batch_size=2",
34 |             "datamodule.dataloader.val.batch_size=2",
35 |             "datamodule.dataloader.test.batch_size=2",
36 |             "trainer.logger=false",
37 |         ]
38 |         app = self.create_app_from_hydra(
39 |             config_module="pytorchvideo_trainer.conf",
40 |             config_name="simclr_train_app_conf",
41 |             overrides=overrides,
42 |         )
43 |         trainer_overrides = {"fast_dev_run": fast_dev_run, "logger": logger}
44 |         self.mock_trainer_params(app, trainer_overrides)
45 |         return app
46 | 
47 |     @run_locally
48 |     @tempdir
49 |     def test_simclr_app_train_test_30_views(self, root_dir: str) -> None:
50 |         train_app = self.get_train_app(
51 |             root_dir=root_dir, fast_dev_run=False, logger=False
52 |         )
53 |         output = train_app.train()
54 |         self.assertIsNotNone(output)
55 |         output = train_app.test()
56 |         self.assertIsNotNone(output)
57 | 
58 |         video_clips_cnts = getattr(train_app.module, "video_clips_cnts", None)
59 |         num_ensemble_views = getattr(train_app.datamodule, "num_ensemble_views", 10)
60 |         num_spatial_crops = getattr(train_app.datamodule, "num_spatial_crops", 3)
61 |         self.assertIsNotNone(video_clips_cnts)
62 |         for _, sample_cnts in video_clips_cnts.items():
63 |             self.assertEqual(num_ensemble_views * num_spatial_crops, sample_cnts)
64 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | line_length = 88
3 | multi_line_output = 3
4 | include_trailing_comma = True
5 | force_grid_warp = 0
6 | default_section = THIRDPARTY
7 | lines_after_imports = 2
8 | combine_as_imports = True
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import os
 5 | 
 6 | from setuptools import find_packages, setup
 7 | 
 8 | 
 9 | def get_version():
10 |     init_py_path = os.path.join(
11 |         os.path.abspath(os.path.dirname(__file__)), "pytorchvideo", "__init__.py"
12 |     )
13 |     init_py = open(init_py_path, "r").readlines()
14 |     version_line = [
15 |         lines.strip() for lines in init_py if lines.startswith("__version__")
16 |     ][0]
17 |     version = version_line.split("=")[-1].strip().strip("'\"")
18 | 
19 |     # Used by CI to build nightly packages. Users should never use it.
20 |     # To build a nightly wheel, run:
21 |     # BUILD_NIGHTLY=1 python setup.py sdist
22 |     if os.getenv("BUILD_NIGHTLY", "0") == "1":
23 |         from datetime import datetime
24 | 
25 |         date_str = datetime.today().strftime("%Y%m%d")
26 |         # pip can perform proper comparison for ".post" suffix,
27 |         # i.e., "1.1.post1234" >= "1.1"
28 |         version = version + ".post" + date_str
29 | 
30 |         new_init_py = [l for l in init_py if not l.startswith("__version__")]
31 |         new_init_py.append('__version__ = "{}"\n'.format(version))
32 |         with open(init_py_path, "w") as f:
33 |             f.write("".join(new_init_py))
34 | 
35 |     return version
36 | 
37 | 
38 | def get_name():
39 |     name = "pytorchvideo"
40 |     if os.getenv("BUILD_NIGHTLY", "0") == "1":
41 |         name += "-nightly"
42 |     return name
43 | 
44 | 
45 | setup(
46 |     name=get_name(),
47 |     version=get_version(),
48 |     license="Apache 2.0",
49 |     author="Facebook AI",
50 |     url="https://github.com/facebookresearch/pytorchvideo",
51 |     description="A video understanding deep learning library.",
52 |     python_requires=">=3.7",
53 |     install_requires=[
54 |         "fvcore",
55 |         "av",
56 |         "parameterized",
57 |         "iopath",
58 |         "networkx",
59 |     ],
60 |     extras_require={
61 |         "test": ["coverage", "pytest", "opencv-python", "decord"],
62 |         "dev": [
63 |             "opencv-python",
64 |             "decord",
65 |             "black==20.8b1",
66 |             "sphinx",
67 |             "isort==4.3.21",
68 |             "flake8==3.8.1",
69 |             "flake8-bugbear",
70 |             "flake8-comprehensions",
71 |             "pre-commit",
72 |             "nbconvert",
73 |             "bs4",
74 |             "autoflake==1.4",
75 |         ],
76 |         "opencv-python": [
77 |             "opencv-python",
78 |         ],
79 |     },
80 |     packages=find_packages(exclude=("scripts", "tests")),
81 | )
82 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | ## Unit Tests
 2 | 
 3 | 
 4 | Before running the tests, please ensure that you installed the necessary additional test dependencies. 
 5 | If not installed, check the [install-README](https://github.com/facebookresearch/pytorchvideo/blob/main/INSTALL.md) on how to do it.
 6 | 
 7 | Use the the following command to run the tests:
 8 | ```
 9 | # From root of the project
10 | python -m unittest discover -v -s ./tests
11 | ```
12 | 
13 | To generate the coverage reports, please run the following command:
14 | ```
15 | #Install Coverage using 
16 | pip install coverage
17 | 
18 | # From root of the project
19 | coverage run -m unittest discover -v -s tests
20 | ```
21 | 
22 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/tests/benchmark_transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | from typing import Callable
 5 | 
 6 | import torch
 7 | from fvcore.common.benchmark import benchmark
 8 | from pytorchvideo.data.utils import thwc_to_cthw
 9 | from pytorchvideo.transforms.functional import short_side_scale
10 | from utils import create_dummy_video_frames
11 | 
12 | 
13 | class TestBenchmarkTransforms(unittest.TestCase):
14 |     def setUp(self):
15 |         super().setUp()
16 |         torch.set_rng_state(torch.manual_seed(42).get_state())
17 | 
18 |     def test_benchmark_short_side_scale_pytorch(self, num_iters: int = 10) -> None:
19 |         """
20 |         Benchmark scale operation with pytorch backend.
21 |         Args:
22 |             num_iters (int): number of iterations to perform benchmarking.
23 |         """
24 |         kwargs_list = [
25 |             {"temporal_size": 8, "ori_spatial_size": (128, 128), "dst_short_size": 112},
26 |             {
27 |                 "temporal_size": 16,
28 |                 "ori_spatial_size": (128, 128),
29 |                 "dst_short_size": 112,
30 |             },
31 |             {
32 |                 "temporal_size": 32,
33 |                 "ori_spatial_size": (128, 128),
34 |                 "dst_short_size": 112,
35 |             },
36 |             {"temporal_size": 8, "ori_spatial_size": (256, 256), "dst_short_size": 224},
37 |             {
38 |                 "temporal_size": 16,
39 |                 "ori_spatial_size": (256, 256),
40 |                 "dst_short_size": 224,
41 |             },
42 |             {
43 |                 "temporal_size": 32,
44 |                 "ori_spatial_size": (256, 256),
45 |                 "dst_short_size": 224,
46 |             },
47 |             {"temporal_size": 8, "ori_spatial_size": (320, 320), "dst_short_size": 224},
48 |             {
49 |                 "temporal_size": 16,
50 |                 "ori_spatial_size": (320, 320),
51 |                 "dst_short_size": 224,
52 |             },
53 |             {
54 |                 "temporal_size": 32,
55 |                 "ori_spatial_size": (320, 320),
56 |                 "dst_short_size": 224,
57 |             },
58 |         ]
59 | 
60 |         def _init_benchmark_short_side_scale(**kwargs) -> Callable:
61 |             x = thwc_to_cthw(
62 |                 create_dummy_video_frames(
63 |                     kwargs["temporal_size"],
64 |                     kwargs["ori_spatial_size"][0],
65 |                     kwargs["ori_spatial_size"][1],
66 |                 )
67 |             ).to(dtype=torch.float32)
68 | 
69 |             def func_to_benchmark() -> None:
70 |                 _ = short_side_scale(x, kwargs["dst_short_size"])
71 |                 return
72 | 
73 |             return func_to_benchmark
74 | 
75 |         benchmark(
76 |             _init_benchmark_short_side_scale,
77 |             "benchmark_short_side_scale_pytorch",
78 |             kwargs_list,
79 |             num_iters=num_iters,
80 |             warmup_iters=2,
81 |         )
82 |         self.assertTrue(True)
83 | 


--------------------------------------------------------------------------------
/tests/test_accelerator_efficient_blocks_mobile_cpu_activation_attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import logging
 4 | import unittest
 5 | from copy import deepcopy
 6 | 
 7 | import torch
 8 | from pytorchvideo.layers.accelerator.mobile_cpu.activation_functions import (
 9 |     supported_act_functions,
10 | )
11 | from pytorchvideo.layers.accelerator.mobile_cpu.attention import SqueezeExcitation
12 | 
13 | 
14 | class TestActivationAttentionEquivalency(unittest.TestCase):
15 |     def test_activation_equivalency(self):
16 |         # Input tensor
17 |         input_tensor = torch.randn(1, 3, 4, 6, 6)
18 |         for iter_activation_name in supported_act_functions:
19 |             act_func_ref = supported_act_functions[iter_activation_name]()
20 |             act_func_convert = deepcopy(act_func_ref)
21 |             act_func_convert.convert()
22 |             # Get output of both activations
23 |             out0 = act_func_ref(input_tensor)
24 |             out1 = act_func_convert(input_tensor)
25 |             # Check arithmetic equivalency
26 |             max_err = float(torch.max(torch.abs(out0 - out1)))
27 | 
28 |             logging.info(
29 |                 f"test_activation_equivalency: {iter_activation_name} max_err {max_err}"
30 |             )
31 |             self.assertTrue(max_err < 1e-3)
32 | 
33 |     def test_squeeze_excite_equivalency(self):
34 |         # Input tensor
35 |         input_tensor = torch.randn(1, 16, 4, 6, 6)
36 |         # Instantiate ref and convert se modules.
37 |         se_ref = SqueezeExcitation(16, num_channels_reduced=2, is_3d=True)
38 |         se_ref.eval()
39 |         se_convert = deepcopy(se_ref)
40 |         se_convert.convert((1, 16, 4, 6, 6))
41 |         # Get output of both activations
42 |         out0 = se_ref(input_tensor)
43 |         out1 = se_convert(input_tensor)
44 |         # Check arithmetic equivalency
45 |         max_err = float(torch.max(torch.abs(out0 - out1)))
46 |         rel_err = torch.abs((out0 - out1) / out0)
47 |         max_rel_err = float(torch.max(rel_err))
48 | 
49 |         logging.info(
50 |             (
51 |                 "test_squeeze_excite_equivalency: "
52 |                 f"max_err {max_err}, max_rel_err {max_rel_err}"
53 |             )
54 |         )
55 |         self.assertTrue(max_err < 1e-3)
56 | 


--------------------------------------------------------------------------------
/tests/test_accelerator_efficient_blocks_mobile_cpu_head_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import logging
 4 | import unittest
 5 | from copy import deepcopy
 6 | 
 7 | import torch
 8 | from pytorchvideo.layers.accelerator.mobile_cpu.fully_connected import FullyConnected
 9 | from pytorchvideo.layers.accelerator.mobile_cpu.pool import (
10 |     AdaptiveAvgPool2d,
11 |     AdaptiveAvgPool2dOutSize1,
12 |     AdaptiveAvgPool3d,
13 |     AdaptiveAvgPool3dOutSize1,
14 | )
15 | 
16 | 
17 | class TestHeadLayerEquivalency(unittest.TestCase):
18 |     def test_head_layer_equivalency(self):
19 |         for input_dim in (4, 5):  # 4 for BCHW, 5 for BCTHW
20 |             input_tensor_size = (1, 3, 4, 6, 6) if input_dim == 5 else (1, 3, 6, 6)
21 |             input_tensor = torch.randn(input_tensor_size)
22 |             # Build up common head layer: pool + linear
23 |             if input_dim == 5:
24 |                 pool_efficient_block_ref = AdaptiveAvgPool3d(1)
25 |                 pool_efficient_block_1 = AdaptiveAvgPool3d(1)
26 |                 pool_efficient_block_2 = AdaptiveAvgPool3dOutSize1()
27 | 
28 |             else:
29 |                 pool_efficient_block_ref = AdaptiveAvgPool2d(1)
30 |                 pool_efficient_block_1 = AdaptiveAvgPool2d(1)
31 |                 pool_efficient_block_2 = AdaptiveAvgPool2dOutSize1()
32 |             pool_efficient_block_1.convert()
33 |             pool_efficient_block_2.convert(input_tensor_size)
34 |             linear_ref = FullyConnected(3, 8)
35 |             linear_1 = deepcopy(linear_ref)
36 |             linear_1.convert()
37 | 
38 |             ref_out = pool_efficient_block_ref(input_tensor)
39 |             if input_dim == 5:
40 |                 ref_out = ref_out.permute((0, 2, 3, 4, 1))
41 |             else:
42 |                 ref_out = ref_out.permute((0, 2, 3, 1))
43 |             ref_out = linear_ref(ref_out)
44 | 
45 |             head_out_1 = pool_efficient_block_1(input_tensor)
46 |             if input_dim == 5:
47 |                 head_out_1 = head_out_1.permute((0, 2, 3, 4, 1))
48 |             else:
49 |                 head_out_1 = head_out_1.permute((0, 2, 3, 1))
50 |             head_out_1 = linear_1(head_out_1)
51 |             # Check arithmetic equivalency
52 |             max_err = float(torch.max(torch.abs(ref_out - head_out_1)))
53 |             rel_err = torch.abs((ref_out - head_out_1) / ref_out)
54 |             max_rel_err = float(torch.max(rel_err))
55 |             logging.info(
56 |                 (
57 |                     "test_head_layer_equivalency: AdaptiveAvgPool + Linear"
58 |                     f"input tensor size: {input_tensor_size}"
59 |                     f"max_err {max_err}, max_rel_err {max_rel_err}"
60 |                 )
61 |             )
62 |             self.assertTrue(max_err < 1e-3)
63 | 
64 |             head_out_2 = pool_efficient_block_2(input_tensor)
65 |             if input_dim == 5:
66 |                 head_out_2 = head_out_2.permute((0, 2, 3, 4, 1))
67 |             else:
68 |                 head_out_2 = head_out_2.permute((0, 2, 3, 1))
69 |             head_out_2 = linear_1(head_out_2)
70 |             # Check arithmetic equivalency
71 |             max_err = float(torch.max(torch.abs(ref_out - head_out_2)))
72 |             rel_err = torch.abs((ref_out - head_out_2) / ref_out)
73 |             max_rel_err = float(torch.max(rel_err))
74 |             logging.info(
75 |                 (
76 |                     "test_head_layer_equivalency: AdaptiveAvgPoolOutSize1 + Linear"
77 |                     f"input tensor size: {input_tensor_size}"
78 |                     f"max_err {max_err}, max_rel_err {max_rel_err}"
79 |                 )
80 |             )
81 |             self.assertTrue(max_err < 1e-3)
82 | 


--------------------------------------------------------------------------------
/tests/test_accelerator_efficient_blocks_mobile_cpu_residual_block.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import logging
 4 | import unittest
 5 | from copy import deepcopy
 6 | 
 7 | import torch
 8 | from pytorchvideo.models.accelerator.mobile_cpu.residual_blocks import (
 9 |     X3dBottleneckBlock,
10 | )
11 | 
12 | 
13 | class TestConv3dBlockEquivalency(unittest.TestCase):
14 |     def test_X3dBottleneckBlock_equivalency(self):
15 |         # Input tensor
16 |         input_blob_size = (1, 3, 4, 6, 6)
17 |         input_tensor = torch.randn(input_blob_size)
18 |         for use_residual in (True, False):
19 |             for spatial_stride in (1, 2):
20 |                 for se_ratio in (0, 0.5):
21 |                     for act_func_0 in ("relu", "swish", "hswish", "identity"):
22 |                         for act_func_1 in ("relu", "swish", "hswish", "identity"):
23 |                             for act_func_2 in ("relu", "swish", "hswish", "identity"):
24 |                                 act_func_tuple = (act_func_0, act_func_1, act_func_2)
25 |                                 # X3dBottleneckBlock
26 |                                 x3d_block_ref = X3dBottleneckBlock(
27 |                                     3,
28 |                                     16,
29 |                                     3,
30 |                                     use_residual=use_residual,
31 |                                     spatial_stride=spatial_stride,
32 |                                     se_ratio=se_ratio,
33 |                                     act_functions=act_func_tuple,
34 |                                 )
35 |                                 x3d_block = deepcopy(x3d_block_ref)
36 |                                 # Get ref output
37 |                                 x3d_block_ref.eval()
38 |                                 out_ref = x3d_block_ref(input_tensor)
39 |                                 # Convert into deployment mode
40 |                                 x3d_block.convert(input_blob_size)
41 |                                 out = x3d_block(input_tensor)
42 |                                 # Check arithmetic equivalency
43 |                                 max_err = float(torch.max(torch.abs(out_ref - out)))
44 |                                 rel_err = torch.abs((out_ref - out) / out_ref)
45 |                                 max_rel_err = float(torch.max(rel_err))
46 |                                 logging.info(
47 |                                     (
48 |                                         "test_X3dBottleneckBlock_equivalency: "
49 |                                         f"current setting: use_residual {use_residual}, "
50 |                                         f"spatial_stride {spatial_stride}, "
51 |                                         f"se_ratio {se_ratio}, "
52 |                                         f"act_func_tuple {act_func_tuple}, "
53 |                                         f"max_err {max_err}, max_rel_err {max_rel_err}"
54 |                                     )
55 |                                 )
56 |                                 self.assertTrue(max_err < 1e-3)
57 | 


--------------------------------------------------------------------------------
/tests/test_data_frame_video.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | import pytest
 6 | from pytorchvideo.data.frame_video import FrameVideo
 7 | from utils import temp_frame_video
 8 | 
 9 | 
10 | class TestFrameVideo(unittest.TestCase):
11 |     def test_frame_video_works(self):
12 |         frame_names = [f"{str(i)}.png" for i in range(3)]
13 |         with temp_frame_video(frame_names) as (f_name, data):
14 |             frame_paths = [f_name / x for x in frame_names]
15 |             test_video = FrameVideo.from_frame_paths(frame_paths)
16 |             expected_duration = (
17 |                 0.1  # Total duration of 3 frames at 30fps is 0.1 seconds.
18 |             )
19 |             self.assertEqual(test_video.duration, expected_duration)
20 | 
21 |             # All frames (0 - 0.1 seconds)
22 |             clip = test_video.get_clip(0, 0.1)
23 |             frames, indices = clip["video"], clip["frame_indices"]
24 |             self.assertTrue(frames.equal(data))
25 |             self.assertEqual(indices, [0, 1, 2])
26 | 
27 |             # All frames (0 - 0.1 seconds), filtred to middle frame
28 |             clip = test_video.get_clip(0, 0.1, lambda lst: lst[1:2])
29 |             frames, indices = clip["video"], clip["frame_indices"]
30 |             self.assertTrue(frames.equal(data[:, 1:2]))
31 |             self.assertEqual(indices, [1])
32 | 
33 |             # 2 frames (0 - 0.066 seconds)
34 |             clip = test_video.get_clip(0, 0.066)
35 |             frames, indices = clip["video"], clip["frame_indices"]
36 |             self.assertTrue(frames.equal(data[:, :2]))
37 |             self.assertEqual(indices, [0, 1])
38 | 
39 |             # No frames (3 - 5 seconds)
40 |             result = test_video.get_clip(3, 5)
41 |             self.assertEqual(result, None)
42 | 
43 |     def test_open_video_failure(self):
44 |         test_video = FrameVideo.from_frame_paths(["non_existent_file.txt"])
45 |         with pytest.raises(Exception):
46 |             test_video.get_clip(0, 0.01)  # duration is 1 / 30 because one frame
47 | 
48 |     def test_empty_frames_failure(self):
49 |         with pytest.raises(AssertionError):
50 |             FrameVideo.from_frame_paths([])
51 | 


--------------------------------------------------------------------------------
/tests/test_fuse_bn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | import torch
 6 | from pytorchvideo.models.vision_transformers import (
 7 |     create_multiscale_vision_transformers,
 8 | )
 9 | 
10 | 
11 | class TestFuseBN(unittest.TestCase):
12 |     def setUp(self):
13 |         super().setUp()
14 |         torch.set_rng_state(torch.manual_seed(42).get_state())
15 | 
16 |     def test_fuse_bn(self):
17 |         model = create_multiscale_vision_transformers(
18 |             spatial_size=224,
19 |             temporal_size=8,
20 |             norm="batchnorm",
21 |             embed_dim_mul=[[1, 2.0], [3, 2.0], [14, 2.0]],
22 |             atten_head_mul=[[1, 2.0], [3, 2.0], [14, 2.0]],
23 |             pool_q_stride_size=[[1, 1, 2, 2], [3, 1, 2, 2], [14, 1, 2, 2]],
24 |             pool_kv_stride_adaptive=[1, 8, 8],
25 |             pool_kvq_kernel=[3, 3, 3],
26 |             cls_embed_on=False,
27 |         )
28 | 
29 |         for blk in model.blocks:
30 |             blk.norm1 = rand_init_bn(blk.norm1)
31 |             blk.norm2 = rand_init_bn(blk.norm2)
32 |             if blk.attn.norm_q:
33 |                 blk.attn.norm_q = rand_init_bn(blk.attn.norm_q)
34 |             if blk.attn.norm_k:
35 |                 blk.attn.norm_k = rand_init_bn(blk.attn.norm_k)
36 |             if blk.attn.norm_v:
37 |                 blk.attn.norm_v = rand_init_bn(blk.attn.norm_v)
38 | 
39 |         model.eval()
40 | 
41 |         x = torch.randn((4, 3, 8, 224, 224))
42 |         expected_output = model(x)
43 |         model.fuse_bn()
44 |         output = model(x)
45 |         self.assertTrue(torch.all(torch.isclose(output, expected_output, atol=1e-5)))
46 |         self.assertTrue(
47 |             len(
48 |                 [
49 |                     layer
50 |                     for layer in model.modules()
51 |                     if isinstance(layer, (torch.nn.BatchNorm1d, torch.nn.BatchNorm3d))
52 |                 ]
53 |             )
54 |             == 0
55 |         )
56 | 
57 | 
58 | def rand_init_bn(bn):
59 |     bn.weight.data.uniform_(0.5, 1.5)
60 |     bn.bias.data.uniform_(-0.5, 0.5)
61 |     bn.running_var.data.uniform_(0.5, 1.5)
62 |     bn.running_mean.data.uniform_(-0.5, 0.5)
63 |     return bn
64 | 


--------------------------------------------------------------------------------
/tests/test_layers_drop_path.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | import torch
 6 | from pytorchvideo.layers import DropPath
 7 | 
 8 | 
 9 | class TestDropPath(unittest.TestCase):
10 |     def setUp(self):
11 |         super().setUp()
12 |         torch.set_rng_state(torch.manual_seed(42).get_state())
13 | 
14 |     def test_dropPath(self):
15 |         # Input should be same if drop_prob = 0.
16 |         net_drop_path = DropPath(drop_prob=0.0)
17 |         fake_input = torch.rand(64, 10, 20)
18 |         output = net_drop_path(fake_input)
19 |         self.assertTrue(output.equal(fake_input))
20 |         # Test when drop_prob > 0.
21 |         net_drop_path = DropPath(drop_prob=0.5)
22 |         fake_input = torch.rand(64, 10, 20)
23 |         output = net_drop_path(fake_input)
24 |         self.assertTrue(output.shape, fake_input.shape)
25 | 


--------------------------------------------------------------------------------
/tests/test_layers_fusion.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | import torch
 6 | from pytorchvideo.layers import make_fusion_layer
 7 | 
 8 | 
 9 | class TestFusion(unittest.TestCase):
10 |     def setUp(self):
11 |         super().setUp()
12 |         torch.set_rng_state(torch.manual_seed(42).get_state())
13 | 
14 |         self.fake_input_1 = torch.Tensor(
15 |             [[[4, -2], [3, 0]], [[0, 2], [4, 3]], [[3, 1], [5, 2]]]
16 |         ).float()
17 |         self.fake_input_2 = torch.Tensor(
18 |             [[[1, 2], [3, 4]], [[5, 6], [6, 5]], [[4, 3], [2, 1]]]
19 |         ).float()
20 | 
21 |     def test_reduce_fusion_layers(self):
22 |         expected_output_for_method = {
23 |             "max": torch.Tensor(
24 |                 [[[4, 2], [3, 4]], [[5, 6], [6, 5]], [[4, 3], [5, 2]]]
25 |             ).float(),
26 |             "sum": torch.Tensor(
27 |                 [[[5, 0], [6, 4]], [[5, 8], [10, 8]], [[7, 4], [7, 3]]]
28 |             ).float(),
29 |             "prod": torch.Tensor(
30 |                 [[[4, -4], [9, 0]], [[0, 12], [24, 15]], [[12, 3], [10, 2]]]
31 |             ).float(),
32 |         }
33 | 
34 |         for method, expected_output in expected_output_for_method.items():
35 |             model = make_fusion_layer(
36 |                 method, [self.fake_input_1.shape[-1], self.fake_input_2.shape[-1]]
37 |             )
38 |             output = model([self.fake_input_1, self.fake_input_2])
39 |             self.assertTrue(torch.equal(output, expected_output))
40 |             self.assertEqual(model.output_dim, self.fake_input_1.shape[-1])
41 | 
42 |     def test_concat_fusion(self):
43 |         model = make_fusion_layer(
44 |             "concat", [self.fake_input_1.shape[-1], self.fake_input_2.shape[-1]]
45 |         )
46 |         input_list = [self.fake_input_1, self.fake_input_2]
47 |         output = model(input_list)
48 |         expected_output = torch.cat(input_list, dim=-1)
49 |         self.assertTrue(torch.equal(output, expected_output))
50 | 
51 |         expected_shape = self.fake_input_1.shape[-1] + self.fake_input_2.shape[-1]
52 |         self.assertEqual(model.output_dim, expected_shape)
53 | 
54 |     def test_temporal_concat_fusion(self):
55 |         model = make_fusion_layer(
56 |             "temporal_concat",
57 |             [self.fake_input_1.shape[-1], self.fake_input_2.shape[-1]],
58 |         )
59 |         input_list = [self.fake_input_1, self.fake_input_2]
60 |         output = model(input_list)
61 | 
62 |         expected_output = torch.cat(input_list, dim=-2)
63 |         self.assertTrue(torch.equal(output, expected_output))
64 |         self.assertEqual(model.output_dim, self.fake_input_2.shape[-1])
65 | 


--------------------------------------------------------------------------------
/tests/test_layers_mlp.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import itertools
 4 | import unittest
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | from pytorchvideo.layers import make_multilayer_perceptron
 9 | 
10 | 
11 | class TestMLP(unittest.TestCase):
12 |     def setUp(self):
13 |         super().setUp()
14 |         torch.set_rng_state(torch.manual_seed(42).get_state())
15 | 
16 |     def test_make_multilayer_perceptron(self):
17 |         fake_input = torch.rand((8, 64))
18 |         fcs = [64, 128, 64, 32]
19 |         mid_activations = [nn.ReLU, nn.Sigmoid]
20 |         final_activations = [nn.ReLU, nn.Sigmoid, None]
21 |         norms = [nn.LayerNorm, nn.BatchNorm1d, None]
22 |         for mid_act, final_act, norm in itertools.product(
23 |             mid_activations, final_activations, norms
24 |         ):
25 |             mlp, output_dim = make_multilayer_perceptron(
26 |                 fully_connected_dims=fcs,
27 |                 mid_activation=mid_act,
28 |                 final_activation=final_act,
29 |                 norm=norm,
30 |                 dropout_rate=0.5,
31 |             )
32 | 
33 |             self.assertEqual(output_dim, 32)
34 | 
35 |             output = mlp(fake_input)
36 |             self.assertTrue(output.shape, torch.Size([8, 32]))
37 | 


--------------------------------------------------------------------------------
/tests/test_layers_squeeze_excitation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import copy
 4 | import unittest
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | from pytorchvideo.layers.squeeze_excitation import (
 9 |     create_audio_2d_squeeze_excitation_block,
10 | )
11 | 
12 | 
13 | class Test2DSqueezeExcitationBlock(unittest.TestCase):
14 |     def setUp(self):
15 |         self.layer_args = {
16 |             "dim_in": 32,
17 |             "dim_out": 32,
18 |             "use_se": True,
19 |             "se_reduction_ratio": 16,
20 |             "branch_fusion": lambda x, y: x + y,
21 |             "conv_a_kernel_size": 3,
22 |             "conv_a_stride": 1,
23 |             "conv_a_padding": 1,
24 |             "conv_b_kernel_size": 3,
25 |             "conv_b_stride": 1,
26 |             "conv_b_padding": 1,
27 |             "norm": nn.BatchNorm2d,
28 |             "norm_eps": 1e-5,
29 |             "norm_momentum": 0.1,
30 |             "activation": nn.ReLU,
31 |         }
32 | 
33 |         self.batchsize = 1
34 |         self.forward_pass_configs = [
35 |             {
36 |                 "input": torch.rand(self.batchsize, self.layer_args["dim_in"], 100, 40),
37 |                 "output_shape": torch.Size(
38 |                     [self.batchsize, self.layer_args["dim_out"], 100, 40]
39 |                 ),
40 |             },
41 |         ]
42 | 
43 |     def test_forward_pass(self):
44 |         for split_config in self.forward_pass_configs:
45 |             layer_args = copy.deepcopy(self.layer_args)
46 |             model = create_audio_2d_squeeze_excitation_block(**layer_args)
47 | 
48 |             out = model(split_config["input"])
49 |             self.assertTrue(isinstance(out, torch.Tensor))
50 |             self.assertEqual(out.size(), split_config["output_shape"])
51 | 


--------------------------------------------------------------------------------
/tests/test_losses_soft_target_cross_entropy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import itertools
 4 | import unittest
 5 | 
 6 | import torch
 7 | import torch.nn.functional as F
 8 | from pytorchvideo.losses.soft_target_cross_entropy import SoftTargetCrossEntropyLoss
 9 | 
10 | 
11 | class TestSoftTargetCrossEntropyLoss(unittest.TestCase):
12 |     def setUp(self):
13 |         super().setUp()
14 |         torch.set_rng_state(torch.manual_seed(42).get_state())
15 | 
16 |     def test_soft_target_cross_entropy_loss(self):
17 |         """
18 |         Test the soft target cross entropy loss.
19 |         """
20 |         for batch_size, num_class, use_1D_target in itertools.product(
21 |             (1, 8), (2, 10), (True, False)
22 |         ):
23 |             loss = SoftTargetCrossEntropyLoss()
24 | 
25 |             # Test forwarding.
26 |             for (
27 |                 input_tensor,
28 |                 target_tensor,
29 |             ) in TestSoftTargetCrossEntropyLoss._get_inputs(
30 |                 batch_size=batch_size, num_class=num_class, use_1D_target=use_1D_target
31 |             ):
32 |                 output_tensor = loss(input_tensor, target_tensor)
33 |                 output_shape = output_tensor.shape
34 | 
35 |                 self.assertEqual(
36 |                     output_shape,
37 |                     torch.Size([]),
38 |                     "Output shape {} is different from expected.".format(output_shape),
39 |                 )
40 | 
41 |                 # If target is normalized, output_tensor must match direct eval
42 |                 if target_tensor.ndim == 1 or all(target_tensor.sum(dim=-1) == 1):
43 |                     _target_tensor = target_tensor
44 |                     if target_tensor.ndim == 1:
45 |                         _target_tensor = torch.nn.functional.one_hot(
46 |                             target_tensor, num_class
47 |                         )
48 | 
49 |                     _output_tensor = torch.sum(
50 |                         -_target_tensor * F.log_softmax(input_tensor, dim=-1), dim=-1
51 |                     ).mean()
52 | 
53 |                     self.assertTrue(abs(_output_tensor - output_tensor) < 1e-6)
54 | 
55 |     @staticmethod
56 |     def _get_inputs(
57 |         batch_size: int = 16, num_class: int = 400, use_1D_target: bool = True
58 |     ) -> torch.tensor:
59 |         """
60 |         Provide different tensors as test cases.
61 | 
62 |         Yield:
63 |             (torch.tensor): tensor as test case input.
64 |         """
65 |         # Prepare random tensor as test cases.
66 |         if use_1D_target:
67 |             target_shape = (batch_size,)
68 |         else:
69 |             target_shape = (batch_size, num_class)
70 |         input_shape = (batch_size, num_class)
71 | 
72 |         yield torch.rand(input_shape), torch.randint(num_class, target_shape)
73 | 


--------------------------------------------------------------------------------
/tests/test_models_audio_visual_slowfast.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import itertools
 4 | import unittest
 5 | from typing import Tuple
 6 | 
 7 | import torch
 8 | from pytorchvideo.models.audio_visual_slowfast import create_audio_visual_slowfast
 9 | from pytorchvideo.transforms.functional import uniform_temporal_subsample_repeated
10 | from torch import nn
11 | 
12 | 
13 | class TestAVSlowFast(unittest.TestCase):
14 |     def setUp(self):
15 |         super().setUp()
16 |         torch.set_rng_state(torch.manual_seed(42).get_state())
17 | 
18 |     def test_create_avslowfast_with_callable(self):
19 |         """
20 |         Test builder `create_audio_visual_slowfast` with callable inputs.
21 |         """
22 |         for norm, activation in itertools.product(
23 |             (nn.BatchNorm3d, None), (nn.ReLU, nn.Sigmoid, None)
24 |         ):
25 |             input_channel = 3
26 | 
27 |             model = create_audio_visual_slowfast(
28 |                 input_channels=(input_channel, input_channel, 1),
29 |                 model_depth=18,
30 |                 norm=norm,
31 |                 activation=activation,
32 |             )
33 | 
34 |             # Test forwarding.
35 |             for tensor in TestAVSlowFast._get_inputs(input_channel):
36 |                 with torch.no_grad():
37 |                     if tensor[0].shape[1] != input_channel:
38 |                         with self.assertRaises(RuntimeError):
39 |                             model(tensor)
40 |                         continue
41 | 
42 |                     model(tensor)
43 | 
44 |     @staticmethod
45 |     def _get_inputs(
46 |         channel: int = 3,
47 |         clip_length: int = 64,
48 |         audio_clip_length: int = 128,
49 |         crop_size: int = 224,
50 |         audio_size: int = 80,
51 |         frame_ratios: Tuple[int] = (8, 2),
52 |         audio_frame_ratio: int = 1,
53 |     ) -> Tuple[torch.Tensor]:
54 |         """
55 |         Provide different tensors as test cases.
56 | 
57 |         Yield:
58 |             Tuple[torch.Tensor]: tensors as test case input.
59 |         """
60 |         # Prepare random inputs as test cases.
61 |         shape = (1, channel, clip_length, crop_size, crop_size)
62 |         audio_shape = (1, 1, audio_clip_length, 1, audio_size)
63 |         output = uniform_temporal_subsample_repeated(
64 |             torch.rand(shape), frame_ratios=frame_ratios, temporal_dim=2
65 |         )
66 |         yield output + uniform_temporal_subsample_repeated(
67 |             torch.rand(audio_shape), frame_ratios=(audio_frame_ratio,), temporal_dim=2
68 |         )
69 | 


--------------------------------------------------------------------------------
/tests/test_models_byol.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | import torch
 6 | from pytorchvideo.models.byol import BYOL
 7 | from torch import nn
 8 | 
 9 | 
10 | class TestBYOL(unittest.TestCase):
11 |     def setUp(self):
12 |         super().setUp()
13 |         torch.set_rng_state(torch.manual_seed(42).get_state())
14 | 
15 |     def test_byol(self):
16 |         byol = BYOL(
17 |             backbone=nn.Linear(8, 4),
18 |             projector=nn.Linear(4, 4),
19 |             feature_dim=4,
20 |             norm=nn.BatchNorm1d,
21 |         )
22 |         for crop1, crop2 in TestBYOL._get_inputs():
23 |             byol(crop1, crop2)
24 | 
25 |     @staticmethod
26 |     def _get_inputs() -> torch.tensor:
27 |         """
28 |         Provide different tensors as test cases.
29 | 
30 |         Yield:
31 |             (torch.tensor): tensor as test case input.
32 |         """
33 |         # Prepare random inputs as test cases.
34 |         shapes = ((2, 8),)
35 |         for shape in shapes:
36 |             yield torch.rand(shape), torch.rand(shape)
37 | 


--------------------------------------------------------------------------------
/tests/test_models_hub_vision_transformers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import os
 4 | import unittest
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | from pytorchvideo.models.hub.utils import hub_model_builder
 9 | 
10 | 
11 | class TestHubVisionTransformers(unittest.TestCase):
12 |     def setUp(self):
13 |         super().setUp()
14 |         torch.set_rng_state(torch.manual_seed(42).get_state())
15 | 
16 |     def test_load_hubconf(self):
17 |         def test_load_mvit_(model_name, pretrained):
18 |             path = os.path.join(
19 |                 os.path.dirname(os.path.realpath(__file__)),
20 |                 "..",
21 |             )
22 |             model = torch.hub.load(
23 |                 repo_or_dir=path,
24 |                 source="local",
25 |                 model=model_name,
26 |                 pretrained=pretrained,
27 |             )
28 |             self.assertIsNotNone(model)
29 | 
30 |         models = [
31 |             "mvit_base_16x4",
32 |             "mvit_base_16",
33 |             "mvit_base_32x3",
34 |         ]
35 |         pretrains = [False, False, False]
36 | 
37 |         for model_name, pretrain in zip(models, pretrains):
38 |             test_load_mvit_(model_name, pretrain)
39 | 
40 |     def test_hub_model_builder(self):
41 |         def _fake_model(in_features=10, out_features=10) -> nn.Module:
42 |             """
43 |             A fake model builder with a linear layer.
44 |             """
45 |             model = nn.Linear(in_features, out_features)
46 |             return model
47 | 
48 |         in_fea = 5
49 |         default_config = {"in_features": in_fea}
50 |         model = hub_model_builder(
51 |             model_builder_func=_fake_model, default_config=default_config
52 |         )
53 |         self.assertEqual(model.in_features, in_fea)
54 |         self.assertEqual(model.out_features, 10)
55 | 
56 |         # Test case where add_config overwrites default_config.
57 |         in_fea = 5
58 |         default_config = {"in_features": in_fea}
59 |         add_in_fea = 2
60 |         add_out_fea = 3
61 | 
62 |         model = hub_model_builder(
63 |             model_builder_func=_fake_model,
64 |             default_config=default_config,
65 |             in_features=add_in_fea,
66 |             out_features=add_out_fea,
67 |         )
68 |         self.assertEqual(model.in_features, add_in_fea)
69 |         self.assertEqual(model.out_features, add_out_fea)
70 | 
71 |         # Test assertions.
72 |         self.assertRaises(
73 |             AssertionError,
74 |             hub_model_builder,
75 |             model_builder_func=_fake_model,
76 |             pretrained=True,
77 |             default_config={},
78 |             fake_input=None,
79 |         )
80 | 


--------------------------------------------------------------------------------
/tests/test_models_memory_bank.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | import torch
 6 | from pytorchvideo.models.memory_bank import MemoryBank
 7 | from torch import nn
 8 | 
 9 | 
10 | class TestMemoryBank(unittest.TestCase):
11 |     def setUp(self):
12 |         super().setUp()
13 |         torch.set_rng_state(torch.manual_seed(42).get_state())
14 | 
15 |     def test_memory_bank(self):
16 |         simclr = MemoryBank(
17 |             backbone=nn.Linear(8, 4),
18 |             mlp=nn.Linear(4, 2),
19 |             temperature=0.07,
20 |             bank_size=8,
21 |             dim=2,
22 |         )
23 |         for crop, ind in TestMemoryBank._get_inputs():
24 |             simclr(crop, ind)
25 | 
26 |     @staticmethod
27 |     def _get_inputs(bank_size: int = 8) -> torch.tensor:
28 |         """
29 |         Provide different tensors as test cases.
30 | 
31 |         Yield:
32 |             (torch.tensor): tensor as test case input.
33 |         """
34 |         # Prepare random inputs as test cases.
35 |         shapes = ((2, 8),)
36 |         for shape in shapes:
37 |             yield torch.rand(shape), torch.randint(0, bank_size, size=(shape[0],))
38 | 


--------------------------------------------------------------------------------
/tests/test_simclr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import unittest
 4 | 
 5 | import torch
 6 | from pytorchvideo.models.simclr import SimCLR
 7 | from torch import nn
 8 | 
 9 | 
10 | class TestSimCLR(unittest.TestCase):
11 |     def setUp(self):
12 |         super().setUp()
13 |         torch.set_rng_state(torch.manual_seed(42).get_state())
14 | 
15 |     def test_simclr(self):
16 |         simclr = SimCLR(
17 |             backbone=nn.Linear(8, 4),
18 |             mlp=nn.Linear(4, 2),
19 |             temperature=0.07,
20 |         )
21 |         for crop1, crop2 in TestSimCLR._get_inputs():
22 |             simclr(crop1, crop2)
23 | 
24 |     @staticmethod
25 |     def _get_inputs() -> torch.tensor:
26 |         """
27 |         Provide different tensors as test cases.
28 | 
29 |         Yield:
30 |             (torch.tensor): tensor as test case input.
31 |         """
32 |         # Prepare random inputs as test cases.
33 |         shapes = (
34 |             (1, 8),
35 |             (2, 8),
36 |         )
37 |         for shape in shapes:
38 |             yield torch.rand(shape), torch.rand(shape)
39 | 


--------------------------------------------------------------------------------
/tutorials/video_classification_example/environment.yml:
--------------------------------------------------------------------------------
 1 | # Conda environment file
 2 | # Usage: `conda env update -f environment.yml`
 3 | 
 4 | name: video_classification_example
 5 | 
 6 | channels:
 7 |   - conda-forge
 8 |   - pytorch-nightly
 9 | 
10 | dependencies:
11 |   - pytorch-lightning
12 |   - submitit
13 | 


--------------------------------------------------------------------------------
/tutorials/video_classification_example/slurm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import os
 5 | import pathlib
 6 | import shutil
 7 | 
 8 | import submitit
 9 | 
10 | 
11 | def init_and_run(run_fn, run_config):
12 |     os.environ["RANK"] = os.environ["SLURM_LOCALID"]
13 |     os.environ["LOCAL_RANK"] = os.environ["SLURM_LOCALID"]
14 |     os.environ["NODE_RANK"] = os.environ["SLURM_LOCALID"]
15 |     os.environ["WORLD_SIZE"] = os.environ["SLURM_NTASKS"]
16 |     run_fn(run_config)
17 | 
18 | 
19 | def copy_and_run_with_config(run_fn, run_config, directory, **cluster_config):
20 |     working_directory = pathlib.Path(directory) / cluster_config["job_name"]
21 |     ignore_list = [
22 |         "lightning_logs",
23 |         "logs",
24 |         "checkpoints",
25 |         "experiments",
26 |         ".git",
27 |         "output",
28 |         "val.csv",
29 |         "train.csv",
30 |     ]
31 |     shutil.copytree(".", working_directory, ignore=lambda x, y: ignore_list)
32 |     os.chdir(working_directory)
33 |     print(f"Running at {working_directory}")
34 | 
35 |     executor = submitit.SlurmExecutor(folder=working_directory)
36 |     executor.update_parameters(**cluster_config)
37 |     job = executor.submit(init_and_run, run_fn, run_config)
38 |     print(f"job_id: {job}")
39 | 


--------------------------------------------------------------------------------
/website/.dockerignore:
--------------------------------------------------------------------------------
1 | */node_modules
2 | *.log
3 | 


--------------------------------------------------------------------------------
/website/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | node_modules
 4 | 
 5 | lib/core/metadata.js
 6 | lib/core/MetadataBlog.js
 7 | 
 8 | website/translated_docs
 9 | website/build/
10 | website/yarn.lock
11 | website/node_modules
12 | website/i18n/*
13 | 


--------------------------------------------------------------------------------
/website/docs/tutorial_overview.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | id: tutorial_overview
 3 | title: Tutorials
 4 | sidebar_label: Overview
 5 | ---
 6 | 
 7 | PyTorchVideo tutorials are designed to help you get acquainted with the library and also give you an idea on how to incorporate different  PyTorchVideo components into your own video-research workflow. In the tutorials, through examples, we also show how PyTorchVideo makes it easy to address some of the common deeplearning video use cases.
 8 | 
 9 | PyTorchVideo is built on PyTorch. If you are new to PyTorch, the easiest way to get started is with the [PyTorch: A 60 Minute Blitz](https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py) tutorial.
10 | 


--------------------------------------------------------------------------------
/website/website/core/Footer.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) 2017-present, Facebook, Inc.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | 
 9 | const PropTypes = require("prop-types");
10 | const React = require('react');
11 | 
12 | function SocialFooter(props) {
13 |   const repoUrl = `https://github.com/${props.config.organizationName}/${props.config.projectName}`;
14 |   return (
15 |     <div className="footerSection">
16 |       <div className="social">
17 |         <a
18 |           className="github-button" // part of the https://buttons.github.io/buttons.js script in siteConfig.js
19 |           href={repoUrl}
20 |           data-count-href={`${repoUrl}/stargazers`}
21 |           data-show-count="true"
22 |           data-count-aria-label="# stargazers on GitHub"
23 |           aria-label="Star PytorchVideo on GitHub"
24 |         >
25 |           {props.config.projectName}
26 |         </a>
27 |       </div>
28 |     </div>
29 |   );
30 | }
31 | 
32 | SocialFooter.propTypes = {
33 |   config: PropTypes.object
34 | };
35 | 
36 | class Footer extends React.Component {
37 |   docUrl(doc, language) {
38 |     const baseUrl = this.props.config.baseUrl;
39 |     const docsUrl = this.props.config.docsUrl;
40 |     const docsPart = `${docsUrl ? `${docsUrl}/` : ''}`;
41 |     const langPart = `${language ? `${language}/` : ''}`;
42 |     return `${baseUrl}${docsPart}${langPart}${doc}`;
43 |   }
44 | 
45 |   pageUrl(doc, language) {
46 |     const baseUrl = this.props.config.baseUrl;
47 |     return baseUrl + (language ? `${language}/` : '') + doc;
48 |   }
49 | 
50 |   render() {
51 |     const repoUrl = `https://github.com/${this.props.config.organizationName}/${this.props.config.projectName}`;
52 |     return (
53 |       <footer className="nav-footer" id="footer">
54 |         <section className="sitemap">
55 |           <SocialFooter config={this.props.config} />
56 |         </section>
57 | 
58 |         <a
59 |           href="https://opensource.facebook.com/"
60 |           target="_blank"
61 |           rel="noreferrer noopener"
62 |           className="fbOpenSource">
63 |           <img
64 |             src={`${this.props.config.baseUrl}img/oss_logo.png`}
65 |             alt="Facebook Open Source"
66 |             width="170"
67 |             height="45"
68 |           />
69 |         </a>
70 |         <section className="copyright">{this.props.config.copyright}
71 |           <br/>
72 |           Legal:
73 |           <a
74 |             href="https://opensource.facebook.com/legal/privacy/"
75 |             target="_blank"
76 |             rel="noreferrer noopener">
77 |             Privacy
78 |           </a>
79 |           <a
80 |             href="https://opensource.facebook.com/legal/terms/"
81 |             target="_blank"
82 |             rel="noreferrer noopener">
83 |             Terms
84 |           </a>
85 |         </section>
86 |       </footer>
87 |     );
88 |   }
89 | }
90 | 
91 | module.exports = Footer;


--------------------------------------------------------------------------------
/website/website/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scripts": {
 3 |     "examples": "docusaurus-examples",
 4 |     "start": "docusaurus-start",
 5 |     "build": "docusaurus-build",
 6 |     "publish-gh-pages": "docusaurus-publish",
 7 |     "write-translations": "docusaurus-write-translations",
 8 |     "version": "docusaurus-version",
 9 |     "rename-version": "docusaurus-rename-version"
10 |   },
11 |   "devDependencies": {
12 |     "docusaurus": "^1.14.6"
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/website/website/sidebars.json:
--------------------------------------------------------------------------------
1 | {
2 |   "docs-other": {
3 |     "Tutorials": ["tutorial_overview"],
4 |     "Classification": ["tutorial_classification", "tutorial_torchhub_inference"],
5 |     "Detection": ["tutorial_torchhub_detection_inference"],
6 |     "Accelerator": ["tutorial_accelerator_build_your_model", "tutorial_accelerator_use_accelerator_model_zoo", "tutorial_accelerator_use_model_transmuter"]
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/website/website/siteConfig.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright (c) Facebook, Inc. and its affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | // See https://docusaurus.io/docs/site-config for all the possible
 9 | // site configuration options.
10 | 
11 | 
12 | const siteConfig = {
13 |   title: 'PyTorchVideo', // Title for your website.
14 |   tagline: 'A deep learning library for video understanding research',
15 |   url: 'https://pytorchvideo.org', // Your website URL
16 |   baseUrl: '/', 
17 | 
18 |   // Used for publishing and more
19 |   projectName: 'pytorchvideo',
20 |   organizationName: 'facebookresearch',
21 | 
22 |   // For no header links in the top nav bar -> headerLinks: [],
23 |   headerLinks: [
24 |     {doc: 'tutorial_overview', label: 'Tutorials'},
25 |     {href: "https://pytorchvideo.readthedocs.io/en/latest/index.html", label: 'Docs'}, // TODO: Change this after the repo becomes public.
26 |     {href: "https://github.com/facebookresearch/pytorchvideo/", label: 'GitHub'}, //TODO: Change this after repo becomes public
27 |   ],
28 | 
29 | 
30 |   /* path to images for header/footer */
31 |   headerIcon: 'img/logo.svg',
32 |   footerIcon: 'img/logo.svg',
33 |   favicon: 'img/favicon.png',
34 | 
35 |   /* Colors for website */
36 |   colors: {
37 |     primaryColor: '#812ce5',
38 |     secondaryColor: '#cc33cc',
39 |   },
40 | 
41 |   // This copyright info is used in /core/Footer.js and blog RSS/Atom feeds.
42 |   copyright: `Copyright © ${new Date().getFullYear()} Facebook, Inc`,
43 | 
44 |   highlight: {
45 |     // Highlight.js theme to use for syntax highlighting in code blocks.
46 |     theme: 'atom-one-dark',
47 |   },
48 | 
49 |   // Add custom scripts here that would be placed in <script> tags.
50 |   scripts: ['https://buttons.github.io/buttons.js'],
51 | 
52 |   // On page navigation for the current documentation page.
53 |   onPageNav: 'separate',
54 |   // No .html extensions for paths.
55 |   cleanUrl: true,
56 | 
57 |   // Open Graph and Twitter card images.
58 |   ogImage: 'img/logo.svg',
59 |   twitterImage: 'img/logo.svg',
60 | 
61 |   // Google analytics
62 |   gaTrackingId: 'UA-154877538-1',
63 | 
64 | };
65 | 
66 | module.exports = siteConfig;
67 | 


--------------------------------------------------------------------------------
/website/website/static/CNAME:
--------------------------------------------------------------------------------
1 | pytorchvideo.org


--------------------------------------------------------------------------------
/website/website/static/img/efficient.svg:
--------------------------------------------------------------------------------
1 | <svg height='100px' width='100px'  fill="#812ce5" xmlns:x="http://ns.adobe.com/Extensibility/1.0/" xmlns:i="http://ns.adobe.com/AdobeIllustrator/10.0/" xmlns:graph="http://ns.adobe.com/Graphs/1.0/" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 100 100" style="enable-background:new 0 0 100 100;" xml:space="preserve"><g><g i:extraneous="self"><g><path d="M82.5,65.5c-1.4-0.8-3.1-0.3-3.9,1C72.7,76.7,61.7,83.1,50,83.1c-17.4,0-31.6-13.5-32.9-30.5l3.9,3.9     c0.6,0.6,1.3,0.8,2,0.8s1.4-0.3,2-0.8c1.1-1.1,1.1-2.9,0-4l-8.8-8.8c-1.1-1.1-2.9-1.1-4,0l-8.8,8.8c-1.1,1.1-1.1,2.9,0,4     c1.1,1.1,2.9,1.1,4,0l4.1-4.1c1.2,20.3,18,36.4,38.6,36.4c13.7,0,26.6-7.4,33.5-19.4C84.3,68,83.8,66.3,82.5,65.5z"></path><path d="M96.7,43.6c-1.1-1.1-2.9-1.1-4,0l-4.1,4.1c-1.2-20.3-18-36.4-38.6-36.4c-13.8,0-26.6,7.4-33.5,19.4     c-0.8,1.4-0.3,3.1,1,3.8c1.4,0.8,3.1,0.3,3.8-1C27.2,23.3,38.2,16.9,50,16.9c17.4,0,31.7,13.5,32.9,30.6l-3.9-3.9     c-1.1-1.1-2.9-1.1-4,0c-1.1,1.1-1.1,2.9,0,4l8.8,8.8c0.6,0.6,1.3,0.8,2,0.8c0.7,0,1.4-0.3,2-0.8l8.8-8.8     C97.8,46.5,97.8,44.7,96.7,43.6z"></path><path d="M31.6,45.9c-1,0.1-1.7,1-1.7,2v4.3c0,1,0.7,1.8,1.7,2l3.7,0.5c0.5,1.1,0.6,1.4,1,2.5l-2.2,3c-0.6,0.8-0.5,1.9,0.2,2.6     l3,3c0.7,0.7,1.8,0.8,2.6,0.2l3-2.2c1.1,0.5,1.4,0.6,2.5,1l0.5,3.7c0.1,1,1,1.7,2,1.7h4.3c1,0,1.8-0.7,2-1.7l0.5-3.7     c1.1-0.5,1.4-0.6,2.5-1l3,2.2c0.8,0.6,1.9,0.5,2.6-0.2l3-3c0.7-0.7,0.8-1.8,0.2-2.6l-2.2-3c0.5-1.1,0.6-1.4,1-2.5l3.7-0.5     c1-0.1,1.7-1,1.7-2v-4.3c0-1-0.7-1.8-1.7-2l-3.7-0.5c-0.5-1.1-0.6-1.4-1-2.5l2.2-3c0.6-0.8,0.5-1.9-0.2-2.6l-3-3     c-0.7-0.7-1.8-0.8-2.6-0.2l-3,2.2c-1.1-0.5-1.4-0.6-2.5-1l-0.5-3.7c-0.1-1-1-1.7-2-1.7h-4.3c-1,0-1.8,0.7-2,1.7l-0.5,3.7     c-1.1,0.5-1.4,0.6-2.5,1l-3-2.2c-0.8-0.6-1.9-0.5-2.6,0.2l-3,3c-0.7,0.7-0.8,1.8-0.2,2.6l2.2,3c-0.5,1.1-0.6,1.4-1,2.5L31.6,45.9     z M50,42.5c4.2,0,7.5,3.4,7.5,7.5s-3.4,7.5-7.5,7.5c-4.2,0-7.5-3.4-7.5-7.5S45.8,42.5,50,42.5z"></path></g></g></g></svg>


--------------------------------------------------------------------------------
/website/website/static/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pytorchvideo/6cdc929315aab1b5674b6dcf73b16ec99147735f/website/website/static/img/favicon.png


--------------------------------------------------------------------------------
/website/website/static/img/logo.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 763 186"><defs><style>.cls-1{fill:#812ce5;}.cls-2{fill:#c3c;}</style></defs><g id="PyTorchVideo"><path d="M209.36,102.11l-8.36.1v21.65h-6.3V62.36h15.45c16.23,0,24,7.87,24,19.19C234.16,95,224.61,101.92,209.36,102.11ZM210,68.17H201V96.41l8.75-.2c11.62-.2,17.91-4.82,17.91-14.37C227.66,73.09,221.46,68.17,210,68.17Z"/><path d="M262.2,123.37l-3.64,9.74c-4.13,11-8.46,14.17-14.56,14.17a20.92,20.92,0,0,1-8.76-2.07l1.87-5.61a14.22,14.22,0,0,0,6.89,2c3.44,0,6-1.87,9.25-10.43l3.05-8L238.69,78.79h6.59L259.45,116l13.87-37.2h6.4Z"/><path d="M300.48,68.26v55.6h-6.29V68.26H272.54v-5.9h49.59v5.9Z"/><path d="M340.14,125.14c-12.5,0-21.75-9.25-21.75-23.72S328,77.51,340.63,77.51s21.65,9.25,21.65,23.72S352.64,125.14,340.14,125.14Zm.2-42.12c-9.55,0-15.84,7.58-15.84,18.21,0,11,6.39,18.4,15.94,18.4s15.84-7.58,15.84-18.21C356.28,90.4,349.88,83,340.34,83Z"/><path d="M377.73,123.86h-6.1V78.79l6.1-1.28v9.65c2.95-5.81,7.38-9.65,13.29-9.65a16.39,16.39,0,0,1,7.87,2.07l-1.58,5.71a13.4,13.4,0,0,0-6.88-2c-4.73,0-9.16,3.54-12.7,11.61Z"/><path d="M422.8,125.14c-13.58,0-22-9.74-22-23.72s9.35-23.91,22.14-23.91A25.67,25.67,0,0,1,437,81.35l-1.57,5.51a22.83,22.83,0,0,0-12.5-3.64c-9.74,0-15.84,7.28-15.84,18,0,11,6.39,18.2,15.94,18.2a23.26,23.26,0,0,0,12.49-3.74l1.28,5.61A26.92,26.92,0,0,1,422.8,125.14Z"/><path d="M473.87,123.86V94.73c0-7.87-3.15-11.41-9.64-11.41-5.22,0-10.23,2.66-14,6.4v34.14h-6.1V57.54l6.1-1.28V84.7C455,80,460.88,77.51,465.9,77.51,474.66,77.51,480,83.12,480,93v30.9Z"/><path class="cls-1" d="M517.66,123.86h-7.38l-22.53-61.5h6.39l20,55,19.88-55h6.39Z"/><path class="cls-1" d="M550.63,68.46c-2.76,0-4.13-2.16-4.13-4.53a4.13,4.13,0,1,1,8.26,0C554.76,66.3,553.38,68.46,550.63,68.46Zm-3,55.4V78.79l6.1-1.28v46.35Z"/><path class="cls-1" d="M596.88,57.54l6.1-1.28v67.6h-6.1v-4.23a19.43,19.43,0,0,1-13.09,5.51c-12.1,0-20.17-9.74-20.17-23.42,0-14,9.35-23.91,22.34-23.91a27.38,27.38,0,0,1,10.92,2.46ZM585.66,83.32c-9.64,0-15.74,7.67-15.74,18.3,0,10.43,5.8,17.91,15,17.91a17.77,17.77,0,0,0,12-5V86.76A18.6,18.6,0,0,0,585.66,83.32Z"/><path class="cls-1" d="M618.43,102c.2,10.13,6.3,17.41,15.74,17.41a24.3,24.3,0,0,0,13.09-3.74l1.18,5.61A27.68,27.68,0,0,1,634,125.14c-13,0-21.75-9.74-21.75-23.42,0-14.17,9-24.21,21.26-24.21,9.93,0,16.43,6.5,16.43,16.63a27.21,27.21,0,0,1-1,7.19l-30.5.19Zm25.39-5.42a12.87,12.87,0,0,0,.1-2c0-6.89-3.74-11.62-10.83-11.62-8.07,0-13.28,6.2-14.27,13.68Z"/><path class="cls-1" d="M678.26,125.14c-12.5,0-21.75-9.25-21.75-23.72s9.65-23.91,22.24-23.91,21.65,9.25,21.65,23.72S690.76,125.14,678.26,125.14Zm.2-42.12c-9.55,0-15.85,7.58-15.85,18.21,0,11,6.4,18.4,15.95,18.4s15.84-7.58,15.84-18.21C694.4,90.4,688,83,678.46,83Z"/><path class="cls-2" d="M133.15,93.93l-9.89-4.79-9.43,4.68L124,98.68c-.09.06-53.81,27-53.95,27.08V72.84l8.48,4.06V67.5l-16.5-8v79.54l81.23-40.27-10.07-4.86Z"/><polygon class="cls-1" points="149.68 66.18 140.29 71.05 113.57 84.46 104.16 89.18 86.55 98.02 86.55 80.74 86.55 71.38 86.55 45.1 119.15 59.33 128.55 54.7 78.52 31.78 78.52 67.5 78.52 76.9 78.52 102.05 78.52 111.32 86.55 107.34 113.83 93.82 123.26 89.14 159.76 71.05 149.68 66.18"/><path class="cls-1" d="M134.36,67.42a4.2,4.2,0,1,0-4.2-4.2A4.2,4.2,0,0,0,134.36,67.42Z"/></g></svg>


--------------------------------------------------------------------------------
/website/website/static/img/logo_no_text.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 200"><defs><style>.cls-1{fill:#c3c;}.cls-2{fill:#812ce5;}</style></defs><g id="PyTorchVideo"><path class="cls-1" d="M122.71,107.15l-9.89-4.79L103.39,107l10.16,4.87c-.09.06-53.81,27-53.95,27.07V86.05l8.48,4.07v-9.4l-16.5-8v79.54L132.81,112l-10.07-4.87Z"/><polygon class="cls-2" points="139.24 79.4 129.86 84.27 103.14 97.68 93.72 102.4 76.11 111.24 76.11 93.96 76.11 84.6 76.11 58.32 108.71 72.55 118.11 67.92 68.08 45 68.08 80.72 68.08 90.12 68.08 115.27 68.08 124.54 76.11 120.56 103.39 107.03 112.82 102.36 149.32 84.27 139.24 79.4"/><path class="cls-2" d="M123.92,80.64a4.2,4.2,0,1,0-4.2-4.2A4.2,4.2,0,0,0,123.92,80.64Z"/></g></svg>


--------------------------------------------------------------------------------
/website/website/static/img/logo_white.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 762 186"><defs><style>.cls-1{fill:#fff;}</style></defs><g id="PyTorchVideo"><path class="cls-1" d="M209.36,102.11l-8.36.1v21.65h-6.3V62.36h15.45c16.23,0,24,7.87,24,19.19C234.16,95,224.61,101.92,209.36,102.11ZM210,68.17H201V96.41l8.75-.2c11.62-.2,17.91-4.82,17.91-14.37C227.66,73.09,221.46,68.17,210,68.17Z"/><path class="cls-1" d="M262.2,123.37l-3.64,9.74c-4.13,11-8.46,14.17-14.56,14.17a20.92,20.92,0,0,1-8.76-2.07l1.87-5.61a14.22,14.22,0,0,0,6.89,2c3.44,0,6-1.87,9.25-10.43l3.05-8L238.69,78.79h6.59L259.45,116l13.87-37.2h6.4Z"/><path class="cls-1" d="M300.48,68.26v55.6h-6.29V68.26H272.54v-5.9h49.59v5.9Z"/><path class="cls-1" d="M340.14,125.14c-12.5,0-21.75-9.25-21.75-23.72S328,77.51,340.63,77.51s21.65,9.25,21.65,23.72S352.64,125.14,340.14,125.14Zm.2-42.12c-9.55,0-15.84,7.58-15.84,18.21,0,11,6.39,18.4,15.94,18.4s15.84-7.58,15.84-18.21C356.28,90.4,349.88,83,340.34,83Z"/><path class="cls-1" d="M377.73,123.86h-6.1V78.79l6.1-1.28v9.65c2.95-5.81,7.38-9.65,13.29-9.65a16.39,16.39,0,0,1,7.87,2.07l-1.58,5.71a13.4,13.4,0,0,0-6.88-2c-4.73,0-9.16,3.54-12.7,11.61Z"/><path class="cls-1" d="M422.8,125.14c-13.58,0-22-9.74-22-23.72s9.35-23.91,22.14-23.91A25.67,25.67,0,0,1,437,81.35l-1.57,5.51a22.83,22.83,0,0,0-12.5-3.64c-9.74,0-15.84,7.28-15.84,18,0,11,6.39,18.2,15.94,18.2a23.26,23.26,0,0,0,12.49-3.74l1.28,5.61A26.92,26.92,0,0,1,422.8,125.14Z"/><path class="cls-1" d="M473.87,123.86V94.73c0-7.87-3.15-11.41-9.64-11.41-5.22,0-10.23,2.66-14,6.4v34.14h-6.1V57.54l6.1-1.28V84.7C455,80,460.88,77.51,465.9,77.51,474.66,77.51,480,83.12,480,93v30.9Z"/><path class="cls-1" d="M517.66,123.86h-7.38l-22.53-61.5h6.39l20,55,19.88-55h6.39Z"/><path class="cls-1" d="M550.63,68.46c-2.76,0-4.13-2.16-4.13-4.53a4.13,4.13,0,1,1,8.26,0C554.76,66.3,553.38,68.46,550.63,68.46Zm-3,55.4V78.79l6.1-1.28v46.35Z"/><path class="cls-1" d="M596.88,57.54l6.1-1.28v67.6h-6.1v-4.23a19.43,19.43,0,0,1-13.09,5.51c-12.1,0-20.17-9.74-20.17-23.42,0-14,9.35-23.91,22.34-23.91a27.38,27.38,0,0,1,10.92,2.46ZM585.66,83.32c-9.64,0-15.74,7.67-15.74,18.3,0,10.43,5.8,17.91,15,17.91a17.77,17.77,0,0,0,12-5V86.76A18.6,18.6,0,0,0,585.66,83.32Z"/><path class="cls-1" d="M618.43,102c.2,10.13,6.3,17.41,15.74,17.41a24.3,24.3,0,0,0,13.09-3.74l1.18,5.61A27.68,27.68,0,0,1,634,125.14c-13,0-21.75-9.74-21.75-23.42,0-14.17,9-24.21,21.26-24.21,9.93,0,16.43,6.5,16.43,16.63a27.21,27.21,0,0,1-1,7.19l-30.5.19Zm25.39-5.42a12.87,12.87,0,0,0,.1-2c0-6.89-3.74-11.62-10.83-11.62-8.07,0-13.28,6.2-14.27,13.68Z"/><path class="cls-1" d="M678.26,125.14c-12.5,0-21.75-9.25-21.75-23.72s9.65-23.91,22.24-23.91,21.65,9.25,21.65,23.72S690.76,125.14,678.26,125.14Zm.2-42.12c-9.55,0-15.85,7.58-15.85,18.21,0,11,6.4,18.4,15.95,18.4s15.84-7.58,15.84-18.21C694.4,90.4,688,83,678.46,83Z"/><path class="cls-1" d="M132.15,93.93l-9.89-4.79-9.43,4.68L123,98.68c-.09.06-53.81,27-53.95,27.08V72.84l8.48,4.06V67.5l-16.5-8v79.54l81.23-40.27-10.07-4.86Z"/><polygon class="cls-1" points="148.68 66.18 139.29 71.05 112.57 84.46 103.16 89.18 85.55 98.02 85.55 80.74 85.55 71.38 85.55 45.1 118.15 59.33 127.55 54.7 77.52 31.78 77.52 67.5 77.52 76.9 77.52 102.05 77.52 111.32 85.55 107.34 112.83 93.82 122.26 89.14 158.76 71.05 148.68 66.18"/><path class="cls-1" d="M133.36,67.42a4.2,4.2,0,1,0-4.2-4.2A4.2,4.2,0,0,0,133.36,67.42Z"/></g></svg>


--------------------------------------------------------------------------------
/website/website/static/img/modelzoo.svg:
--------------------------------------------------------------------------------
1 | <svg height='100px' width='100px'  fill="#812ce5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 32 32" version="1.1" x="0px" y="0px"><title>Book, education, library, locker, notebook, school, university icon</title><desc>Created with Sketch.</desc><g stroke="none" stroke-width="1" fill="none" fill-rule="evenodd"><path d="M26,3 C26.5128358,3 26.9355072,3.38604019 26.9932723,3.88337887 L27,4 L27,28 C27,28.5128358 26.6139598,28.9355072 26.1166211,28.9932723 L26,29 L6,29 C5.48716416,29 5.06449284,28.6139598 5.00672773,28.1166211 L5,28 L5,4 C5,3.48716416 5.38604019,3.06449284 5.88337887,3.00672773 L6,3 L26,3 Z M25,21 L7,21 L7,27 L25,27 L25,21 Z M18,23 C18.5522847,23 19,23.4477153 19,24 C19,24.5128358 18.6139598,24.9355072 18.1166211,24.9932723 L18,25 L14,25 C13.4477153,25 13,24.5522847 13,24 C13,23.4871642 13.3860402,23.0644928 13.8833789,23.0067277 L14,23 L18,23 Z M25,13 L7,13 L7,19 L25,19 L25,13 Z M18,15 C18.5522847,15 19,15.4477153 19,16 C19,16.5128358 18.6139598,16.9355072 18.1166211,16.9932723 L18,17 L14,17 C13.4477153,17 13,16.5522847 13,16 C13,15.4871642 13.3860402,15.0644928 13.8833789,15.0067277 L14,15 L18,15 Z M25,5 L7,5 L7,11 L25,11 L25,5 Z M18,7 C18.5522847,7 19,7.44771525 19,8 C19,8.51283584 18.6139598,8.93550716 18.1166211,8.99327227 L18,9 L14,9 C13.4477153,9 13,8.55228475 13,8 C13,7.48716416 13.3860402,7.06449284 13.8833789,7.00672773 L14,7 L18,7 Z" fill="#812ce5" fill-rule="nonzero"></path></g></svg>


--------------------------------------------------------------------------------
/website/website/static/img/oss_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/pytorchvideo/6cdc929315aab1b5674b6dcf73b16ec99147735f/website/website/static/img/oss_logo.png


--------------------------------------------------------------------------------
/website/website/static/img/pytorch.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 17.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 4 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 5 | 	 viewBox="0.6 1067.9 90.3 109.1" enable-background="new 0.6 1067.9 90.3 109.1" xml:space="preserve">
 6 | <g>
 7 | 	<path fill="#EE4C2C" d="M77.6,1099.6l-8.1,8.1c13.3,13.3,13.3,34.7,0,47.8c-13.3,13.3-34.7,13.3-47.8,0
 8 | 		c-13.3-13.3-13.3-34.7,0-47.8l0,0l21.1-21.1l3-3l0,0v-15.9l-31.8,31.8c-17.7,17.7-17.7,46.3,0,64c17.7,17.7,46.3,17.7,63.7,0
 9 | 		C95.3,1145.8,95.3,1117.4,77.6,1099.6z"/>
10 | 	<circle fill="#EE4C2C" cx="61.7" cy="1091.8" r="5.9"/>
11 | </g>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/website/website/static/img/reproducible.svg:
--------------------------------------------------------------------------------
1 | <svg height='100px' width='100px'  fill="#812ce5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 100 100" style="enable-background:new 0 0 100 100;" xml:space="preserve"><rect x="-204.7" y="-135" style="" width="85.7" height="71.3" fill="none"></rect><text transform="matrix(1 0 0 1 -204.6665 -68.2031)" style="" font-family="&apos;Admira-Font-Outline-NEW-29&apos;" font-size="72px"></text><g><path d="M28,56.3c-1.1,0-2,0.9-2,2v10c0,1.1,0.9,2,2,2s2-0.9,2-2v-10C30,57.2,29.1,56.3,28,56.3z"></path><path d="M75.1,85h-10c-1.1,0-2,0.9-2,2s0.9,2,2,2h10.1c1.1,0,2-0.9,2-2S76.2,85,75.1,85z"></path><path d="M55.1,85h-10c-1.1,0-2,0.9-2,2s0.9,2,2,2h10c1.1,0,2-0.9,2-2S56.2,85,55.1,85z"></path><path d="M36.4,83.8c-2.5-1.4-4.5-3.7-5.6-6.4c-0.4-1-1.6-1.5-2.6-1.1s-1.5,1.6-1.1,2.6c1.4,3.5,4,6.5,7.3,8.4   c0.3,0.2,0.6,0.3,1,0.3c0.7,0,1.4-0.4,1.7-1C37.7,85.6,37.4,84.4,36.4,83.8z"></path><path d="M28.7,36.4c-1.1-0.2-2.1,0.5-2.3,1.5c-0.2,1.2-0.3,2.3-0.3,3.5v6.9c0,1.1,0.9,2,2,2s2-0.9,2-2v-6.9c0-0.9,0.1-1.8,0.3-2.7   C30.5,37.7,29.8,36.6,28.7,36.4z"></path><path d="M84,29.6c0.5-1,0.2-2.2-0.8-2.7c-1.7-1-3.6-1.5-5.6-1.8c-0.5-9-8-16-17.1-16H27.4C17.9,9,10,16.5,10,25.9V58   c0,9.4,7.9,17,17.4,17h33.3C70.1,75,78,67.4,78,58V29c1,0.3,2.3,0.7,3.4,1.3c0.3,0.2,0.6,0.3,0.9,0.3C83.1,30.6,83.7,30.2,84,29.6z    M60.6,71H27.4C20.1,71,14,65.2,14,58V25.9C14,18.7,20.1,13,27.4,13h33.3c6.8,0,12.4,5,13.1,12h-1.1c-1.1,0-2,0.9-2,2s0.9,2,2,2H74   v29C74,65.2,67.9,71,60.6,71z"></path><path d="M90,53.8c-1.1,0-2,0.9-2,2v10c0,1.1,0.9,2,2,2s2-0.9,2-2v-10C92,54.7,91.1,53.8,90,53.8z"></path><path d="M42.6,25C42.6,25,42.6,25,42.6,25c-3.8,0-7.5,1.2-10.5,3.6c-0.9,0.7-1,1.9-0.3,2.7c0.4,0.5,1,0.7,1.6,0.7   c0.4,0,0.9-0.2,1.3-0.5c2.3-1.8,5-2.8,8-2.8c1.1,0,2-0.8,2-1.9C44.6,25.8,43.7,25,42.6,25z"></path><path d="M89.9,73.8c-1.1-0.2-2.1,0.5-2.4,1.5c-0.6,2.8-2.2,5.4-4.4,7.2c-0.9,0.7-1,2-0.3,2.8c0.4,0.5,1,0.7,1.6,0.7   c0.4,0,0.9-0.1,1.3-0.4c3-2.4,5-5.8,5.8-9.5C91.7,75.1,91,74.1,89.9,73.8z"></path><path d="M90.7,35.2c-0.4-1-1.6-1.5-2.6-1.1c-1,0.4-1.5,1.6-1,2.6c0.6,1.5,1,3.1,1,4.8v4.4c0,1.1,0.9,2,2,2s2-0.9,2-2v-4.4   C92,39.3,91.5,37.2,90.7,35.2z"></path><path d="M62.6,25h-10c-1.1,0-2,0.9-2,2s0.9,2,2,2h10c1.1,0,2-0.9,2-2S63.7,25,62.6,25z"></path></g></svg>


--------------------------------------------------------------------------------