├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── GETTING_STARTED.md
├── INSTALL.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── configs
    ├── AVA
    │   ├── SLOWFAST_32x2_R50_SHORT.yaml
    │   ├── SLOW_8x8_R50_SHORT.yaml
    │   └── c2
    │   │   ├── SLOWFAST_32x2_R101_50_50.yaml
    │   │   ├── SLOWFAST_32x2_R101_50_50_v2.1.yaml
    │   │   ├── SLOWFAST_32x2_R50.yaml
    │   │   ├── SLOWFAST_64x2_R101_50_50.yaml
    │   │   └── SLOW_8x8_R50.yaml
    ├── Charades
    │   ├── SLOWFAST_16x8_R50.yaml
    │   └── SLOWFAST_16x8_R50_multigrid.yaml
    ├── Kinetics
    │   ├── AVSLOWFAST_4x16_R50.yaml
    │   ├── AVSLOWFAST_8x8_R50.yaml
    │   ├── C2D_8x8_R50.yaml
    │   ├── C2D_8x8_R50_IN1K.yaml
    │   ├── C2D_NLN_8x8_R50.yaml
    │   ├── C2D_NLN_8x8_R50_IN1K.yaml
    │   ├── I3D_8x8_R101.yaml
    │   ├── I3D_8x8_R50.yaml
    │   ├── I3D_8x8_R50_IN1K.yaml
    │   ├── I3D_NLN_8x8_R101.yaml
    │   ├── I3D_NLN_8x8_R50.yaml
    │   ├── I3D_NLN_8x8_R50_IN1K.yaml
    │   ├── SLOWFAST_4x16_R50.yaml
    │   ├── SLOWFAST_8x8_R50.yaml
    │   ├── SLOWFAST_8x8_R50_stepwise.yaml
    │   ├── SLOWFAST_8x8_R50_stepwise_multigrid.yaml
    │   ├── SLOWFAST_NLN_4x16_R50.yaml
    │   ├── SLOWFAST_NLN_8x8_R50.yaml
    │   ├── SLOW_4x16_R50.yaml
    │   ├── SLOW_8x8_R50.yaml
    │   ├── SLOW_NLN_4x16_R50.yaml
    │   ├── SLOW_NLN_8x8_R50.yaml
    │   └── c2
    │   │   ├── C2D_NOPOOL_8x8_R50.yaml
    │   │   ├── I3D_8x8_R50.yaml
    │   │   ├── I3D_NLN_8x8_R50.yaml
    │   │   ├── SLOWFAST_16x8_R101_50_50.yaml
    │   │   ├── SLOWFAST_4x16_R50.yaml
    │   │   ├── SLOWFAST_8x8_R101_101_101.yaml
    │   │   ├── SLOWFAST_8x8_R101_50_101.yaml
    │   │   ├── SLOWFAST_8x8_R101_50_50.yaml
    │   │   ├── SLOWFAST_8x8_R50.yaml
    │   │   ├── SLOWFAST_NLN_16x8_R101_50_50.yaml
    │   │   ├── SLOW_4x16_R50.yaml
    │   │   └── SLOW_8x8_R50.yaml
    └── SSv2
    │   ├── SLOWFAST_16x8_R50.yaml
    │   └── SLOWFAST_16x8_R50_multigrid.yaml
├── demo
    ├── AVA
    │   └── SLOWFAST_32x2_R101_50_50.yaml
    ├── Kinetics
    │   └── SLOWFAST_8x8_R50.yaml
    └── ava_demo.gif
├── linter.sh
├── projects
    ├── avslowfast
    │   └── README.md
    └── multigrid
    │   ├── README.md
    │   └── multigrid.png
├── setup.cfg
├── setup.py
├── slowfast
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── custom_config.py
    │   └── defaults.py
    ├── datasets
    │   ├── DATASET.md
    │   ├── __init__.py
    │   ├── ava_dataset.py
    │   ├── ava_helper.py
    │   ├── build.py
    │   ├── charades.py
    │   ├── cv2_transform.py
    │   ├── decoder.py
    │   ├── kinetics.py
    │   ├── loader.py
    │   ├── multigrid_helper.py
    │   ├── ssv2.py
    │   ├── transform.py
    │   ├── utils.py
    │   └── video_container.py
    ├── models
    │   ├── __init__.py
    │   ├── batchnorm_helper.py
    │   ├── build.py
    │   ├── custom_video_model_builder.py
    │   ├── head_helper.py
    │   ├── losses.py
    │   ├── nonlocal_helper.py
    │   ├── optimizer.py
    │   ├── resnet_helper.py
    │   ├── stem_helper.py
    │   └── video_model_builder.py
    ├── utils
    │   ├── __init__.py
    │   ├── ava_eval_helper.py
    │   ├── ava_evaluation
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── ava_action_list_v2.1_for_activitynet_2018.pbtxt.txt
    │   │   ├── label_map_util.py
    │   │   ├── metrics.py
    │   │   ├── np_box_list.py
    │   │   ├── np_box_list_ops.py
    │   │   ├── np_box_mask_list.py
    │   │   ├── np_box_mask_list_ops.py
    │   │   ├── np_box_ops.py
    │   │   ├── np_mask_ops.py
    │   │   ├── object_detection_evaluation.py
    │   │   ├── per_image_evaluation.py
    │   │   └── standard_fields.py
    │   ├── benchmark.py
    │   ├── bn_helper.py
    │   ├── c2_model_loading.py
    │   ├── checkpoint.py
    │   ├── distributed.py
    │   ├── env.py
    │   ├── logging.py
    │   ├── lr_policy.py
    │   ├── meters.py
    │   ├── metrics.py
    │   ├── misc.py
    │   ├── multigrid.py
    │   ├── multiprocessing.py
    │   ├── parser.py
    │   └── weight_init_helper.py
    └── visualization
    │   ├── __init__.py
    │   ├── async_predictor.py
    │   ├── ava_demo_precomputed_boxes.py
    │   ├── demo_loader.py
    │   ├── gradcam_utils.py
    │   ├── predictor.py
    │   ├── tensorboard_vis.py
    │   ├── utils.py
    │   └── video_visualizer.py
└── tools
    ├── benchmark.py
    ├── demo_net.py
    ├── run_net.py
    ├── test_net.py
    ├── train_net.py
    └── visualization.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # LaTex
  2 | main.pdf
  3 | supp.pdf
  4 | **/*.aux
  5 | **/*.log
  6 | **/*.synctex.gz
  7 | **/*.aux
  8 | **/*.bbl
  9 | **/*.blg
 10 | **/*.brf
 11 | **/*.sublime-project
 12 | **/*.sublime-workspace
 13 | **/*.fdb_latexmk
 14 | **/*.fls
 15 | **/*.toc
 16 | 
 17 | tools/debug.sh
 18 | 
 19 | # MacOS stuff
 20 | .DS_Store
 21 | **/.DS_Store
 22 | 
 23 | **/__pycache__
 24 | **/*.pyc
 25 | **/.settings
 26 | .project
 27 | .pydevproject
 28 | 
 29 | # external/*
 30 | 
 31 | # Byte-compiled / optimized / DLL files
 32 | __pycache__/
 33 | *.py[cod]
 34 | *$py.class
 35 | 
 36 | # C extensions
 37 | *.so
 38 | 
 39 | # Distribution / packaging
 40 | .Python
 41 | build/
 42 | develop-eggs/
 43 | dist/
 44 | downloads/
 45 | eggs/
 46 | .eggs/
 47 | lib/
 48 | lib64/
 49 | parts/
 50 | sdist/
 51 | var/
 52 | wheels/
 53 | *.egg-info/
 54 | .installed.cfg
 55 | *.egg
 56 | MANIFEST
 57 | 
 58 | # PyInstaller
 59 | #  Usually these files are written by a python script from a template
 60 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 61 | *.manifest
 62 | *.spec
 63 | 
 64 | # Installer logs
 65 | pip-log.txt
 66 | pip-delete-this-directory.txt
 67 | 
 68 | # Unit test / coverage reports
 69 | htmlcov/
 70 | .tox/
 71 | .coverage
 72 | .coverage.*
 73 | .cache
 74 | nosetests.xml
 75 | coverage.xml
 76 | *.cover
 77 | .hypothesis/
 78 | .pytest_cache/
 79 | 
 80 | # Translations
 81 | *.mo
 82 | *.pot
 83 | 
 84 | # Django stuff:
 85 | *.log
 86 | local_settings.py
 87 | db.sqlite3
 88 | 
 89 | # Flask stuff:
 90 | instance/
 91 | .webassets-cache
 92 | 
 93 | # Scrapy stuff:
 94 | .scrapy
 95 | 
 96 | # Sphinx documentation
 97 | docs/_build/
 98 | 
 99 | # PyBuilder
100 | target/
101 | 
102 | # Jupyter Notebook
103 | .ipynb_checkpoints
104 | 
105 | # pyenv
106 | .python-version
107 | 
108 | # celery beat schedule file
109 | celerybeat-schedule
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | Facebook has adopted a Code of Conduct that we expect project participants to adhere to.
4 | Please read the [full text](https://code.fb.com/codeofconduct/)
5 | so that you can understand what actions will and will not be tolerated.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to PySlowFast
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `master`.
 9 | 2. If you've changed APIs, update the documentation.
10 | 3. Ensure the test suite passes.
11 | 4. Make sure your code lints.
12 | 5. Ensure no regressions in baseline model speed and accuracy.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | 
23 | Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue. The recommended issue format is:
24 | ------
25 | 
26 | #### To Reproduce
27 | ```How to reproduce the issue.```
28 | #### Expected behavior
29 | ```Expected output.```
30 | #### Environment
31 | ```Your environment.```
32 | 
33 | ------
34 | 
35 | ## Coding Style  
36 | * 4 spaces for indentation rather than tabs
37 | * 80 character line length
38 | * PEP8 formatting
39 | 
40 | ## License
41 | By contributing to PySlowFast, you agree that your contributions will be licensed under the LICENSE file in the root directory of this source tree.
42 | 


--------------------------------------------------------------------------------
/GETTING_STARTED.md:
--------------------------------------------------------------------------------
 1 | # Getting Started with PySlowFast
 2 | 
 3 | This document provides a brief intro of launching jobs in PySlowFast for training and testing. Before launching any job, make sure you have properly installed the PySlowFast following the instruction in [README.md](README.md) and you have prepared the dataset following [DATASET.md](slowfast/datasets/DATASET.md) with the correct format.
 4 | 
 5 | ## Train a Standard Model from Scratch
 6 | 
 7 | Here we can start with training a simple C2D models by running:
 8 | 
 9 | ```
10 | python tools/run_net.py \
11 |   --cfg configs/Kinetics/C2D_8x8_R50.yaml \
12 |   DATA.PATH_TO_DATA_DIR path_to_your_dataset \
13 |   NUM_GPUS 2 \
14 |   TRAIN.BATCH_SIZE 16 \
15 | ```
16 | You may need to pass location of your dataset in the command line by adding `DATA.PATH_TO_DATA_DIR path_to_your_dataset`, or you can simply add
17 | 
18 | ```
19 | DATA:
20 |   PATH_TO_DATA_DIR: path_to_your_dataset
21 | ```
22 | To the yaml configs file, then you do not need to pass it to the command line every time.
23 | 
24 | 
25 | You may also want to add:
26 | ```
27 |   DATA_LOADER.NUM_WORKERS 0 \
28 |   NUM_GPUS 2 \
29 |   TRAIN.BATCH_SIZE 16 \
30 | ```
31 | 
32 | If you want to launch a quick job for debugging on your local machine.
33 | 
34 | ## Resume from an Existing Checkpoint
35 | If your checkpoint is trained by PyTorch, then you can add the following line in the command line, or you can also add it in the YAML config:
36 | 
37 | ```
38 | TRAIN.CHECKPOINT_FILE_PATH path_to_your_PyTorch_checkpoint
39 | ```
40 | 
41 | If the checkpoint in trained by Caffe2, then you can do the following:
42 | 
43 | ```
44 | TRAIN.CHECKPOINT_FILE_PATH path_to_your_Caffe2_checkpoint \
45 | TRAIN.CHECKPOINT_TYPE caffe2
46 | ```
47 | 
48 | If you need to performance inflation on the checkpoint, remember to set `TRAIN.CHECKPOINT_INFLATE` to True.
49 | 
50 | 
51 | ## Perform Test
52 | We have `TRAIN.ENABLE` and `TEST.ENABLE` to control whether training or testing is required for the current job. If only testing is preferred, you can set the `TRAIN.ENABLE` to False, and do not forget to pass the path to the model you want to test to TEST.CHECKPOINT_FILE_PATH.
53 | ```
54 | python tools/run_net.py \
55 |   --cfg configs/Kinetics/C2D_8x8_R50.yaml \
56 |   DATA.PATH_TO_DATA_DIR path_to_your_dataset \
57 |   TEST.CHECKPOINT_FILE_PATH path_to_your_checkpoint \
58 |   TRAIN.ENABLE False \
59 | ```
60 | ## Run the Demo on Videos/Camera
61 | 
62 | Currently, demo on multiple GPUs is not supported. Set the following in your config file:
63 | * `NUM_GPUS: 1`
64 | * `NUM_SHARDS: 1`
65 | * `DEMO.WEBCAM`: Set this to an index of a camera for demo using webcam. Otherwise, set `DEMO.INPUT_VIDEO` to path of an input video.
66 | * `DEMO.ENABLE: True`
67 | * `DEMO.LABEL_FILE_PATH`: path to a json label file that map {label: label_id}.
68 | * `CHECKPOINT_FILE_PATH: "path/to/the/pre-trained/model.pkl"` (skip this if you decide to place the model in `OUTPUT_DIR` which is default to `./checkpoints/`)
69 | 
70 | Optional:
71 | * `DEMO.DISPLAY_WIDTH`: custom display window width.
72 | * `DEMO.DISPLAY_HEIGHT`: custom display window height.
73 | * `DEMO.OUTPUT_FILE`: set this as a path if you want to write outputs to a video file instead of displaying it in a window.
74 | * `DEMO.BUFFER_SIZE`: overlapping number of frames between 2 consecutive input clips. Set this to a positive number to make more frequent predictions
75 | (at the expense of slower speed).
76 | 
77 | If you want to only run the demo process, set `TRAIN.ENABLE` and `TEST.ENABLE` to False.
78 | 
79 | ### Classification
80 | Modify a `<pretrained_model_config_file>.yaml` in `configs/Kinetics/` corresponding to the pretrained model you want to use (you can look at `demo/Kinetics/SLOWFAST_8x8_R50.yaml` for reference).
81 | 
82 | ### Detection
83 | Modify a `<pretrained_model_config_file>.yaml` in `configs/AVA/` corresponding to the pretrained model you want to use (you can look at `demo/AVA/SLOWFAST_32x2_R101_50_50.yaml` for reference)
84 | 
85 | Optional:
86 | * `DEMO.DETECTRON2_THRESH`: Set a threshold for choosing bouding boxes outputed by Detectron2. (Default to 0.9)
87 | * Pick a different [Detectron2](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md) Object Detection model config and weights. Set the parameters: `DEMO.DETECTRON2_CFG` and `DEMO.DETECTRON2_WEIGHTS`. (Default to `faster_rcnn_R_50_FPN_3x.yaml` config and the corresponding weights)
88 | 
89 | ### Run command
90 | ```
91 | python \tools\run_net.py --cfg path/to/<pretrained_model_config_file>.yaml
92 | ```
93 | 


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Requirements
 4 | - Python >= 3.6
 5 | - Numpy
 6 | - PyTorch 1.3
 7 | - [fvcore](https://github.com/facebookresearch/fvcore/): `pip install 'git+https://github.com/facebookresearch/fvcore'`
 8 | - [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation.
 9 |   You can install them together at [pytorch.org](https://pytorch.org) to make sure of this.
10 | - simplejson: `pip install simplejson`
11 | - GCC >= 4.9
12 | - PyAV: `conda install av -c conda-forge`
13 | - ffmpeg (4.0 is prefereed, will be installed along with PyAV)
14 | - PyYaml: (will be installed along with fvcore)
15 | - tqdm: (will be installed along with fvcore)
16 | - psutil: `pip install psutil`
17 | - OpenCV: `pip install opencv-python`
18 | - torchvision: `pip install torchvision` or `conda install torchvision -c pytorch`
19 | - librosa: `pip install librosa` (if using Audiovisual SlowFast Networks)
20 | - tensorboard: `pip install tensorboard`
21 | - moviepy: (optional, for visualizing video on tensorboard) `conda install -c conda-forge moviepy` or `pip install moviepy`
22 | - [Detectron2](https://github.com/facebookresearch/detectron2):
23 | ```
24 |     pip install -U torch torchvision cython
25 |     pip install -U 'git+https://github.com/facebookresearch/fvcore.git' 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
26 |     git clone https://github.com/facebookresearch/detectron2 detectron2_repo
27 |     pip install -e detectron2_repo
28 |     # You can find more details at https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md
29 | ```
30 | 
31 | ## Pytorch
32 | Please follow PyTorch official instructions to install from source:
33 | ```
34 | git clone --recursive https://github.com/pytorch/pytorch
35 | ```
36 | 
37 | ## PySlowFast
38 | 
39 | Clone the PySlowFast Video Understanding repository.
40 | ```
41 | git clone https://github.com/facebookresearch/slowfast
42 | ```
43 | 
44 | Add this repository to $PYTHONPATH.
45 | ```
46 | export PYTHONPATH=/path/to/SlowFast/slowfast:$PYTHONPATH
47 | ```
48 | 
49 | ### Build PySlowFast
50 | 
51 | After having the above dependencies, run:
52 | ```
53 | git clone https://github.com/facebookresearch/slowfast
54 | cd SlowFast
55 | python setup.py build develop
56 | ```
57 | 
58 | Now the installation is finished, run the pipeline with:
59 | ```
60 | python tools/run_net.py --cfg configs/Kinetics/C2D_8x8_R50.yaml NUM_GPUS 1 TRAIN.BATCH_SIZE 8 SOLVER.BASE_LR 0.0125 DATA.PATH_TO_DATA_DIR path_to_your_data_folder
61 | ```
62 | 


--------------------------------------------------------------------------------
/MODEL_ZOO.md:
--------------------------------------------------------------------------------
 1 | # PySlowFast Model Zoo and Baselines
 2 | 
 3 | ## Kinetics
 4 | 
 5 | We provided original pretrained models from Caffe2 on heavy models (testing Caffe2 pretrained model in PyTorch might have a small different in performance):
 6 | 
 7 | | architecture | depth |  pretrain |  frame length x sample rate | top1 |  top5  |  model | config |
 8 | | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
 9 | | C2D | R50 | Train From Scratch | 8 x 8 | 67.2 | 87.8 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/C2D_NOPOOL_8x8_R50.pkl) | Kinetics/c2/C2D_NOPOOL_8x8_R50 |
10 | | I3D | R50 | Train From Scratch | 8 x 8 | 73.5 | 90.8 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/I3D_8x8_R50.pkl) | Kinetics/c2/I3D_8x8_R50 |
11 | | I3D NLN | R50 | Train From Scratch | 8 x 8 | 74.0 | 91.1 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/I3D_NLN_8x8_R50.pkl) | Kinetics/c2/I3D_NLN_8x8_R50 |
12 | | Slow | R50 | Train From Scratch | 4 x 16 | 72.7 | 90.3 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWONLY_4x16_R50.pkl) | Kinetics/c2/SLOW_4x16_R50 |
13 | | Slow | R50 | Train From Scratch | 8 x 8 | 74.8 | 91.6 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWONLY_8x8_R50.pkl) | Kinetics/c2/SLOW_8x8_R50 |
14 | | SlowFast | R50 | Train From Scratch | 4 x 16 | 75.6 | 92.0 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWFAST_4x16_R50.pkl) | Kinetics/c2/SLOWFAST_4x16_R50 |
15 | | SlowFast | R50 | Train From Scratch | 8 x 8 | 77.0 | 92.6 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl) | Kinetics/c2/SLOWFAST_8x8_R50 |
16 | | SlowFast | R101 | Train From Scratch | 8 x 8 | 78.0 | 93.3 | [`link`](coming_soon) | Kinetics/c2/SLOWFAST_8x8_R101_101_101|
17 | | SlowFast | R101 | Train From Scratch | 16 x 8 | 78.9 | 93.5 | [`link`](coming_soon) | Kinetics/c2/SLOWFAST_16x8_R101_50_50 |
18 | 
19 | 
20 | ## AVA
21 | 
22 | | architecture | depth | Pretrain Model |  frame length x sample rate  | MAP | AVA version | model |
23 | | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |------------- |
24 | | Slow | R50 | [Kinetics 400](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/ava/pretrain/C2D_8x8_R50.pkl) | 4 x 16 | 19.5 | 2.2 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/ava/C2D_8x8_R50.pkl) |
25 | | SlowFast | R101 | [Kinetics 600](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/ava/pretrain/SLOWFAST_32x2_R101_50_50_v2.1.pkl) | 8 x 8 | 28.2 | 2.1 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/ava/SLOWFAST_32x2_R101_50_50_v2.1.pkl) |
26 | | SlowFast | R101 | [Kinetics 600](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/ava/pretrain/SLOWFAST_32x2_R101_50_50.pkl) | 8 x 8 | 29.1 | 2.2 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/ava/SLOWFAST_32x2_R101_50_50.pkl) |
27 | | SlowFast | R101 | [Kinetics 600](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/ava/pretrain/SLOWFAST_64x2_R101_50_50.pkl) | 16 x 8 | 29.4 | 2.2 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/ava/SLOWFAST_64x2_R101_50_50.pkl) |
28 | 
29 | ## Multigrid Training
30 | 
31 | ***Update June, 2020:*** In the following we provide (reimplemented) models from  "[A Multigrid Method for Efficiently Training Video Models
32 | ](https://arxiv.org/abs/1912.00998)" paper. The multigrid method trains about 3-6x faster than the original training on multiple datasets. See [projects/multigrid](projects/multigrid/README.md) for more information. The following provides models, results, and example config files.
33 | 
34 | #### Kinetics:
35 | | architecture | depth |  pretrain |  frame length x sample rate | training | top1 |  top5  |  model | config |
36 | | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
37 | | SlowFast | R50 | Train From Scratch | 8 x 8 | Standard | 76.8 | 92.7 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/pyslowfast/model_zoo/multigrid/model_zoo/Kinetics/SLOWFAST_8x8_R50_stepwise.pkl) | Kinetics/SLOWFAST_8x8_R50_stepwise |
38 | | SlowFast | R50 | Train From Scratch | 8 x 8 | Multigrid | 76.6 | 92.7 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/pyslowfast/model_zoo/multigrid/model_zoo/Kinetics/SLOWFAST_8x8_R50_stepwise_multigrid.pkl) | Kinetics/SLOWFAST_8x8_R50_stepwise_multigrid |
39 | 
40 | (Here we use stepwise learning rate schedule.)
41 | 
42 | #### Something-Something V2:
43 | | architecture | depth |  pretrain |  frame length x sample rate | training | top1 |  top5  |  model | config |
44 | | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
45 | | SlowFast | R50 | [Kinetics 400](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl) | 16 x 8 | Standard | 63.0 | 88.5 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/pyslowfast/model_zoo/multigrid/model_zoo/SSv2/SLOWFAST_16x8_R50.pkl) | SSv2/SLOWFAST_16x8_R50 |
46 | | SlowFast | R50 | [Kinetics 400](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl) | 16 x 8 | Multigrid | 63.5 | 88.7 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/pyslowfast/model_zoo/multigrid/model_zoo/SSv2/SLOWFAST_16x8_R50_multigrid.pkl) | SSv2/SLOWFAST_16x8_R50_multigrid |
47 | 
48 | 
49 | #### Charades
50 | | architecture | depth |  pretrain |  frame length x sample rate | training | mAP |  model | config |
51 | | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
52 | | SlowFast | R50 | [Kinetics 400](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl) | 16 x 8 | Standard | 38.9 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/pyslowfast/model_zoo/multigrid/model_zoo/Charades/SLOWFAST_16x8_R50.pkl) | SSv2/SLOWFAST_16x8_R50 |
53 | | SlowFast | R50 | [Kinetics 400](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/SLOWFAST_8x8_R50.pkl) | 16 x 8 | Multigrid | 38.6 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/pyslowfast/model_zoo/multigrid/model_zoo/Charades/SLOWFAST_16x8_R50_multigrid.pkl) | SSv2/SLOWFAST_16x8_R50_multigrid |
54 | 
55 | 
56 | ## ImageNet
57 | 
58 | We also release the imagenet pretrain model if finetune from ImageNet pretrain is preferred. The reported accuracy is obtrained by center crop testing on validation set.
59 | 
60 | | architecture | depth |  Top1 |  Top5  |  model  |
61 | | ------------- | ------------- | ------------- | ------------- | ------------- |
62 | | ResNet | R50 | 23.6 | 6.8 | [`link`](https://dl.fbaipublicfiles.com/pyslowfast/model_zoo/kinetics400/R50_IN1K.pyth) |
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PySlowFast
 2 | 
 3 | PySlowFast is an open source video understanding codebase from FAIR that provides state-of-the-art video classification models with efficient training. This repository includes implementations of the following methods:
 4 | 
 5 | - [SlowFast Networks for Video Recognition](https://arxiv.org/abs/1812.03982)
 6 | - [Non-local Neural Networks](https://arxiv.org/abs/1711.07971)
 7 | - [A Multigrid Method for Efficiently Training Video Models](https://arxiv.org/abs/1912.00998)
 8 | 
 9 | <div align="center">
10 |   <img src="demo/ava_demo.gif" width="600px"/>
11 | </div>
12 | 
13 | ## Introduction
14 | 
15 | The goal of PySlowFast is to provide a high-performance, light-weight pytorch codebase provides state-of-the-art video backbones for video understanding research on different tasks (classification, detection, and etc). It is designed in order to support rapid implementation and evaluation of novel video research ideas. PySlowFast includes implementations of the following backbone network architectures:
16 | 
17 | - SlowFast
18 | - Slow
19 | - C2D
20 | - I3D
21 | - Non-local Network
22 | 
23 | ## Updates
24 |  - We now support [Multigrid Training](https://arxiv.org/abs/1912.00998) for efficiently training video models. See [`projects/multigrid`](./projects/multigrid/README.md) for more information.
25 |  - PySlowFast is released in conjunction with our [ICCV 2019 Tutorial](https://alexander-kirillov.github.io/tutorials/visual-recognition-iccv19/).
26 | 
27 | ## License
28 | 
29 | PySlowFast is released under the [Apache 2.0 license](LICENSE).
30 | 
31 | ## Model Zoo and Baselines
32 | 
33 | We provide a large set of baseline results and trained models available for download in the PySlowFast [Model Zoo](MODEL_ZOO.md).
34 | 
35 | ## Installation
36 | 
37 | Please find installation instructions for PyTorch and PySlowFast in [INSTALL.md](INSTALL.md). You may follow the instructions in [DATASET.md](slowfast/datasets/DATASET.md) to prepare the datasets.
38 | 
39 | ## Quick Start
40 | 
41 | Follow the example in [GETTING_STARTED.md](GETTING_STARTED.md) to start playing video models with PySlowFast.
42 | 
43 | ## Contributors
44 | PySlowFast is written and maintained by [Haoqi Fan](https://haoqifan.github.io/), [Yanghao Li](https://lyttonhao.github.io/), [Bo Xiong](https://www.cs.utexas.edu/~bxiong/), [Wan-Yen Lo](https://www.linkedin.com/in/wanyenlo/), [Christoph Feichtenhofer](https://feichtenhofer.github.io/).
45 | 
46 | ## Citing PySlowFast
47 | If you find PySlowFast useful in your research, please use the following BibTeX entry for citation.
48 | ```BibTeX
49 | @misc{fan2020pyslowfast,
50 |   author =       {Haoqi Fan and Yanghao Li and Bo Xiong and Wan-Yen Lo and 
51 |                   Christoph Feichtenhofer},
52 |   title =        {PySlowFast},
53 |   howpublished = {\url{https://github.com/facebookresearch/slowfast}},
54 |   year =         {2020}
55 | }
56 | ```
57 | 


--------------------------------------------------------------------------------
/configs/AVA/SLOWFAST_32x2_R50_SHORT.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: ava
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 5
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the pretrain checkpoint file.
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 224
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | DETECTION:
18 |   ENABLE: True
19 |   ALIGNED: True
20 | AVA:
21 |   DETECTION_SCORE_THRESH: 0.8
22 |   TRAIN_PREDICT_BOX_LISTS: [
23 |     "ava_train_v2.2.csv",
24 |     "person_box_67091280_iou90/ava_detection_train_boxes_and_labels_include_negative_v2.2.csv",
25 |   ]
26 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
27 | SLOWFAST:
28 |   ALPHA: 4
29 |   BETA_INV: 8
30 |   FUSION_CONV_CHANNEL_RATIO: 2
31 |   FUSION_KERNEL_SZ: 7
32 | RESNET:
33 |   ZERO_INIT_FINAL_BN: True
34 |   WIDTH_PER_GROUP: 64
35 |   NUM_GROUPS: 1
36 |   DEPTH: 50
37 |   TRANS_FUNC: bottleneck_transform
38 |   STRIDE_1X1: False
39 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
40 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [2, 2]]
41 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [1, 1]]
42 | NONLOCAL:
43 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
44 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
45 |   INSTANTIATION: dot_product
46 |   POOL: [[[1, 2, 2], [1, 2, 2]], [[1, 2, 2], [1, 2, 2]], [[1, 2, 2], [1, 2, 2]], [[1, 2, 2], [1, 2, 2]]]
47 | BN:
48 |   USE_PRECISE_STATS: False
49 |   NUM_BATCHES_PRECISE: 200
50 | SOLVER:
51 |   BASE_LR: 0.1
52 |   LR_POLICY: steps_with_relative_lrs
53 |   STEPS: [0, 10, 15, 20]
54 |   LRS: [1, 0.1, 0.01, 0.001]
55 |   MAX_EPOCH: 20
56 |   MOMENTUM: 0.9
57 |   WEIGHT_DECAY: 1e-7
58 |   WARMUP_EPOCHS: 5.0
59 |   WARMUP_START_LR: 0.000125
60 |   OPTIMIZING_METHOD: sgd
61 | MODEL:
62 |   NUM_CLASSES: 80
63 |   ARCH: slowfast
64 |   MODEL_NAME: SlowFast
65 |   LOSS_FUNC: bce
66 |   DROPOUT_RATE: 0.5
67 |   HEAD_ACT: sigmoid
68 | TEST:
69 |   ENABLE: True
70 |   DATASET: ava
71 |   BATCH_SIZE: 8
72 | DATA_LOADER:
73 |   NUM_WORKERS: 2
74 |   PIN_MEMORY: True
75 | NUM_GPUS: 8
76 | NUM_SHARDS: 1
77 | RNG_SEED: 0
78 | OUTPUT_DIR: .
79 | 


--------------------------------------------------------------------------------
/configs/AVA/SLOW_8x8_R50_SHORT.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: ava
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 5
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the pretrain checkpoint file.
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 4
12 |   SAMPLING_RATE: 16
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 | DETECTION:
18 |   ENABLE: True
19 |   ALIGNED: True
20 | AVA:
21 |   DETECTION_SCORE_THRESH: 0.9
22 |   TRAIN_PREDICT_BOX_LISTS: [
23 |     "ava_train_v2.2.csv",
24 |     "person_box_67091280_iou90/ava_detection_train_boxes_and_labels_include_negative_v2.2.csv",
25 |   ]
26 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
27 | RESNET:
28 |   ZERO_INIT_FINAL_BN: True
29 |   WIDTH_PER_GROUP: 64
30 |   NUM_GROUPS: 1
31 |   DEPTH: 50
32 |   TRANS_FUNC: bottleneck_transform
33 |   STRIDE_1X1: False
34 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
35 |   SPATIAL_DILATIONS: [[1], [1], [1], [2]]
36 |   SPATIAL_STRIDES: [[1], [2], [2], [1]]
37 | NONLOCAL:
38 |   LOCATION: [[[]], [[]], [[]], [[]]]
39 |   GROUP: [[1], [1], [1], [1]]
40 |   INSTANTIATION: softmax
41 | BN:
42 |   USE_PRECISE_STATS: False
43 |   NUM_BATCHES_PRECISE: 200
44 | SOLVER:
45 |   BASE_LR: 0.1
46 |   LR_POLICY: steps_with_relative_lrs
47 |   STEPS: [0, 10, 15, 20]
48 |   LRS: [1, 0.1, 0.01, 0.001]
49 |   MAX_EPOCH: 20
50 |   MOMENTUM: 0.9
51 |   WEIGHT_DECAY: 1e-7
52 |   WARMUP_EPOCHS: 5.0
53 |   WARMUP_START_LR: 0.000125
54 |   OPTIMIZING_METHOD: sgd
55 | MODEL:
56 |   NUM_CLASSES: 80
57 |   ARCH: slow
58 |   MODEL_NAME: ResNet
59 |   LOSS_FUNC: bce
60 |   DROPOUT_RATE: 0.5
61 |   HEAD_ACT: sigmoid
62 | TEST:
63 |   ENABLE: True
64 |   DATASET: ava
65 |   BATCH_SIZE: 8
66 | DATA_LOADER:
67 |   NUM_WORKERS: 2
68 |   PIN_MEMORY: True
69 | NUM_GPUS: 8
70 | NUM_SHARDS: 1
71 | RNG_SEED: 0
72 | OUTPUT_DIR: .
73 | 


--------------------------------------------------------------------------------
/configs/AVA/c2/SLOWFAST_32x2_R101_50_50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: ava
 4 |   BATCH_SIZE: 16
 5 |   EVAL_PERIOD: 1
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to pretrain model
 9 |   CHECKPOINT_TYPE: pytorch
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | DETECTION:
18 |   ENABLE: True
19 |   ALIGNED: False
20 | AVA:
21 |   BGR: False
22 |   DETECTION_SCORE_THRESH: 0.8
23 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
24 | SLOWFAST:
25 |   ALPHA: 4
26 |   BETA_INV: 8
27 |   FUSION_CONV_CHANNEL_RATIO: 2
28 |   FUSION_KERNEL_SZ: 5
29 | RESNET:
30 |   ZERO_INIT_FINAL_BN: True
31 |   WIDTH_PER_GROUP: 64
32 |   NUM_GROUPS: 1
33 |   DEPTH: 101
34 |   TRANS_FUNC: bottleneck_transform
35 |   STRIDE_1X1: False
36 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
37 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [2, 2]]
38 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [1, 1]]
39 | NONLOCAL:
40 |   LOCATION: [[[], []], [[], []], [[6, 13, 20], []], [[], []]]
41 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
42 |   INSTANTIATION: dot_product
43 |   POOL: [[[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]]]
44 | BN:
45 |   USE_PRECISE_STATS: False
46 |   NUM_BATCHES_PRECISE: 200
47 | SOLVER:
48 |   MOMENTUM: 0.9
49 |   WEIGHT_DECAY: 1e-7
50 |   OPTIMIZING_METHOD: sgd
51 | MODEL:
52 |   NUM_CLASSES: 80
53 |   ARCH: slowfast
54 |   MODEL_NAME: SlowFast
55 |   LOSS_FUNC: bce
56 |   DROPOUT_RATE: 0.5
57 |   HEAD_ACT: sigmoid
58 | TEST:
59 |   ENABLE: True
60 |   DATASET: ava
61 |   BATCH_SIZE: 8
62 | DATA_LOADER:
63 |   NUM_WORKERS: 2
64 |   PIN_MEMORY: True
65 | NUM_GPUS: 8
66 | NUM_SHARDS: 1
67 | RNG_SEED: 0
68 | OUTPUT_DIR: .
69 | 


--------------------------------------------------------------------------------
/configs/AVA/c2/SLOWFAST_32x2_R101_50_50_v2.1.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: ava
 4 |   BATCH_SIZE: 16
 5 |   EVAL_PERIOD: 1
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to pretrain model
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | DETECTION:
18 |   ENABLE: True
19 |   ALIGNED: False
20 | AVA:
21 |   BGR: False
22 |   DETECTION_SCORE_THRESH: 0.8
23 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
24 |   TRAIN_GT_BOX_LISTS: ["ava_train_v2.1.csv"]
25 |   LABEL_MAP_FILE: ava_action_list_v2.1_for_activitynet_2018.pbtxt
26 |   EXCLUSION_FILE: ava_val_excluded_timestamps_v2.1.csv
27 |   GROUNDTRUTH_FILE: ava_val_v2.1.csv
28 | SLOWFAST:
29 |   ALPHA: 4
30 |   BETA_INV: 8
31 |   FUSION_CONV_CHANNEL_RATIO: 2
32 |   FUSION_KERNEL_SZ: 5
33 | RESNET:
34 |   ZERO_INIT_FINAL_BN: True
35 |   WIDTH_PER_GROUP: 64
36 |   NUM_GROUPS: 1
37 |   DEPTH: 101
38 |   TRANS_FUNC: bottleneck_transform
39 |   STRIDE_1X1: False
40 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
41 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [2, 2]]
42 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [1, 1]]
43 | NONLOCAL:
44 |   LOCATION: [[[], []], [[], []], [[6, 13, 20], []], [[], []]]
45 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
46 |   INSTANTIATION: dot_product
47 |   POOL: [[[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]]]
48 | BN:
49 |   USE_PRECISE_STATS: False
50 |   NUM_BATCHES_PRECISE: 200
51 | SOLVER:
52 |   MOMENTUM: 0.9
53 |   WEIGHT_DECAY: 1e-7
54 |   OPTIMIZING_METHOD: sgd
55 | MODEL:
56 |   NUM_CLASSES: 80
57 |   ARCH: slowfast
58 |   MODEL_NAME: SlowFast
59 |   LOSS_FUNC: bce
60 |   DROPOUT_RATE: 0.5
61 |   HEAD_ACT: sigmoid
62 | TEST:
63 |   ENABLE: True
64 |   DATASET: ava
65 |   BATCH_SIZE: 8
66 | DATA_LOADER:
67 |   NUM_WORKERS: 2
68 |   PIN_MEMORY: True
69 | NUM_GPUS: 8
70 | NUM_SHARDS: 1
71 | RNG_SEED: 0
72 | OUTPUT_DIR: .
73 | 


--------------------------------------------------------------------------------
/configs/AVA/c2/SLOWFAST_32x2_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: ava
 4 |   BATCH_SIZE: 16
 5 |   EVAL_PERIOD: 1
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to pretrain model
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | DETECTION:
18 |   ENABLE: True
19 |   ALIGNED: False
20 | AVA:
21 |   BGR: False
22 |   DETECTION_SCORE_THRESH: 0.8
23 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
24 | SLOWFAST:
25 |   ALPHA: 4
26 |   BETA_INV: 8
27 |   FUSION_CONV_CHANNEL_RATIO: 2
28 |   FUSION_KERNEL_SZ: 7
29 | RESNET:
30 |   ZERO_INIT_FINAL_BN: True
31 |   WIDTH_PER_GROUP: 64
32 |   NUM_GROUPS: 1
33 |   DEPTH: 50
34 |   TRANS_FUNC: bottleneck_transform
35 |   STRIDE_1X1: False
36 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
37 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [2, 2]]
38 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [1, 1]]
39 | NONLOCAL:
40 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
41 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
42 |   INSTANTIATION: dot_product
43 |   POOL: [[[1, 2, 2], [1, 2, 2]], [[1, 2, 2], [1, 2, 2]], [[1, 2, 2], [1, 2, 2]], [[1, 2, 2], [1, 2, 2]]]
44 | BN:
45 |   USE_PRECISE_STATS: False
46 |   NUM_BATCHES_PRECISE: 200
47 | SOLVER:
48 |   MOMENTUM: 0.9
49 |   WEIGHT_DECAY: 1e-7
50 |   OPTIMIZING_METHOD: sgd
51 | MODEL:
52 |   NUM_CLASSES: 80
53 |   ARCH: slowfast
54 |   MODEL_NAME: SlowFast
55 |   LOSS_FUNC: bce
56 |   DROPOUT_RATE: 0.5
57 |   HEAD_ACT: sigmoid
58 | TEST:
59 |   ENABLE: True
60 |   DATASET: ava
61 |   BATCH_SIZE: 8
62 | DATA_LOADER:
63 |   NUM_WORKERS: 2
64 |   PIN_MEMORY: True
65 | NUM_GPUS: 8
66 | NUM_SHARDS: 1
67 | RNG_SEED: 0
68 | OUTPUT_DIR: .
69 | 


--------------------------------------------------------------------------------
/configs/AVA/c2/SLOWFAST_64x2_R101_50_50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: ava
 4 |   BATCH_SIZE: 16
 5 |   EVAL_PERIOD: 1
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to pretrain model
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 64
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | DETECTION:
18 |   ENABLE: True
19 |   ALIGNED: False
20 | AVA:
21 |   BGR: False
22 |   DETECTION_SCORE_THRESH: 0.8
23 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
24 | SLOWFAST:
25 |   ALPHA: 4
26 |   BETA_INV: 8
27 |   FUSION_CONV_CHANNEL_RATIO: 2
28 |   FUSION_KERNEL_SZ: 5
29 | RESNET:
30 |   ZERO_INIT_FINAL_BN: True
31 |   WIDTH_PER_GROUP: 64
32 |   NUM_GROUPS: 1
33 |   DEPTH: 101
34 |   TRANS_FUNC: bottleneck_transform
35 |   STRIDE_1X1: False
36 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
37 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [2, 2]]
38 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [1, 1]]
39 | NONLOCAL:
40 |   LOCATION: [[[], []], [[], []], [[6, 13, 20], []], [[], []]]
41 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
42 |   INSTANTIATION: dot_product
43 |   POOL: [[[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]]]
44 | BN:
45 |   USE_PRECISE_STATS: False
46 |   NUM_BATCHES_PRECISE: 200
47 | SOLVER:
48 |   MOMENTUM: 0.9
49 |   WEIGHT_DECAY: 1e-7
50 |   OPTIMIZING_METHOD: sgd
51 | MODEL:
52 |   NUM_CLASSES: 80
53 |   ARCH: slowfast
54 |   MODEL_NAME: SlowFast
55 |   LOSS_FUNC: bce
56 |   DROPOUT_RATE: 0.5
57 |   HEAD_ACT: sigmoid
58 | TEST:
59 |   ENABLE: True
60 |   DATASET: ava
61 |   BATCH_SIZE: 8
62 | DATA_LOADER:
63 |   NUM_WORKERS: 0
64 |   PIN_MEMORY: True
65 | NUM_GPUS: 8
66 | NUM_SHARDS: 1
67 | RNG_SEED: 0
68 | OUTPUT_DIR: .
69 | 


--------------------------------------------------------------------------------
/configs/AVA/c2/SLOW_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: ava
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 1
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to pretrain model
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 4
12 |   SAMPLING_RATE: 16
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 | DETECTION:
18 |   ENABLE: True
19 |   ALIGNED: False
20 | AVA:
21 |   BGR: False
22 |   DETECTION_SCORE_THRESH: 0.9
23 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou75/ava_detection_val_boxes_and_labels.csv"]
24 | RESNET:
25 |   ZERO_INIT_FINAL_BN: True
26 |   WIDTH_PER_GROUP: 64
27 |   NUM_GROUPS: 1
28 |   DEPTH: 50
29 |   TRANS_FUNC: bottleneck_transform
30 |   STRIDE_1X1: False
31 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
32 |   SPATIAL_DILATIONS: [[1], [1], [1], [2]]
33 |   SPATIAL_STRIDES: [[1], [2], [2], [1]]
34 | NONLOCAL:
35 |   LOCATION: [[[]], [[]], [[]], [[]]]
36 |   GROUP: [[1], [1], [1], [1]]
37 |   INSTANTIATION: softmax
38 | BN:
39 |   USE_PRECISE_STATS: False
40 |   NUM_BATCHES_PRECISE: 200
41 | SOLVER:
42 |   MOMENTUM: 0.9
43 |   WEIGHT_DECAY: 1e-7
44 |   OPTIMIZING_METHOD: sgd
45 | MODEL:
46 |   NUM_CLASSES: 80
47 |   ARCH: slow
48 |   MODEL_NAME: ResNet
49 |   LOSS_FUNC: bce
50 |   DROPOUT_RATE: 0.5
51 |   HEAD_ACT: sigmoid
52 | TEST:
53 |   ENABLE: True
54 |   DATASET: ava
55 |   BATCH_SIZE: 8
56 | DATA_LOADER:
57 |   NUM_WORKERS: 2
58 |   PIN_MEMORY: True
59 | NUM_GPUS: 8
60 | NUM_SHARDS: 1
61 | RNG_SEED: 0
62 | OUTPUT_DIR: .
63 | 


--------------------------------------------------------------------------------
/configs/Charades/SLOWFAST_16x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: charades
 4 |   BATCH_SIZE: 16
 5 |   EVAL_PERIOD: 6
 6 |   CHECKPOINT_PERIOD: 6
 7 |   AUTO_RESUME: True
 8 |   CHECKPOINT_FILE_PATH: SLOWFAST_8x8_R50.pkl # please download from the model zoo.
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 64
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 340]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 |   MULTI_LABEL: True
18 |   INV_UNIFORM_SAMPLE: True
19 |   ENSEMBLE_METHOD: max
20 |   REVERSE_INPUT_CHANNEL: True
21 | SLOWFAST:
22 |   ALPHA: 4
23 |   BETA_INV: 8
24 |   FUSION_CONV_CHANNEL_RATIO: 2
25 |   FUSION_KERNEL_SZ: 7
26 | RESNET:
27 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
28 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
29 |   ZERO_INIT_FINAL_BN: True
30 |   WIDTH_PER_GROUP: 64
31 |   NUM_GROUPS: 1
32 |   DEPTH: 50
33 |   TRANS_FUNC: bottleneck_transform
34 |   STRIDE_1X1: False
35 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
36 | NONLOCAL:
37 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
38 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
39 |   INSTANTIATION: dot_product
40 | BN:
41 |   USE_PRECISE_STATS: True
42 |   NUM_BATCHES_PRECISE: 200
43 |   NORM_TYPE: sync_batchnorm
44 |   NUM_SYNC_DEVICES: 4
45 | SOLVER:
46 |   BASE_LR: 0.0375
47 |   LR_POLICY: steps_with_relative_lrs
48 |   LRS: [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
49 |   STEPS: [0, 41, 49]
50 |   MAX_EPOCH: 57
51 |   MOMENTUM: 0.9
52 |   WEIGHT_DECAY: 1e-4
53 |   WARMUP_EPOCHS: 4.0
54 |   WARMUP_START_LR: 0.0001
55 |   OPTIMIZING_METHOD: sgd
56 | MODEL:
57 |   NUM_CLASSES: 157
58 |   ARCH: slowfast
59 |   LOSS_FUNC: bce_logit
60 |   HEAD_ACT: sigmoid
61 |   DROPOUT_RATE: 0.5
62 | TEST:
63 |   ENABLE: True
64 |   DATASET: charades
65 |   BATCH_SIZE: 16
66 |   NUM_ENSEMBLE_VIEWS: 10
67 |   NUM_SPATIAL_CROPS: 3
68 | DATA_LOADER:
69 |   NUM_WORKERS: 8
70 |   PIN_MEMORY: True
71 | NUM_GPUS: 8
72 | NUM_SHARDS: 1
73 | RNG_SEED: 0
74 | OUTPUT_DIR: .
75 | LOG_MODEL_INFO: False
76 | 


--------------------------------------------------------------------------------
/configs/Charades/SLOWFAST_16x8_R50_multigrid.yaml:
--------------------------------------------------------------------------------
 1 | MULTIGRID:
 2 |   SHORT_CYCLE: True
 3 |   LONG_CYCLE: True
 4 | TRAIN:
 5 |   ENABLE: True
 6 |   DATASET: charades
 7 |   BATCH_SIZE: 16
 8 |   EVAL_PERIOD: 6
 9 |   CHECKPOINT_PERIOD: 6
10 |   AUTO_RESUME: True
11 |   CHECKPOINT_FILE_PATH: SLOWFAST_8x8_R50.pkl # please download from the model zoo.
12 |   CHECKPOINT_TYPE: caffe2
13 | DATA:
14 |   NUM_FRAMES: 64
15 |   SAMPLING_RATE: 2
16 |   TRAIN_JITTER_SCALES: [256, 340]
17 |   TRAIN_CROP_SIZE: 224
18 |   TEST_CROP_SIZE: 256
19 |   INPUT_CHANNEL_NUM: [3, 3]
20 |   MULTI_LABEL: True
21 |   INV_UNIFORM_SAMPLE: True
22 |   ENSEMBLE_METHOD: max
23 |   REVERSE_INPUT_CHANNEL: True
24 | SLOWFAST:
25 |   ALPHA: 4
26 |   BETA_INV: 8
27 |   FUSION_CONV_CHANNEL_RATIO: 2
28 |   FUSION_KERNEL_SZ: 7
29 | RESNET:
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 |   ZERO_INIT_FINAL_BN: True
33 |   WIDTH_PER_GROUP: 64
34 |   NUM_GROUPS: 1
35 |   DEPTH: 50
36 |   TRANS_FUNC: bottleneck_transform
37 |   STRIDE_1X1: False
38 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
39 | NONLOCAL:
40 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
41 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
42 |   INSTANTIATION: dot_product
43 | BN:
44 |   USE_PRECISE_STATS: True
45 |   NUM_BATCHES_PRECISE: 200
46 |   NORM_TYPE: sync_batchnorm
47 |   NUM_SYNC_DEVICES: 4
48 | SOLVER:
49 |   BASE_LR: 0.0375
50 |   LR_POLICY: steps_with_relative_lrs
51 |   LRS: [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
52 |   STEPS: [0, 41, 49]
53 |   MAX_EPOCH: 57
54 |   MOMENTUM: 0.9
55 |   WEIGHT_DECAY: 1e-4
56 |   WARMUP_EPOCHS: 4.0
57 |   WARMUP_START_LR: 0.0001
58 |   OPTIMIZING_METHOD: sgd
59 | MODEL:
60 |   NUM_CLASSES: 157
61 |   ARCH: slowfast
62 |   LOSS_FUNC: bce_logit
63 |   HEAD_ACT: sigmoid
64 |   DROPOUT_RATE: 0.5
65 | TEST:
66 |   ENABLE: True
67 |   DATASET: charades
68 |   BATCH_SIZE: 16
69 |   NUM_ENSEMBLE_VIEWS: 10
70 |   NUM_SPATIAL_CROPS: 3
71 | DATA_LOADER:
72 |   NUM_WORKERS: 8
73 |   PIN_MEMORY: True
74 | NUM_GPUS: 8
75 | NUM_SHARDS: 1
76 | RNG_SEED: 0
77 | OUTPUT_DIR: .
78 | LOG_MODEL_INFO: False
79 | 


--------------------------------------------------------------------------------
/configs/Kinetics/AVSLOWFAST_4x16_R50.yaml:
--------------------------------------------------------------------------------
  1 | TRAIN:
  2 |   ENABLE: True
  3 |   DATASET: kinetics
  4 |   BATCH_SIZE: 64
  5 |   EVAL_PERIOD: 10
  6 |   CHECKPOINT_PERIOD: 1
  7 |   AUTO_RESUME: True
  8 |   # CHECKPOINT_FILE_PATH: ../../data/output/checkpoints/avslowfast.pth
  9 |   # CHECKPOINT_TYPE: pytorch # caffe2 or pytorch
 10 | DATA:
 11 |   USE_BGR_ORDER: False # False
 12 |   NUM_FRAMES: 32
 13 |   SAMPLING_RATE: 2
 14 |   TRAIN_JITTER_SCALES: [256, 320]
 15 |   TRAIN_CROP_SIZE: 224
 16 |   TEST_CROP_SIZE: 256
 17 |   INPUT_CHANNEL_NUM: [3, 3, 1]
 18 |   USE_AUDIO: True
 19 |   GET_MISALIGNED_AUDIO: True
 20 |   AUDIO_SAMPLE_RATE: 16000
 21 |   AUDIO_WIN_SZ: 32
 22 |   AUDIO_STEP_SZ: 16
 23 |   AUDIO_FRAME_NUM: 128
 24 |   AUDIO_MEL_NUM: 80
 25 |   AUDIO_MISALIGNED_GAP: 32 # half second
 26 |   LOGMEL_MEAN: -7.03 # -7.03, -24.227
 27 |   LOGMEL_STD: 4.66 # 4.66, 1.0
 28 |   EASY_NEG_RATIO: 0.75
 29 |   MIX_NEG_EPOCH: 96
 30 | SLOWFAST:
 31 |   ALPHA: 8
 32 |   BETA_INV: 8
 33 |   FUSION_CONV_CHANNEL_RATIO: 2
 34 |   FUSION_KERNEL_SZ: 5
 35 |   AU_ALPHA: 32
 36 |   AU_BETA_INV: 2
 37 |   AU_FUSION_CONV_CHANNEL_MODE: ByDim # ByDim, ByRatio
 38 |   AU_FUSION_CONV_CHANNEL_RATIO: 0.25
 39 |   AU_FUSION_CONV_CHANNEL_DIM: 64
 40 |   AU_FUSION_KERNEL_SZ: 5
 41 |   AU_FUSION_CONV_NUM: 2
 42 |   AU_REDUCE_TF_DIM: True
 43 |   FS_FUSION: [False, False, True, True]
 44 |   AFS_FUSION: [False, False, True, True]
 45 |   AVS_FLAG: [False, False, True, True, True]
 46 |   AVS_PROJ_DIM: 64
 47 |   AVS_VAR_THRESH: 0.01
 48 |   AVS_DUPLICATE_THRESH: 0.99999
 49 |   DROPPATHWAY_RATE: 0.8 # 0.8
 50 | RESNET:
 51 |   ZERO_INIT_FINAL_BN: True
 52 |   WIDTH_PER_GROUP: 64
 53 |   NUM_GROUPS: 1
 54 |   DEPTH: 50
 55 |   TRANS_FUNC: bottleneck_transform
 56 |   AUDIO_TRANS_FUNC: tf_bottleneck_transform_v1
 57 |   AUDIO_TRANS_NUM: 2
 58 |   STRIDE_1X1: False
 59 |   # 18: [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]
 60 |   # 34: [[3, 3, 3], [4, 4, 4], [6, 6, 6], [3, 3, 3]]
 61 |   # 50: [[3, 3, 3], [4, 4, 4], [6, 6, 6], [3, 3, 3]]
 62 |   # 101: [[3, 3, 3], [4, 4, 4], [23, 23, 23], [3, 3, 3]]
 63 |   # 152: [[3, 3, 3], [8, 8, 8], [36, 36, 36], [3, 3, 3]]
 64 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3, 3], [4, 4, 4], [6, 6, 6], [3, 3, 3]]
 65 |   SPATIAL_DILATIONS: [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]]
 66 | NONLOCAL:
 67 |   LOCATION: [[[], [], []], [[], [], []], [[], [], []], [[], [], []]]
 68 |   GROUP: [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]]
 69 |   POOL: [
 70 |     [[1, 2, 2], [1, 2, 2], [1, 2, 2]], 
 71 |     [[1, 2, 2], [1, 2, 2], [1, 2, 2]], 
 72 |     [[1, 2, 2], [1, 2, 2], [1, 2, 2]], 
 73 |     [[1, 2, 2], [1, 2, 2], [1, 2, 2]],
 74 |   ]
 75 |   INSTANTIATION: dot_product
 76 | BN:
 77 |   USE_PRECISE_STATS: True
 78 |   NUM_BATCHES_PRECISE: 200
 79 |   MOMENTUM: 0.1
 80 |   WEIGHT_DECAY: 0.0
 81 | SOLVER:
 82 |   BASE_LR: 0.1 # 0.1
 83 |   LR_POLICY: cosine
 84 |   MAX_EPOCH: 196
 85 |   MOMENTUM: 0.9
 86 |   WEIGHT_DECAY: 1e-4
 87 |   WARMUP_EPOCHS: 34.0 # 34.0
 88 |   WARMUP_START_LR: 0.01 # 0.01
 89 |   OPTIMIZING_METHOD: sgd
 90 | MODEL:
 91 |   NUM_CLASSES: 400
 92 |   MODEL_NAME: AVSlowFast
 93 |   ARCH: avslowfast
 94 |   LOSS_FUNC: cross_entropy
 95 |   DROPOUT_RATE: 0.5
 96 | TEST:
 97 |   ENABLE: True
 98 |   DATASET: kinetics
 99 |   BATCH_SIZE: 64
100 |   # CHECKPOINT_FILE_PATH: ../../data/output/checkpoints/avslowfast.pth
101 |   # CHECKPOINT_TYPE: pytorch # caffe2 or pytorch
102 | DATA_LOADER:
103 |   NUM_WORKERS: 8 # 8
104 |   PIN_MEMORY: True
105 | NUM_GPUS: 8
106 | NUM_SHARDS: 1
107 | RNG_SEED: 0
108 | OUTPUT_DIR: ./output/AVSlowFast-R50-4x16
109 | 


--------------------------------------------------------------------------------
/configs/Kinetics/AVSLOWFAST_8x8_R50.yaml:
--------------------------------------------------------------------------------
  1 | TRAIN:
  2 |   ENABLE: True
  3 |   DATASET: kinetics
  4 |   BATCH_SIZE: 32
  5 |   EVAL_PERIOD: 10
  6 |   CHECKPOINT_PERIOD: 1
  7 |   AUTO_RESUME: True
  8 |   # CHECKPOINT_FILE_PATH: ../../data/output/checkpoints/avslowfast.pth
  9 |   # CHECKPOINT_TYPE: pytorch # caffe2 or pytorch
 10 | DATA:
 11 |   USE_BGR_ORDER: False # False
 12 |   NUM_FRAMES: 32
 13 |   SAMPLING_RATE: 2
 14 |   TRAIN_JITTER_SCALES: [256, 320]
 15 |   TRAIN_CROP_SIZE: 224
 16 |   TEST_CROP_SIZE: 256
 17 |   INPUT_CHANNEL_NUM: [3, 3, 1]
 18 |   USE_AUDIO: True
 19 |   GET_MISALIGNED_AUDIO: True
 20 |   AUDIO_SAMPLE_RATE: 16000
 21 |   AUDIO_WIN_SZ: 32
 22 |   AUDIO_STEP_SZ: 16
 23 |   AUDIO_FRAME_NUM: 128
 24 |   AUDIO_MEL_NUM: 80
 25 |   AUDIO_MISALIGNED_GAP: 32 # half second
 26 |   LOGMEL_MEAN: -7.03 # -7.03, -24.227
 27 |   LOGMEL_STD: 4.66 # 4.66, 1.0
 28 |   EASY_NEG_RATIO: 0.75
 29 |   MIX_NEG_EPOCH: 96
 30 | SLOWFAST:
 31 |   ALPHA: 4
 32 |   BETA_INV: 8
 33 |   FUSION_CONV_CHANNEL_RATIO: 2
 34 |   FUSION_KERNEL_SZ: 7
 35 |   AU_ALPHA: 16
 36 |   AU_BETA_INV: 2
 37 |   AU_FUSION_CONV_CHANNEL_MODE: ByDim # ByDim, ByRatio
 38 |   AU_FUSION_CONV_CHANNEL_RATIO: 0.25
 39 |   AU_FUSION_CONV_CHANNEL_DIM: 64
 40 |   AU_FUSION_KERNEL_SZ: 5
 41 |   AU_FUSION_CONV_NUM: 2
 42 |   AU_REDUCE_TF_DIM: True
 43 |   FS_FUSION: [False, False, True, True]
 44 |   AFS_FUSION: [False, False, True, True]
 45 |   AVS_FLAG: [False, False, True, True, True]
 46 |   AVS_PROJ_DIM: 64
 47 |   AVS_VAR_THRESH: 0.01
 48 |   AVS_DUPLICATE_THRESH: 0.99999
 49 |   DROPPATHWAY_RATE: 0.8 # 0.8
 50 | RESNET:
 51 |   ZERO_INIT_FINAL_BN: True
 52 |   WIDTH_PER_GROUP: 64
 53 |   NUM_GROUPS: 1
 54 |   DEPTH: 50
 55 |   TRANS_FUNC: bottleneck_transform
 56 |   AUDIO_TRANS_FUNC: tf_bottleneck_transform_v1
 57 |   AUDIO_TRANS_NUM: 2
 58 |   STRIDE_1X1: False
 59 |   # 18: [[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]
 60 |   # 34: [[3, 3, 3], [4, 4, 4], [6, 6, 6], [3, 3, 3]]
 61 |   # 50: [[3, 3, 3], [4, 4, 4], [6, 6, 6], [3, 3, 3]]
 62 |   # 101: [[3, 3, 3], [4, 4, 4], [23, 23, 23], [3, 3, 3]]
 63 |   # 152: [[3, 3, 3], [8, 8, 8], [36, 36, 36], [3, 3, 3]]
 64 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3, 3], [4, 4, 4], [6, 6, 6], [3, 3, 3]]
 65 |   SPATIAL_DILATIONS: [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]]
 66 | NONLOCAL:
 67 |   LOCATION: [[[], [], []], [[], [], []], [[], [], []], [[], [], []]]
 68 |   GROUP: [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]]
 69 |   POOL: [
 70 |     [[1, 2, 2], [1, 2, 2], [1, 2, 2]], 
 71 |     [[1, 2, 2], [1, 2, 2], [1, 2, 2]], 
 72 |     [[1, 2, 2], [1, 2, 2], [1, 2, 2]], 
 73 |     [[1, 2, 2], [1, 2, 2], [1, 2, 2]],
 74 |   ]
 75 |   INSTANTIATION: dot_product
 76 | BN:
 77 |   USE_PRECISE_STATS: True
 78 |   NUM_BATCHES_PRECISE: 400
 79 |   MOMENTUM: 0.1
 80 |   WEIGHT_DECAY: 0.0
 81 | SOLVER:
 82 |   BASE_LR: 0.1 # 0.1
 83 |   LR_POLICY: cosine
 84 |   MAX_EPOCH: 196
 85 |   MOMENTUM: 0.9
 86 |   WEIGHT_DECAY: 1e-4
 87 |   WARMUP_EPOCHS: 34.0 # 34.0
 88 |   WARMUP_START_LR: 0.01 # 0.01
 89 |   OPTIMIZING_METHOD: sgd
 90 | MODEL:
 91 |   NUM_CLASSES: 400
 92 |   MODEL_NAME: AVSlowFast
 93 |   ARCH: avslowfast
 94 |   LOSS_FUNC: cross_entropy
 95 |   DROPOUT_RATE: 0.5
 96 | TEST:
 97 |   ENABLE: True
 98 |   DATASET: kinetics
 99 |   BATCH_SIZE: 32
100 |   # CHECKPOINT_FILE_PATH: ../../data/output/checkpoints/avslowfast.pth
101 |   # CHECKPOINT_TYPE: pytorch # caffe2 or pytorch
102 | DATA_LOADER:
103 |   NUM_WORKERS: 8 # 8
104 |   PIN_MEMORY: True
105 | NUM_GPUS: 8
106 | NUM_SHARDS: 1
107 | RNG_SEED: 0
108 | OUTPUT_DIR: ./output/AVSlowFast-R50-8x8
109 | 


--------------------------------------------------------------------------------
/configs/Kinetics/C2D_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 8
10 |   SAMPLING_RATE: 8
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 50
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[]], [[]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: softmax
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: c2d
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/C2D_8x8_R50_IN1K.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   CHECKPOINT_FILE_PATH: /mnt/vol/gfsai-bistro2-east/ai-group/bistro/gpu/haoqifan/pySlowFastModelZoo/imagenet50_pretrain.pyth
 9 |   CHECKPOINT_INFLATE: True
10 | DATA:
11 |   NUM_FRAMES: 8
12 |   SAMPLING_RATE: 8
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 |   PATH_TO_DATA_DIR: /mnt/vol/gfsai-east/ai-group/users/haoqifan/kinetics/alllist/py_slowfast
18 | RESNET:
19 |   ZERO_INIT_FINAL_BN: True
20 |   WIDTH_PER_GROUP: 64
21 |   NUM_GROUPS: 1
22 |   DEPTH: 50
23 |   TRANS_FUNC: bottleneck_transform
24 |   STRIDE_1X1: False
25 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
26 | NONLOCAL:
27 |   LOCATION: [[[]], [[]], [[]], [[]]]
28 |   GROUP: [[1], [1], [1], [1]]
29 |   INSTANTIATION: softmax
30 | BN:
31 |   USE_PRECISE_STATS: True
32 |   NUM_BATCHES_PRECISE: 200
33 | SOLVER:
34 |   BASE_LR: 0.01
35 |   LR_POLICY: steps_with_relative_lrs
36 |   STEPS: [0, 44, 88, 118]
37 |   LRS: [1, 0.1, 0.01, 0.001]
38 |   MAX_EPOCH: 118
39 |   MOMENTUM: 0.9
40 |   WEIGHT_DECAY: 1e-4
41 |   OPTIMIZING_METHOD: sgd
42 | MODEL:
43 |   NUM_CLASSES: 400
44 |   ARCH: c2d
45 |   MODEL_NAME: ResNet
46 |   LOSS_FUNC: cross_entropy
47 |   DROPOUT_RATE: 0.5
48 | TEST:
49 |   ENABLE: True
50 |   DATASET: kinetics
51 |   BATCH_SIZE: 64
52 | DATA_LOADER:
53 |   NUM_WORKERS: 8
54 |   PIN_MEMORY: True
55 | NUM_GPUS: 8
56 | NUM_SHARDS: 1
57 | RNG_SEED: 0
58 | OUTPUT_DIR: .
59 | 


--------------------------------------------------------------------------------
/configs/Kinetics/C2D_NLN_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 8
10 |   SAMPLING_RATE: 8
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 50
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[1, 3]], [[1, 3, 5]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: softmax
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: c2d
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/C2D_NLN_8x8_R50_IN1K.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   CHECKPOINT_FILE_PATH: /mnt/vol/gfsai-bistro2-east/ai-group/bistro/gpu/haoqifan/pySlowFastModelZoo/imagenet50_pretrain.pyth
 9 |   CHECKPOINT_INFLATE: True
10 | DATA:
11 |   NUM_FRAMES: 8
12 |   SAMPLING_RATE: 8
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 |   PATH_TO_DATA_DIR: /mnt/vol/gfsai-east/ai-group/users/haoqifan/kinetics/alllist/py_slowfast
18 | RESNET:
19 |   ZERO_INIT_FINAL_BN: True
20 |   WIDTH_PER_GROUP: 64
21 |   NUM_GROUPS: 1
22 |   DEPTH: 50
23 |   TRANS_FUNC: bottleneck_transform
24 |   STRIDE_1X1: False
25 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
26 | NONLOCAL:
27 |   LOCATION: [[[]], [[1, 3]], [[1, 3, 5]], [[]]]
28 |   GROUP: [[1], [1], [1], [1]]
29 |   INSTANTIATION: softmax
30 | BN:
31 |   USE_PRECISE_STATS: True
32 |   NUM_BATCHES_PRECISE: 200
33 | SOLVER:
34 |   BASE_LR: 0.01
35 |   LR_POLICY: steps_with_relative_lrs
36 |   STEPS: [0, 44, 88, 118]
37 |   LRS: [1, 0.1, 0.01, 0.001]
38 |   MAX_EPOCH: 118
39 |   MOMENTUM: 0.9
40 |   WEIGHT_DECAY: 1e-4
41 |   OPTIMIZING_METHOD: sgd
42 | MODEL:
43 |   NUM_CLASSES: 400
44 |   ARCH: c2d
45 |   MODEL_NAME: ResNet
46 |   LOSS_FUNC: cross_entropy
47 |   DROPOUT_RATE: 0.5
48 | TEST:
49 |   ENABLE: True
50 |   DATASET: kinetics
51 |   BATCH_SIZE: 64
52 | DATA_LOADER:
53 |   NUM_WORKERS: 8
54 |   PIN_MEMORY: True
55 | NUM_GPUS: 8
56 | NUM_SHARDS: 1
57 | RNG_SEED: 0
58 | OUTPUT_DIR: .
59 | 


--------------------------------------------------------------------------------
/configs/Kinetics/I3D_8x8_R101.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 8
10 |   SAMPLING_RATE: 8
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 101
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [23], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[]], [[]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: softmax
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: i3d
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/I3D_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 8
10 |   SAMPLING_RATE: 8
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 50
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[]], [[]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: softmax
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: i3d
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/I3D_8x8_R50_IN1K.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   CHECKPOINT_FILE_PATH: /mnt/vol/gfsai-bistro2-east/ai-group/bistro/gpu/haoqifan/pySlowFastModelZoo/imagenet50_pretrain.pyth
 9 |   CHECKPOINT_INFLATE: True
10 | DATA:
11 |   NUM_FRAMES: 8
12 |   SAMPLING_RATE: 8
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 |   PATH_TO_DATA_DIR: /mnt/vol/gfsai-east/ai-group/users/haoqifan/kinetics/alllist/py_slowfast
18 | RESNET:
19 |   ZERO_INIT_FINAL_BN: True
20 |   WIDTH_PER_GROUP: 64
21 |   NUM_GROUPS: 1
22 |   DEPTH: 50
23 |   TRANS_FUNC: bottleneck_transform
24 |   STRIDE_1X1: False
25 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
26 | NONLOCAL:
27 |   LOCATION: [[[]], [[]], [[]], [[]]]
28 |   GROUP: [[1], [1], [1], [1]]
29 |   INSTANTIATION: softmax
30 | BN:
31 |   USE_PRECISE_STATS: True
32 |   NUM_BATCHES_PRECISE: 200
33 | SOLVER:
34 |   BASE_LR: 0.01
35 |   LR_POLICY: steps_with_relative_lrs
36 |   STEPS: [0, 44, 88, 118]
37 |   LRS: [1, 0.1, 0.01, 0.001]
38 |   MAX_EPOCH: 118
39 |   MOMENTUM: 0.9
40 |   WEIGHT_DECAY: 1e-4
41 |   OPTIMIZING_METHOD: sgd
42 | MODEL:
43 |   NUM_CLASSES: 400
44 |   ARCH: i3d
45 |   MODEL_NAME: ResNet
46 |   LOSS_FUNC: cross_entropy
47 |   DROPOUT_RATE: 0.5
48 | TEST:
49 |   ENABLE: True
50 |   DATASET: kinetics
51 |   BATCH_SIZE: 64
52 | DATA_LOADER:
53 |   NUM_WORKERS: 8
54 |   PIN_MEMORY: True
55 | NUM_GPUS: 8
56 | NUM_SHARDS: 1
57 | RNG_SEED: 0
58 | OUTPUT_DIR: .
59 | 


--------------------------------------------------------------------------------
/configs/Kinetics/I3D_NLN_8x8_R101.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 8
10 |   SAMPLING_RATE: 8
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 101
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [23], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[1, 3]], [[1, 3, 5]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: softmax
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: i3d
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/I3D_NLN_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 8
10 |   SAMPLING_RATE: 8
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 50
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[1, 3]], [[1, 3, 5]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: softmax
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: i3d
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/I3D_NLN_8x8_R50_IN1K.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   CHECKPOINT_FILE_PATH: /mnt/vol/gfsai-bistro2-east/ai-group/bistro/gpu/haoqifan/pySlowFastModelZoo/imagenet50_pretrain.pyth
 9 |   CHECKPOINT_INFLATE: True
10 | DATA:
11 |   NUM_FRAMES: 8
12 |   SAMPLING_RATE: 8
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 |   PATH_TO_DATA_DIR: /mnt/vol/gfsai-east/ai-group/users/haoqifan/kinetics/alllist/py_slowfast
18 | RESNET:
19 |   ZERO_INIT_FINAL_BN: True
20 |   WIDTH_PER_GROUP: 64
21 |   NUM_GROUPS: 1
22 |   DEPTH: 50
23 |   TRANS_FUNC: bottleneck_transform
24 |   STRIDE_1X1: False
25 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
26 | NONLOCAL:
27 |   LOCATION: [[[]], [[1, 3]], [[1, 3, 5]], [[]]]
28 |   GROUP: [[1], [1], [1], [1]]
29 |   INSTANTIATION: softmax
30 | BN:
31 |   USE_PRECISE_STATS: True
32 |   NUM_BATCHES_PRECISE: 200
33 | SOLVER:
34 |   BASE_LR: 0.01
35 |   LR_POLICY: steps_with_relative_lrs
36 |   STEPS: [0, 44, 88, 118]
37 |   LRS: [1, 0.1, 0.01, 0.001]
38 |   MAX_EPOCH: 118
39 |   MOMENTUM: 0.9
40 |   WEIGHT_DECAY: 1e-4
41 |   OPTIMIZING_METHOD: sgd
42 | MODEL:
43 |   NUM_CLASSES: 400
44 |   ARCH: i3d
45 |   MODEL_NAME: ResNet
46 |   LOSS_FUNC: cross_entropy
47 |   DROPOUT_RATE: 0.5
48 | TEST:
49 |   ENABLE: True
50 |   DATASET: kinetics
51 |   BATCH_SIZE: 64
52 | DATA_LOADER:
53 |   NUM_WORKERS: 8
54 |   PIN_MEMORY: True
55 | NUM_GPUS: 8
56 | NUM_SHARDS: 1
57 | RNG_SEED: 0
58 | OUTPUT_DIR: .
59 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOWFAST_4x16_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 32
10 |   SAMPLING_RATE: 2
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3, 3]
15 | SLOWFAST:
16 |   ALPHA: 8
17 |   BETA_INV: 8
18 |   FUSION_CONV_CHANNEL_RATIO: 2
19 |   FUSION_KERNEL_SZ: 5
20 | RESNET:
21 |   ZERO_INIT_FINAL_BN: True
22 |   WIDTH_PER_GROUP: 64
23 |   NUM_GROUPS: 1
24 |   DEPTH: 50
25 |   TRANS_FUNC: bottleneck_transform
26 |   STRIDE_1X1: False
27 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
28 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
29 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
30 | NONLOCAL:
31 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
32 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
33 |   INSTANTIATION: dot_product
34 | BN:
35 |   USE_PRECISE_STATS: True
36 |   NUM_BATCHES_PRECISE: 200
37 | SOLVER:
38 |   BASE_LR: 0.1
39 |   LR_POLICY: cosine
40 |   MAX_EPOCH: 196
41 |   MOMENTUM: 0.9
42 |   WEIGHT_DECAY: 1e-4
43 |   WARMUP_EPOCHS: 34.0
44 |   WARMUP_START_LR: 0.01
45 |   OPTIMIZING_METHOD: sgd
46 | MODEL:
47 |   NUM_CLASSES: 400
48 |   ARCH: slowfast
49 |   MODEL_NAME: SlowFast
50 |   LOSS_FUNC: cross_entropy
51 |   DROPOUT_RATE: 0.5
52 | TEST:
53 |   ENABLE: True
54 |   DATASET: kinetics
55 |   BATCH_SIZE: 64
56 | DATA_LOADER:
57 |   NUM_WORKERS: 8
58 |   PIN_MEMORY: True
59 | NUM_GPUS: 8
60 | NUM_SHARDS: 1
61 | RNG_SEED: 0
62 | OUTPUT_DIR: .
63 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOWFAST_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 32
10 |   SAMPLING_RATE: 2
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3, 3]
15 | SLOWFAST:
16 |   ALPHA: 4
17 |   BETA_INV: 8
18 |   FUSION_CONV_CHANNEL_RATIO: 2
19 |   FUSION_KERNEL_SZ: 7
20 | RESNET:
21 |   ZERO_INIT_FINAL_BN: True
22 |   WIDTH_PER_GROUP: 64
23 |   NUM_GROUPS: 1
24 |   DEPTH: 50
25 |   TRANS_FUNC: bottleneck_transform
26 |   STRIDE_1X1: False
27 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
28 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
29 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
30 | NONLOCAL:
31 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
32 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
33 |   INSTANTIATION: dot_product
34 | BN:
35 |   USE_PRECISE_STATS: True
36 |   NUM_BATCHES_PRECISE: 200
37 | SOLVER:
38 |   BASE_LR: 0.1
39 |   LR_POLICY: cosine
40 |   MAX_EPOCH: 196
41 |   MOMENTUM: 0.9
42 |   WEIGHT_DECAY: 1e-4
43 |   WARMUP_EPOCHS: 34.0
44 |   WARMUP_START_LR: 0.01
45 |   OPTIMIZING_METHOD: sgd
46 | MODEL:
47 |   NUM_CLASSES: 400
48 |   ARCH: slowfast
49 |   MODEL_NAME: SlowFast
50 |   LOSS_FUNC: cross_entropy
51 |   DROPOUT_RATE: 0.5
52 | TEST:
53 |   ENABLE: True
54 |   DATASET: kinetics
55 |   BATCH_SIZE: 64
56 | DATA_LOADER:
57 |   NUM_WORKERS: 8
58 |   PIN_MEMORY: True
59 | NUM_GPUS: 8
60 | NUM_SHARDS: 1
61 | RNG_SEED: 0
62 | OUTPUT_DIR: .
63 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOWFAST_8x8_R50_stepwise.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 32
10 |   SAMPLING_RATE: 2
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3, 3]
15 | SLOWFAST:
16 |   ALPHA: 4
17 |   BETA_INV: 8
18 |   FUSION_CONV_CHANNEL_RATIO: 2
19 |   FUSION_KERNEL_SZ: 7
20 | RESNET:
21 |   ZERO_INIT_FINAL_BN: True
22 |   WIDTH_PER_GROUP: 64
23 |   NUM_GROUPS: 1
24 |   DEPTH: 50
25 |   TRANS_FUNC: bottleneck_transform
26 |   STRIDE_1X1: False
27 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
28 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
29 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
30 | NONLOCAL:
31 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
32 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
33 |   INSTANTIATION: dot_product
34 | BN:
35 |   USE_PRECISE_STATS: True
36 |   NUM_BATCHES_PRECISE: 200
37 | SOLVER:
38 |   BASE_LR: 0.1
39 |   LR_POLICY: steps_with_relative_lrs
40 |   LRS: [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
41 |   STEPS: [0, 94, 154, 196]
42 |   MAX_EPOCH: 239
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34.0
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   MODEL_NAME: SlowFast
52 |   LOSS_FUNC: cross_entropy
53 |   DROPOUT_RATE: 0.5
54 | TEST:
55 |   ENABLE: True
56 |   DATASET: kinetics
57 |   BATCH_SIZE: 64
58 | DATA_LOADER:
59 |   NUM_WORKERS: 8
60 |   PIN_MEMORY: True
61 | NUM_GPUS: 8
62 | NUM_SHARDS: 1
63 | RNG_SEED: 0
64 | OUTPUT_DIR: .
65 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOWFAST_8x8_R50_stepwise_multigrid.yaml:
--------------------------------------------------------------------------------
 1 | MULTIGRID:
 2 |   SHORT_CYCLE: True
 3 |   LONG_CYCLE: True
 4 | TRAIN:
 5 |   ENABLE: True
 6 |   DATASET: kinetics
 7 |   BATCH_SIZE: 64
 8 |   EVAL_PERIOD: 10
 9 |   CHECKPOINT_PERIOD: 1
10 |   AUTO_RESUME: True
11 | DATA:
12 |   NUM_FRAMES: 32
13 |   SAMPLING_RATE: 2
14 |   TRAIN_JITTER_SCALES: [256, 320]
15 |   TRAIN_CROP_SIZE: 224
16 |   TEST_CROP_SIZE: 224
17 |   INPUT_CHANNEL_NUM: [3, 3]
18 | SLOWFAST:
19 |   ALPHA: 4
20 |   BETA_INV: 8
21 |   FUSION_CONV_CHANNEL_RATIO: 2
22 |   FUSION_KERNEL_SZ: 7
23 | RESNET:
24 |   ZERO_INIT_FINAL_BN: True
25 |   WIDTH_PER_GROUP: 64
26 |   NUM_GROUPS: 1
27 |   DEPTH: 50
28 |   TRANS_FUNC: bottleneck_transform
29 |   STRIDE_1X1: False
30 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
31 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
32 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
33 | NONLOCAL:
34 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
35 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
36 |   INSTANTIATION: dot_product
37 | BN:
38 |   USE_PRECISE_STATS: True
39 |   NUM_BATCHES_PRECISE: 200
40 | SOLVER:
41 |   BASE_LR: 0.1
42 |   LR_POLICY: steps_with_relative_lrs
43 |   LRS: [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
44 |   STEPS: [0, 94, 154, 196]
45 |   MAX_EPOCH: 239
46 |   MOMENTUM: 0.9
47 |   WEIGHT_DECAY: 1e-4
48 |   WARMUP_EPOCHS: 34.0
49 |   WARMUP_START_LR: 0.01
50 |   OPTIMIZING_METHOD: sgd
51 | MODEL:
52 |   NUM_CLASSES: 400
53 |   ARCH: slowfast
54 |   MODEL_NAME: SlowFast
55 |   LOSS_FUNC: cross_entropy
56 |   DROPOUT_RATE: 0.5
57 | TEST:
58 |   ENABLE: True
59 |   DATASET: kinetics
60 |   BATCH_SIZE: 64
61 | DATA_LOADER:
62 |   NUM_WORKERS: 8
63 |   PIN_MEMORY: True
64 | NUM_GPUS: 8
65 | NUM_SHARDS: 1
66 | RNG_SEED: 0
67 | OUTPUT_DIR: .
68 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOWFAST_NLN_4x16_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 32
10 |   SAMPLING_RATE: 2
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3, 3]
15 | SLOWFAST:
16 |   ALPHA: 8
17 |   BETA_INV: 8
18 |   FUSION_CONV_CHANNEL_RATIO: 2
19 |   FUSION_KERNEL_SZ: 5
20 | RESNET:
21 |   ZERO_INIT_FINAL_BN: True
22 |   WIDTH_PER_GROUP: 64
23 |   NUM_GROUPS: 1
24 |   DEPTH: 50
25 |   TRANS_FUNC: bottleneck_transform
26 |   STRIDE_1X1: False
27 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
28 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
29 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
30 | NONLOCAL:
31 |   LOCATION: [[[], []], [[1, 3], []], [[1, 3, 5], []], [[], []]]
32 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
33 |   INSTANTIATION: dot_product
34 | BN:
35 |   USE_PRECISE_STATS: True
36 |   NUM_BATCHES_PRECISE: 200
37 | SOLVER:
38 |   BASE_LR: 0.1
39 |   LR_POLICY: cosine
40 |   MAX_EPOCH: 196
41 |   MOMENTUM: 0.9
42 |   WEIGHT_DECAY: 1e-4
43 |   WARMUP_EPOCHS: 34.0
44 |   WARMUP_START_LR: 0.01
45 |   OPTIMIZING_METHOD: sgd
46 | MODEL:
47 |   NUM_CLASSES: 400
48 |   ARCH: slowfast
49 |   MODEL_NAME: SlowFast
50 |   LOSS_FUNC: cross_entropy
51 |   DROPOUT_RATE: 0.5
52 | TEST:
53 |   ENABLE: True
54 |   DATASET: kinetics
55 |   BATCH_SIZE: 64
56 | DATA_LOADER:
57 |   NUM_WORKERS: 8
58 |   PIN_MEMORY: True
59 | NUM_GPUS: 8
60 | NUM_SHARDS: 1
61 | RNG_SEED: 0
62 | OUTPUT_DIR: .
63 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOWFAST_NLN_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 32
10 |   SAMPLING_RATE: 2
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3, 3]
15 | SLOWFAST:
16 |   ALPHA: 4
17 |   BETA_INV: 8
18 |   FUSION_CONV_CHANNEL_RATIO: 2
19 |   FUSION_KERNEL_SZ: 5
20 | RESNET:
21 |   ZERO_INIT_FINAL_BN: True
22 |   WIDTH_PER_GROUP: 64
23 |   NUM_GROUPS: 1
24 |   DEPTH: 50
25 |   TRANS_FUNC: bottleneck_transform
26 |   STRIDE_1X1: False
27 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
28 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
29 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
30 | NONLOCAL:
31 |   LOCATION: [[[], []], [[1, 3], []], [[1, 3, 5], []], [[], []]]
32 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
33 |   INSTANTIATION: dot_product
34 | BN:
35 |   USE_PRECISE_STATS: True
36 |   NUM_BATCHES_PRECISE: 200
37 | SOLVER:
38 |   BASE_LR: 0.1
39 |   LR_POLICY: cosine
40 |   MAX_EPOCH: 196
41 |   MOMENTUM: 0.9
42 |   WEIGHT_DECAY: 1e-4
43 |   WARMUP_EPOCHS: 34.0
44 |   WARMUP_START_LR: 0.01
45 |   OPTIMIZING_METHOD: sgd
46 | MODEL:
47 |   NUM_CLASSES: 400
48 |   ARCH: slowfast
49 |   MODEL_NAME: SlowFast
50 |   LOSS_FUNC: cross_entropy
51 |   DROPOUT_RATE: 0.5
52 | TEST:
53 |   ENABLE: True
54 |   DATASET: kinetics
55 |   BATCH_SIZE: 64
56 | DATA_LOADER:
57 |   NUM_WORKERS: 8
58 |   PIN_MEMORY: True
59 | NUM_GPUS: 8
60 | NUM_SHARDS: 1
61 | RNG_SEED: 0
62 | OUTPUT_DIR: .
63 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOW_4x16_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 4
10 |   SAMPLING_RATE: 16
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 50
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[]], [[]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: dot_product
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: slow
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOW_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 8
10 |   SAMPLING_RATE: 8
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 50
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[]], [[]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: dot_product
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: slow
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOW_NLN_4x16_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 4
10 |   SAMPLING_RATE: 16
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 50
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[1, 3]], [[1, 3, 5]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: dot_product
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: slow
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/SLOW_NLN_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 | DATA:
 9 |   NUM_FRAMES: 8
10 |   SAMPLING_RATE: 8
11 |   TRAIN_JITTER_SCALES: [256, 320]
12 |   TRAIN_CROP_SIZE: 224
13 |   TEST_CROP_SIZE: 256
14 |   INPUT_CHANNEL_NUM: [3]
15 | RESNET:
16 |   ZERO_INIT_FINAL_BN: True
17 |   WIDTH_PER_GROUP: 64
18 |   NUM_GROUPS: 1
19 |   DEPTH: 50
20 |   TRANS_FUNC: bottleneck_transform
21 |   STRIDE_1X1: False
22 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
23 | NONLOCAL:
24 |   LOCATION: [[[]], [[1, 3]], [[1, 3, 5]], [[]]]
25 |   GROUP: [[1], [1], [1], [1]]
26 |   INSTANTIATION: dot_product
27 | BN:
28 |   USE_PRECISE_STATS: True
29 |   NUM_BATCHES_PRECISE: 200
30 | SOLVER:
31 |   BASE_LR: 0.1
32 |   LR_POLICY: cosine
33 |   MAX_EPOCH: 196
34 |   MOMENTUM: 0.9
35 |   WEIGHT_DECAY: 1e-4
36 |   WARMUP_EPOCHS: 34.0
37 |   WARMUP_START_LR: 0.01
38 |   OPTIMIZING_METHOD: sgd
39 | MODEL:
40 |   NUM_CLASSES: 400
41 |   ARCH: slow
42 |   MODEL_NAME: ResNet
43 |   LOSS_FUNC: cross_entropy
44 |   DROPOUT_RATE: 0.5
45 | TEST:
46 |   ENABLE: True
47 |   DATASET: kinetics
48 |   BATCH_SIZE: 64
49 | DATA_LOADER:
50 |   NUM_WORKERS: 8
51 |   PIN_MEMORY: True
52 | NUM_GPUS: 8
53 | NUM_SHARDS: 1
54 | RNG_SEED: 0
55 | OUTPUT_DIR: .
56 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/C2D_NOPOOL_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 8
12 |   SAMPLING_RATE: 8
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 | RESNET:
18 |   ZERO_INIT_FINAL_BN: True
19 |   WIDTH_PER_GROUP: 64
20 |   NUM_GROUPS: 1
21 |   DEPTH: 50
22 |   TRANS_FUNC: bottleneck_transform
23 |   STRIDE_1X1: False
24 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
25 | NONLOCAL:
26 |   LOCATION: [[[]], [[]], [[]], [[]]]
27 |   GROUP: [[1], [1], [1], [1]]
28 |   INSTANTIATION: softmax
29 | BN:
30 |   USE_PRECISE_STATS: True
31 |   NUM_BATCHES_PRECISE: 200
32 | SOLVER:
33 |   BASE_LR: 0.1
34 |   LR_POLICY: cosine
35 |   MAX_EPOCH: 196
36 |   MOMENTUM: 0.9
37 |   WEIGHT_DECAY: 1e-4
38 |   WARMUP_EPOCHS: 34.0
39 |   WARMUP_START_LR: 0.01
40 |   OPTIMIZING_METHOD: sgd
41 | MODEL:
42 |   NUM_CLASSES: 400
43 |   ARCH: c2d
44 |   MODEL_NAME: ResNet_nopool
45 |   LOSS_FUNC: cross_entropy
46 |   DROPOUT_RATE: 0.5
47 | TEST:
48 |   ENABLE: True
49 |   DATASET: kinetics
50 |   BATCH_SIZE: 64
51 | DATA_LOADER:
52 |   NUM_WORKERS: 8
53 |   PIN_MEMORY: True
54 | NUM_GPUS: 8
55 | NUM_SHARDS: 1
56 | RNG_SEED: 0
57 | OUTPUT_DIR: .
58 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/I3D_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 8
12 |   SAMPLING_RATE: 8
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 | RESNET:
18 |   ZERO_INIT_FINAL_BN: True
19 |   WIDTH_PER_GROUP: 64
20 |   NUM_GROUPS: 1
21 |   DEPTH: 50
22 |   TRANS_FUNC: bottleneck_transform
23 |   STRIDE_1X1: False
24 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
25 | NONLOCAL:
26 |   LOCATION: [[[]], [[]], [[]], [[]]]
27 |   GROUP: [[1], [1], [1], [1]]
28 |   INSTANTIATION: softmax
29 | BN:
30 |   USE_PRECISE_STATS: True
31 |   NUM_BATCHES_PRECISE: 200
32 | SOLVER:
33 |   BASE_LR: 0.1
34 |   LR_POLICY: cosine
35 |   MAX_EPOCH: 196
36 |   MOMENTUM: 0.9
37 |   WEIGHT_DECAY: 1e-4
38 |   WARMUP_EPOCHS: 34.0
39 |   WARMUP_START_LR: 0.01
40 |   OPTIMIZING_METHOD: sgd
41 | MODEL:
42 |   NUM_CLASSES: 400
43 |   ARCH: i3d
44 |   MODEL_NAME: ResNet
45 |   LOSS_FUNC: cross_entropy
46 |   DROPOUT_RATE: 0.5
47 | TEST:
48 |   ENABLE: True
49 |   DATASET: kinetics
50 |   BATCH_SIZE: 64
51 | DATA_LOADER:
52 |   NUM_WORKERS: 8
53 |   PIN_MEMORY: True
54 | NUM_GPUS: 8
55 | NUM_SHARDS: 1
56 | RNG_SEED: 0
57 | OUTPUT_DIR: .
58 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/I3D_NLN_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 8
12 |   SAMPLING_RATE: 8
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 | RESNET:
18 |   ZERO_INIT_FINAL_BN: True
19 |   WIDTH_PER_GROUP: 64
20 |   NUM_GROUPS: 1
21 |   DEPTH: 50
22 |   TRANS_FUNC: bottleneck_transform
23 |   STRIDE_1X1: False
24 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
25 | NONLOCAL:
26 |   LOCATION: [[[]], [[1, 3]], [[1, 3, 5]], [[]]]
27 |   GROUP: [[1], [1], [1], [1]]
28 |   INSTANTIATION: softmax
29 | BN:
30 |   USE_PRECISE_STATS: True
31 |   NUM_BATCHES_PRECISE: 200
32 | SOLVER:
33 |   BASE_LR: 0.1
34 |   LR_POLICY: cosine
35 |   MAX_EPOCH: 196
36 |   MOMENTUM: 0.9
37 |   WEIGHT_DECAY: 1e-4
38 |   WARMUP_EPOCHS: 34.0
39 |   WARMUP_START_LR: 0.01
40 |   OPTIMIZING_METHOD: sgd
41 | MODEL:
42 |   NUM_CLASSES: 400
43 |   ARCH: i3d
44 |   MODEL_NAME: ResNet
45 |   LOSS_FUNC: cross_entropy
46 |   DROPOUT_RATE: 0.5
47 | TEST:
48 |   ENABLE: True
49 |   DATASET: kinetics
50 |   BATCH_SIZE: 64
51 | DATA_LOADER:
52 |   NUM_WORKERS: 8
53 |   PIN_MEMORY: True
54 | NUM_GPUS: 8
55 | NUM_SHARDS: 1
56 | RNG_SEED: 0
57 | OUTPUT_DIR: .
58 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOWFAST_16x8_R101_50_50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 64
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | SLOWFAST:
18 |   ALPHA: 4
19 |   BETA_INV: 8
20 |   FUSION_CONV_CHANNEL_RATIO: 2
21 |   FUSION_KERNEL_SZ: 5
22 | RESNET:
23 |   ZERO_INIT_FINAL_BN: True
24 |   WIDTH_PER_GROUP: 64
25 |   NUM_GROUPS: 1
26 |   DEPTH: 101
27 |   TRANS_FUNC: bottleneck_transform
28 |   STRIDE_1X1: False
29 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 | NONLOCAL:
33 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
34 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
35 |   INSTANTIATION: dot_product
36 | BN:
37 |   USE_PRECISE_STATS: True
38 |   NUM_BATCHES_PRECISE: 200
39 | SOLVER:
40 |   BASE_LR: 0.1
41 |   LR_POLICY: cosine
42 |   MAX_EPOCH: 196
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34.0
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   MODEL_NAME: SlowFast
52 |   LOSS_FUNC: cross_entropy
53 |   DROPOUT_RATE: 0.5
54 | TEST:
55 |   ENABLE: True
56 |   DATASET: kinetics
57 |   BATCH_SIZE: 64
58 | DATA_LOADER:
59 |   NUM_WORKERS: 8
60 |   PIN_MEMORY: True
61 | NUM_GPUS: 8
62 | NUM_SHARDS: 1
63 | RNG_SEED: 0
64 | OUTPUT_DIR: .
65 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOWFAST_4x16_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | SLOWFAST:
18 |   ALPHA: 8
19 |   BETA_INV: 8
20 |   FUSION_CONV_CHANNEL_RATIO: 2
21 |   FUSION_KERNEL_SZ: 5
22 | RESNET:
23 |   ZERO_INIT_FINAL_BN: True
24 |   WIDTH_PER_GROUP: 64
25 |   NUM_GROUPS: 1
26 |   DEPTH: 50
27 |   TRANS_FUNC: bottleneck_transform
28 |   STRIDE_1X1: False
29 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 | NONLOCAL:
33 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
34 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
35 |   INSTANTIATION: dot_product
36 | BN:
37 |   USE_PRECISE_STATS: True
38 |   NUM_BATCHES_PRECISE: 200
39 | SOLVER:
40 |   BASE_LR: 0.1
41 |   LR_POLICY: cosine
42 |   MAX_EPOCH: 196
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34.0
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   MODEL_NAME: SlowFast
52 |   LOSS_FUNC: cross_entropy
53 |   DROPOUT_RATE: 0.5
54 | TEST:
55 |   ENABLE: True
56 |   DATASET: kinetics
57 |   BATCH_SIZE: 64
58 | DATA_LOADER:
59 |   NUM_WORKERS: 8
60 |   PIN_MEMORY: True
61 | NUM_GPUS: 8
62 | NUM_SHARDS: 1
63 | RNG_SEED: 0
64 | OUTPUT_DIR: .
65 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOWFAST_8x8_R101_101_101.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | SLOWFAST:
18 |   ALPHA: 4
19 |   BETA_INV: 8
20 |   FUSION_CONV_CHANNEL_RATIO: 2
21 |   FUSION_KERNEL_SZ: 5
22 | RESNET:
23 |   ZERO_INIT_FINAL_BN: True
24 |   WIDTH_PER_GROUP: 64
25 |   NUM_GROUPS: 1
26 |   DEPTH: 101
27 |   TRANS_FUNC: bottleneck_transform
28 |   STRIDE_1X1: False
29 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [23, 23], [3, 3]]
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 | NONLOCAL:
33 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
34 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
35 |   INSTANTIATION: dot_product
36 | BN:
37 |   USE_PRECISE_STATS: True
38 |   NUM_BATCHES_PRECISE: 200
39 | SOLVER:
40 |   BASE_LR: 0.1
41 |   LR_POLICY: cosine
42 |   MAX_EPOCH: 196
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34.0
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   MODEL_NAME: SlowFast
52 |   LOSS_FUNC: cross_entropy
53 |   DROPOUT_RATE: 0.5
54 | TEST:
55 |   ENABLE: True
56 |   DATASET: kinetics
57 |   BATCH_SIZE: 64
58 | DATA_LOADER:
59 |   NUM_WORKERS: 8
60 |   PIN_MEMORY: True
61 | NUM_GPUS: 8
62 | NUM_SHARDS: 1
63 | RNG_SEED: 0
64 | OUTPUT_DIR: .
65 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOWFAST_8x8_R101_50_101.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | SLOWFAST:
18 |   ALPHA: 4
19 |   BETA_INV: 8
20 |   FUSION_CONV_CHANNEL_RATIO: 2
21 |   FUSION_KERNEL_SZ: 5
22 | RESNET:
23 |   ZERO_INIT_FINAL_BN: True
24 |   WIDTH_PER_GROUP: 64
25 |   NUM_GROUPS: 1
26 |   DEPTH: 101
27 |   TRANS_FUNC: bottleneck_transform
28 |   STRIDE_1X1: False
29 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 23], [3, 3]]
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 | NONLOCAL:
33 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
34 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
35 |   INSTANTIATION: dot_product
36 | BN:
37 |   USE_PRECISE_STATS: True
38 |   NUM_BATCHES_PRECISE: 200
39 | SOLVER:
40 |   BASE_LR: 0.1
41 |   LR_POLICY: cosine
42 |   MAX_EPOCH: 196
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34.0
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   MODEL_NAME: SlowFast
52 |   LOSS_FUNC: cross_entropy
53 |   DROPOUT_RATE: 0.5
54 | TEST:
55 |   ENABLE: True
56 |   DATASET: kinetics
57 |   BATCH_SIZE: 64
58 | DATA_LOADER:
59 |   NUM_WORKERS: 8
60 |   PIN_MEMORY: True
61 | NUM_GPUS: 8
62 | NUM_SHARDS: 1
63 | RNG_SEED: 0
64 | OUTPUT_DIR: .
65 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOWFAST_8x8_R101_50_50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | SLOWFAST:
18 |   ALPHA: 4
19 |   BETA_INV: 8
20 |   FUSION_CONV_CHANNEL_RATIO: 2
21 |   FUSION_KERNEL_SZ: 5
22 | RESNET:
23 |   ZERO_INIT_FINAL_BN: True
24 |   WIDTH_PER_GROUP: 64
25 |   NUM_GROUPS: 1
26 |   DEPTH: 101
27 |   TRANS_FUNC: bottleneck_transform
28 |   STRIDE_1X1: False
29 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 | NONLOCAL:
33 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
34 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
35 |   INSTANTIATION: dot_product
36 | BN:
37 |   USE_PRECISE_STATS: True
38 |   NUM_BATCHES_PRECISE: 200
39 | SOLVER:
40 |   BASE_LR: 0.1
41 |   LR_POLICY: cosine
42 |   MAX_EPOCH: 196
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34.0
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   MODEL_NAME: SlowFast
52 |   LOSS_FUNC: cross_entropy
53 |   DROPOUT_RATE: 0.5
54 | TEST:
55 |   ENABLE: True
56 |   DATASET: kinetics
57 |   BATCH_SIZE: 64
58 | DATA_LOADER:
59 |   NUM_WORKERS: 8
60 |   PIN_MEMORY: True
61 | NUM_GPUS: 8
62 | NUM_SHARDS: 1
63 | RNG_SEED: 0
64 | OUTPUT_DIR: .
65 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOWFAST_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | SLOWFAST:
18 |   ALPHA: 4
19 |   BETA_INV: 8
20 |   FUSION_CONV_CHANNEL_RATIO: 2
21 |   FUSION_KERNEL_SZ: 7
22 | RESNET:
23 |   ZERO_INIT_FINAL_BN: True
24 |   WIDTH_PER_GROUP: 64
25 |   NUM_GROUPS: 1
26 |   DEPTH: 50
27 |   TRANS_FUNC: bottleneck_transform
28 |   STRIDE_1X1: False
29 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 | NONLOCAL:
33 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
34 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
35 |   INSTANTIATION: dot_product
36 | BN:
37 |   USE_PRECISE_STATS: True
38 |   NUM_BATCHES_PRECISE: 200
39 | SOLVER:
40 |   BASE_LR: 0.1
41 |   LR_POLICY: cosine
42 |   MAX_EPOCH: 196
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34.0
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   MODEL_NAME: SlowFast
52 |   LOSS_FUNC: cross_entropy
53 |   DROPOUT_RATE: 0.5
54 | TEST:
55 |   ENABLE: True
56 |   DATASET: kinetics
57 |   BATCH_SIZE: 64
58 | DATA_LOADER:
59 |   NUM_WORKERS: 8
60 |   PIN_MEMORY: True
61 | NUM_GPUS: 8
62 | NUM_SHARDS: 1
63 | RNG_SEED: 0
64 | OUTPUT_DIR: .
65 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOWFAST_NLN_16x8_R101_50_50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 64
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | SLOWFAST:
18 |   ALPHA: 4
19 |   BETA_INV: 8
20 |   FUSION_CONV_CHANNEL_RATIO: 2
21 |   FUSION_KERNEL_SZ: 5
22 | RESNET:
23 |   ZERO_INIT_FINAL_BN: True
24 |   WIDTH_PER_GROUP: 64
25 |   NUM_GROUPS: 1
26 |   DEPTH: 101
27 |   TRANS_FUNC: bottleneck_transform
28 |   STRIDE_1X1: False
29 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 | NONLOCAL:
33 |   LOCATION: [[[], []], [[], []], [[6, 13, 20], []], [[], []]]
34 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
35 |   INSTANTIATION: dot_product
36 | BN:
37 |   USE_PRECISE_STATS: True
38 |   NUM_BATCHES_PRECISE: 200
39 | SOLVER:
40 |   BASE_LR: 0.1
41 |   LR_POLICY: cosine
42 |   MAX_EPOCH: 196
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34.0
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   MODEL_NAME: SlowFast
52 |   LOSS_FUNC: cross_entropy
53 |   DROPOUT_RATE: 0.5
54 | TEST:
55 |   ENABLE: True
56 |   DATASET: kinetics
57 |   BATCH_SIZE: 64
58 | DATA_LOADER:
59 |   NUM_WORKERS: 8
60 |   PIN_MEMORY: True
61 | NUM_GPUS: 8
62 | NUM_SHARDS: 1
63 | RNG_SEED: 0
64 | OUTPUT_DIR: .
65 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOW_4x16_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 4
12 |   SAMPLING_RATE: 16
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 | RESNET:
18 |   ZERO_INIT_FINAL_BN: True
19 |   WIDTH_PER_GROUP: 64
20 |   NUM_GROUPS: 1
21 |   DEPTH: 50
22 |   TRANS_FUNC: bottleneck_transform
23 |   STRIDE_1X1: False
24 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
25 | NONLOCAL:
26 |   LOCATION: [[[]], [[]], [[]], [[]]]
27 |   GROUP: [[1], [1], [1], [1]]
28 |   INSTANTIATION: dot_product
29 | BN:
30 |   USE_PRECISE_STATS: True
31 |   NUM_BATCHES_PRECISE: 200
32 | SOLVER:
33 |   BASE_LR: 0.1
34 |   LR_POLICY: cosine
35 |   MAX_EPOCH: 196
36 |   MOMENTUM: 0.9
37 |   WEIGHT_DECAY: 1e-4
38 |   WARMUP_EPOCHS: 34.0
39 |   WARMUP_START_LR: 0.01
40 |   OPTIMIZING_METHOD: sgd
41 | MODEL:
42 |   NUM_CLASSES: 400
43 |   ARCH: slow
44 |   MODEL_NAME: ResNet
45 |   LOSS_FUNC: cross_entropy
46 |   DROPOUT_RATE: 0.5
47 | TEST:
48 |   ENABLE: True
49 |   DATASET: kinetics
50 |   BATCH_SIZE: 64
51 | DATA_LOADER:
52 |   NUM_WORKERS: 8
53 |   PIN_MEMORY: True
54 | NUM_GPUS: 8
55 | NUM_SHARDS: 1
56 | RNG_SEED: 0
57 | OUTPUT_DIR: .
58 | 


--------------------------------------------------------------------------------
/configs/Kinetics/c2/SLOW_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   # CHECKPOINT_FILE_PATH: path to the model to test
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 8
12 |   SAMPLING_RATE: 8
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3]
17 | RESNET:
18 |   ZERO_INIT_FINAL_BN: True
19 |   WIDTH_PER_GROUP: 64
20 |   NUM_GROUPS: 1
21 |   DEPTH: 50
22 |   TRANS_FUNC: bottleneck_transform
23 |   STRIDE_1X1: False
24 |   NUM_BLOCK_TEMP_KERNEL: [[3], [4], [6], [3]]
25 | NONLOCAL:
26 |   LOCATION: [[[]], [[]], [[]], [[]]]
27 |   GROUP: [[1], [1], [1], [1]]
28 |   INSTANTIATION: dot_product
29 | BN:
30 |   USE_PRECISE_STATS: True
31 |   NUM_BATCHES_PRECISE: 200
32 | SOLVER:
33 |   BASE_LR: 0.1
34 |   LR_POLICY: cosine
35 |   MAX_EPOCH: 196
36 |   MOMENTUM: 0.9
37 |   WEIGHT_DECAY: 1e-4
38 |   WARMUP_EPOCHS: 34.0
39 |   WARMUP_START_LR: 0.01
40 |   OPTIMIZING_METHOD: sgd
41 | MODEL:
42 |   NUM_CLASSES: 400
43 |   ARCH: slow
44 |   MODEL_NAME: ResNet
45 |   LOSS_FUNC: cross_entropy
46 |   DROPOUT_RATE: 0.5
47 | TEST:
48 |   ENABLE: True
49 |   DATASET: kinetics
50 |   BATCH_SIZE: 64
51 | DATA_LOADER:
52 |   NUM_WORKERS: 8
53 |   PIN_MEMORY: True
54 | NUM_GPUS: 8
55 | NUM_SHARDS: 1
56 | RNG_SEED: 0
57 | OUTPUT_DIR: .
58 | 


--------------------------------------------------------------------------------
/configs/SSv2/SLOWFAST_16x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: True
 3 |   DATASET: ssv2
 4 |   BATCH_SIZE: 16
 5 |   EVAL_PERIOD: 2
 6 |   CHECKPOINT_PERIOD: 2
 7 |   AUTO_RESUME: True
 8 |   CHECKPOINT_FILE_PATH: SLOWFAST_8x8_R50.pkl # please download from the model zoo.
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 64
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 |   INV_UNIFORM_SAMPLE: True
18 |   RANDOM_FLIP: False
19 |   REVERSE_INPUT_CHANNEL: True
20 | SLOWFAST:
21 |   ALPHA: 4
22 |   BETA_INV: 8
23 |   FUSION_CONV_CHANNEL_RATIO: 2
24 |   FUSION_KERNEL_SZ: 7
25 | RESNET:
26 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
27 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
28 |   ZERO_INIT_FINAL_BN: True
29 |   WIDTH_PER_GROUP: 64
30 |   NUM_GROUPS: 1
31 |   DEPTH: 50
32 |   TRANS_FUNC: bottleneck_transform
33 |   STRIDE_1X1: False
34 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
35 | NONLOCAL:
36 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
37 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
38 |   INSTANTIATION: dot_product
39 | BN:
40 |   USE_PRECISE_STATS: True
41 |   NUM_BATCHES_PRECISE: 200
42 |   NORM_TYPE: sync_batchnorm
43 |   NUM_SYNC_DEVICES: 4
44 | SOLVER:
45 |   BASE_LR: 0.03
46 |   LR_POLICY: steps_with_relative_lrs
47 |   LRS: [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
48 |   STEPS: [0, 14, 18]
49 |   MAX_EPOCH: 22
50 |   MOMENTUM: 0.9
51 |   WEIGHT_DECAY: 1e-6
52 |   WARMUP_EPOCHS: 0.19
53 |   WARMUP_START_LR: 0.0001
54 |   OPTIMIZING_METHOD: sgd
55 | MODEL:
56 |   NUM_CLASSES: 174
57 |   ARCH: slowfast
58 |   LOSS_FUNC: cross_entropy
59 |   DROPOUT_RATE: 0.5
60 | TEST:
61 |   ENABLE: True
62 |   DATASET: ssv2
63 |   BATCH_SIZE: 16
64 |   NUM_ENSEMBLE_VIEWS: 1
65 |   NUM_SPATIAL_CROPS: 1
66 | DATA_LOADER:
67 |   NUM_WORKERS: 4
68 |   PIN_MEMORY: True
69 | NUM_GPUS: 8
70 | NUM_SHARDS: 1
71 | RNG_SEED: 0
72 | OUTPUT_DIR: .
73 | LOG_MODEL_INFO: False
74 | 


--------------------------------------------------------------------------------
/configs/SSv2/SLOWFAST_16x8_R50_multigrid.yaml:
--------------------------------------------------------------------------------
 1 | MULTIGRID:
 2 |   SHORT_CYCLE: True
 3 |   LONG_CYCLE: True
 4 | TRAIN:
 5 |   ENABLE: True
 6 |   DATASET: ssv2
 7 |   BATCH_SIZE: 16
 8 |   EVAL_PERIOD: 2
 9 |   CHECKPOINT_PERIOD: 2
10 |   AUTO_RESUME: True
11 |   CHECKPOINT_FILE_PATH: SLOWFAST_8x8_R50.pkl # please download from the model zoo.
12 |   CHECKPOINT_TYPE: caffe2
13 | DATA:
14 |   NUM_FRAMES: 64
15 |   SAMPLING_RATE: 2
16 |   TRAIN_JITTER_SCALES: [256, 320]
17 |   TRAIN_CROP_SIZE: 224
18 |   TEST_CROP_SIZE: 224
19 |   INPUT_CHANNEL_NUM: [3, 3]
20 |   INV_UNIFORM_SAMPLE: True
21 |   RANDOM_FLIP: False
22 |   REVERSE_INPUT_CHANNEL: True
23 | SLOWFAST:
24 |   ALPHA: 4
25 |   BETA_INV: 8
26 |   FUSION_CONV_CHANNEL_RATIO: 2
27 |   FUSION_KERNEL_SZ: 7
28 | RESNET:
29 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
30 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
31 |   ZERO_INIT_FINAL_BN: True
32 |   WIDTH_PER_GROUP: 64
33 |   NUM_GROUPS: 1
34 |   DEPTH: 50
35 |   TRANS_FUNC: bottleneck_transform
36 |   STRIDE_1X1: False
37 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
38 | NONLOCAL:
39 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
40 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
41 |   INSTANTIATION: dot_product
42 | BN:
43 |   USE_PRECISE_STATS: True
44 |   NUM_BATCHES_PRECISE: 200
45 |   NORM_TYPE: sync_batchnorm
46 |   NUM_SYNC_DEVICES: 4
47 | SOLVER:
48 |   BASE_LR: 0.03
49 |   LR_POLICY: steps_with_relative_lrs
50 |   LRS: [1, 0.1, 0.01, 0.001, 0.0001, 0.00001]
51 |   STEPS: [0, 14, 18]
52 |   MAX_EPOCH: 22
53 |   MOMENTUM: 0.9
54 |   WEIGHT_DECAY: 1e-6
55 |   WARMUP_EPOCHS: 0.19
56 |   WARMUP_START_LR: 0.0001
57 |   OPTIMIZING_METHOD: sgd
58 | MODEL:
59 |   NUM_CLASSES: 174
60 |   ARCH: slowfast
61 |   LOSS_FUNC: cross_entropy
62 |   DROPOUT_RATE: 0.5
63 | TEST:
64 |   ENABLE: True
65 |   DATASET: ssv2
66 |   BATCH_SIZE: 16
67 |   NUM_ENSEMBLE_VIEWS: 1
68 |   NUM_SPATIAL_CROPS: 1
69 | DATA_LOADER:
70 |   NUM_WORKERS: 4
71 |   PIN_MEMORY: True
72 | NUM_GPUS: 8
73 | NUM_SHARDS: 1
74 | RNG_SEED: 0
75 | OUTPUT_DIR: .
76 | LOG_MODEL_INFO: False
77 | 


--------------------------------------------------------------------------------
/demo/AVA/SLOWFAST_32x2_R101_50_50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: ava
 4 |   BATCH_SIZE: 16
 5 |   EVAL_PERIOD: 1
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   CHECKPOINT_FILE_PATH: ./SLOWFAST_32x2_R101_50_50.pkl  #path to pretrain model
 9 |   CHECKPOINT_TYPE: pytorch
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | DETECTION:
18 |   ENABLE: True
19 |   ALIGNED: False
20 | AVA:
21 |   BGR: False
22 |   DETECTION_SCORE_THRESH: 0.8
23 |   TEST_PREDICT_BOX_LISTS: ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]
24 | SLOWFAST:
25 |   ALPHA: 4
26 |   BETA_INV: 8
27 |   FUSION_CONV_CHANNEL_RATIO: 2
28 |   FUSION_KERNEL_SZ: 5
29 | RESNET:
30 |   ZERO_INIT_FINAL_BN: True
31 |   WIDTH_PER_GROUP: 64
32 |   NUM_GROUPS: 1
33 |   DEPTH: 101
34 |   TRANS_FUNC: bottleneck_transform
35 |   STRIDE_1X1: False
36 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
37 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [2, 2]]
38 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [1, 1]]
39 | NONLOCAL:
40 |   LOCATION: [[[], []], [[], []], [[6, 13, 20], []], [[], []]]
41 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
42 |   INSTANTIATION: dot_product
43 |   POOL: [[[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]], [[2, 2, 2], [2, 2, 2]]]
44 | BN:
45 |   USE_PRECISE_STATS: False
46 |   NUM_BATCHES_PRECISE: 200
47 | SOLVER:
48 |   MOMENTUM: 0.9
49 |   WEIGHT_DECAY: 1e-7
50 |   OPTIMIZING_METHOD: sgd
51 | MODEL:
52 |   NUM_CLASSES: 80
53 |   ARCH: slowfast
54 |   MODEL_NAME: SlowFast
55 |   LOSS_FUNC: bce
56 |   DROPOUT_RATE: 0.5
57 |   HEAD_ACT: sigmoid
58 | TEST:
59 |   ENABLE: False
60 |   DATASET: ava
61 |   BATCH_SIZE: 8
62 | DATA_LOADER:
63 |   NUM_WORKERS: 2
64 |   PIN_MEMORY: True
65 | 
66 | NUM_GPUS: 1
67 | NUM_SHARDS: 1
68 | RNG_SEED: 0
69 | OUTPUT_DIR: .
70 | TENSORBOARD:
71 |   MODEL_VIS:
72 |     TOPK: 2
73 | DEMO:
74 |   ENABLE: True
75 |   LABEL_FILE_PATH:  # Add local label file path here.
76 |   WEBCAM: 0
77 |   DETECTRON2_CFG: "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
78 |   DETECTRON2_WEIGHTS: detectron2://COCO-Detection/faster_rcnn_R_50_FPN_3x/137849458/model_final_280758.pkl
79 | 


--------------------------------------------------------------------------------
/demo/Kinetics/SLOWFAST_8x8_R50.yaml:
--------------------------------------------------------------------------------
 1 | TRAIN:
 2 |   ENABLE: False
 3 |   DATASET: kinetics
 4 |   BATCH_SIZE: 64
 5 |   EVAL_PERIOD: 10
 6 |   CHECKPOINT_PERIOD: 1
 7 |   AUTO_RESUME: True
 8 |   CHECKPOINT_FILE_PATH: "./SLOWFAST_8x8_R50.pkl" # path to pretrain model to run demo
 9 |   CHECKPOINT_TYPE: caffe2
10 | DATA:
11 |   NUM_FRAMES: 32
12 |   SAMPLING_RATE: 2
13 |   TRAIN_JITTER_SCALES: [256, 320]
14 |   TRAIN_CROP_SIZE: 224
15 |   TEST_CROP_SIZE: 256
16 |   INPUT_CHANNEL_NUM: [3, 3]
17 | SLOWFAST:
18 |   ALPHA: 4
19 |   BETA_INV: 8
20 |   FUSION_CONV_CHANNEL_RATIO: 2
21 |   FUSION_KERNEL_SZ: 7
22 | RESNET:
23 |   ZERO_INIT_FINAL_BN: True
24 |   WIDTH_PER_GROUP: 64
25 |   NUM_GROUPS: 1
26 |   DEPTH: 50
27 |   TRANS_FUNC: bottleneck_transform
28 |   STRIDE_1X1: False
29 |   NUM_BLOCK_TEMP_KERNEL: [[3, 3], [4, 4], [6, 6], [3, 3]]
30 |   SPATIAL_STRIDES: [[1, 1], [2, 2], [2, 2], [2, 2]]
31 |   SPATIAL_DILATIONS: [[1, 1], [1, 1], [1, 1], [1, 1]]
32 | NONLOCAL:
33 |   LOCATION: [[[], []], [[], []], [[], []], [[], []]]
34 |   GROUP: [[1, 1], [1, 1], [1, 1], [1, 1]]
35 |   INSTANTIATION: dot_product
36 | BN:
37 |   USE_PRECISE_STATS: True
38 |   NUM_BATCHES_PRECISE: 200
39 | SOLVER:
40 |   BASE_LR: 0.1
41 |   LR_POLICY: cosine
42 |   MAX_EPOCH: 196
43 |   MOMENTUM: 0.9
44 |   WEIGHT_DECAY: 1e-4
45 |   WARMUP_EPOCHS: 34
46 |   WARMUP_START_LR: 0.01
47 |   OPTIMIZING_METHOD: sgd
48 | MODEL:
49 |   NUM_CLASSES: 400
50 |   ARCH: slowfast
51 |   LOSS_FUNC: cross_entropy
52 |   DROPOUT_RATE: 0.5
53 | TEST:
54 |   ENABLE: False
55 |   DATASET: kinetics
56 |   BATCH_SIZE: 64
57 | DATA_LOADER:
58 |   NUM_WORKERS: 8
59 |   PIN_MEMORY: True
60 | DEMO:
61 |   ENABLE: True
62 |   LABEL_FILE_PATH:  # Add local label file path here.
63 |   WEBCAM: 0
64 | NUM_GPUS: 1
65 | NUM_SHARDS: 1
66 | RNG_SEED: 0
67 | OUTPUT_DIR: .
68 | 


--------------------------------------------------------------------------------
/demo/ava_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fanyix/SlowFast/629fd1bf00e2d3b320b6e46c652331819fe9d4e7/demo/ava_demo.gif


--------------------------------------------------------------------------------
/linter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | # Run this script at project root by ".linter.sh" before you commit.
 4 | echo "Running isort..."
 5 | isort -y -sp .
 6 | 
 7 | echo "Running black..."
 8 | black -l 80 .
 9 | 
10 | echo "Running flake..."
11 | flake8 .
12 | 
13 | command -v arc > /dev/null && {
14 |   echo "Running arc lint ..."
15 |   arc lint
16 | }
17 | 


--------------------------------------------------------------------------------
/projects/avslowfast/README.md:
--------------------------------------------------------------------------------
 1 | # Getting Started with PyAVSlowFast
 2 | 
 3 | This section supplements the original doc in PySlowFast (attached below) and provide instructions on how to start training AVSlowFast model with this codebase. 
 4 | 
 5 | First, a note that `DATA.PATH_TO_DATA_DIR` points to the directory where annotation csv files reside and `DATA.PATH_PREFIX` to the root of the data directory. 
 6 | 
 7 | Then, issue the following training command
 8 | ```
 9 | python tools/run_net.py \
10 |   --cfg configs/Kinetics/AVSLOWFAST_4x16_R50.yaml \
11 |   DATA.PATH_TO_DATA_DIR path_to_your_annotation \  
12 |   DATA.PATH_PREFIX path_to_your_dataset_root \ 
13 |   NUM_GPUS 8 \
14 |   DATA_LOADER.NUM_WORKERS 8 \
15 |   TRAIN.BATCH_SIZE 64 \
16 | ```
17 | 
18 | For testing, run the following
19 | ```
20 | python tools/run_net.py \
21 |   --cfg configs/Kinetics/AVSLOWFAST_4x16_R50.yaml \
22 |   DATA.PATH_TO_DATA_DIR path_to_your_annotation \  
23 |   DATA.PATH_PREFIX path_to_your_dataset_root \ 
24 |   TEST.BATCH_SIZE 32 \
25 |   TEST.CHECKPOINT_FILE_PATH path_to_your_checkpoint \
26 |   TRAIN.ENABLE False \
27 | ```
28 | 
29 | ## Citing AVSlowFast
30 | Please cite AVSlowFast if you use it in your research, you can use the following BibTeX entry.
31 | ```BibTeX
32 | @article{xiao-avslowfast2020,
33 |   author    = {Xiao, Fanyi and Lee, Yong Jae and Grauman, Kristen and Malik, Jitendra and Feichtenhofer, Christoph},
34 |   title     = {{Audiovisual SlowFast Networks for Video Recognition}},
35 |   journal   = {arXiv preprint arXiv:2001.08740},
36 |   Year      = {2020}}
37 | ```
38 | 


--------------------------------------------------------------------------------
/projects/multigrid/README.md:
--------------------------------------------------------------------------------
 1 | # A Multigrid Method for Efficiently Training Video Models
 2 | [Chao-Yuan Wu](https://www.cs.utexas.edu/~cywu/),
 3 | [Ross Girshick](http://rossgirshick.info),
 4 | [Kaiming He](http://kaiminghe.com),
 5 | [Christoph Feichtenhofer](http://feichtenhofer.github.io/),
 6 | [Philipp Kr&auml;henb&uuml;hl](http://www.philkr.net/)
 7 | <br/>
 8 | In CVPR, 2020. [[Paper](https://arxiv.org/abs/1912.00998)]
 9 | <br/>
10 | <div align="center">
11 |   <img src="multigrid.png" width="700px" />
12 | </div>
13 | <br/>
14 | 
15 | 
16 | ## Getting started
17 | To enable multigrid training, add `MULTIGRID.LONG_CYCLE True` and/or `MULTIGRID.SHORT_CYCLE True` when training your model. (Default multigrid training uses both long and short cycles; See [paper](https://arxiv.org/abs/1912.00998) for details.) For example,
18 | 
19 | ```
20 | python tools/run_net.py \
21 |   --cfg configs/Charades/SLOWFAST_16x8_R50.yaml \
22 |   DATA.PATH_TO_DATA_DIR path_to_your_dataset \
23 |   MULTIGRID.LONG_CYCLE True \
24 |   MULTIGRID.SHORT_CYCLE True \
25 | ```
26 | This should train multiple times faster than training *without* multigrid training.
27 | Note that multigrid training might induce higher IO overhead.
28 | Systems with faster IO (e.g., with efficient local disk) might enjoy more speedup.
29 | Please see [MODEL_ZOO.md](https://github.com/facebookresearch/SlowFast/blob/master/MODEL_ZOO.md) for more examples of multigrid training.
30 | 
31 | ## Citing Multigrid Training
32 | If you use multigrid training or the models from MODEL_ZOO in your research, please use the following BibTeX entry.
33 | ```BibTeX
34 | @inproceedings{multigrid2020,
35 |   Author    = {Chao-Yuan Wu and Ross Girshick and Kaiming He and Christoph Feichtenhofer
36 |                and Philipp Kr\"{a}henb\"{u}hl},
37 |   Title     = {{A Multigrid Method for Efficiently Training Video Models}},
38 |   Booktitle = {{CVPR}},
39 |   Year      = {2020}}
40 | ```
41 | 


--------------------------------------------------------------------------------
/projects/multigrid/multigrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fanyix/SlowFast/629fd1bf00e2d3b320b6e46c652331819fe9d4e7/projects/multigrid/multigrid.png


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length=100
 3 | multi_line_output=4
 4 | known_standard_library=numpy,setuptools
 5 | known_myself=slowfast
 6 | known_third_party=fvcore,av,torch,pycocotools,yacs,termcolor,scipy,simplejson,matplotlib,detectron2,torchvision,yaml,tqdm,psutil,opencv-python,pandas,tensorboard,moviepy
 7 | no_lines_before=STDLIB,THIRDPARTY
 8 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER
 9 | default_section=FIRSTPARTY
10 | 
11 | [mypy]
12 | python_version=3.6
13 | ignore_missing_imports = True
14 | warn_unused_configs = True
15 | disallow_untyped_defs = True
16 | check_untyped_defs = True
17 | warn_unused_ignores = True
18 | warn_redundant_casts = True
19 | show_column_numbers = True
20 | follow_imports = silent
21 | allow_redefinition = True
22 | ; Require all functions to be annotated
23 | disallow_incomplete_defs = True
24 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | from setuptools import find_packages, setup
 5 | 
 6 | setup(
 7 |     name="slowfast",
 8 |     version="1.0",
 9 |     author="FAIR",
10 |     url="unknown",
11 |     description="SlowFast Video Understanding",
12 |     install_requires=[
13 |         "yacs>=0.1.6",
14 |         "pyyaml>=5.1",
15 |         "av",
16 |         "matplotlib",
17 |         "termcolor>=1.1",
18 |         "simplejson",
19 |         "tqdm",
20 |         "psutil",
21 |         "matplotlib",
22 |         "detectron2",
23 |         "opencv-python",
24 |         "pandas",
25 |         "torchvision>=0.4.2",
26 |         "sklearn",
27 |     ],
28 |     extras_require={"tensorboard_video_visualization": ["moviepy"]},
29 |     packages=find_packages(exclude=("configs", "tests")),
30 | )
31 | 


--------------------------------------------------------------------------------
/slowfast/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 
4 | from slowfast.utils.env import setup_environment
5 | 
6 | setup_environment()
7 | 


--------------------------------------------------------------------------------
/slowfast/config/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 


--------------------------------------------------------------------------------
/slowfast/config/custom_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Add custom configs and default values"""
 5 | 
 6 | 
 7 | def add_custom_config(_C):
 8 |     # Add your own customized configs.
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/slowfast/datasets/DATASET.md:
--------------------------------------------------------------------------------
  1 | # Dataset Preparation
  2 | 
  3 | ## Kinetics
  4 | 
  5 | The Kinetics Dataset could be downloaded via the code released by ActivityNet:
  6 | 
  7 | 1. Download the videos via the official [scripts](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics).
  8 | 
  9 | 2. After all the videos were downloaded, resize the video to the short edge size of 256, then prepare the csv files for training, validation, and testing set as `train.csv`, `val.csv`, `test.csv`. The format of the csv file is:
 10 | 
 11 | ```
 12 | path_to_video_1 label_1
 13 | path_to_video_2 label_2
 14 | path_to_video_3 label_3
 15 | ...
 16 | path_to_video_N label_N
 17 | ```
 18 | 
 19 | All the Kinetics models in the Model Zoo are trained and tested with the same data as [Non-local Network](https://github.com/facebookresearch/video-nonlocal-net/blob/master/DATASET.md). For dataset specific issues, please reach out to the [dataset provider](https://deepmind.com/research/open-source/kinetics).
 20 | 
 21 | ## AVA
 22 | 
 23 | The AVA Dataset could be downloaded from the [official site](https://research.google.com/ava/download.html#ava_actions_download)
 24 | 
 25 | We followed the same [downloading and preprocessing procedure](https://github.com/facebookresearch/video-long-term-feature-banks/blob/master/DATASET.md) as the [Long-Term Feature Banks for Detailed Video Understanding](https://arxiv.org/abs/1812.05038) do.
 26 | 
 27 | You could follow these steps to download and preprocess the data:
 28 | 
 29 | 1. Download videos
 30 | 
 31 | ```
 32 | DATA_DIR="../../data/ava/videos"
 33 | 
 34 | if [[ ! -d "${DATA_DIR}" ]]; then
 35 |   echo "${DATA_DIR} doesn't exist. Creating it.";
 36 |   mkdir -p ${DATA_DIR}
 37 | fi
 38 | 
 39 | wget https://s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt
 40 | 
 41 | for line in $(cat ava_file_names_trainval_v2.1.txt)
 42 | do
 43 |   wget https://s3.amazonaws.com/ava-dataset/trainval/$line -P ${DATA_DIR}
 44 | done
 45 | ```
 46 | 
 47 | 2. Cut each video from its 15th to 30th minute
 48 | 
 49 | ```
 50 | IN_DATA_DIR="../../data/ava/videos"
 51 | OUT_DATA_DIR="../../data/ava/videos_15min"
 52 | 
 53 | if [[ ! -d "${OUT_DATA_DIR}" ]]; then
 54 |   echo "${OUT_DATA_DIR} doesn't exist. Creating it.";
 55 |   mkdir -p ${OUT_DATA_DIR}
 56 | fi
 57 | 
 58 | for video in $(ls -A1 -U ${IN_DATA_DIR}/*)
 59 | do
 60 |   out_name="${OUT_DATA_DIR}/${video##*/}"
 61 |   if [ ! -f "${out_name}" ]; then
 62 |     ffmpeg -ss 900 -t 901 -i "${video}" "${out_name}"
 63 |   fi
 64 | done
 65 | ```
 66 | 
 67 | 3. Extract frames
 68 | 
 69 | ```
 70 | IN_DATA_DIR="../../data/ava/videos_15min"
 71 | OUT_DATA_DIR="../../data/ava/frames"
 72 | 
 73 | if [[ ! -d "${OUT_DATA_DIR}" ]]; then
 74 |   echo "${OUT_DATA_DIR} doesn't exist. Creating it.";
 75 |   mkdir -p ${OUT_DATA_DIR}
 76 | fi
 77 | 
 78 | for video in $(ls -A1 -U ${IN_DATA_DIR}/*)
 79 | do
 80 |   video_name=${video##*/}
 81 | 
 82 |   if [[ $video_name = *".webm" ]]; then
 83 |     video_name=${video_name::-5}
 84 |   else
 85 |     video_name=${video_name::-4}
 86 |   fi
 87 | 
 88 |   out_video_dir=${OUT_DATA_DIR}/${video_name}/
 89 |   mkdir -p "${out_video_dir}"
 90 | 
 91 |   out_name="${out_video_dir}/${video_name}_%06d.jpg"
 92 | 
 93 |   ffmpeg -i "${video}" -r 30 -q:v 1 "${out_name}"
 94 | done
 95 | ```
 96 | 
 97 | 4. Download annotations
 98 | 
 99 | ```
100 | DATA_DIR="../../data/ava/annotations"
101 | 
102 | if [[ ! -d "${DATA_DIR}" ]]; then
103 |   echo "${DATA_DIR} doesn't exist. Creating it.";
104 |   mkdir -p ${DATA_DIR}
105 | fi
106 | 
107 | wget https://research.google.com/ava/download/ava_train_v2.1.csv -P ${DATA_DIR}
108 | wget https://research.google.com/ava/download/ava_val_v2.1.csv -P ${DATA_DIR}
109 | wget https://research.google.com/ava/download/ava_action_list_v2.1_for_activitynet_2018.pbtxt -P ${DATA_DIR}
110 | wget https://research.google.com/ava/download/ava_train_excluded_timestamps_v2.1.csv -P ${DATA_DIR}
111 | wget https://research.google.com/ava/download/ava_val_excluded_timestamps_v2.1.csv -P ${DATA_DIR}
112 | ```
113 | 
114 | 5. Download "frame lists" ([train](https://dl.fbaipublicfiles.com/video-long-term-feature-banks/data/ava/frame_lists/train.csv), [val](https://dl.fbaipublicfiles.com/video-long-term-feature-banks/data/ava/frame_lists/val.csv)) and put them in
115 | the `frame_lists` folder (see structure above).
116 | 
117 | 6. Download person boxes ([train](https://dl.fbaipublicfiles.com/video-long-term-feature-banks/data/ava/annotations/ava_train_predicted_boxes.csv), [val](https://dl.fbaipublicfiles.com/video-long-term-feature-banks/data/ava/annotations/ava_val_predicted_boxes.csv), [test](https://dl.fbaipublicfiles.com/video-long-term-feature-banks/data/ava/annotations/ava_test_predicted_boxes.csv)) and put them in the `annotations` folder (see structure above).
118 | If you prefer to use your own person detector, please see details
119 | in [here](https://github.com/facebookresearch/video-long-term-feature-banks/blob/master/GETTING_STARTED.md#ava-person-detector).
120 | 
121 | 
122 | Download the ava dataset with the following structure:
123 | 
124 | ```
125 | ava
126 | |_ frames
127 | |  |_ [video name 0]
128 | |  |  |_ [video name 0]_000001.jpg
129 | |  |  |_ [video name 0]_000002.jpg
130 | |  |  |_ ...
131 | |  |_ [video name 1]
132 | |     |_ [video name 1]_000001.jpg
133 | |     |_ [video name 1]_000002.jpg
134 | |     |_ ...
135 | |_ frame_lists
136 | |  |_ train.csv
137 | |  |_ val.csv
138 | |_ annotations
139 |    |_ [official AVA annotation files]
140 |    |_ ava_train_predicted_boxes.csv
141 |    |_ ava_val_predicted_boxes.csv
142 | ```
143 | 
144 | You could also replace the `v2.1` by `v2.2` if you need the AVA v2.2 annotation. You can also download some pre-prepared annotations from [here](https://dl.fbaipublicfiles.com/pyslowfast/annotation/ava/ava_annotations.tar).
145 | 
146 | 
147 | ## Charades
148 | 1. Please download the Charades RGB frames from [dataset provider](http://ai2-website.s3.amazonaws.com/data/Charades_v1_rgb.tar).
149 | 
150 | 2. Download the *frame list* from the following links: ([train](https://dl.fbaipublicfiles.com/pyslowfast/dataset/charades/frame_lists/train.csv), [val](https://dl.fbaipublicfiles.com/pyslowfast/dataset/charades/frame_lists/val.csv)).
151 | 
152 | Please set `DATA.PATH_TO_DATA_DIR` to point to the folder containing the frame lists, and `DATA.PATH_PREFIX` to the folder containing RGB frames.
153 | 
154 | 
155 | ## Something-Something V2
156 | 1. Please download the dataset and annotations from [dataset provider](https://20bn.com/datasets/something-something).
157 | 
158 | 2. Download the *frame list* from the following links: ([train](https://dl.fbaipublicfiles.com/pyslowfast/dataset/ssv2/frame_lists/train.csv), [val](https://dl.fbaipublicfiles.com/pyslowfast/dataset/ssv2/frame_lists/val.csv)).
159 | 
160 | 3. Extract the frames at 30 FPS. (We used ffmpeg-4.1.3 with command
161 | `ffmpeg -i "${video}" -r 30 -q:v 1 "${out_name}"`
162 |    in experiments.) Please put the frames in a structure consistent with the frame lists.
163 | 
164 | 
165 | Please put all annotation json files and the frame lists in the same folder, and set `DATA.PATH_TO_DATA_DIR` to the path. Set `DATA.PATH_PREFIX` to be the path to the folder containing extracted frames.
166 | 


--------------------------------------------------------------------------------
/slowfast/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 
4 | from .ava_dataset import Ava  # noqa
5 | from .build import DATASET_REGISTRY, build_dataset  # noqa
6 | from .charades import Charades  # noqa
7 | from .kinetics import Kinetics  # noqa
8 | from .ssv2 import Ssv2  # noqa
9 | 


--------------------------------------------------------------------------------
/slowfast/datasets/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | from fvcore.common.registry import Registry
 5 | 
 6 | DATASET_REGISTRY = Registry("DATASET")
 7 | DATASET_REGISTRY.__doc__ = """
 8 | Registry for dataset.
 9 | 
10 | The registered object will be called with `obj(cfg, split)`.
11 | The call should return a `torch.utils.data.Dataset` object.
12 | """
13 | 
14 | 
15 | def build_dataset(dataset_name, cfg, split):
16 |     """
17 |     Build a dataset, defined by `dataset_name`.
18 |     Args:
19 |         dataset_name (str): the name of the dataset to be constructed.
20 |         cfg (CfgNode): configs. Details can be found in
21 |             slowfast/config/defaults.py
22 |         split (str): the split of the data loader. Options include `train`,
23 |             `val`, and `test`.
24 |     Returns:
25 |         Dataset: a constructed dataset specified by dataset_name.
26 |     """
27 |     # Capitalize the the first letter of the dataset_name since the dataset_name
28 |     # in configs may be in lowercase but the name of dataset class should always
29 |     # start with an uppercase letter.
30 |     name = dataset_name.capitalize()
31 |     return DATASET_REGISTRY.get(name)(cfg, split)
32 | 


--------------------------------------------------------------------------------
/slowfast/datasets/loader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | """Data loader."""
  5 | 
  6 | import itertools
  7 | import numpy as np
  8 | import torch
  9 | from torch.utils.data._utils.collate import default_collate
 10 | from torch.utils.data.distributed import DistributedSampler
 11 | from torch.utils.data.sampler import RandomSampler
 12 | 
 13 | from slowfast.datasets.multigrid_helper import ShortCycleBatchSampler
 14 | 
 15 | from .build import build_dataset
 16 | 
 17 | 
 18 | def detection_collate(batch):
 19 |     """
 20 |     Collate function for detection task. Concatanate bboxes, labels and
 21 |     metadata from different samples in the first dimension instead of
 22 |     stacking them to have a batch-size dimension.
 23 |     Args:
 24 |         batch (tuple or list): data batch to collate.
 25 |     Returns:
 26 |         (tuple): collated detection data batch.
 27 |     """
 28 |     inputs, labels, video_idx, extra_data = zip(*batch)
 29 |     inputs, video_idx = default_collate(inputs), default_collate(video_idx)
 30 |     labels = torch.tensor(np.concatenate(labels, axis=0)).float()
 31 | 
 32 |     collated_extra_data = {}
 33 |     for key in extra_data[0].keys():
 34 |         data = [d[key] for d in extra_data]
 35 |         if key == "boxes" or key == "ori_boxes":
 36 |             # Append idx info to the bboxes before concatenating them.
 37 |             bboxes = [
 38 |                 np.concatenate(
 39 |                     [np.full((data[i].shape[0], 1), float(i)), data[i]], axis=1
 40 |                 )
 41 |                 for i in range(len(data))
 42 |             ]
 43 |             bboxes = np.concatenate(bboxes, axis=0)
 44 |             collated_extra_data[key] = torch.tensor(bboxes).float()
 45 |         elif key == "metadata":
 46 |             collated_extra_data[key] = torch.tensor(
 47 |                 list(itertools.chain(*data))
 48 |             ).view(-1, 2)
 49 |         else:
 50 |             collated_extra_data[key] = default_collate(data)
 51 | 
 52 |     return inputs, labels, video_idx, collated_extra_data
 53 | 
 54 | 
 55 | def shuffle_misaligned_audio(epoch, inputs, cfg):
 56 |     """
 57 |     Shuffle the misaligned (negative) input audio clips,
 58 |     such that creating positive/negative pairs that are
 59 |     from different videos. 
 60 | 
 61 |     Args:
 62 |         epoch (int): the current epoch number.
 63 |         inputs (list of tensors): inputs to model,
 64 |             inputs[2] corresponds to audio inputs.  
 65 |         cfg (CfgNode): configs. Details can be found in
 66 |             slowfast/config/defaults.py
 67 |     """
 68 | 
 69 |     if len(inputs) > 2 and cfg.DATA.GET_MISALIGNED_AUDIO:
 70 |         N = inputs[2].size(0)
 71 |         # We only leave "hard negatives" after 
 72 |         # cfg.DATA.MIX_NEG_EPOCH epochs
 73 |         SN = max(int(cfg.DATA.EASY_NEG_RATIO * N), 1) if \
 74 |                 epoch >= cfg.DATA.MIX_NEG_EPOCH else N
 75 |         with torch.no_grad(): 
 76 |             idx = torch.arange(N)
 77 |             idx[:SN] = torch.arange(1, SN+1) % SN
 78 |             inputs[2][:, 1, ...] = inputs[2][idx, 1, ...]
 79 |     return inputs
 80 | 
 81 | 
 82 | def construct_loader(cfg, split, is_precise_bn=False):
 83 |     """
 84 |     Constructs the data loader for the given dataset.
 85 |     Args:
 86 |         cfg (CfgNode): configs. Details can be found in
 87 |             slowfast/config/defaults.py
 88 |         split (str): the split of the data loader. Options include `train`,
 89 |             `val`, and `test`.
 90 |     """
 91 |     assert split in ["train", "val", "test"]
 92 |     if split in ["train"]:
 93 |         dataset_name = cfg.TRAIN.DATASET
 94 |         batch_size = int(cfg.TRAIN.BATCH_SIZE / max(1, cfg.NUM_GPUS))
 95 |         shuffle = True
 96 |         drop_last = True
 97 |     elif split in ["val"]:
 98 |         dataset_name = cfg.TRAIN.DATASET
 99 |         batch_size = int(cfg.TRAIN.BATCH_SIZE / max(1, cfg.NUM_GPUS))
100 |         shuffle = False
101 |         drop_last = False
102 |     elif split in ["test"]:
103 |         dataset_name = cfg.TEST.DATASET
104 |         batch_size = int(cfg.TEST.BATCH_SIZE / max(1, cfg.NUM_GPUS))
105 |         shuffle = False
106 |         drop_last = False
107 | 
108 |     # Construct the dataset
109 |     dataset = build_dataset(dataset_name, cfg, split)
110 | 
111 |     if cfg.MULTIGRID.SHORT_CYCLE and split in ["train"] and not is_precise_bn:
112 |         # Create a sampler for multi-process training
113 |         sampler = (
114 |             DistributedSampler(dataset)
115 |             if cfg.NUM_GPUS > 1
116 |             else RandomSampler(dataset)
117 |         )
118 |         batch_sampler = ShortCycleBatchSampler(
119 |             sampler, batch_size=batch_size, drop_last=drop_last, cfg=cfg
120 |         )
121 |         # Create a loader
122 |         loader = torch.utils.data.DataLoader(
123 |             dataset,
124 |             batch_sampler=batch_sampler,
125 |             num_workers=cfg.DATA_LOADER.NUM_WORKERS,
126 |             pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
127 |         )
128 |     else:
129 |         # Create a sampler for multi-process training
130 |         sampler = DistributedSampler(dataset) if cfg.NUM_GPUS > 1 else None
131 |         # Create a loader
132 |         loader = torch.utils.data.DataLoader(
133 |             dataset,
134 |             batch_size=batch_size,
135 |             shuffle=(False if sampler else shuffle),
136 |             sampler=sampler,
137 |             num_workers=cfg.DATA_LOADER.NUM_WORKERS,
138 |             pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
139 |             drop_last=drop_last,
140 |             collate_fn=detection_collate if cfg.DETECTION.ENABLE else None,
141 |         )
142 |     return loader
143 | 
144 | 
145 | def shuffle_dataset(loader, cur_epoch):
146 |     """"
147 |     Shuffles the data.
148 |     Args:
149 |         loader (loader): data loader to perform shuffle.
150 |         cur_epoch (int): number of the current epoch.
151 |     """
152 |     sampler = (
153 |         loader.batch_sampler.sampler
154 |         if isinstance(loader.batch_sampler, ShortCycleBatchSampler)
155 |         else loader.sampler
156 |     )
157 |     assert isinstance(
158 |         sampler, (RandomSampler, DistributedSampler)
159 |     ), "Sampler type '{}' not supported".format(type(sampler))
160 |     # RandomSampler handles shuffling automatically
161 |     if isinstance(sampler, DistributedSampler):
162 |         # DistributedSampler shuffles data based on epoch
163 |         sampler.set_epoch(cur_epoch)
164 | 


--------------------------------------------------------------------------------
/slowfast/datasets/multigrid_helper.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Helper functions for multigrid training."""
 5 | 
 6 | import numpy as np
 7 | from torch._six import int_classes as _int_classes
 8 | from torch.utils.data.sampler import Sampler
 9 | 
10 | 
11 | class ShortCycleBatchSampler(Sampler):
12 |     """
13 |     Extend Sampler to support "short cycle" sampling.
14 |     See paper "A Multigrid Method for Efficiently Training Video Models",
15 |     Wu et al., 2019 (https://arxiv.org/abs/1912.00998) for details.
16 |     """
17 | 
18 |     def __init__(self, sampler, batch_size, drop_last, cfg):
19 |         if not isinstance(sampler, Sampler):
20 |             raise ValueError(
21 |                 "sampler should be an instance of "
22 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
23 |             )
24 |         if (
25 |             not isinstance(batch_size, _int_classes)
26 |             or isinstance(batch_size, bool)
27 |             or batch_size <= 0
28 |         ):
29 |             raise ValueError(
30 |                 "batch_size should be a positive integer value, "
31 |                 "but got batch_size={}".format(batch_size)
32 |             )
33 |         if not isinstance(drop_last, bool):
34 |             raise ValueError(
35 |                 "drop_last should be a boolean value, but got "
36 |                 "drop_last={}".format(drop_last)
37 |             )
38 |         self.sampler = sampler
39 |         self.drop_last = drop_last
40 | 
41 |         bs_factor = [
42 |             int(
43 |                 round(
44 |                     (
45 |                         float(cfg.DATA.TRAIN_CROP_SIZE)
46 |                         / (s * cfg.MULTIGRID.DEFAULT_S)
47 |                     )
48 |                     ** 2
49 |                 )
50 |             )
51 |             for s in cfg.MULTIGRID.SHORT_CYCLE_FACTORS
52 |         ]
53 | 
54 |         self.batch_sizes = [
55 |             batch_size * bs_factor[0],
56 |             batch_size * bs_factor[1],
57 |             batch_size,
58 |         ]
59 | 
60 |     def __iter__(self):
61 |         counter = 0
62 |         batch_size = self.batch_sizes[0]
63 |         batch = []
64 |         for idx in self.sampler:
65 |             batch.append((idx, counter % 3))
66 |             if len(batch) == batch_size:
67 |                 yield batch
68 |                 counter += 1
69 |                 batch_size = self.batch_sizes[counter % 3]
70 |                 batch = []
71 |         if len(batch) > 0 and not self.drop_last:
72 |             yield batch
73 | 
74 |     def __len__(self):
75 |         avg_batch_size = sum(self.batch_sizes) / 3.0
76 |         if self.drop_last:
77 |             return int(np.floor(len(self.sampler) / avg_batch_size))
78 |         else:
79 |             return int(np.ceil(len(self.sampler) / avg_batch_size))
80 | 


--------------------------------------------------------------------------------
/slowfast/datasets/video_container.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import av
 5 | 
 6 | 
 7 | def get_video_container(path_to_vid, multi_thread_decode=False, backend="pyav"):
 8 |     """
 9 |     Given the path to the video, return the pyav video container.
10 |     Args:
11 |         path_to_vid (str): path to the video.
12 |         multi_thread_decode (bool): if True, perform multi-thread decoding.
13 |         backend (str): decoder backend, options include `pyav` and
14 |             `torchvision`, default is `pyav`.
15 |     Returns:
16 |         container (container): video container.
17 |     """
18 |     if backend == "torchvision":
19 |         with open(path_to_vid, "rb") as fp:
20 |             container = fp.read()
21 |         return container
22 |     elif backend == "pyav":
23 |         container = av.open(path_to_vid)
24 |         if multi_thread_decode:
25 |             # Enable multiple threads for decoding.
26 |             container.streams.video[0].thread_type = "AUTO"
27 |         return container
28 |     else:
29 |         raise NotImplementedError("Unknown backend {}".format(backend))
30 | 


--------------------------------------------------------------------------------
/slowfast/models/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 
4 | from .build import MODEL_REGISTRY, build_model  # noqa
5 | from .custom_video_model_builder import *  # noqa
6 | from .video_model_builder import ResNet, SlowFast  # noqa
7 | 


--------------------------------------------------------------------------------
/slowfast/models/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Model construction functions."""
 5 | 
 6 | import torch
 7 | from fvcore.common.registry import Registry
 8 | 
 9 | MODEL_REGISTRY = Registry("MODEL")
10 | MODEL_REGISTRY.__doc__ = """
11 | Registry for video model.
12 | 
13 | The registered object will be called with `obj(cfg)`.
14 | The call should return a `torch.nn.Module` object.
15 | """
16 | 
17 | 
18 | def build_model(cfg, gpu_id=None):
19 |     """
20 |     Builds the video model.
21 |     Args:
22 |         cfg (configs): configs that contains the hyper-parameters to build the
23 |         backbone. Details can be seen in slowfast/config/defaults.py.
24 |         gpu_id (Optional[int]): specify the gpu index to build model.
25 |     """
26 |     if torch.cuda.is_available():
27 |         assert (
28 |             cfg.NUM_GPUS <= torch.cuda.device_count()
29 |         ), "Cannot use more GPU devices than available"
30 |     else:
31 |         assert (
32 |             cfg.NUM_GPUS == 0
33 |         ), "Cuda is not available. Please set `NUM_GPUS: 0 for running on CPUs."
34 | 
35 |     # Construct the model
36 |     name = cfg.MODEL.MODEL_NAME
37 |     model = MODEL_REGISTRY.get(name)(cfg)
38 | 
39 |     if cfg.NUM_GPUS:
40 |         if gpu_id is None:
41 |             # Determine the GPU used by the current process
42 |             cur_device = torch.cuda.current_device()
43 |         else:
44 |             cur_device = gpu_id
45 |         # Transfer the model to the current GPU device
46 |         model = model.cuda(device=cur_device)
47 |     # Use multi-process data parallel model in the multi-gpu setting
48 |     if cfg.NUM_GPUS > 1:
49 |         # Make model replica operate on the current device
50 |         model = torch.nn.parallel.DistributedDataParallel(
51 |             module=model, device_ids=[cur_device], output_device=cur_device
52 |         )
53 |     return model
54 | 


--------------------------------------------------------------------------------
/slowfast/models/custom_video_model_builder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 
4 | 
5 | """A More Flexible Video models."""
6 | 


--------------------------------------------------------------------------------
/slowfast/models/losses.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Loss functions."""
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | _LOSSES = {
 9 |     "cross_entropy": nn.CrossEntropyLoss,
10 |     "bce": nn.BCELoss,
11 |     "bce_logit": nn.BCEWithLogitsLoss,
12 | }
13 | 
14 | 
15 | def get_loss_func(loss_name):
16 |     """
17 |     Retrieve the loss given the loss name.
18 |     Args (int):
19 |         loss_name: the name of the loss to use.
20 |     """
21 |     if loss_name not in _LOSSES.keys():
22 |         raise NotImplementedError("Loss {} is not supported".format(loss_name))
23 |     return _LOSSES[loss_name]
24 | 


--------------------------------------------------------------------------------
/slowfast/models/nonlocal_helper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | """Non-local helper"""
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | 
 10 | class Nonlocal(nn.Module):
 11 |     """
 12 |     Builds Non-local Neural Networks as a generic family of building
 13 |     blocks for capturing long-range dependencies. Non-local Network
 14 |     computes the response at a position as a weighted sum of the
 15 |     features at all positions. This building block can be plugged into
 16 |     many computer vision architectures.
 17 |     More details in the paper: https://arxiv.org/pdf/1711.07971.pdf
 18 |     """
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         dim,
 23 |         dim_inner,
 24 |         pool_size=None,
 25 |         instantiation="softmax",
 26 |         zero_init_final_conv=False,
 27 |         zero_init_final_norm=True,
 28 |         norm_eps=1e-5,
 29 |         norm_momentum=0.1,
 30 |         norm_module=nn.BatchNorm3d,
 31 |     ):
 32 |         """
 33 |         Args:
 34 |             dim (int): number of dimension for the input.
 35 |             dim_inner (int): number of dimension inside of the Non-local block.
 36 |             pool_size (list): the kernel size of spatial temporal pooling,
 37 |                 temporal pool kernel size, spatial pool kernel size, spatial
 38 |                 pool kernel size in order. By default pool_size is None,
 39 |                 then there would be no pooling used.
 40 |             instantiation (string): supports two different instantiation method:
 41 |                 "dot_product": normalizing correlation matrix with L2.
 42 |                 "softmax": normalizing correlation matrix with Softmax.
 43 |             zero_init_final_conv (bool): If true, zero initializing the final
 44 |                 convolution of the Non-local block.
 45 |             zero_init_final_norm (bool):
 46 |                 If true, zero initializing the final batch norm of the Non-local
 47 |                 block.
 48 |             norm_module (nn.Module): nn.Module for the normalization layer. The
 49 |                 default is nn.BatchNorm3d.
 50 |         """
 51 |         super(Nonlocal, self).__init__()
 52 |         self.dim = dim
 53 |         self.dim_inner = dim_inner
 54 |         self.pool_size = pool_size
 55 |         self.instantiation = instantiation
 56 |         self.use_pool = (
 57 |             False
 58 |             if pool_size is None
 59 |             else any((size > 1 for size in pool_size))
 60 |         )
 61 |         self.norm_eps = norm_eps
 62 |         self.norm_momentum = norm_momentum
 63 |         self._construct_nonlocal(
 64 |             zero_init_final_conv, zero_init_final_norm, norm_module
 65 |         )
 66 | 
 67 |     def _construct_nonlocal(
 68 |         self, zero_init_final_conv, zero_init_final_norm, norm_module
 69 |     ):
 70 |         # Three convolution heads: theta, phi, and g.
 71 |         self.conv_theta = nn.Conv3d(
 72 |             self.dim, self.dim_inner, kernel_size=1, stride=1, padding=0
 73 |         )
 74 |         self.conv_phi = nn.Conv3d(
 75 |             self.dim, self.dim_inner, kernel_size=1, stride=1, padding=0
 76 |         )
 77 |         self.conv_g = nn.Conv3d(
 78 |             self.dim, self.dim_inner, kernel_size=1, stride=1, padding=0
 79 |         )
 80 | 
 81 |         # Final convolution output.
 82 |         self.conv_out = nn.Conv3d(
 83 |             self.dim_inner, self.dim, kernel_size=1, stride=1, padding=0
 84 |         )
 85 |         # Zero initializing the final convolution output.
 86 |         self.conv_out.zero_init = zero_init_final_conv
 87 | 
 88 |         # TODO: change the name to `norm`
 89 |         self.bn = norm_module(
 90 |             num_features=self.dim,
 91 |             eps=self.norm_eps,
 92 |             momentum=self.norm_momentum,
 93 |         )
 94 |         # Zero initializing the final bn.
 95 |         self.bn.transform_final_bn = zero_init_final_norm
 96 | 
 97 |         # Optional to add the spatial-temporal pooling.
 98 |         if self.use_pool:
 99 |             self.pool = nn.MaxPool3d(
100 |                 kernel_size=self.pool_size,
101 |                 stride=self.pool_size,
102 |                 padding=[0, 0, 0],
103 |             )
104 | 
105 |     def forward(self, x):
106 |         x_identity = x
107 |         N, C, T, H, W = x.size()
108 | 
109 |         theta = self.conv_theta(x)
110 | 
111 |         # Perform temporal-spatial pooling to reduce the computation.
112 |         if self.use_pool:
113 |             x = self.pool(x)
114 | 
115 |         phi = self.conv_phi(x)
116 |         g = self.conv_g(x)
117 | 
118 |         theta = theta.view(N, self.dim_inner, -1)
119 |         phi = phi.view(N, self.dim_inner, -1)
120 |         g = g.view(N, self.dim_inner, -1)
121 | 
122 |         # (N, C, TxHxW) * (N, C, TxHxW) => (N, TxHxW, TxHxW).
123 |         theta_phi = torch.einsum("nct,ncp->ntp", (theta, phi))
124 |         # For original Non-local paper, there are two main ways to normalize
125 |         # the affinity tensor:
126 |         #   1) Softmax normalization (norm on exp).
127 |         #   2) dot_product normalization.
128 |         if self.instantiation == "softmax":
129 |             # Normalizing the affinity tensor theta_phi before softmax.
130 |             theta_phi = theta_phi * (self.dim_inner ** -0.5)
131 |             theta_phi = nn.functional.softmax(theta_phi, dim=2)
132 |         elif self.instantiation == "dot_product":
133 |             spatial_temporal_dim = theta_phi.shape[2]
134 |             theta_phi = theta_phi / spatial_temporal_dim
135 |         else:
136 |             raise NotImplementedError(
137 |                 "Unknown norm type {}".format(self.instantiation)
138 |             )
139 | 
140 |         # (N, TxHxW, TxHxW) * (N, C, TxHxW) => (N, C, TxHxW).
141 |         theta_phi_g = torch.einsum("ntg,ncg->nct", (theta_phi, g))
142 | 
143 |         # (N, C, TxHxW) => (N, C, T, H, W).
144 |         theta_phi_g = theta_phi_g.view(N, self.dim_inner, T, H, W)
145 | 
146 |         p = self.conv_out(theta_phi_g)
147 |         p = self.bn(p)
148 |         return x_identity + p
149 | 


--------------------------------------------------------------------------------
/slowfast/models/optimizer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Optimizer."""
 5 | 
 6 | import torch
 7 | 
 8 | import slowfast.utils.lr_policy as lr_policy
 9 | 
10 | 
11 | def construct_optimizer(model, cfg):
12 |     """
13 |     Construct a stochastic gradient descent or ADAM optimizer with momentum.
14 |     Details can be found in:
15 |     Herbert Robbins, and Sutton Monro. "A stochastic approximation method."
16 |     and
17 |     Diederik P.Kingma, and Jimmy Ba.
18 |     "Adam: A Method for Stochastic Optimization."
19 | 
20 |     Args:
21 |         model (model): model to perform stochastic gradient descent
22 |         optimization or ADAM optimization.
23 |         cfg (config): configs of hyper-parameters of SGD or ADAM, includes base
24 |         learning rate,  momentum, weight_decay, dampening, and etc.
25 |     """
26 |     # Batchnorm parameters.
27 |     bn_params = []
28 |     # Non-batchnorm parameters.
29 |     non_bn_parameters = []
30 |     for name, p in model.named_parameters():
31 |         if "bn" in name:
32 |             bn_params.append(p)
33 |         else:
34 |             non_bn_parameters.append(p)
35 |     # Apply different weight decay to Batchnorm and non-batchnorm parameters.
36 |     # In Caffe2 classification codebase the weight decay for batchnorm is 0.0.
37 |     # Having a different weight decay on batchnorm might cause a performance
38 |     # drop.
39 |     optim_params = [
40 |         {"params": bn_params, "weight_decay": cfg.BN.WEIGHT_DECAY},
41 |         {"params": non_bn_parameters, "weight_decay": cfg.SOLVER.WEIGHT_DECAY},
42 |     ]
43 |     # Check all parameters will be passed into optimizer.
44 |     assert len(list(model.parameters())) == len(non_bn_parameters) + len(
45 |         bn_params
46 |     ), "parameter size does not match: {} + {} != {}".format(
47 |         len(non_bn_parameters), len(bn_params), len(list(model.parameters()))
48 |     )
49 | 
50 |     if cfg.SOLVER.OPTIMIZING_METHOD == "sgd":
51 |         return torch.optim.SGD(
52 |             optim_params,
53 |             lr=cfg.SOLVER.BASE_LR,
54 |             momentum=cfg.SOLVER.MOMENTUM,
55 |             weight_decay=cfg.SOLVER.WEIGHT_DECAY,
56 |             dampening=cfg.SOLVER.DAMPENING,
57 |             nesterov=cfg.SOLVER.NESTEROV,
58 |         )
59 |     elif cfg.SOLVER.OPTIMIZING_METHOD == "adam":
60 |         return torch.optim.Adam(
61 |             optim_params,
62 |             lr=cfg.SOLVER.BASE_LR,
63 |             betas=(0.9, 0.999),
64 |             weight_decay=cfg.SOLVER.WEIGHT_DECAY,
65 |         )
66 |     else:
67 |         raise NotImplementedError(
68 |             "Does not support {} optimizer".format(cfg.SOLVER.OPTIMIZING_METHOD)
69 |         )
70 | 
71 | 
72 | def get_epoch_lr(cur_epoch, cfg):
73 |     """
74 |     Retrieves the lr for the given epoch (as specified by the lr policy).
75 |     Args:
76 |         cfg (config): configs of hyper-parameters of ADAM, includes base
77 |         learning rate, betas, and weight decays.
78 |         cur_epoch (float): the number of epoch of the current training stage.
79 |     """
80 |     return lr_policy.get_lr_at_epoch(cfg, cur_epoch)
81 | 
82 | 
83 | def set_lr(optimizer, new_lr):
84 |     """
85 |     Sets the optimizer lr to the specified value.
86 |     Args:
87 |         optimizer (optim): the optimizer using to optimize the current network.
88 |         new_lr (float): the new learning rate to set.
89 |     """
90 |     for param_group in optimizer.param_groups:
91 |         param_group["lr"] = new_lr
92 | 


--------------------------------------------------------------------------------
/slowfast/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/README.md:
--------------------------------------------------------------------------------
1 | The code under this folder is from the official [ActivityNet repo](https://github.com/activitynet/ActivityNet).
2 | 


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fanyix/SlowFast/629fd1bf00e2d3b320b6e46c652331819fe9d4e7/slowfast/utils/ava_evaluation/__init__.py


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/ava_action_list_v2.1_for_activitynet_2018.pbtxt.txt:
--------------------------------------------------------------------------------
  1 | item {
  2 |   name: "bend/bow (at the waist)"
  3 |   id: 1
  4 | }
  5 | item {
  6 |   name: "crouch/kneel"
  7 |   id: 3
  8 | }
  9 | item {
 10 |   name: "dance"
 11 |   id: 4
 12 | }
 13 | item {
 14 |   name: "fall down"
 15 |   id: 5
 16 | }
 17 | item {
 18 |   name: "get up"
 19 |   id: 6
 20 | }
 21 | item {
 22 |   name: "jump/leap"
 23 |   id: 7
 24 | }
 25 | item {
 26 |   name: "lie/sleep"
 27 |   id: 8
 28 | }
 29 | item {
 30 |   name: "martial art"
 31 |   id: 9
 32 | }
 33 | item {
 34 |   name: "run/jog"
 35 |   id: 10
 36 | }
 37 | item {
 38 |   name: "sit"
 39 |   id: 11
 40 | }
 41 | item {
 42 |   name: "stand"
 43 |   id: 12
 44 | }
 45 | item {
 46 |   name: "swim"
 47 |   id: 13
 48 | }
 49 | item {
 50 |   name: "walk"
 51 |   id: 14
 52 | }
 53 | item {
 54 |   name: "answer phone"
 55 |   id: 15
 56 | }
 57 | item {
 58 |   name: "carry/hold (an object)"
 59 |   id: 17
 60 | }
 61 | item {
 62 |   name: "climb (e.g., a mountain)"
 63 |   id: 20
 64 | }
 65 | item {
 66 |   name: "close (e.g., a door, a box)"
 67 |   id: 22
 68 | }
 69 | item {
 70 |   name: "cut"
 71 |   id: 24
 72 | }
 73 | item {
 74 |   name: "dress/put on clothing"
 75 |   id: 26
 76 | }
 77 | item {
 78 |   name: "drink"
 79 |   id: 27
 80 | }
 81 | item {
 82 |   name: "drive (e.g., a car, a truck)"
 83 |   id: 28
 84 | }
 85 | item {
 86 |   name: "eat"
 87 |   id: 29
 88 | }
 89 | item {
 90 |   name: "enter"
 91 |   id: 30
 92 | }
 93 | item {
 94 |   name: "hit (an object)"
 95 |   id: 34
 96 | }
 97 | item {
 98 |   name: "lift/pick up"
 99 |   id: 36
100 | }
101 | item {
102 |   name: "listen (e.g., to music)"
103 |   id: 37
104 | }
105 | item {
106 |   name: "open (e.g., a window, a car door)"
107 |   id: 38
108 | }
109 | item {
110 |   name: "play musical instrument"
111 |   id: 41
112 | }
113 | item {
114 |   name: "point to (an object)"
115 |   id: 43
116 | }
117 | item {
118 |   name: "pull (an object)"
119 |   id: 45
120 | }
121 | item {
122 |   name: "push (an object)"
123 |   id: 46
124 | }
125 | item {
126 |   name: "put down"
127 |   id: 47
128 | }
129 | item {
130 |   name: "read"
131 |   id: 48
132 | }
133 | item {
134 |   name: "ride (e.g., a bike, a car, a horse)"
135 |   id: 49
136 | }
137 | item {
138 |   name: "sail boat"
139 |   id: 51
140 | }
141 | item {
142 |   name: "shoot"
143 |   id: 52
144 | }
145 | item {
146 |   name: "smoke"
147 |   id: 54
148 | }
149 | item {
150 |   name: "take a photo"
151 |   id: 56
152 | }
153 | item {
154 |   name: "text on/look at a cellphone"
155 |   id: 57
156 | }
157 | item {
158 |   name: "throw"
159 |   id: 58
160 | }
161 | item {
162 |   name: "touch (an object)"
163 |   id: 59
164 | }
165 | item {
166 |   name: "turn (e.g., a screwdriver)"
167 |   id: 60
168 | }
169 | item {
170 |   name: "watch (e.g., TV)"
171 |   id: 61
172 | }
173 | item {
174 |   name: "work on a computer"
175 |   id: 62
176 | }
177 | item {
178 |   name: "write"
179 |   id: 63
180 | }
181 | item {
182 |   name: "fight/hit (a person)"
183 |   id: 64
184 | }
185 | item {
186 |   name: "give/serve (an object) to (a person)"
187 |   id: 65
188 | }
189 | item {
190 |   name: "grab (a person)"
191 |   id: 66
192 | }
193 | item {
194 |   name: "hand clap"
195 |   id: 67
196 | }
197 | item {
198 |   name: "hand shake"
199 |   id: 68
200 | }
201 | item {
202 |   name: "hand wave"
203 |   id: 69
204 | }
205 | item {
206 |   name: "hug (a person)"
207 |   id: 70
208 | }
209 | item {
210 |   name: "kiss (a person)"
211 |   id: 72
212 | }
213 | item {
214 |   name: "lift (a person)"
215 |   id: 73
216 | }
217 | item {
218 |   name: "listen to (a person)"
219 |   id: 74
220 | }
221 | item {
222 |   name: "push (another person)"
223 |   id: 76
224 | }
225 | item {
226 |   name: "sing to (e.g., self, a person, a group)"
227 |   id: 77
228 | }
229 | item {
230 |   name: "take (an object) from (a person)"
231 |   id: 78
232 | }
233 | item {
234 |   name: "talk to (e.g., self, a person, a group)"
235 |   id: 79
236 | }
237 | item {
238 |   name: "watch (a person)"
239 |   id: 80
240 | }
241 | 


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/label_map_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Label map utility functions."""
 16 | 
 17 | from __future__ import (
 18 |     absolute_import,
 19 |     division,
 20 |     print_function,
 21 |     unicode_literals,
 22 | )
 23 | import logging
 24 | 
 25 | # from google.protobuf import text_format
 26 | # from google3.third_party.tensorflow_models.object_detection.protos import string_int_label_map_pb2
 27 | 
 28 | 
 29 | def _validate_label_map(label_map):
 30 |     """Checks if a label map is valid.
 31 | 
 32 |   Args:
 33 |     label_map: StringIntLabelMap to validate.
 34 | 
 35 |   Raises:
 36 |     ValueError: if label map is invalid.
 37 |   """
 38 |     for item in label_map.item:
 39 |         if item.id < 1:
 40 |             raise ValueError("Label map ids should be >= 1.")
 41 | 
 42 | 
 43 | def create_category_index(categories):
 44 |     """Creates dictionary of COCO compatible categories keyed by category id.
 45 | 
 46 |   Args:
 47 |     categories: a list of dicts, each of which has the following keys:
 48 |       'id': (required) an integer id uniquely identifying this category.
 49 |       'name': (required) string representing category name
 50 |         e.g., 'cat', 'dog', 'pizza'.
 51 | 
 52 |   Returns:
 53 |     category_index: a dict containing the same entries as categories, but keyed
 54 |       by the 'id' field of each category.
 55 |   """
 56 |     category_index = {}
 57 |     for cat in categories:
 58 |         category_index[cat["id"]] = cat
 59 |     return category_index
 60 | 
 61 | 
 62 | def get_max_label_map_index(label_map):
 63 |     """Get maximum index in label map.
 64 | 
 65 |   Args:
 66 |     label_map: a StringIntLabelMapProto
 67 | 
 68 |   Returns:
 69 |     an integer
 70 |   """
 71 |     return max([item.id for item in label_map.item])
 72 | 
 73 | 
 74 | def convert_label_map_to_categories(
 75 |     label_map, max_num_classes, use_display_name=True
 76 | ):
 77 |     """Loads label map proto and returns categories list compatible with eval.
 78 | 
 79 |   This function loads a label map and returns a list of dicts, each of which
 80 |   has the following keys:
 81 |     'id': (required) an integer id uniquely identifying this category.
 82 |     'name': (required) string representing category name
 83 |       e.g., 'cat', 'dog', 'pizza'.
 84 |   We only allow class into the list if its id-label_id_offset is
 85 |   between 0 (inclusive) and max_num_classes (exclusive).
 86 |   If there are several items mapping to the same id in the label map,
 87 |   we will only keep the first one in the categories list.
 88 | 
 89 |   Args:
 90 |     label_map: a StringIntLabelMapProto or None.  If None, a default categories
 91 |       list is created with max_num_classes categories.
 92 |     max_num_classes: maximum number of (consecutive) label indices to include.
 93 |     use_display_name: (boolean) choose whether to load 'display_name' field
 94 |       as category name.  If False or if the display_name field does not exist,
 95 |       uses 'name' field as category names instead.
 96 |   Returns:
 97 |     categories: a list of dictionaries representing all possible categories.
 98 |   """
 99 |     categories = []
100 |     list_of_ids_already_added = []
101 |     if not label_map:
102 |         label_id_offset = 1
103 |         for class_id in range(max_num_classes):
104 |             categories.append(
105 |                 {
106 |                     "id": class_id + label_id_offset,
107 |                     "name": "category_{}".format(class_id + label_id_offset),
108 |                 }
109 |             )
110 |         return categories
111 |     for item in label_map.item:
112 |         if not 0 < item.id <= max_num_classes:
113 |             logging.info(
114 |                 "Ignore item %d since it falls outside of requested "
115 |                 "label range.",
116 |                 item.id,
117 |             )
118 |             continue
119 |         if use_display_name and item.HasField("display_name"):
120 |             name = item.display_name
121 |         else:
122 |             name = item.name
123 |         if item.id not in list_of_ids_already_added:
124 |             list_of_ids_already_added.append(item.id)
125 |             categories.append({"id": item.id, "name": name})
126 |     return categories
127 | 
128 | 
129 | def load_labelmap(path):
130 |     """Loads label map proto.
131 | 
132 |   Args:
133 |     path: path to StringIntLabelMap proto text file.
134 |   Returns:
135 |     a StringIntLabelMapProto
136 |   """
137 |     with open(path, "r") as fid:
138 |         label_map_string = fid.read()
139 |         label_map = string_int_label_map_pb2.StringIntLabelMap()
140 |         try:
141 |             text_format.Merge(label_map_string, label_map)
142 |         except text_format.ParseError:
143 |             label_map.ParseFromString(label_map_string)
144 |     _validate_label_map(label_map)
145 |     return label_map
146 | 
147 | 
148 | def get_label_map_dict(label_map_path, use_display_name=False):
149 |     """Reads a label map and returns a dictionary of label names to id.
150 | 
151 |   Args:
152 |     label_map_path: path to label_map.
153 |     use_display_name: whether to use the label map items' display names as keys.
154 | 
155 |   Returns:
156 |     A dictionary mapping label names to id.
157 |   """
158 |     label_map = load_labelmap(label_map_path)
159 |     label_map_dict = {}
160 |     for item in label_map.item:
161 |         if use_display_name:
162 |             label_map_dict[item.display_name] = item.id
163 |         else:
164 |             label_map_dict[item.name] = item.id
165 |     return label_map_dict
166 | 
167 | 
168 | def create_category_index_from_labelmap(label_map_path):
169 |     """Reads a label map and returns a category index.
170 | 
171 |   Args:
172 |     label_map_path: Path to `StringIntLabelMap` proto text file.
173 | 
174 |   Returns:
175 |     A category index, which is a dictionary that maps integer ids to dicts
176 |     containing categories, e.g.
177 |     {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...}
178 |   """
179 |     label_map = load_labelmap(label_map_path)
180 |     max_num_classes = max(item.id for item in label_map.item)
181 |     categories = convert_label_map_to_categories(label_map, max_num_classes)
182 |     return create_category_index(categories)
183 | 
184 | 
185 | def create_class_agnostic_category_index():
186 |     """Creates a category index with a single `object` class."""
187 |     return {1: {"id": 1, "name": "object"}}
188 | 


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/metrics.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Functions for computing metrics like precision, recall, CorLoc and etc."""
 17 | from __future__ import division
 18 | import numpy as np
 19 | 
 20 | 
 21 | def compute_precision_recall(scores, labels, num_gt):
 22 |     """Compute precision and recall.
 23 | 
 24 |   Args:
 25 |     scores: A float numpy array representing detection score
 26 |     labels: A boolean numpy array representing true/false positive labels
 27 |     num_gt: Number of ground truth instances
 28 | 
 29 |   Raises:
 30 |     ValueError: if the input is not of the correct format
 31 | 
 32 |   Returns:
 33 |     precision: Fraction of positive instances over detected ones. This value is
 34 |       None if no ground truth labels are present.
 35 |     recall: Fraction of detected positive instance over all positive instances.
 36 |       This value is None if no ground truth labels are present.
 37 | 
 38 |   """
 39 |     if (
 40 |         not isinstance(labels, np.ndarray)
 41 |         or labels.dtype != np.bool
 42 |         or len(labels.shape) != 1
 43 |     ):
 44 |         raise ValueError("labels must be single dimension bool numpy array")
 45 | 
 46 |     if not isinstance(scores, np.ndarray) or len(scores.shape) != 1:
 47 |         raise ValueError("scores must be single dimension numpy array")
 48 | 
 49 |     if num_gt < np.sum(labels):
 50 |         raise ValueError(
 51 |             "Number of true positives must be smaller than num_gt."
 52 |         )
 53 | 
 54 |     if len(scores) != len(labels):
 55 |         raise ValueError("scores and labels must be of the same size.")
 56 | 
 57 |     if num_gt == 0:
 58 |         return None, None
 59 | 
 60 |     sorted_indices = np.argsort(scores)
 61 |     sorted_indices = sorted_indices[::-1]
 62 |     labels = labels.astype(int)
 63 |     true_positive_labels = labels[sorted_indices]
 64 |     false_positive_labels = 1 - true_positive_labels
 65 |     cum_true_positives = np.cumsum(true_positive_labels)
 66 |     cum_false_positives = np.cumsum(false_positive_labels)
 67 |     precision = cum_true_positives.astype(float) / (
 68 |         cum_true_positives + cum_false_positives
 69 |     )
 70 |     recall = cum_true_positives.astype(float) / num_gt
 71 |     return precision, recall
 72 | 
 73 | 
 74 | def compute_average_precision(precision, recall):
 75 |     """Compute Average Precision according to the definition in VOCdevkit.
 76 | 
 77 |   Precision is modified to ensure that it does not decrease as recall
 78 |   decrease.
 79 | 
 80 |   Args:
 81 |     precision: A float [N, 1] numpy array of precisions
 82 |     recall: A float [N, 1] numpy array of recalls
 83 | 
 84 |   Raises:
 85 |     ValueError: if the input is not of the correct format
 86 | 
 87 |   Returns:
 88 |     average_precison: The area under the precision recall curve. NaN if
 89 |       precision and recall are None.
 90 | 
 91 |   """
 92 |     if precision is None:
 93 |         if recall is not None:
 94 |             raise ValueError("If precision is None, recall must also be None")
 95 |         return np.NAN
 96 | 
 97 |     if not isinstance(precision, np.ndarray) or not isinstance(
 98 |         recall, np.ndarray
 99 |     ):
100 |         raise ValueError("precision and recall must be numpy array")
101 |     if precision.dtype != np.float or recall.dtype != np.float:
102 |         raise ValueError("input must be float numpy array.")
103 |     if len(precision) != len(recall):
104 |         raise ValueError("precision and recall must be of the same size.")
105 |     if not precision.size:
106 |         return 0.0
107 |     if np.amin(precision) < 0 or np.amax(precision) > 1:
108 |         raise ValueError("Precision must be in the range of [0, 1].")
109 |     if np.amin(recall) < 0 or np.amax(recall) > 1:
110 |         raise ValueError("recall must be in the range of [0, 1].")
111 |     if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)):
112 |         raise ValueError("recall must be a non-decreasing array")
113 | 
114 |     recall = np.concatenate([[0], recall, [1]])
115 |     precision = np.concatenate([[0], precision, [0]])
116 | 
117 |     # Preprocess precision to be a non-decreasing array
118 |     for i in range(len(precision) - 2, -1, -1):
119 |         precision[i] = np.maximum(precision[i], precision[i + 1])
120 | 
121 |     indices = np.where(recall[1:] != recall[:-1])[0] + 1
122 |     average_precision = np.sum(
123 |         (recall[indices] - recall[indices - 1]) * precision[indices]
124 |     )
125 |     return average_precision
126 | 
127 | 
128 | def compute_cor_loc(
129 |     num_gt_imgs_per_class, num_images_correctly_detected_per_class
130 | ):
131 |     """Compute CorLoc according to the definition in the following paper.
132 | 
133 |   https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
134 | 
135 |   Returns nans if there are no ground truth images for a class.
136 | 
137 |   Args:
138 |     num_gt_imgs_per_class: 1D array, representing number of images containing
139 |         at least one object instance of a particular class
140 |     num_images_correctly_detected_per_class: 1D array, representing number of
141 |         images that are correctly detected at least one object instance of a
142 |         particular class
143 | 
144 |   Returns:
145 |     corloc_per_class: A float numpy array represents the corloc score of each
146 |       class
147 |   """
148 |     # Divide by zero expected for classes with no gt examples.
149 |     with np.errstate(divide="ignore", invalid="ignore"):
150 |         return np.where(
151 |             num_gt_imgs_per_class == 0,
152 |             np.nan,
153 |             num_images_correctly_detected_per_class / num_gt_imgs_per_class,
154 |         )
155 | 


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/np_box_list.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Numpy BoxList classes and functions."""
 17 | 
 18 | from __future__ import (
 19 |     absolute_import,
 20 |     division,
 21 |     print_function,
 22 |     unicode_literals,
 23 | )
 24 | import numpy as np
 25 | 
 26 | 
 27 | class BoxList(object):
 28 |     """Box collection.
 29 | 
 30 |   BoxList represents a list of bounding boxes as numpy array, where each
 31 |   bounding box is represented as a row of 4 numbers,
 32 |   [y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes within a
 33 |   given list correspond to a single image.
 34 | 
 35 |   Optionally, users can add additional related fields (such as
 36 |   objectness/classification scores).
 37 |   """
 38 | 
 39 |     def __init__(self, data):
 40 |         """Constructs box collection.
 41 | 
 42 |     Args:
 43 |       data: a numpy array of shape [N, 4] representing box coordinates
 44 | 
 45 |     Raises:
 46 |       ValueError: if bbox data is not a numpy array
 47 |       ValueError: if invalid dimensions for bbox data
 48 |     """
 49 |         if not isinstance(data, np.ndarray):
 50 |             raise ValueError("data must be a numpy array.")
 51 |         if len(data.shape) != 2 or data.shape[1] != 4:
 52 |             raise ValueError("Invalid dimensions for box data.")
 53 |         if data.dtype != np.float32 and data.dtype != np.float64:
 54 |             raise ValueError(
 55 |                 "Invalid data type for box data: float is required."
 56 |             )
 57 |         if not self._is_valid_boxes(data):
 58 |             raise ValueError(
 59 |                 "Invalid box data. data must be a numpy array of "
 60 |                 "N*[y_min, x_min, y_max, x_max]"
 61 |             )
 62 |         self.data = {"boxes": data}
 63 | 
 64 |     def num_boxes(self):
 65 |         """Return number of boxes held in collections."""
 66 |         return self.data["boxes"].shape[0]
 67 | 
 68 |     def get_extra_fields(self):
 69 |         """Return all non-box fields."""
 70 |         return [k for k in self.data.keys() if k != "boxes"]
 71 | 
 72 |     def has_field(self, field):
 73 |         return field in self.data
 74 | 
 75 |     def add_field(self, field, field_data):
 76 |         """Add data to a specified field.
 77 | 
 78 |     Args:
 79 |       field: a string parameter used to speficy a related field to be accessed.
 80 |       field_data: a numpy array of [N, ...] representing the data associated
 81 |           with the field.
 82 |     Raises:
 83 |       ValueError: if the field is already exist or the dimension of the field
 84 |           data does not matches the number of boxes.
 85 |     """
 86 |         if self.has_field(field):
 87 |             raise ValueError("Field " + field + "already exists")
 88 |         if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
 89 |             raise ValueError("Invalid dimensions for field data")
 90 |         self.data[field] = field_data
 91 | 
 92 |     def get(self):
 93 |         """Convenience function for accesssing box coordinates.
 94 | 
 95 |     Returns:
 96 |       a numpy array of shape [N, 4] representing box corners
 97 |     """
 98 |         return self.get_field("boxes")
 99 | 
100 |     def get_field(self, field):
101 |         """Accesses data associated with the specified field in the box collection.
102 | 
103 |     Args:
104 |       field: a string parameter used to speficy a related field to be accessed.
105 | 
106 |     Returns:
107 |       a numpy 1-d array representing data of an associated field
108 | 
109 |     Raises:
110 |       ValueError: if invalid field
111 |     """
112 |         if not self.has_field(field):
113 |             raise ValueError("field {} does not exist".format(field))
114 |         return self.data[field]
115 | 
116 |     def get_coordinates(self):
117 |         """Get corner coordinates of boxes.
118 | 
119 |     Returns:
120 |      a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
121 |     """
122 |         box_coordinates = self.get()
123 |         y_min = box_coordinates[:, 0]
124 |         x_min = box_coordinates[:, 1]
125 |         y_max = box_coordinates[:, 2]
126 |         x_max = box_coordinates[:, 3]
127 |         return [y_min, x_min, y_max, x_max]
128 | 
129 |     def _is_valid_boxes(self, data):
130 |         """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
131 | 
132 |     Args:
133 |       data: a numpy array of shape [N, 4] representing box coordinates
134 | 
135 |     Returns:
136 |       a boolean indicating whether all ymax of boxes are equal or greater than
137 |           ymin, and all xmax of boxes are equal or greater than xmin.
138 |     """
139 |         if data.shape[0] > 0:
140 |             for i in range(data.shape[0]):
141 |                 if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
142 |                     return False
143 |         return True
144 | 


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/np_box_mask_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Numpy BoxMaskList classes and functions."""
17 | 
18 | from __future__ import (
19 |     absolute_import,
20 |     division,
21 |     print_function,
22 |     unicode_literals,
23 | )
24 | import numpy as np
25 | 
26 | from . import np_box_list
27 | 
28 | 
29 | class BoxMaskList(np_box_list.BoxList):
30 |     """Convenience wrapper for BoxList with masks.
31 | 
32 |   BoxMaskList extends the np_box_list.BoxList to contain masks as well.
33 |   In particular, its constructor receives both boxes and masks. Note that the
34 |   masks correspond to the full image.
35 |   """
36 | 
37 |     def __init__(self, box_data, mask_data):
38 |         """Constructs box collection.
39 | 
40 |     Args:
41 |       box_data: a numpy array of shape [N, 4] representing box coordinates
42 |       mask_data: a numpy array of shape [N, height, width] representing masks
43 |         with values are in {0,1}. The masks correspond to the full
44 |         image. The height and the width will be equal to image height and width.
45 | 
46 |     Raises:
47 |       ValueError: if bbox data is not a numpy array
48 |       ValueError: if invalid dimensions for bbox data
49 |       ValueError: if mask data is not a numpy array
50 |       ValueError: if invalid dimension for mask data
51 |     """
52 |         super(BoxMaskList, self).__init__(box_data)
53 |         if not isinstance(mask_data, np.ndarray):
54 |             raise ValueError("Mask data must be a numpy array.")
55 |         if len(mask_data.shape) != 3:
56 |             raise ValueError("Invalid dimensions for mask data.")
57 |         if mask_data.dtype != np.uint8:
58 |             raise ValueError(
59 |                 "Invalid data type for mask data: uint8 is required."
60 |             )
61 |         if mask_data.shape[0] != box_data.shape[0]:
62 |             raise ValueError(
63 |                 "There should be the same number of boxes and masks."
64 |             )
65 |         self.data["masks"] = mask_data
66 | 
67 |     def get_masks(self):
68 |         """Convenience function for accessing masks.
69 | 
70 |     Returns:
71 |       a numpy array of shape [N, height, width] representing masks
72 |     """
73 |         return self.get_field("masks")
74 | 


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/np_box_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Operations for [N, 4] numpy arrays representing bounding boxes.
 17 | 
 18 | Example box operations that are supported:
 19 |   * Areas: compute bounding box areas
 20 |   * IOU: pairwise intersection-over-union scores
 21 | """
 22 | from __future__ import (
 23 |     absolute_import,
 24 |     division,
 25 |     print_function,
 26 |     unicode_literals,
 27 | )
 28 | import numpy as np
 29 | 
 30 | 
 31 | def area(boxes):
 32 |     """Computes area of boxes.
 33 | 
 34 |   Args:
 35 |     boxes: Numpy array with shape [N, 4] holding N boxes
 36 | 
 37 |   Returns:
 38 |     a numpy array with shape [N*1] representing box areas
 39 |   """
 40 |     return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
 41 | 
 42 | 
 43 | def intersection(boxes1, boxes2):
 44 |     """Compute pairwise intersection areas between boxes.
 45 | 
 46 |   Args:
 47 |     boxes1: a numpy array with shape [N, 4] holding N boxes
 48 |     boxes2: a numpy array with shape [M, 4] holding M boxes
 49 | 
 50 |   Returns:
 51 |     a numpy array with shape [N*M] representing pairwise intersection area
 52 |   """
 53 |     [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
 54 |     [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
 55 | 
 56 |     all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
 57 |     all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
 58 |     intersect_heights = np.maximum(
 59 |         np.zeros(all_pairs_max_ymin.shape),
 60 |         all_pairs_min_ymax - all_pairs_max_ymin,
 61 |     )
 62 |     all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
 63 |     all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
 64 |     intersect_widths = np.maximum(
 65 |         np.zeros(all_pairs_max_xmin.shape),
 66 |         all_pairs_min_xmax - all_pairs_max_xmin,
 67 |     )
 68 |     return intersect_heights * intersect_widths
 69 | 
 70 | 
 71 | def iou(boxes1, boxes2):
 72 |     """Computes pairwise intersection-over-union between box collections.
 73 | 
 74 |   Args:
 75 |     boxes1: a numpy array with shape [N, 4] holding N boxes.
 76 |     boxes2: a numpy array with shape [M, 4] holding N boxes.
 77 | 
 78 |   Returns:
 79 |     a numpy array with shape [N, M] representing pairwise iou scores.
 80 |   """
 81 |     intersect = intersection(boxes1, boxes2)
 82 |     area1 = area(boxes1)
 83 |     area2 = area(boxes2)
 84 |     union = (
 85 |         np.expand_dims(area1, axis=1)
 86 |         + np.expand_dims(area2, axis=0)
 87 |         - intersect
 88 |     )
 89 |     return intersect / union
 90 | 
 91 | 
 92 | def ioa(boxes1, boxes2):
 93 |     """Computes pairwise intersection-over-area between box collections.
 94 | 
 95 |   Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
 96 |   their intersection area over box2's area. Note that ioa is not symmetric,
 97 |   that is, IOA(box1, box2) != IOA(box2, box1).
 98 | 
 99 |   Args:
100 |     boxes1: a numpy array with shape [N, 4] holding N boxes.
101 |     boxes2: a numpy array with shape [M, 4] holding N boxes.
102 | 
103 |   Returns:
104 |     a numpy array with shape [N, M] representing pairwise ioa scores.
105 |   """
106 |     intersect = intersection(boxes1, boxes2)
107 |     areas = np.expand_dims(area(boxes2), axis=0)
108 |     return intersect / areas
109 | 


--------------------------------------------------------------------------------
/slowfast/utils/ava_evaluation/np_mask_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Operations for [N, height, width] numpy arrays representing masks.
 17 | 
 18 | Example mask operations that are supported:
 19 |   * Areas: compute mask areas
 20 |   * IOU: pairwise intersection-over-union scores
 21 | """
 22 | from __future__ import (
 23 |     absolute_import,
 24 |     division,
 25 |     print_function,
 26 |     unicode_literals,
 27 | )
 28 | import numpy as np
 29 | 
 30 | EPSILON = 1e-7
 31 | 
 32 | 
 33 | def area(masks):
 34 |     """Computes area of masks.
 35 | 
 36 |   Args:
 37 |     masks: Numpy array with shape [N, height, width] holding N masks. Masks
 38 |       values are of type np.uint8 and values are in {0,1}.
 39 | 
 40 |   Returns:
 41 |     a numpy array with shape [N*1] representing mask areas.
 42 | 
 43 |   Raises:
 44 |     ValueError: If masks.dtype is not np.uint8
 45 |   """
 46 |     if masks.dtype != np.uint8:
 47 |         raise ValueError("Masks type should be np.uint8")
 48 |     return np.sum(masks, axis=(1, 2), dtype=np.float32)
 49 | 
 50 | 
 51 | def intersection(masks1, masks2):
 52 |     """Compute pairwise intersection areas between masks.
 53 | 
 54 |   Args:
 55 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
 56 |       values are of type np.uint8 and values are in {0,1}.
 57 |     masks2: a numpy array with shape [M, height, width] holding M masks. Masks
 58 |       values are of type np.uint8 and values are in {0,1}.
 59 | 
 60 |   Returns:
 61 |     a numpy array with shape [N*M] representing pairwise intersection area.
 62 | 
 63 |   Raises:
 64 |     ValueError: If masks1 and masks2 are not of type np.uint8.
 65 |   """
 66 |     if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
 67 |         raise ValueError("masks1 and masks2 should be of type np.uint8")
 68 |     n = masks1.shape[0]
 69 |     m = masks2.shape[0]
 70 |     answer = np.zeros([n, m], dtype=np.float32)
 71 |     for i in np.arange(n):
 72 |         for j in np.arange(m):
 73 |             answer[i, j] = np.sum(
 74 |                 np.minimum(masks1[i], masks2[j]), dtype=np.float32
 75 |             )
 76 |     return answer
 77 | 
 78 | 
 79 | def iou(masks1, masks2):
 80 |     """Computes pairwise intersection-over-union between mask collections.
 81 | 
 82 |   Args:
 83 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
 84 |       values are of type np.uint8 and values are in {0,1}.
 85 |     masks2: a numpy array with shape [M, height, width] holding N masks. Masks
 86 |       values are of type np.uint8 and values are in {0,1}.
 87 | 
 88 |   Returns:
 89 |     a numpy array with shape [N, M] representing pairwise iou scores.
 90 | 
 91 |   Raises:
 92 |     ValueError: If masks1 and masks2 are not of type np.uint8.
 93 |   """
 94 |     if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
 95 |         raise ValueError("masks1 and masks2 should be of type np.uint8")
 96 |     intersect = intersection(masks1, masks2)
 97 |     area1 = area(masks1)
 98 |     area2 = area(masks2)
 99 |     union = (
100 |         np.expand_dims(area1, axis=1)
101 |         + np.expand_dims(area2, axis=0)
102 |         - intersect
103 |     )
104 |     return intersect / np.maximum(union, EPSILON)
105 | 
106 | 
107 | def ioa(masks1, masks2):
108 |     """Computes pairwise intersection-over-area between box collections.
109 | 
110 |   Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as
111 |   their intersection area over mask2's area. Note that ioa is not symmetric,
112 |   that is, IOA(mask1, mask2) != IOA(mask2, mask1).
113 | 
114 |   Args:
115 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
116 |       values are of type np.uint8 and values are in {0,1}.
117 |     masks2: a numpy array with shape [M, height, width] holding N masks. Masks
118 |       values are of type np.uint8 and values are in {0,1}.
119 | 
120 |   Returns:
121 |     a numpy array with shape [N, M] representing pairwise ioa scores.
122 | 
123 |   Raises:
124 |     ValueError: If masks1 and masks2 are not of type np.uint8.
125 |   """
126 |     if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
127 |         raise ValueError("masks1 and masks2 should be of type np.uint8")
128 |     intersect = intersection(masks1, masks2)
129 |     areas = np.expand_dims(area(masks2), axis=0)
130 |     return intersect / (areas + EPSILON)
131 | 


--------------------------------------------------------------------------------
/slowfast/utils/benchmark.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | Functions for benchmarks.
  4 | """
  5 | 
  6 | import numpy as np
  7 | import pprint
  8 | import torch
  9 | import tqdm
 10 | from fvcore.common.timer import Timer
 11 | 
 12 | import slowfast.utils.logging as logging
 13 | import slowfast.utils.misc as misc
 14 | from slowfast.datasets import loader
 15 | from slowfast.utils.env import setup_environment
 16 | 
 17 | logger = logging.get_logger(__name__)
 18 | 
 19 | 
 20 | def benchmark_data_loading(cfg):
 21 |     """
 22 |     Benchmark the speed of data loading in PySlowFast.
 23 |     Args:
 24 | 
 25 |         cfg (CfgNode): configs. Details can be found in
 26 |             slowfast/config/defaults.py
 27 |     """
 28 |     # Set up environment.
 29 |     setup_environment()
 30 |     # Set random seed from configs.
 31 |     np.random.seed(cfg.RNG_SEED)
 32 |     torch.manual_seed(cfg.RNG_SEED)
 33 | 
 34 |     # Setup logging format.
 35 |     logging.setup_logging(cfg.OUTPUT_DIR)
 36 | 
 37 |     # Print config.
 38 |     logger.info("Benchmark data loading with config:")
 39 |     logger.info(pprint.pformat(cfg))
 40 | 
 41 |     timer = Timer()
 42 |     dataloader = loader.construct_loader(cfg, "train")
 43 |     logger.info(
 44 |         "Initialize loader using {:.2f} seconds.".format(timer.seconds())
 45 |     )
 46 |     # Total batch size across different machines.
 47 |     batch_size = cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS
 48 |     log_period = cfg.BENCHMARK.LOG_PERIOD
 49 |     epoch_times = []
 50 |     # Test for a few epochs.
 51 |     for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS):
 52 |         timer = Timer()
 53 |         timer_epoch = Timer()
 54 |         iter_times = []
 55 |         if cfg.BENCHMARK.SHUFFLE:
 56 |             loader.shuffle_dataset(dataloader, cur_epoch)
 57 |         for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)):
 58 |             if cur_iter > 0 and cur_iter % log_period == 0:
 59 |                 iter_times.append(timer.seconds())
 60 |                 ram_usage, ram_total = misc.cpu_mem_usage()
 61 |                 logger.info(
 62 |                     "Epoch {}: {} iters ({} videos) in {:.2f} seconds. "
 63 |                     "RAM Usage: {:.2f}/{:.2f} GB.".format(
 64 |                         cur_epoch,
 65 |                         log_period,
 66 |                         log_period * batch_size,
 67 |                         iter_times[-1],
 68 |                         ram_usage,
 69 |                         ram_total,
 70 |                     )
 71 |                 )
 72 |                 timer.reset()
 73 |         epoch_times.append(timer_epoch.seconds())
 74 |         ram_usage, ram_total = misc.cpu_mem_usage()
 75 |         logger.info(
 76 |             "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. "
 77 |             "RAM Usage: {:.2f}/{:.2f} GB.".format(
 78 |                 cur_epoch,
 79 |                 len(dataloader),
 80 |                 len(dataloader) * batch_size,
 81 |                 epoch_times[-1],
 82 |                 ram_usage,
 83 |                 ram_total,
 84 |             )
 85 |         )
 86 |         logger.info(
 87 |             "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} "
 88 |             "(avg/std) seconds.".format(
 89 |                 cur_epoch,
 90 |                 log_period,
 91 |                 log_period * batch_size,
 92 |                 np.mean(iter_times),
 93 |                 np.std(iter_times),
 94 |             )
 95 |         )
 96 |     logger.info(
 97 |         "On average every epoch ({} videos) takes {:.2f}/{:.2f} "
 98 |         "(avg/std) seconds.".format(
 99 |             len(dataloader) * batch_size,
100 |             np.mean(epoch_times),
101 |             np.std(epoch_times),
102 |         )
103 |     )
104 | 


--------------------------------------------------------------------------------
/slowfast/utils/bn_helper.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """bn helper."""
 5 | 
 6 | import itertools
 7 | import torch
 8 | 
 9 | 
10 | @torch.no_grad()
11 | def compute_and_update_bn_stats(model, data_loader, num_batches=200):
12 |     """
13 |     Compute and update the batch norm stats to make it more precise. During
14 |     training both bn stats and the weight are changing after every iteration,
15 |     so the bn can not precisely reflect the latest stats of the current model.
16 |     Here the bn stats is recomputed without change of weights, to make the
17 |     running mean and running var more precise.
18 |     Args:
19 |         model (model): the model using to compute and update the bn stats.
20 |         data_loader (dataloader): dataloader using to provide inputs.
21 |         num_batches (int): running iterations using to compute the stats.
22 |     """
23 | 
24 |     # Prepares all the bn layers.
25 |     bn_layers = [
26 |         m
27 |         for m in model.modules()
28 |         if any(
29 |             (
30 |                 isinstance(m, bn_type)
31 |                 for bn_type in (
32 |                     torch.nn.BatchNorm1d,
33 |                     torch.nn.BatchNorm2d,
34 |                     torch.nn.BatchNorm3d,
35 |                 )
36 |             )
37 |         )
38 |     ]
39 | 
40 |     # In order to make the running stats only reflect the current batch, the
41 |     # momentum is disabled.
42 |     # bn.running_mean = (1 - momentum) * bn.running_mean + momentum * batch_mean
43 |     # Setting the momentum to 1.0 to compute the stats without momentum.
44 |     momentum_actual = [bn.momentum for bn in bn_layers]
45 |     for bn in bn_layers:
46 |         bn.momentum = 1.0
47 | 
48 |     # Calculates the running iterations for precise stats computation.
49 |     running_mean = [torch.zeros_like(bn.running_mean) for bn in bn_layers]
50 |     running_square_mean = [torch.zeros_like(bn.running_var) for bn in bn_layers]
51 | 
52 |     for ind, (inputs, _, _) in enumerate(
53 |         itertools.islice(data_loader, num_batches)
54 |     ):
55 |         # Forwards the model to update the bn stats.
56 |         if isinstance(inputs, (list,)):
57 |             for i in range(len(inputs)):
58 |                 inputs[i] = inputs[i].float().cuda(non_blocking=True)
59 |         else:
60 |             inputs = inputs.cuda(non_blocking=True)
61 |         model(inputs)
62 | 
63 |         for i, bn in enumerate(bn_layers):
64 |             # Accumulates the bn stats.
65 |             running_mean[i] += (bn.running_mean - running_mean[i]) / (ind + 1)
66 |             # $E(x^2) = Var(x) + E(x)^2$.
67 |             cur_square_mean = bn.running_var + bn.running_mean ** 2
68 |             running_square_mean[i] += (
69 |                 cur_square_mean - running_square_mean[i]
70 |             ) / (ind + 1)
71 | 
72 |     for i, bn in enumerate(bn_layers):
73 |         bn.running_mean = running_mean[i]
74 |         # Var(x) = $E(x^2) - E(x)^2$.
75 |         bn.running_var = running_square_mean[i] - bn.running_mean ** 2
76 |         # Sets the precise bn stats.
77 |         bn.momentum = momentum_actual[i]
78 | 


--------------------------------------------------------------------------------
/slowfast/utils/c2_model_loading.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | """Caffe2 to PyTorch checkpoint name converting utility."""
  5 | 
  6 | import re
  7 | 
  8 | 
  9 | def get_name_convert_func():
 10 |     """
 11 |     Get the function to convert Caffe2 layer names to PyTorch layer names.
 12 |     Returns:
 13 |         (func): function to convert parameter name from Caffe2 format to PyTorch
 14 |         format.
 15 |     """
 16 |     pairs = [
 17 |         # ------------------------------------------------------------
 18 |         # 'nonlocal_conv3_1_theta_w' -> 's3.pathway0_nonlocal3.conv_g.weight'
 19 |         [
 20 |             r"^nonlocal_conv([0-9]+)_([0-9]+)_(.*)",
 21 |             r"s\1.pathway0_nonlocal\2_\3",
 22 |         ],
 23 |         # 'theta' -> 'conv_theta'
 24 |         [r"^(.*)_nonlocal([0-9]+)_(theta)(.*)", r"\1_nonlocal\2.conv_\3\4"],
 25 |         # 'g' -> 'conv_g'
 26 |         [r"^(.*)_nonlocal([0-9]+)_(g)(.*)", r"\1_nonlocal\2.conv_\3\4"],
 27 |         # 'phi' -> 'conv_phi'
 28 |         [r"^(.*)_nonlocal([0-9]+)_(phi)(.*)", r"\1_nonlocal\2.conv_\3\4"],
 29 |         # 'out' -> 'conv_out'
 30 |         [r"^(.*)_nonlocal([0-9]+)_(out)(.*)", r"\1_nonlocal\2.conv_\3\4"],
 31 |         # 'nonlocal_conv4_5_bn_s' -> 's4.pathway0_nonlocal3.bn.weight'
 32 |         [r"^(.*)_nonlocal([0-9]+)_(bn)_(.*)", r"\1_nonlocal\2.\3.\4"],
 33 |         # ------------------------------------------------------------
 34 |         # 't_pool1_subsample_bn' -> 's1_fuse.conv_f2s.bn.running_mean'
 35 |         [r"^t_pool1_subsample_bn_(.*)", r"s1_fuse.bn.\1"],
 36 |         # 't_pool1_subsample' -> 's1_fuse.conv_f2s'
 37 |         [r"^t_pool1_subsample_(.*)", r"s1_fuse.conv_f2s.\1"],
 38 |         # 't_res4_5_branch2c_bn_subsample_bn_rm' -> 's4_fuse.conv_f2s.bias'
 39 |         [
 40 |             r"^t_res([0-9]+)_([0-9]+)_branch2c_bn_subsample_bn_(.*)",
 41 |             r"s\1_fuse.bn.\3",
 42 |         ],
 43 |         # 't_pool1_subsample' -> 's1_fuse.conv_f2s'
 44 |         [
 45 |             r"^t_res([0-9]+)_([0-9]+)_branch2c_bn_subsample_(.*)",
 46 |             r"s\1_fuse.conv_f2s.\3",
 47 |         ],
 48 |         # ------------------------------------------------------------
 49 |         # 'res4_4_branch_2c_bn_b' -> 's4.pathway0_res4.branch2.c_bn_b'
 50 |         [
 51 |             r"^res([0-9]+)_([0-9]+)_branch([0-9]+)([a-z])_(.*)",
 52 |             r"s\1.pathway0_res\2.branch\3.\4_\5",
 53 |         ],
 54 |         # 'res_conv1_bn_' -> 's1.pathway0_stem.bn.'
 55 |         [r"^res_conv1_bn_(.*)", r"s1.pathway0_stem.bn.\1"],
 56 |         # 'conv1_w_momentum' -> 's1.pathway0_stem.conv.'
 57 |         [r"^conv1_(.*)", r"s1.pathway0_stem.conv.\1"],
 58 |         # 'res4_0_branch1_w' -> 'S4.pathway0_res0.branch1.weight'
 59 |         [
 60 |             r"^res([0-9]+)_([0-9]+)_branch([0-9]+)_(.*)",
 61 |             r"s\1.pathway0_res\2.branch\3_\4",
 62 |         ],
 63 |         # 'res_conv1_' -> 's1.pathway0_stem.conv.'
 64 |         [r"^res_conv1_(.*)", r"s1.pathway0_stem.conv.\1"],
 65 |         # ------------------------------------------------------------
 66 |         # 'res4_4_branch_2c_bn_b' -> 's4.pathway0_res4.branch2.c_bn_b'
 67 |         [
 68 |             r"^t_res([0-9]+)_([0-9]+)_branch([0-9]+)([a-z])_(.*)",
 69 |             r"s\1.pathway1_res\2.branch\3.\4_\5",
 70 |         ],
 71 |         # 'res_conv1_bn_' -> 's1.pathway0_stem.bn.'
 72 |         [r"^t_res_conv1_bn_(.*)", r"s1.pathway1_stem.bn.\1"],
 73 |         # 'conv1_w_momentum' -> 's1.pathway0_stem.conv.'
 74 |         [r"^t_conv1_(.*)", r"s1.pathway1_stem.conv.\1"],
 75 |         # 'res4_0_branch1_w' -> 'S4.pathway0_res0.branch1.weight'
 76 |         [
 77 |             r"^t_res([0-9]+)_([0-9]+)_branch([0-9]+)_(.*)",
 78 |             r"s\1.pathway1_res\2.branch\3_\4",
 79 |         ],
 80 |         # 'res_conv1_' -> 's1.pathway0_stem.conv.'
 81 |         [r"^t_res_conv1_(.*)", r"s1.pathway1_stem.conv.\1"],
 82 |         # ------------------------------------------------------------
 83 |         # pred_ -> head.projection.
 84 |         [r"pred_(.*)", r"head.projection.\1"],
 85 |         # '.bn_b' -> '.weight'
 86 |         [r"(.*)bn.b\Z", r"\1bn.bias"],
 87 |         # '.bn_s' -> '.weight'
 88 |         [r"(.*)bn.s\Z", r"\1bn.weight"],
 89 |         # '_bn_rm' -> '.running_mean'
 90 |         [r"(.*)bn.rm\Z", r"\1bn.running_mean"],
 91 |         # '_bn_riv' -> '.running_var'
 92 |         [r"(.*)bn.riv\Z", r"\1bn.running_var"],
 93 |         # '_b' -> '.bias'
 94 |         [r"(.*)[\._]b\Z", r"\1.bias"],
 95 |         # '_w' -> '.weight'
 96 |         [r"(.*)[\._]w\Z", r"\1.weight"],
 97 |     ]
 98 | 
 99 |     def convert_caffe2_name_to_pytorch(caffe2_layer_name):
100 |         """
101 |         Convert the caffe2_layer_name to pytorch format by apply the list of
102 |         regular expressions.
103 |         Args:
104 |             caffe2_layer_name (str): caffe2 layer name.
105 |         Returns:
106 |             (str): pytorch layer name.
107 |         """
108 |         for source, dest in pairs:
109 |             caffe2_layer_name = re.sub(source, dest, caffe2_layer_name)
110 |         return caffe2_layer_name
111 | 
112 |     return convert_caffe2_name_to_pytorch
113 | 


--------------------------------------------------------------------------------
/slowfast/utils/env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Set up Environment."""
 5 | 
 6 | import slowfast.utils.logging as logging
 7 | 
 8 | _ENV_SETUP_DONE = False
 9 | 
10 | 
11 | def setup_environment():
12 |     global _ENV_SETUP_DONE
13 |     if _ENV_SETUP_DONE:
14 |         return
15 |     _ENV_SETUP_DONE = True
16 | 


--------------------------------------------------------------------------------
/slowfast/utils/logging.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Logging."""
 5 | 
 6 | import builtins
 7 | import decimal
 8 | import functools
 9 | import logging
10 | import os
11 | import sys
12 | import simplejson
13 | from fvcore.common.file_io import PathManager
14 | 
15 | import slowfast.utils.distributed as du
16 | 
17 | 
18 | def _suppress_print():
19 |     """
20 |     Suppresses printing from the current process.
21 |     """
22 | 
23 |     def print_pass(*objects, sep=" ", end="\n", file=sys.stdout, flush=False):
24 |         pass
25 | 
26 |     builtins.print = print_pass
27 | 
28 | 
29 | @functools.lru_cache(maxsize=None)
30 | def _cached_log_stream(filename):
31 |     return PathManager.open(filename, "a")
32 | 
33 | 
34 | def setup_logging(output_dir=None):
35 |     """
36 |     Sets up the logging for multiple processes. Only enable the logging for the
37 |     master process, and suppress logging for the non-master processes.
38 |     """
39 |     # Set up logging format.
40 |     _FORMAT = "[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s"
41 | 
42 |     if du.is_master_proc():
43 |         # Enable logging for the master process.
44 |         logging.root.handlers = []
45 |         logging.basicConfig(
46 |             level=logging.INFO, format=_FORMAT, stream=sys.stdout
47 |         )
48 |     else:
49 |         # Suppress logging for non-master processes.
50 |         _suppress_print()
51 | 
52 |     logger = logging.getLogger()
53 |     logger.setLevel(logging.DEBUG)
54 |     logger.propagate = False
55 |     plain_formatter = logging.Formatter(
56 |         "[%(asctime)s][%(levelname)s] %(name)s: %(lineno)4d: %(message)s",
57 |         datefmt="%m/%d %H:%M:%S",
58 |     )
59 | 
60 |     if du.is_master_proc():
61 |         ch = logging.StreamHandler(stream=sys.stdout)
62 |         ch.setLevel(logging.DEBUG)
63 |         ch.setFormatter(plain_formatter)
64 |         logger.addHandler(ch)
65 | 
66 |     if output_dir is not None and du.is_master_proc(du.get_world_size()):
67 |         filename = os.path.join(output_dir, "stdout.log")
68 |         fh = logging.StreamHandler(_cached_log_stream(filename))
69 |         fh.setLevel(logging.DEBUG)
70 |         fh.setFormatter(plain_formatter)
71 |         logger.addHandler(fh)
72 | 
73 | 
74 | def get_logger(name):
75 |     """
76 |     Retrieve the logger with the specified name or, if name is None, return a
77 |     logger which is the root logger of the hierarchy.
78 |     Args:
79 |         name (string): name of the logger.
80 |     """
81 |     return logging.getLogger(name)
82 | 
83 | 
84 | def log_json_stats(stats):
85 |     """
86 |     Logs json stats.
87 |     Args:
88 |         stats (dict): a dictionary of statistical information to log.
89 |     """
90 |     stats = {
91 |         k: decimal.Decimal("{:.6f}".format(v)) if isinstance(v, float) else v
92 |         for k, v in stats.items()
93 |     }
94 |     json_stats = simplejson.dumps(stats, sort_keys=True, use_decimal=True)
95 |     logger = get_logger(__name__)
96 |     logger.info("json_stats: {:s}".format(json_stats))
97 | 


--------------------------------------------------------------------------------
/slowfast/utils/lr_policy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Learning rate policy."""
 5 | 
 6 | import math
 7 | 
 8 | 
 9 | def get_lr_at_epoch(cfg, cur_epoch):
10 |     """
11 |     Retrieve the learning rate of the current epoch with the option to perform
12 |     warm up in the beginning of the training stage.
13 |     Args:
14 |         cfg (CfgNode): configs. Details can be found in
15 |             slowfast/config/defaults.py
16 |         cur_epoch (float): the number of epoch of the current training stage.
17 |     """
18 |     lr = get_lr_func(cfg.SOLVER.LR_POLICY)(cfg, cur_epoch)
19 |     # Perform warm up.
20 |     if cur_epoch < cfg.SOLVER.WARMUP_EPOCHS:
21 |         lr_start = cfg.SOLVER.WARMUP_START_LR
22 |         lr_end = get_lr_func(cfg.SOLVER.LR_POLICY)(
23 |             cfg, cfg.SOLVER.WARMUP_EPOCHS
24 |         )
25 |         alpha = (lr_end - lr_start) / cfg.SOLVER.WARMUP_EPOCHS
26 |         lr = cur_epoch * alpha + lr_start
27 |     return lr
28 | 
29 | 
30 | def lr_func_cosine(cfg, cur_epoch):
31 |     """
32 |     Retrieve the learning rate to specified values at specified epoch with the
33 |     cosine learning rate schedule. Details can be found in:
34 |     Ilya Loshchilov, and  Frank Hutter
35 |     SGDR: Stochastic Gradient Descent With Warm Restarts.
36 |     Args:
37 |         cfg (CfgNode): configs. Details can be found in
38 |             slowfast/config/defaults.py
39 |         cur_epoch (float): the number of epoch of the current training stage.
40 |     """
41 |     return (
42 |         cfg.SOLVER.BASE_LR
43 |         * (math.cos(math.pi * cur_epoch / cfg.SOLVER.MAX_EPOCH) + 1.0)
44 |         * 0.5
45 |     )
46 | 
47 | 
48 | def lr_func_steps_with_relative_lrs(cfg, cur_epoch):
49 |     """
50 |     Retrieve the learning rate to specified values at specified epoch with the
51 |     steps with relative learning rate schedule.
52 |     Args:
53 |         cfg (CfgNode): configs. Details can be found in
54 |             slowfast/config/defaults.py
55 |         cur_epoch (float): the number of epoch of the current training stage.
56 |     """
57 |     ind = get_step_index(cfg, cur_epoch)
58 |     return cfg.SOLVER.LRS[ind] * cfg.SOLVER.BASE_LR
59 | 
60 | 
61 | def get_step_index(cfg, cur_epoch):
62 |     """
63 |     Retrieves the lr step index for the given epoch.
64 |     Args:
65 |         cfg (CfgNode): configs. Details can be found in
66 |             slowfast/config/defaults.py
67 |         cur_epoch (float): the number of epoch of the current training stage.
68 |     """
69 |     steps = cfg.SOLVER.STEPS + [cfg.SOLVER.MAX_EPOCH]
70 |     for ind, step in enumerate(steps):  # NoQA
71 |         if cur_epoch < step:
72 |             break
73 |     return ind - 1
74 | 
75 | 
76 | def get_lr_func(lr_policy):
77 |     """
78 |     Given the configs, retrieve the specified lr policy function.
79 |     Args:
80 |         lr_policy (string): the learning rate policy to use for the job.
81 |     """
82 |     policy = "lr_func_" + lr_policy
83 |     if policy not in globals():
84 |         raise NotImplementedError("Unknown LR policy: {}".format(lr_policy))
85 |     else:
86 |         return globals()[policy]
87 | 


--------------------------------------------------------------------------------
/slowfast/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Functions for computing metrics."""
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def topks_correct(preds, labels, ks):
10 |     """
11 |     Given the predictions, labels, and a list of top-k values, compute the
12 |     number of correct predictions for each top-k value.
13 | 
14 |     Args:
15 |         preds (array): array of predictions. Dimension is batchsize
16 |             N x ClassNum.
17 |         labels (array): array of labels. Dimension is batchsize N.
18 |         ks (list): list of top-k values. For example, ks = [1, 5] correspods
19 |             to top-1 and top-5.
20 | 
21 |     Returns:
22 |         topks_correct (list): list of numbers, where the `i`-th entry
23 |             corresponds to the number of top-`ks[i]` correct predictions.
24 |     """
25 |     assert preds.size(0) == labels.size(
26 |         0
27 |     ), "Batch dim of predictions and labels must match"
28 |     # Find the top max_k predictions for each sample
29 |     _top_max_k_vals, top_max_k_inds = torch.topk(
30 |         preds, max(ks), dim=1, largest=True, sorted=True
31 |     )
32 |     # (batch_size, max_k) -> (max_k, batch_size).
33 |     top_max_k_inds = top_max_k_inds.t()
34 |     # (batch_size, ) -> (max_k, batch_size).
35 |     rep_max_k_labels = labels.view(1, -1).expand_as(top_max_k_inds)
36 |     # (i, j) = 1 if top i-th prediction for the j-th sample is correct.
37 |     top_max_k_correct = top_max_k_inds.eq(rep_max_k_labels)
38 |     # Compute the number of topk correct predictions for each k.
39 |     topks_correct = [
40 |         top_max_k_correct[:k, :].view(-1).float().sum() for k in ks
41 |     ]
42 |     return topks_correct
43 | 
44 | 
45 | def topk_errors(preds, labels, ks):
46 |     """
47 |     Computes the top-k error for each k.
48 |     Args:
49 |         preds (array): array of predictions. Dimension is N.
50 |         labels (array): array of labels. Dimension is N.
51 |         ks (list): list of ks to calculate the top accuracies.
52 |     """
53 |     num_topks_correct = topks_correct(preds, labels, ks)
54 |     return [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct]
55 | 
56 | 
57 | def topk_accuracies(preds, labels, ks):
58 |     """
59 |     Computes the top-k accuracy for each k.
60 |     Args:
61 |         preds (array): array of predictions. Dimension is N.
62 |         labels (array): array of labels. Dimension is N.
63 |         ks (list): list of ks to calculate the top accuracies.
64 |     """
65 |     num_topks_correct = topks_correct(preds, labels, ks)
66 |     return [(x / preds.size(0)) * 100.0 for x in num_topks_correct]
67 | 


--------------------------------------------------------------------------------
/slowfast/utils/multiprocessing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Multiprocessing helpers."""
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def run(
10 |     local_rank, num_proc, func, init_method, shard_id, num_shards, backend, cfg
11 | ):
12 |     """
13 |     Runs a function from a child process.
14 |     Args:
15 |         local_rank (int): rank of the current process on the current machine.
16 |         num_proc (int): number of processes per machine.
17 |         func (function): function to execute on each of the process.
18 |         init_method (string): method to initialize the distributed training.
19 |             TCP initialization: equiring a network address reachable from all
20 |             processes followed by the port.
21 |             Shared file-system initialization: makes use of a file system that
22 |             is shared and visible from all machines. The URL should start with
23 |             file:// and contain a path to a non-existent file on a shared file
24 |             system.
25 |         shard_id (int): the rank of the current machine.
26 |         num_shards (int): number of overall machines for the distributed
27 |             training job.
28 |         backend (string): three distributed backends ('nccl', 'gloo', 'mpi') are
29 |             supports, each with different capabilities. Details can be found
30 |             here:
31 |             https://pytorch.org/docs/stable/distributed.html
32 |         cfg (CfgNode): configs. Details can be found in
33 |             slowfast/config/defaults.py
34 |     """
35 |     # Initialize the process group.
36 |     world_size = num_proc * num_shards
37 |     rank = shard_id * num_proc + local_rank
38 | 
39 |     try:
40 |         torch.distributed.init_process_group(
41 |             backend=backend,
42 |             init_method=init_method,
43 |             world_size=world_size,
44 |             rank=rank,
45 |         )
46 |     except Exception as e:
47 |         raise e
48 | 
49 |     torch.cuda.set_device(local_rank)
50 |     func(cfg)
51 | 


--------------------------------------------------------------------------------
/slowfast/utils/parser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Argument parser functions."""
 5 | 
 6 | import argparse
 7 | import sys
 8 | 
 9 | import slowfast.utils.checkpoint as cu
10 | from slowfast.config.defaults import get_cfg
11 | 
12 | 
13 | def parse_args():
14 |     """
15 |     Parse the following arguments for a default parser for PySlowFast users.
16 |     Args:
17 |         shard_id (int): shard id for the current machine. Starts from 0 to
18 |             num_shards - 1. If single machine is used, then set shard id to 0.
19 |         num_shards (int): number of shards using by the job.
20 |         init_method (str): initialization method to launch the job with multiple
21 |             devices. Options includes TCP or shared file-system for
22 |             initialization. details can be find in
23 |             https://pytorch.org/docs/stable/distributed.html#tcp-initialization
24 |         cfg (str): path to the config file.
25 |         opts (argument): provide addtional options from the command line, it
26 |             overwrites the config loaded from file.
27 |         """
28 |     parser = argparse.ArgumentParser(
29 |         description="Provide SlowFast video training and testing pipeline."
30 |     )
31 |     parser.add_argument(
32 |         "--shard_id",
33 |         help="The shard id of current node, Starts from 0 to num_shards - 1",
34 |         default=0,
35 |         type=int,
36 |     )
37 |     parser.add_argument(
38 |         "--num_shards",
39 |         help="Number of shards using by the job",
40 |         default=1,
41 |         type=int,
42 |     )
43 |     parser.add_argument(
44 |         "--init_method",
45 |         help="Initialization method, includes TCP or shared file-system",
46 |         default="tcp://localhost:9999",
47 |         type=str,
48 |     )
49 |     parser.add_argument(
50 |         "--cfg",
51 |         dest="cfg_file",
52 |         help="Path to the config file",
53 |         default="configs/Kinetics/SLOWFAST_4x16_R50.yaml",
54 |         type=str,
55 |     )
56 |     parser.add_argument(
57 |         "opts",
58 |         help="See slowfast/config/defaults.py for all options",
59 |         default=None,
60 |         nargs=argparse.REMAINDER,
61 |     )
62 |     if len(sys.argv) == 1:
63 |         parser.print_help()
64 |     return parser.parse_args()
65 | 
66 | 
67 | def load_config(args):
68 |     """
69 |     Given the arguemnts, load and initialize the configs.
70 |     Args:
71 |         args (argument): arguments includes `shard_id`, `num_shards`,
72 |             `init_method`, `cfg_file`, and `opts`.
73 |     """
74 |     # Setup cfg.
75 |     cfg = get_cfg()
76 |     # Load config from cfg.
77 |     if args.cfg_file is not None:
78 |         cfg.merge_from_file(args.cfg_file)
79 |     # Load config from command line, overwrite config from opts.
80 |     if args.opts is not None:
81 |         cfg.merge_from_list(args.opts)
82 | 
83 |     # Inherit parameters from args.
84 |     if hasattr(args, "num_shards") and hasattr(args, "shard_id"):
85 |         cfg.NUM_SHARDS = args.num_shards
86 |         cfg.SHARD_ID = args.shard_id
87 |     if hasattr(args, "rng_seed"):
88 |         cfg.RNG_SEED = args.rng_seed
89 |     if hasattr(args, "output_dir"):
90 |         cfg.OUTPUT_DIR = args.output_dir
91 | 
92 |     # Create the checkpoint dir.
93 |     cu.make_checkpoint_dir(cfg.OUTPUT_DIR)
94 |     return cfg
95 | 


--------------------------------------------------------------------------------
/slowfast/utils/weight_init_helper.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Utility function for weight initialization"""
 5 | 
 6 | import torch.nn as nn
 7 | from fvcore.nn.weight_init import c2_msra_fill
 8 | 
 9 | 
10 | def init_weights(model, fc_init_std=0.01, zero_init_final_bn=True):
11 |     """
12 |     Performs ResNet style weight initialization.
13 |     Args:
14 |         fc_init_std (float): the expected standard deviation for fc layer.
15 |         zero_init_final_bn (bool): if True, zero initialize the final bn for
16 |             every bottleneck.
17 |     """
18 |     for m in model.modules():
19 |         if isinstance(m, nn.Conv3d):
20 |             """
21 |             Follow the initialization method proposed in:
22 |             {He, Kaiming, et al.
23 |             "Delving deep into rectifiers: Surpassing human-level
24 |             performance on imagenet classification."
25 |             arXiv preprint arXiv:1502.01852 (2015)}
26 |             """
27 |             c2_msra_fill(m)
28 |         elif isinstance(m, nn.BatchNorm3d):
29 |             if (
30 |                 hasattr(m, "transform_final_bn")
31 |                 and m.transform_final_bn
32 |                 and zero_init_final_bn
33 |             ):
34 |                 batchnorm_weight = 0.0
35 |             else:
36 |                 batchnorm_weight = 1.0
37 |             if m.weight is not None:
38 |                 m.weight.data.fill_(batchnorm_weight)
39 |             if m.bias is not None:
40 |                 m.bias.data.zero_()
41 |         if isinstance(m, nn.Linear):
42 |             m.weight.data.normal_(mean=0.0, std=fc_init_std)
43 |             m.bias.data.zero_()
44 | 


--------------------------------------------------------------------------------
/slowfast/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 


--------------------------------------------------------------------------------
/slowfast/visualization/demo_loader.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | import cv2
  5 | 
  6 | from slowfast.visualization.utils import TaskInfo
  7 | 
  8 | 
  9 | class VideoReader:
 10 |     """
 11 |     VideoReader object for getting frames from video source for real-time inference.
 12 |     """
 13 | 
 14 |     def __init__(self, cfg):
 15 |         """
 16 |         Args:
 17 |             cfg (CfgNode): configs. Details can be found in
 18 |             slowfast/config/defaults.py
 19 |         """
 20 |         assert (
 21 |             cfg.DEMO.WEBCAM > -1 or cfg.DEMO.INPUT_VIDEO != ""
 22 |         ), "Must specify a data source as input."
 23 | 
 24 |         self.source = (
 25 |             cfg.DEMO.WEBCAM if cfg.DEMO.WEBCAM > -1 else cfg.DEMO.INPUT_VIDEO
 26 |         )
 27 | 
 28 |         self.display_width = cfg.DEMO.DISPLAY_WIDTH
 29 |         self.display_height = cfg.DEMO.DISPLAY_HEIGHT
 30 | 
 31 |         self.cap = cv2.VideoCapture(self.source)
 32 | 
 33 |         if self.display_width > 0 and self.display_height > 0:
 34 |             self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.display_width)
 35 |             self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.display_height)
 36 |         else:
 37 |             self.display_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 38 |             self.display_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 39 | 
 40 |         if not self.cap.isOpened():
 41 |             raise IOError("Video {} cannot be opened".format(self.source))
 42 | 
 43 |         self.output_file = None
 44 |         if cfg.DEMO.OUTPUT_FILE != "":
 45 |             if cfg.DEMO.OUTPUT_FPS == -1:
 46 |                 output_fps = self.cap.get(cv2.CAP_PROP_FPS)
 47 |             else:
 48 |                 output_fps = cfg.DEMO.OUTPUT_FPS
 49 |             self.output_file = self.get_output_file(
 50 |                 cfg.DEMO.OUTPUT_FILE, fps=output_fps
 51 |             )
 52 |         self.id = -1
 53 |         self.buffer = []
 54 |         self.buffer_size = cfg.DEMO.BUFFER_SIZE
 55 |         self.seq_length = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
 56 |         self.test_crop_size = cfg.DATA.TEST_CROP_SIZE
 57 |         self.clip_vis_size = cfg.DEMO.CLIP_VIS_SIZE
 58 | 
 59 |     def __iter__(self):
 60 |         return self
 61 | 
 62 |     def __next__(self):
 63 |         """
 64 |         Read and return the required number of frames for 1 clip.
 65 |         Returns:
 66 |             was_read (bool): False if not enough frames to return.
 67 |             task (TaskInfo object): object contains metadata for the current clips.
 68 |         """
 69 |         self.id += 1
 70 |         task = TaskInfo()
 71 | 
 72 |         task.img_height = self.display_height
 73 |         task.img_width = self.display_width
 74 |         task.crop_size = self.test_crop_size
 75 |         task.clip_vis_size = self.clip_vis_size
 76 | 
 77 |         frames = []
 78 |         if len(self.buffer) != 0:
 79 |             frames = self.buffer
 80 |         was_read = True
 81 |         while was_read and len(frames) < self.seq_length:
 82 |             was_read, frame = self.cap.read()
 83 |             frames.append(frame)
 84 |         if was_read and self.buffer_size != 0:
 85 |             self.buffer = frames[-self.buffer_size :]
 86 | 
 87 |         task.add_frames(self.id, frames)
 88 |         task.num_buffer_frames = 0 if self.id == 0 else self.buffer_size
 89 | 
 90 |         return was_read, task
 91 | 
 92 |     def get_output_file(self, path, fps=30):
 93 |         """
 94 |         Return a video writer object.
 95 |         Args:
 96 |             path (str): path to the output video file.
 97 |             fps (int or float): frames per second.
 98 |         """
 99 |         return cv2.VideoWriter(
100 |             filename=path,
101 |             fourcc=cv2.VideoWriter_fourcc(*"mp4v"),
102 |             fps=float(fps),
103 |             frameSize=(self.display_width, self.display_height),
104 |             isColor=True,
105 |         )
106 | 
107 |     def display(self, frame):
108 |         """
109 |         Either display a single frame (BGR image) to a window or write to
110 |         an output file if output path is provided.
111 |         """
112 |         if self.output_file is None:
113 |             cv2.imshow("SlowFast", frame)
114 |         else:
115 |             self.output_file.write(frame)
116 | 
117 |     def clean(self):
118 |         """
119 |         Clean up open video files and windows.
120 |         """
121 |         self.cap.release()
122 |         if self.output_file is None:
123 |             cv2.destroyAllWindows()
124 |         else:
125 |             self.output_file.release()
126 | 


--------------------------------------------------------------------------------
/slowfast/visualization/predictor.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | import cv2
  5 | import torch
  6 | from detectron2 import model_zoo
  7 | from detectron2.config import get_cfg
  8 | from detectron2.engine import DefaultPredictor
  9 | 
 10 | import slowfast.utils.checkpoint as cu
 11 | from slowfast.datasets import cv2_transform
 12 | from slowfast.models import build_model
 13 | from slowfast.utils import logging
 14 | from slowfast.visualization.utils import process_cv2_inputs
 15 | 
 16 | logger = logging.get_logger(__name__)
 17 | 
 18 | 
 19 | class Predictor:
 20 |     """
 21 |     Action Predictor for action recognition.
 22 |     """
 23 | 
 24 |     def __init__(self, cfg, gpu_id=None):
 25 |         """
 26 |         Args:
 27 |             cfg (CfgNode): configs. Details can be found in
 28 |                 slowfast/config/defaults.py
 29 |             gpu_id (Optional[int]): GPU id.
 30 |         """
 31 |         if cfg.NUM_GPUS:
 32 |             self.gpu_id = torch.cuda.current_device() if gpu_id is None else gpu_id
 33 | 
 34 |         # Build the video model and print model statistics.
 35 |         self.model = build_model(cfg, gpu_id=gpu_id)
 36 |         self.model.eval()
 37 |         self.cfg = cfg
 38 | 
 39 |         if cfg.DETECTION.ENABLE:
 40 |             self.object_detector = Detectron2Predictor(cfg, gpu_id=self.gpu_id)
 41 | 
 42 |         logger.info("Start loading model weights.")
 43 |         cu.load_test_checkpoint(cfg, self.model)
 44 |         logger.info("Finish loading model weights")
 45 | 
 46 |     def __call__(self, task):
 47 |         """
 48 |         Returns the prediction results for the current task.
 49 |         Args:
 50 |             task (TaskInfo object): task object that contain
 51 |                 the necessary information for action prediction. (e.g. frames, boxes)
 52 |         Returns:
 53 |             task (TaskInfo object): the same task info object but filled with
 54 |                 prediction values (a tensor) and the corresponding boxes for
 55 |                 action detection task.
 56 |         """
 57 |         if self.cfg.DETECTION.ENABLE:
 58 |             task = self.object_detector(task)
 59 | 
 60 |         frames, bboxes = task.frames, task.bboxes
 61 |         if bboxes is not None:
 62 |             bboxes = cv2_transform.scale_boxes(
 63 |                 self.cfg.DATA.TEST_CROP_SIZE,
 64 |                 bboxes,
 65 |                 task.img_height,
 66 |                 task.img_width,
 67 |             )
 68 |         if self.cfg.DEMO.INPUT_FORMAT == "BGR":
 69 |             frames = [
 70 |                 cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames
 71 |             ]
 72 | 
 73 |         frames = [
 74 |             cv2_transform.scale(self.cfg.DATA.TEST_CROP_SIZE, frame)
 75 |             for frame in frames
 76 |         ]
 77 |         inputs = process_cv2_inputs(frames, self.cfg)
 78 |         if bboxes is not None:
 79 |             index_pad = torch.full(
 80 |                 size=(bboxes.shape[0], 1),
 81 |                 fill_value=float(0),
 82 |                 device=bboxes.device,
 83 |             )
 84 | 
 85 |             # Pad frame index for each box.
 86 |             bboxes = torch.cat([index_pad, bboxes], axis=1)
 87 |         if self.cfg.NUM_GPUS > 0:
 88 |             # Transfer the data to the current GPU device.
 89 |             if isinstance(inputs, (list,)):
 90 |                 for i in range(len(inputs)):
 91 |                     inputs[i] = inputs[i].cuda(
 92 |                         device=torch.device(self.gpu_id), non_blocking=True
 93 |                     )
 94 |             else:
 95 |                 inputs = inputs.cuda(
 96 |                     device=torch.device(self.gpu_id), non_blocking=True
 97 |                 )
 98 |         if self.cfg.DETECTION.ENABLE and not bboxes.shape[0]:
 99 |             preds = torch.tensor([])
100 |         else:
101 |             preds = self.model(inputs, bboxes)
102 | 
103 |         if self.cfg.NUM_GPUS:
104 |             preds = preds.cpu()
105 |             if bboxes is not None:
106 |                 bboxes = bboxes.detach().cpu()
107 | 
108 |         preds = preds.detach()
109 |         task.add_action_preds(preds)
110 |         if bboxes is not None:
111 |             task.add_bboxes(bboxes[:, 1:])
112 | 
113 |         return task
114 | 
115 | 
116 | class ActionPredictor:
117 |     """
118 |     Synchronous Action Prediction and Visualization pipeline with AsyncVis.
119 |     """
120 |     def __init__(self, cfg, async_vis=None, gpu_id=None):
121 |         """
122 |         Args:
123 |             cfg (CfgNode): configs. Details can be found in
124 |                 slowfast/config/defaults.py
125 |             async_vis (AsyncVis object): asynchronous visualizer.
126 |             gpu_id (Optional[int]): GPU id.
127 |         """
128 |         self.predictor = Predictor(cfg=cfg, gpu_id=gpu_id)
129 |         self.async_vis = async_vis
130 | 
131 |     def put(self, task):
132 |         """
133 |         Make prediction and put the results in `async_vis` task queue.
134 |         Args:
135 |             task (TaskInfo object): task object that contain
136 |                 the necessary information for action prediction. (e.g. frames, boxes)
137 |         """
138 |         task = self.predictor(task)
139 |         self.async_vis.put(task)
140 | 
141 | 
142 | class Detectron2Predictor:
143 |     """
144 |     Wrapper around Detectron2 to return the required predicted bounding boxes
145 |     as a ndarray.
146 |     """
147 | 
148 |     def __init__(self, cfg, gpu_id=None):
149 |         """
150 |         Args:
151 |             cfg (CfgNode): configs. Details can be found in
152 |                 slowfast/config/defaults.py
153 |             gpu_id (Optional[int]): GPU id.
154 |         """
155 | 
156 |         self.cfg = get_cfg()
157 |         self.cfg.merge_from_file(
158 |             model_zoo.get_config_file(cfg.DEMO.DETECTRON2_CFG)
159 |         )
160 |         self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = cfg.DEMO.DETECTRON2_THRESH
161 |         self.cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_WEIGHTS
162 |         self.cfg.INPUT.FORMAT = cfg.DEMO.INPUT_FORMAT
163 |         if cfg.NUM_GPUS and gpu_id is None:
164 |             gpu_id = torch.cuda.current_device()
165 |         self.cfg.MODEL.DEVICE = (
166 |             "cuda:{}".format(gpu_id) if cfg.NUM_GPUS > 0 else "cpu"
167 |         )
168 | 
169 |         logger.info("Initialized Detectron2 Object Detection Model.")
170 | 
171 |         self.predictor = DefaultPredictor(self.cfg)
172 | 
173 |     def __call__(self, task):
174 |         """
175 |         Return bounding boxes predictions as a tensor.
176 |         Args:
177 |             task (TaskInfo object): task object that contain
178 |                 the necessary information for action prediction. (e.g. frames, boxes)
179 |         Returns:
180 |             task (TaskInfo object): the same task info object but filled with
181 |                 prediction values (a tensor) and the corresponding boxes for
182 |                 action detection task.
183 |         """
184 |         middle_frame = task.frames[len(task.frames) // 2]
185 |         outputs = self.predictor(middle_frame)
186 |         # Get only human instances
187 |         mask = outputs["instances"].pred_classes == 0
188 |         pred_boxes = outputs["instances"].pred_boxes.tensor[mask]
189 |         task.add_bboxes(pred_boxes)
190 | 
191 |         return task
192 | 


--------------------------------------------------------------------------------
/tools/benchmark.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | """
 4 | A script to benchmark data loading.
 5 | """
 6 | 
 7 | import slowfast.utils.logging as logging
 8 | from slowfast.utils.benchmark import benchmark_data_loading
 9 | from slowfast.utils.misc import launch_job
10 | from slowfast.utils.parser import load_config, parse_args
11 | 
12 | logger = logging.get_logger(__name__)
13 | 
14 | 
15 | def main():
16 |     args = parse_args()
17 |     cfg = load_config(args)
18 | 
19 |     launch_job(
20 |         cfg=cfg, init_method=args.init_method, func=benchmark_data_loading
21 |     )
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     main()
26 | 


--------------------------------------------------------------------------------
/tools/demo_net.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | import numpy as np
  5 | import queue
  6 | import cv2
  7 | import torch
  8 | import tqdm
  9 | 
 10 | from slowfast.utils import logging
 11 | from slowfast.visualization.async_predictor import (
 12 |     AsycnActionPredictor,
 13 |     AsyncVis,
 14 | )
 15 | from slowfast.visualization.ava_demo_precomputed_boxes import (
 16 |     AVAVisualizerWithPrecomputedBox,
 17 | )
 18 | from slowfast.visualization.demo_loader import VideoReader
 19 | from slowfast.visualization.predictor import ActionPredictor
 20 | from slowfast.visualization.video_visualizer import VideoVisualizer
 21 | 
 22 | logger = logging.get_logger(__name__)
 23 | 
 24 | 
 25 | def run_demo(cfg, frame_provider):
 26 |     """
 27 |     Run demo visualization.
 28 |     Args:
 29 |         cfg (CfgNode): configs. Details can be found in
 30 |             slowfast/config/defaults.py
 31 |         frame_provider (iterator): Python iterator that return task objects that are filled
 32 |             with necessary information such as `frames`, `id` and `num_buffer_frames` for the
 33 |             prediction and visualization pipeline.
 34 |     """
 35 |     # Set random seed from configs.
 36 |     np.random.seed(cfg.RNG_SEED)
 37 |     torch.manual_seed(cfg.RNG_SEED)
 38 |     # Setup logging format.
 39 |     logging.setup_logging(cfg.OUTPUT_DIR)
 40 |     # Print config.
 41 |     logger.info("Run demo with config:")
 42 |     logger.info(cfg)
 43 | 
 44 |     common_classes = (
 45 |         cfg.DEMO.COMMON_CLASS_NAMES
 46 |         if len(cfg.DEMO.LABEL_FILE_PATH) != 0
 47 |         else None
 48 |     )
 49 | 
 50 |     video_vis = VideoVisualizer(
 51 |         num_classes=cfg.MODEL.NUM_CLASSES,
 52 |         class_names_path=cfg.DEMO.LABEL_FILE_PATH,
 53 |         top_k=cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
 54 |         thres=cfg.DEMO.COMMON_CLASS_THRES,
 55 |         lower_thres=cfg.DEMO.UNCOMMON_CLASS_THRES,
 56 |         common_class_names=common_classes,
 57 |         colormap=cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
 58 |         mode=cfg.DEMO.VIS_MODE,
 59 |     )
 60 | 
 61 |     async_vis = AsyncVis(video_vis, n_workers=cfg.DEMO.NUM_VIS_INSTANCES)
 62 | 
 63 |     if cfg.NUM_GPUS <= 1:
 64 |         model = ActionPredictor(cfg=cfg, async_vis=async_vis)
 65 |     else:
 66 |         model = AsycnActionPredictor(cfg, async_vis.task_queue)
 67 | 
 68 |     seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
 69 | 
 70 |     assert (
 71 |         cfg.DEMO.BUFFER_SIZE <= seq_len // 2
 72 |     ), "Buffer size cannot be greater than half of sequence length."
 73 |     num_task = 0
 74 |     for able_to_read, task in frame_provider:
 75 |         if not able_to_read:
 76 |             break
 77 |         num_task += 1
 78 | 
 79 |         model.put(task)
 80 | 
 81 |         try:
 82 |             frames = async_vis.get()
 83 |             num_task -= 1
 84 |             yield frames
 85 |         except queue.Empty:
 86 |             continue
 87 |         # hit Esc to quit the demo.
 88 |         key = cv2.waitKey(1)
 89 |         if key == 27:
 90 |             break
 91 | 
 92 |     while num_task != 0:
 93 |         try:
 94 |             frames = async_vis.get()
 95 |             num_task -= 1
 96 |             yield frames
 97 |         except queue.Empty:
 98 |             continue
 99 |         # hit Esc to quit the demo.
100 |         key = cv2.waitKey(1)
101 |         if key == 27:
102 |             break
103 | 
104 | 
105 | def demo(cfg):
106 |     """
107 |     Run inference on an input video or stream from webcam.
108 |     Args:
109 |         cfg (CfgNode): configs. Details can be found in
110 |             slowfast/config/defaults.py
111 |     """
112 |     # AVA format-specific visualization with precomputed boxes.
113 |     if cfg.DETECTION.ENABLE and cfg.DEMO.PREDS_BOXES != "":
114 |         precomputed_box_vis = AVAVisualizerWithPrecomputedBox(cfg)
115 |         precomputed_box_vis()
116 |     else:
117 |         frame_provider = VideoReader(cfg)
118 | 
119 |         for frames in tqdm.tqdm(run_demo(cfg, frame_provider)):
120 |             for frame in frames:
121 |                 frame_provider.display(frame)
122 |         frame_provider.clean()
123 | 


--------------------------------------------------------------------------------
/tools/run_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Wrapper to train and test a video classification model."""
 5 | import torch
 6 | 
 7 | from slowfast.utils.misc import launch_job
 8 | from slowfast.utils.parser import load_config, parse_args
 9 | 
10 | from demo_net import demo
11 | from test_net import test
12 | from train_net import train
13 | from visualization import visualize
14 | 
15 | 
16 | def main():
17 |     """
18 |     Main function to spawn the train and test process.
19 |     """
20 |     args = parse_args()
21 |     cfg = load_config(args)
22 | 
23 |     # Perform training.
24 |     if cfg.TRAIN.ENABLE:
25 |         launch_job(cfg=cfg, init_method=args.init_method, func=train)
26 | 
27 |     # Perform multi-clip testing.
28 |     if cfg.TEST.ENABLE:
29 |         launch_job(cfg=cfg, init_method=args.init_method, func=test)
30 | 
31 |     # Perform model visualization.
32 |     if cfg.TENSORBOARD.ENABLE and cfg.TENSORBOARD.MODEL_VIS.ENABLE:
33 |         launch_job(cfg=cfg, init_method=args.init_method, func=visualize)
34 | 
35 |     # Run demo.
36 |     if cfg.DEMO.ENABLE:
37 |         demo(cfg)
38 | 
39 | 
40 | if __name__ == "__main__":  
41 |     # torch.multiprocessing.set_start_method("forkserver")
42 |     main()
43 | 


--------------------------------------------------------------------------------