├── .gitignore
├── .gitmodules
├── .style.yapf
├── CONTRIBUTING.md
├── DATASET.md
├── DOCKER.md
├── Dockerfile
├── GETTING_STARTED.md
├── INSTALL.md
├── ISSUES.md
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── compile.sh
├── configs
├── C3D
│ ├── c3d_sports1m_3d_rgb_vgg_c3d_seg1_f16s1.py
│ └── c3d_train01_16_128_171_mean.npy
├── I3D_RGB
│ ├── i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2.py
│ └── i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2_video.py
├── SlowOnly
│ ├── slowonly_kinetics400_se_rgb_r50_seg1_4x16_finetune.py
│ ├── slowonly_kinetics400_se_rgb_r50_seg1_4x16_scratch.py
│ ├── slowonly_kinetics400_se_rgb_r50_seg1_8x8_finetune.py
│ └── slowonly_kinetics400_se_rgb_r50_seg1_8x8_scratch.py
├── TSN
│ ├── tsn_kinetics400_2d_rgb_r50_seg3_f1s1.py
│ └── ucf101
│ │ ├── tsn_flow_bninception.py
│ │ └── tsn_rgb_bninception.py
├── ava
│ └── ava_fast_rcnn_nl_r50_c4_1x_kinetics_pretrain_crop.py
├── hmdb51
│ ├── tsn_flow_bninception.py
│ └── tsn_rgb_bninception.py
└── thumos14
│ └── ssn_thumos14_rgb_bn_inception.py
├── data
└── .placeholder
├── data_tools
├── ava
│ ├── PREPARING_AVA.md
│ ├── download_annotations.sh
│ ├── download_videos.sh
│ ├── download_videos_parallel.sh
│ ├── extract_frames.sh
│ ├── extract_rgb_frames.sh
│ ├── fetch_ava_proposals.sh
│ ├── obtain_video_resolution.sh
│ └── preprocess_videos.sh
├── build_file_list.py
├── build_rawframes.py
├── hmdb51
│ ├── PREPARING_HMDB51.md
│ ├── download_annotations.sh
│ ├── download_videos.sh
│ ├── extract_frames.sh
│ ├── extract_rgb_frames.sh
│ └── generate_filelist.sh
├── kinetics400
│ ├── PREPARING_KINETICS400.md
│ ├── download_annotations.sh
│ ├── download_videos.sh
│ ├── extract_frames.sh
│ ├── extract_rgb_frames.sh
│ ├── generate_rawframes_filelist.sh
│ ├── generate_video_filelist.sh
│ └── rename_classnames.sh
├── thumos14
│ ├── PREPARING_TH14.md
│ ├── download_annotations.sh
│ ├── download_videos.sh
│ ├── extracted_frames.sh
│ └── fetch_tag_proposals.sh
└── ucf101
│ ├── PREPARING_UCF101.md
│ ├── download_annotations.sh
│ ├── download_videos.sh
│ ├── extract_frames.sh
│ └── generate_filelist.sh
├── mmaction
├── __init__.py
├── apis
│ ├── __init__.py
│ ├── env.py
│ └── train.py
├── core
│ ├── __init__.py
│ ├── anchor2d
│ │ ├── __init__.py
│ │ ├── anchor_generator.py
│ │ └── anchor_target.py
│ ├── bbox1d
│ │ ├── __init__.py
│ │ └── geometry.py
│ ├── bbox2d
│ │ ├── __init__.py
│ │ ├── assign_sampling.py
│ │ ├── assigners
│ │ │ ├── __init__.py
│ │ │ ├── assign_result.py
│ │ │ ├── base_assigner.py
│ │ │ └── max_iou_assigner.py
│ │ ├── bbox_target.py
│ │ ├── geometry.py
│ │ ├── samplers
│ │ │ ├── __init__.py
│ │ │ ├── base_sampler.py
│ │ │ ├── pseudo_sampler.py
│ │ │ ├── random_sampler.py
│ │ │ └── sampling_result.py
│ │ └── transforms.py
│ ├── evaluation
│ │ ├── __init__.py
│ │ ├── accuracy.py
│ │ ├── ava_utils.py
│ │ ├── bbox_overlaps.py
│ │ ├── class_names.py
│ │ ├── eval_hooks.py
│ │ ├── localize_utils.py
│ │ └── recall.py
│ ├── post_processing
│ │ ├── __init__.py
│ │ ├── bbox_nms.py
│ │ └── merge_augs.py
│ └── utils
│ │ ├── __init__.py
│ │ └── dist_utils.py
├── datasets
│ ├── __init__.py
│ ├── ava_dataset.py
│ ├── feature_dataset.py
│ ├── lmdbframes_dataset.py
│ ├── loader
│ │ ├── __init__.py
│ │ ├── build_loader.py
│ │ └── sampler.py
│ ├── rawframes_dataset.py
│ ├── ssn_dataset.py
│ ├── transforms.py
│ ├── utils.py
│ └── video_dataset.py
├── losses
│ ├── __init__.py
│ ├── flow_losses.py
│ ├── losses.py
│ └── ssn_losses.py
├── models
│ ├── __init__.py
│ ├── builder.py
│ ├── detectors
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── fast_rcnn.py
│ │ ├── faster_rcnn.py
│ │ ├── test_mixins.py
│ │ └── two_stage.py
│ ├── localizers
│ │ ├── SSN2D.py
│ │ ├── __init__.py
│ │ └── base.py
│ ├── recognizers
│ │ ├── TSN2D.py
│ │ ├── TSN3D.py
│ │ ├── __init__.py
│ │ └── base.py
│ ├── registry.py
│ └── tenons
│ │ ├── anchor_heads
│ │ ├── __init__.py
│ │ ├── anchor_head.py
│ │ └── rpn_head.py
│ │ ├── backbones
│ │ ├── __init__.py
│ │ ├── bninception.py
│ │ ├── c3d.py
│ │ ├── inception_v1_i3d.py
│ │ ├── resnet.py
│ │ ├── resnet_i3d.py
│ │ ├── resnet_i3d_slowfast.py
│ │ ├── resnet_r3d.py
│ │ └── resnet_s3d.py
│ │ ├── bbox_heads
│ │ ├── __init__.py
│ │ └── bbox_head.py
│ │ ├── cls_heads
│ │ ├── __init__.py
│ │ ├── cls_head.py
│ │ └── ssn_head.py
│ │ ├── flownets
│ │ ├── __init__.py
│ │ └── motionnet.py
│ │ ├── necks
│ │ ├── __init__.py
│ │ └── fpn.py
│ │ ├── roi_extractors
│ │ ├── __init__.py
│ │ ├── single_level.py
│ │ └── single_level_straight3d.py
│ │ ├── segmental_consensuses
│ │ ├── TODO.md
│ │ ├── __init__.py
│ │ ├── simple_consensus.py
│ │ └── stpp.py
│ │ ├── shared_heads
│ │ ├── __init__.py
│ │ ├── res_i3d_layer.py
│ │ └── res_layer.py
│ │ ├── spatial_temporal_modules
│ │ ├── __init__.py
│ │ ├── non_local.py
│ │ ├── simple_spatial_module.py
│ │ ├── simple_spatial_temporal_module.py
│ │ └── slowfast_spatial_temporal_module.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── conv_module.py
│ │ ├── nonlocal_block.py
│ │ ├── norm.py
│ │ └── resnet_r3d_utils.py
├── ops
│ ├── __init__.py
│ ├── nms
│ │ ├── __init__.py
│ │ ├── nms_wrapper.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── nms_cpu.cpp
│ │ │ ├── nms_cuda.cpp
│ │ │ ├── nms_kernel.cu
│ │ │ ├── soft_nms_cpu.cpp
│ │ │ └── soft_nms_cpu.pyx
│ ├── resample2d_package
│ │ ├── __init__.py
│ │ ├── resample2d.py
│ │ ├── resample2d_cuda.cc
│ │ ├── resample2d_kernel.cu
│ │ ├── resample2d_kernel.cuh
│ │ └── setup.py
│ ├── roi_align
│ │ ├── __init__.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ ├── gradcheck.py
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ └── roi_align.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── roi_align_cuda.cpp
│ │ │ └── roi_align_kernel.cu
│ ├── roi_pool
│ │ ├── __init__.py
│ │ ├── functions
│ │ │ ├── __init__.py
│ │ │ └── roi_pool.py
│ │ ├── gradcheck.py
│ │ ├── modules
│ │ │ ├── __init__.py
│ │ │ └── roi_pool.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── roi_pool_cuda.cpp
│ │ │ └── roi_pool_kernel.cu
│ └── trajectory_conv_package
│ │ ├── __init__.py
│ │ ├── deform_3d_conv_cuda_kernel.cu
│ │ ├── deform_3d_conv_cuda_kernel.h
│ │ ├── gradcheck.py
│ │ ├── setup.py
│ │ ├── traj_conv.py
│ │ └── traj_conv_cuda.cpp
└── utils
│ └── misc.py
├── modelzoo
└── .placeholder
├── setup.py
├── test_configs
├── CSN
│ ├── ipcsn_kinetics400_se_rgb_r152_seg1_32x2.py
│ └── ircsn_kinetics400_se_rgb_r152_seg1_32x2.py
├── I3D_Flow
│ ├── i3d_hmdb51_3d_tvl1_inception_v1_seg1_f64s1.py
│ ├── i3d_kinetics400_3d_tvl1_inception_v1_seg1_f64s1.py
│ └── i3d_ucf101_3d_tvl1_inception_v1_seg1_f64s1.py
├── I3D_RGB
│ ├── i3d_hmdb51_3d_rgb_inception_v1_seg1_f64s1.py
│ ├── i3d_kinetics400_3d_rgb_inception_v1_seg1_f64s1.py
│ ├── i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2.py
│ └── i3d_ucf101_3d_rgb_inception_v1_seg1_f64s1.py
├── R2plus1D
│ ├── r2plus1d_kinetics400_se_rgb_r34_seg1_32x2.py
│ └── r2plus1d_kinetics400_se_rgb_r34_seg1_8x8.py
├── SlowFast
│ └── slowfast_kinetics400_se_rgb_r50_seg1_4x16.py
├── SlowOnly
│ ├── slowonly_kinetics400_se_rgb_r101_seg1_8x8.py
│ ├── slowonly_kinetics400_se_rgb_r50_seg1_4x16.py
│ └── slowonly_kinetics400_se_rgb_r50_seg1_8x8.py
├── TSN
│ ├── tsn_kinetics400_2d_rgb_r50_seg3_f1s1.py
│ └── ucf101
│ │ ├── tsn_flow_bninception.py
│ │ └── tsn_rgb_bninception.py
├── ava
│ └── ava_fast_rcnn_nl_r50_c4_1x_kinetics_pretrain_crop.py
└── thumos14
│ └── ssn_thumos14_rgb_bn_inception.py
└── tools
├── dist_test_detector.sh
├── dist_test_recognizer.sh
├── dist_test_recognizer_heavy.sh
├── dist_train_detector.sh
├── dist_train_localizer.sh
├── dist_train_recognizer.sh
├── eval_localize_results.py
├── generate_lmdb.py
├── test_detector.py
├── test_localizer.py
├── test_recognizer.py
├── test_recognizer_heavy.py
├── train_detector.py
├── train_localizer.py
└── train_recognizer.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | # cython generated cpp
107 | mmaction/version.py
108 | data
109 | .vscode
110 | .idea
111 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third_party/ActivityNet"]
2 | path = mmaction/third_party/ActivityNet
3 | url = https://github.com/zhaoyue-zephyrus/ActivityNet
4 | [submodule "mmaction/third_party/decord"]
5 | path = third_party/decord
6 | url = https://github.com/zhreshold/decord.git
7 | [submodule "mmaction/third_party/dense_flow"]
8 | path = third_party/dense_flow
9 | url = https://github.com/yjxiong/dense_flow
10 | branch = master
11 |
--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | BASED_ON_STYLE = pep8
3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
5 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to MMAction
2 |
3 | All kinds of contributions are welcome, including but not limited to the following.
4 |
5 | - Fixes (typo, bugs)
6 | - New features and components
7 |
8 | ## Workflow
9 |
10 | 1. fork and pull the latest mmaction
11 | 2. checkout a new branch (do not use master branch for PRs)
12 | 3. commit your changes
13 | 4. create a PR
14 |
15 | Note
16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first.
17 | - If you are the author of some papers and would like to include your method to mmaction,
18 | please contact Yue Zhao (thuzhaoyue@gmail). We will much appreciate your contribution.
19 |
20 | ## Code style
21 |
22 | ### Python
23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
24 | We use [flake8](http://flake8.pycqa.org/en/latest/) as the linter and [yapf](https://github.com/google/yapf) as the formatter.
25 | Please upgrade to the latest yapf (>=0.27.0) and refer to the [configuration](.style.yapf).
26 |
27 | >Before you create a PR, make sure that your code lints and is formatted by yapf.
28 |
29 | ### C++ and CUDA
30 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html)
31 |
--------------------------------------------------------------------------------
/DOCKER.md:
--------------------------------------------------------------------------------
1 | # using Docker to set environment of mmaction
2 |
3 | ## Requirements
4 |
5 | We've been testing/build from ubuntu 18.04 LTS & docker version 19.03.1 (with docker API version 1.40). If you want to building docker images, you could have
6 |
7 | - Docker Engine
8 | - nvidia-docker (to start container with GPUs)
9 | - Disk space (a lot)
10 |
11 | ## Install Docker Engine (Ubuntu version)
12 |
13 | ```
14 | $ curl -fsSL https://get.docker.com -o get-docker.sh
15 | $ sh get-docker.sh
16 | ```
17 |
18 | ## Install Nvidia-Docker
19 |
20 | You could update from [nvidia-docker](https://github.com/NVIDIA/nvidia-docker).
21 |
22 | ## Build the images
23 |
24 | You could see the ```Dockerfile``` on [this](https://github.com/open-mmlab/mmaction) repository. So you can copy this file and build as manually or clone this repository.
25 |
26 | ```
27 | $ git clone --recursive https://github.com/open-mmlab/mmaction
28 | $ cd mmaction
29 | $ docker build -t mmaction .
30 | ```
31 |
32 | So when you building this image. The image will not successfully because we want to modified in this code. So you can clone repository in container manually from next step below.
33 |
34 | ## Run container from images
35 |
36 | ```
37 | $ docker run --name mmaction --gpus all -it -v /path/to/your/data:/root mmaction
38 | ```
39 |
40 | When run the container, Please follow step [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md) to use mmaction.
--------------------------------------------------------------------------------
/ISSUES.md:
--------------------------------------------------------------------------------
1 | # Known Issues
2 |
3 | 1. Error on RTX cards with CUDA 10.0
4 |
5 | Description:
6 |
7 | THCudaCheck FAIL file=/pytorch/aten/src/THC/THCGeneral.cpp line=405 error=11 : invalid argument #15797
8 |
9 | Solution: Install pytorch wheel with cuda 10.0 via
10 |
11 | ```bash
12 | pip install https://download.pytorch.org/whl/cu100/torch-1.0.0-cp36-cp36m-linux_x86_64.whl
13 | ```
14 |
--------------------------------------------------------------------------------
/compile.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | echo "Building package resample2d"
6 | cd ./mmaction/ops/resample2d_package
7 | if [ -d "build" ]; then
8 | rm -r build
9 | fi
10 |
11 | $PYTHON setup.py install --user
12 |
13 | echo "Building package trajectory_conv..."
14 | cd ../trajectory_conv_package
15 | if [ -d "build" ]; then
16 | rm -r build
17 | fi
18 |
19 | $PYTHON setup.py install --user
20 |
21 | echo "Building roi align op..."
22 | cd ../roi_align
23 | if [ -d "build" ]; then
24 | rm -r build
25 | fi
26 | $PYTHON setup.py build_ext --inplace
27 |
28 | echo "Building roi pool op..."
29 | cd ../roi_pool
30 | if [ -d "build" ]; then
31 | rm -r build
32 | fi
33 | $PYTHON setup.py build_ext --inplace
34 |
35 | echo "Building nms op..."
36 | cd ../nms
37 | if [ -d "build" ]; then
38 | rm -r build
39 | fi
40 | $PYTHON setup.py build_ext --inplace
41 |
--------------------------------------------------------------------------------
/configs/C3D/c3d_train01_16_128_171_mean.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/configs/C3D/c3d_train01_16_128_171_mean.npy
--------------------------------------------------------------------------------
/configs/hmdb51/tsn_rgb_bninception.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN2D',
4 | backbone=dict(
5 | type='BNInception',
6 | pretrained='open-mmlab://bninception_caffe',
7 | bn_eval=False,
8 | partial_bn=True),
9 | spatial_temporal_module=dict(
10 | type='SimpleSpatialModule',
11 | spatial_type='avg',
12 | spatial_size=7),
13 | segmental_consensus=dict(
14 | type='SimpleConsensus',
15 | consensus_type='avg'),
16 | cls_head=dict(
17 | type='ClsHead',
18 | with_avg_pool=False,
19 | temporal_feature_size=1,
20 | spatial_feature_size=1,
21 | dropout_ratio=0.8,
22 | in_channels=1024,
23 | init_std=0.001,
24 | num_classes=51))
25 | train_cfg = None
26 | test_cfg = None
27 | # dataset settings
28 | dataset_type = 'RawFramesDataset'
29 | data_root = 'data/hmdb51/rawframes'
30 | img_norm_cfg = dict(
31 | mean=[104, 117, 128], std=[1, 1, 1], to_rgb=False)
32 |
33 | data = dict(
34 | videos_per_gpu=32,
35 | workers_per_gpu=2,
36 | train=dict(
37 | type=dataset_type,
38 | ann_file='data/hmdb51/hmdb51_train_split_1_rawframes.txt',
39 | img_prefix=data_root,
40 | img_norm_cfg=img_norm_cfg,
41 | num_segments=3,
42 | new_length=1,
43 | new_step=1,
44 | random_shift=True,
45 | modality='RGB',
46 | image_tmpl='img_{:05d}.jpg',
47 | img_scale=256,
48 | input_size=224,
49 | div_255=False,
50 | flip_ratio=0.5,
51 | resize_keep_ratio=True,
52 | oversample=None,
53 | random_crop=False,
54 | more_fix_crop=False,
55 | multiscale_crop=True,
56 | scales=[1, 0.875, 0.75, 0.66],
57 | max_distort=1,
58 | test_mode=False),
59 | val=dict(
60 | type=dataset_type,
61 | ann_file='data/hmdb51/hmdb51_val_split_1_rawframes.txt',
62 | img_prefix=data_root,
63 | img_norm_cfg=img_norm_cfg,
64 | num_segments=3,
65 | new_length=1,
66 | new_step=1,
67 | random_shift=False,
68 | modality='RGB',
69 | image_tmpl='img_{:05d}.jpg',
70 | img_scale=256,
71 | input_size=224,
72 | div_255=False,
73 | flip_ratio=0,
74 | resize_keep_ratio=True,
75 | oversample=None,
76 | random_crop=False,
77 | more_fix_crop=False,
78 | multiscale_crop=False,
79 | test_mode=False),
80 | test=dict(
81 | type=dataset_type,
82 | ann_file='data/hmdb51/hmdb51_val_split_1_rawframes.txt',
83 | img_prefix=data_root,
84 | img_norm_cfg=img_norm_cfg,
85 | num_segments=25,
86 | new_length=1,
87 | new_step=1,
88 | random_shift=False,
89 | modality='RGB',
90 | image_tmpl='img_{:05d}.jpg',
91 | img_scale=256,
92 | input_size=224,
93 | div_255=False,
94 | flip_ratio=0,
95 | resize_keep_ratio=True,
96 | oversample='ten_crop',
97 | random_crop=False,
98 | more_fix_crop=False,
99 | multiscale_crop=False,
100 | test_mode=True))
101 | # optimizer
102 | optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005)
103 | optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
104 | # learning policy
105 | lr_config = dict(
106 | policy='step',
107 | step=[30, 60])
108 | checkpoint_config = dict(interval=1)
109 | # workflow = [('train', 5), ('val', 1)]
110 | workflow = [('train', 1)]
111 | # yapf:disable
112 | log_config = dict(
113 | interval=20,
114 | hooks=[
115 | dict(type='TextLoggerHook'),
116 | # dict(type='TensorboardLoggerHook')
117 | ])
118 | # yapf:enable
119 | # runtime settings
120 | total_epochs = 80
121 | dist_params = dict(backend='nccl')
122 | log_level = 'INFO'
123 | work_dir = './work_dirs/tsn_2d_rgb_bninception_seg_3_f1s1_b32_g8'
124 | load_from = None
125 | resume_from = None
126 |
--------------------------------------------------------------------------------
/data/.placeholder:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/data_tools/ava/PREPARING_AVA.md:
--------------------------------------------------------------------------------
1 | ## Preparing AVA
2 |
3 | For more details, please refer to the [official website](https://research.google.com/ava/). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/ava/`.
4 |
5 | ### Prepare annotations
6 | First of all, run the following script to prepare annotations.
7 | ```shell
8 | bash download_annotations.sh
9 | ```
10 |
11 | ### Prepare videos
12 | Then, use the following script to prepare videos. The codes are adapted from the [official crawler](https://github.com/cvdfoundation/ava-dataset). Note that this might take a long time.
13 | ```shell
14 | bash download_videos.sh
15 | ```
16 | Note that if you happen to have sudoer or have [GNU parallel](https://www.gnu.org/software/parallel/) [1](#1) on your machine, you can speed up the procedure by downloading in parallel.
17 |
18 | ```shell
19 | # sudo apt-get install parallel
20 | bash download_videos_parallel.sh
21 | ```
22 |
23 | ### Preprocess videos
24 | The videos vary in length, while the annotations are from 15min to 30min.
25 | Therefore, we can preprocess videos to save storage and processing time afterward.
26 | Run the following scripts to trim the videos into 17-min segments (from 00:14:00 to 00:31:00) with FPS adjusted to 30 FPS and height to be 480.
27 |
28 | ```shell
29 | bash preprocess_videos.sh
30 | ```
31 |
32 |
33 | ### Extract frames
34 | Now it is time to extract frames from videos.
35 | Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow).
36 | If you have some SSD, then we strongly recommend extracting frames there for better I/O performance.
37 | ```shell
38 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/")
39 | mkdir /mnt/SSD/ava_extracted/
40 | ln -s /mnt/SSD/ava_extracted/ ../data/ava/rawframes/
41 | ```
42 | Afterwards, run the following script to extract frames.
43 | ```shell
44 | bash extract_frames.sh
45 | ```
46 | If you only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames.
47 | ```shell
48 | bash extract_rgb_frames.sh
49 | ```
50 |
51 |
52 | ### Fetching proposal files and other metadata file
53 | Run the follow scripts to fetch pre-computed proposal list.
54 | The proposals are adapted from FAIR's [Long-Term Feature Banks](https://github.com/facebookresearch/video-long-term-feature-banks).
55 | ```shell
56 | bash fetch_ava_proposals.sh
57 | ```
58 | In addition, we use the following script to obtain the resolutions of all videos due to varying aspect ratio.
59 | ```shell
60 | bash obtain_video_resolution.sh
61 | ```
62 |
63 | ### Folder structure
64 | In the context of the whole project (for ava only), the folder structure will look like:
65 |
66 | ```
67 | mmaction
68 | ├── mmaction
69 | ├── tools
70 | ├── configs
71 | ├── data
72 | │ ├── ava
73 | │ │ ├── ava_video_resolution_stats.csv
74 | │ │ ├── ava_dense_proposals_train.FAIR.recall_93.9.pkl
75 | │ │ ├── ava_dense_proposals_val.FAIR.recall_93.9.pkl
76 | │ │ ├── annotations
77 | │ │ ├── videos_trainval
78 | │ │ │ ├── 053oq2xB3oU.mkv
79 | │ │ │ ├── 0f39OWEqJ24.mp4
80 | │ │ │ ├── ...
81 | │ │ ├── videos_trimmed_trainval
82 | │ │ │ ├── 053oq2xB3oU.mp4
83 | │ │ │ ├── 0f39OWEqJ24.mp4
84 | │ │ │ ├── ...
85 | │ │ ├── rawframes
86 | │ │ │ ├── 053oq2xB3oU.mp4
87 | | │ │ │ ├── img_00001.jpg
88 | | │ │ │ ├── img_00002.jpg
89 | | │ │ │ ├── ...
90 | ```
91 |
92 | For training and evaluating on AVA, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md).
93 |
94 |
95 | Reference
96 |
97 | [1] O. Tange (2018): GNU Parallel 2018, March 2018, https://doi.org/10.5281/zenodo.1146014
--------------------------------------------------------------------------------
/data_tools/ava/download_annotations.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | DATA_DIR="../../data/ava/annotations"
4 |
5 | if [[ ! -d "${DATA_DIR}" ]]; then
6 | echo "${DATA_DIR} does not exist. Creating";
7 | mkdir -p ${DATA_DIR}
8 | fi
9 |
10 | wget https://research.google.com/ava/download/ava_v2.1.zip
11 | unzip -j ava_v2.1.zip -d ${DATA_DIR}/
12 | rm ava_v2.1.zip
--------------------------------------------------------------------------------
/data_tools/ava/download_videos.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | wget -c https://s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt -P ../../data/ava/annotations/
4 |
5 |
6 | cat ../../data/ava/annotations/ava_file_names_trainval_v2.1.txt | while read vid; do wget -c "https://s3.amazonaws.com/ava-dataset/trainval/${vid}" -P ../../data/ava/videos_trainval/; done
7 |
8 | echo "Downloading finished."
--------------------------------------------------------------------------------
/data_tools/ava/download_videos_parallel.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | wget -c https://s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt -P ../../data/ava/annotations/
4 |
5 |
6 | # sudo apt-get install parallel
7 | # parallel downloading to speed up
8 | awk '{print "https://s3.amazonaws.com/ava-dataset/trainval/"$0}' ../../data/ava/annotations/ava_file_names_trainval_v2.1.txt | parallel -j 8 wget -c -q {} -P ../../data/ava/videos_trainval/
9 | echo "Parallel downloading finished."
--------------------------------------------------------------------------------
/data_tools/ava/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../
4 | python build_rawframes.py ../data/ava/videos_trimmed_trainval/ ../data/ava/rawframes/ --level 1 --flow_type tvl1 --ext mp4
5 | echo "Raw frames (RGB and tv-l1) Generated for train+val set"
6 |
7 | cd ava/
8 |
--------------------------------------------------------------------------------
/data_tools/ava/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../
4 | python build_rawframes.py ../data/ava/videos_trimmed_trainval/ ../data/ava/rawframes/ --level 1 --ext mp4
5 | echo "Raw frames (RGB only) generated for train and val set"
6 |
7 | cd ava/
--------------------------------------------------------------------------------
/data_tools/ava/fetch_ava_proposals.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/ava_dense_proposals_train.FAIR.recall_93.9.pkl -P ../../data/ava/
4 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/ava_dense_proposals_val.FAIR.recall_93.9.pkl -P ../../data/ava/
5 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/ava_dense_proposals_test.FAIR.recall_93.9.pkl -P ../../data/ava/
6 |
--------------------------------------------------------------------------------
/data_tools/ava/obtain_video_resolution.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../../data/ava/
4 |
5 | ls ./videos_trimmed_trainval | while read filename; do \
6 | vid="$(echo ${filename} | cut -d'.' -f1)";
7 | resolution=`ffprobe -v error -select_streams v:0 -show_entries stream=width,height -of csv=s=x:p=0 ./videos_trimmed_trainval/${filename}`
8 | echo ${vid} ${resolution}
9 | done &> ava_video_resolution_stats.csv
10 |
11 | echo $PWD
12 |
13 | cd ../../data_tools/ava/
14 |
--------------------------------------------------------------------------------
/data_tools/ava/preprocess_videos.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../../data/ava/
4 |
5 | mkdir ./videos_trimmed_trainval/
6 | ls videos_trainval/ | while read filename; do \
7 | vid="$(echo ${filename} | cut -d'.' -f1)";
8 | ffmpeg -nostdin -i "./videos_trainval/${filename}" \
9 | -ss 00:14:00 -t 00:17:00 \
10 | -filter:v fps=fps=30 \
11 | "./${vid}.tmp.mp4";
12 | ffmpeg -nostdin -i "./${vid}.tmp.mp4" \
13 | -vf scale=-2:480 \
14 | -c:a copy \
15 | "./videos_trimmed_trainval/${vid}.mp4";
16 | rm "./${vid}.tmp.mp4";
17 | done
18 |
19 | cd ../../data_tools/ava/
20 |
--------------------------------------------------------------------------------
/data_tools/hmdb51/PREPARING_HMDB51.md:
--------------------------------------------------------------------------------
1 | ## Preparing HMDB51
2 |
3 | For more details, please refer to the official [website](http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/hmdb51/`.
4 |
5 | ### Prepare annotations
6 | First of all, run the following script to prepare annotations.
7 | ```shell
8 | bash download_annotations.sh
9 | ```
10 |
11 | ### Prepare videos
12 | Then, use the following script to prepare videos.
13 | ```shell
14 | bash download_videos.sh
15 | ```
16 |
17 | ### Extract frames
18 | Now it is time to extract frames from videos.
19 | Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow).
20 | If you have some SSD, then we recommend extracting frames there for better I/O performance. The extracted frames (RGB + Flow) will take up ~24GB.
21 | ```shell
22 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/")
23 | mkdir /mnt/SSD/hmdb51_extracted/
24 | ln -s /mnt/SSD/hmdb51_extracted/ ../data/hmdb51/rawframes
25 | ```
26 |
27 | If you didn't install dense_flow in the installation or only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames.
28 | ```shell
29 | bash extract_rgb_frames.sh
30 | ```
31 |
32 | If both rgb and optical flow are required, run the following script to extract frames alternatively.
33 | ```shell
34 | bash extract_frames.sh
35 | ```
36 |
37 | ### Generate filelist
38 | Run the follow script to generate filelist in the format of rawframes and videos.
39 | ```shell
40 | bash generate_filelist.sh
41 | ```
42 |
43 | ### Folder structure
44 | In the context of the whole project (for ucf101 only), the folder structure will look like:
45 | ```
46 | mmaction
47 | ├── mmaction
48 | ├── tools
49 | ├── configs
50 | ├── data
51 | │ ├── hmdb51
52 | │ │ ├── hmdb51_{train,val}_split_{1,2,3}_rawframes.txt
53 | │ │ ├── hmdb51_{train,val}_split_{1,2,3}_videos.txt
54 | │ │ ├── annotations
55 | │ │ ├── videos
56 | │ │ │ ├── brush_hair
57 | │ │ │ │ ├── April_09_brush_hair_u_nm_np1_ba_goo_0.avi
58 |
59 | │ │ │ ├── wave
60 | │ │ │ │ ├── 20060723sfjffbartsinger_wave_f_cm_np1_ba_med_0.avi
61 | │ │ ├── rawframes
62 | │ │ │ ├── brush_hair
63 | │ │ │ │ ├── April_09_brush_hair_u_nm_np1_ba_goo_0
64 | │ │ │ │ │ ├── img_00001.jpg
65 | │ │ │ │ │ ├── img_00002.jpg
66 | │ │ │ │ │ ├── ...
67 | │ │ │ │ │ ├── flow_x_00001.jpg
68 | │ │ │ │ │ ├── flow_x_00002.jpg
69 | │ │ │ │ │ ├── ...
70 | │ │ │ │ │ ├── flow_y_00001.jpg
71 | │ │ │ │ │ ├── flow_y_00002.jpg
72 | │ │ │ ├── ...
73 | │ │ │ ├── wave
74 | │ │ │ │ ├── 20060723sfjffbartsinger_wave_f_cm_np1_ba_med_0
75 | │ │ │ │ ├── ...
76 | │ │ │ │ ├── winKen_wave_u_cm_np1_ri_bad_1
77 |
78 | ```
79 |
80 | For training and evaluating on HMDB51, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md).
--------------------------------------------------------------------------------
/data_tools/hmdb51/download_annotations.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | DATA_DIR="../../data/hmdb51/annotations"
4 |
5 | if [[ ! -d "${DATA_DIR}" ]]; then
6 | echo "${DATA_DIR} does not exist. Creating";
7 | mkdir -p ${DATA_DIR}
8 | fi
9 |
10 | cd ${DATA_DIR}
11 | wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar
12 |
13 | # sudo apt-get install unrar
14 | unrar x test_train_splits.rar
15 | rm test_train_splits.rar
16 |
17 | mv testTrainMulti_7030_splits/*.txt ./annotations
18 | rmdir testTrainMulti_7030_splits
19 |
--------------------------------------------------------------------------------
/data_tools/hmdb51/download_videos.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | DATA_DIR="../../data/hmdb51/"
4 |
5 | cd ${DATA_DIR}
6 |
7 | mkdir -p ./videos
8 | cd ./videos
9 |
10 | wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar
11 |
12 | unrar x ./hmdb51_org.rar
13 | rm ./hmdb51_org.rar
14 |
15 | # extract all rar files with full path
16 | for file in *.rar; do unrar x $file; done
17 |
18 | rm ./*.rar
19 | cd "../../../data_tools/hmdb51"
20 |
--------------------------------------------------------------------------------
/data_tools/hmdb51/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | num_gpu=($(nvidia-smi -L | wc -l))
4 | num_worker=${num_gpu}
5 |
6 | cd ../
7 | python build_rawframes.py ../data/hmdb51/videos/ ../data/hmdb51/rawframes/ --level 2 --flow_type tvl1 --num_gpu ${num_gpu} --num_worker ${num_worker}
8 | echo "Raw frames (RGB and tv-l1) Generated"
9 |
10 | cd hmdb51/
11 |
--------------------------------------------------------------------------------
/data_tools/hmdb51/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | num_gpu=($(nvidia-smi -L | wc -l))
4 | num_worker=${num_gpu}
5 |
6 | cd ../
7 | python build_rawframes.py ../data/hmdb51/videos/ ../data/hmdb51/rawframes/ --level 2 --ext avi --num_gpu ${num_gpu} --num_worker ${num_worker}
8 | echo "Raw frames (RGB only) generated for train and val set"
9 |
10 | cd hmdb51/
--------------------------------------------------------------------------------
/data_tools/hmdb51/generate_filelist.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../../
4 | PYTHONPATH=. python data_tools/build_file_list.py hmdb51 data/hmdb51/rawframes/ --level 2 --format rawframes --shuffle
5 | echo "Filelist for rawframes generated."
6 |
7 | PYTHONPATH=. python data_tools/build_file_list.py hmdb51 data/hmdb51/videos/ --level 2 --format videos --shuffle
8 | echo "Filelist for videos generated."
9 |
10 | cd data_tools/hmdb51/
--------------------------------------------------------------------------------
/data_tools/kinetics400/PREPARING_KINETICS400.md:
--------------------------------------------------------------------------------
1 | ## Preparing Kinetics-400
2 |
3 | For more details, please refer to the official [website](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/kinetics400/`.
4 |
5 | ### Prepare annotations
6 | First of all, run the following script to prepare annotations.
7 | ```shell
8 | bash download_annotations.sh
9 | ```
10 |
11 | ### Prepare videos
12 | Then, use the following script to prepare videos. The codes are adapted from the [official crawler](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics). Note that this might take a long time.
13 | ```shell
14 | bash download_videos.sh
15 | ```
16 | Note that some people may already have a backup of the kinetics-400 dataset using the [official crawler](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics).
17 | If this is the case, then you only need to replace all whitespaces in the class name for ease of processing either by [detox](http://manpages.ubuntu.com/manpages/bionic/man1/detox.1.html)
18 |
19 | ```shell
20 | # sudo apt-get install detox
21 | detox -r ../../data/kinetics400/videos_train/
22 | detox -r ../../data/kinetics400/videos_val/
23 | ```
24 | or running
25 | ```shell
26 | bash rename_classnames.sh
27 | ```
28 |
29 | ### Extract frames
30 | Now it is time to extract frames from videos.
31 | Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow).
32 | If you have some SSD, then we strongly recommend extracting frames there for better I/O performance.
33 | ```shell
34 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/")
35 | mkdir /mnt/SSD/kinetics400_extracted_train/
36 | ln -s /mnt/SSD/kinetics400_extracted_train/ ../data/kinetics400/rawframes_train/
37 | mkdir /mnt/SSD/kinetics400_extracted_val/
38 | ln -s /mnt/SSD/kinetics400_extracted_val/ ../data/kinetics400/rawframes_val/
39 | ```
40 | Afterwards, run the following script to extract frames.
41 | ```shell
42 | bash extract_frames.sh
43 | ```
44 | If you only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames.
45 | ```shell
46 | bash extract_rgb_frames.sh
47 | ```
48 |
49 |
50 | ### Generate filelist
51 | Run the follow scripts to generate filelist in the format of videos and rawframes, respectively.
52 | ```shell
53 | bash generate_video_filelist.sh
54 | # execute the command below when rawframes are ready
55 | bash generate_rawframes_filelist.sh
56 | ```
57 |
58 | ### Folder structure
59 | In the context of the whole project (for kinetics400 only), the *minimal* folder structure will look like: (*minimal* means that some data are not necessary: for example, you may want to evaluate kinetics-400 using the original video format.)
60 |
61 | ```
62 | mmaction
63 | ├── mmaction
64 | ├── tools
65 | ├── configs
66 | ├── data
67 | │ ├── kinetics400
68 | │ │ ├── kinetics400_train_list_videos.txt
69 | │ │ ├── kinetics400_val_list_videos.txt
70 | │ │ ├── annotations
71 | │ │ ├── videos_train
72 | │ │ ├── videos_val
73 | │ │ │ ├── abseiling
74 | │ │ │ │ ├── 0wR5jVB-WPk_000417_000427.mp4
75 | │ │ │ │ ├── ...
76 | │ │ │ ├── ...
77 | │ │ │ ├── wrapping_present
78 | │ │ │ ├── ...
79 | │ │ │ ├── zumba
80 | │ │ ├── rawframes_train
81 | │ │ ├── rawframes_val
82 |
83 | ```
84 |
85 | For training and evaluating on Kinetics-400, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md).
--------------------------------------------------------------------------------
/data_tools/kinetics400/download_annotations.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | DATA_DIR="../../data/kinetics400/annotations"
4 |
5 | if [[ ! -d "${DATA_DIR}" ]]; then
6 | echo "${DATA_DIR} does not exist. Creating";
7 | mkdir -p ${DATA_DIR}
8 | fi
9 |
10 | wget https://storage.googleapis.com/deepmind-media/Datasets/kinetics400.tar.gz
11 | tar -xf kinetics400.tar.gz -C ${DATA_DIR}/
12 | rm kinetics400.tar.gz
13 |
--------------------------------------------------------------------------------
/data_tools/kinetics400/download_videos.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../../mmaction/third_party/ActivityNet/Crawler/Kinetics
4 |
5 | # set up environment
6 | conda env create -f environment.yml
7 | source activate kinetics
8 | pip install --upgrade youtube-dl
9 |
10 | DATA_DIR="../../../../../data/kinetics400"
11 | ANNO_DIR="../../../../../data/kinetics400/annotations"
12 | python download.py ${ANNO_DIR}/kinetics400/train.csv ${DATA_DIR}/videos_train
13 | python download.py ${ANNO_DIR}/kinetics400/val.csv ${DATA_DIR}/videos_val
14 |
15 | cd ../../../../../data_tools/kinetics400
16 |
--------------------------------------------------------------------------------
/data_tools/kinetics400/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../
4 | python build_rawframes.py ../data/kinetics400/videos_train/ ../data/kinetics400/rawframes_train/ --level 2 --flow_type tvl1 --ext mp4
5 | echo "Raw frames (RGB and tv-l1) Generated for train set"
6 |
7 | python build_rawframes.py ../data/kinetics400/videos_val/ ../data/kinetics400/rawframes_val/ --level 2 --flow_type tvl1 --ext mp4
8 | echo "Raw frames (RGB and tv-l1) Generated for val set"
9 |
10 | cd kinetics400/
11 |
--------------------------------------------------------------------------------
/data_tools/kinetics400/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../
4 | python build_rawframes.py ../data/kinetics400/videos_train/ ../data/kinetics400/rawframes_train/ --level 2 --ext mp4
5 | echo "Raw frames (RGB only) generated for train set"
6 |
7 | python build_rawframes.py ../data/kinetics400/videos_val/ ../data/kinetics400/rawframes_val/ --level 2 --ext mp4
8 | echo "Raw frames (RGB only) generated for val set"
9 |
10 | cd kinetics400/
11 |
--------------------------------------------------------------------------------
/data_tools/kinetics400/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../../
4 | PYTHONPATH=. python data_tools/build_file_list.py kinetics400 data/kinetics400/rawframes_train/ --level 2 --format rawframes --num_split 1 --subset train --shuffle
5 | echo "Train filelist for rawframes generated."
6 |
7 | PYTHONPATH=. python data_tools/build_file_list.py kinetics400 data/kinetics400/rawframes_val/ --level 2 --format rawframes --num_split 1 --subset val --shuffle
8 | echo "Val filelist for rawframes generated."
9 | cd data_tools/kinetics400/
--------------------------------------------------------------------------------
/data_tools/kinetics400/generate_video_filelist.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../../
4 | PYTHONPATH=. python data_tools/build_file_list.py kinetics400 data/kinetics400/videos_train/ --level 2 --format videos --num_split 1 --subset train --shuffle
5 | echo "Train filelist for video generated."
6 |
7 | PYTHONPATH=. python data_tools/build_file_list.py kinetics400 data/kinetics400/videos_val/ --level 2 --format videos --num_split 1 --subset val --shuffle
8 | echo "Val filelist for video generated."
9 | cd data_tools/kinetics400/
--------------------------------------------------------------------------------
/data_tools/kinetics400/rename_classnames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | # Rename classname for convenience
4 | cd ../../data/kinetics400/
5 | ls ./videos_train | while read class; do \
6 | newclass=`echo $class | tr " " "_" | tr "(" "-" | tr ")" "-" `;
7 | if [ "${class}" != "${newclass}" ]
8 | then
9 | mv "videos_train/${class}" "videos_train/${newclass}";
10 | fi
11 | done
12 |
13 | ls ./videos_val | while read class; do \
14 | newclass=`echo $class | tr " " "_" | tr "(" "-" | tr ")" "-" `;
15 | if [ "${class}" != "${newclass}" ]
16 | then
17 | mv "videos_val/${class}" "videos_val/${newclass}";
18 | fi
19 | done
20 |
21 | cd ../../data_tools/kinetics400/
22 |
--------------------------------------------------------------------------------
/data_tools/thumos14/PREPARING_TH14.md:
--------------------------------------------------------------------------------
1 | ## Preparing THUMOS-14
2 |
3 | For more details, please refer to the [official website](https://www.crcv.ucf.edu/THUMOS14/download.html). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/thumos14/`.
4 |
5 | ### Prepare annotations
6 | First of all, run the following script to prepare annotations.
7 | ```shell
8 | bash download_annotations.sh
9 | ```
10 |
11 | ### Prepare videos
12 | Then, use the following script to prepare videos.
13 | ```shell
14 | bash download_videos.sh
15 | ```
16 |
17 | ### Extract frames
18 | Now it is time to extract frames from videos.
19 | Before extraction, please refer to `DATASET.md` for installing [dense_flow].
20 | If you have some SSD, then we strongly recommend extracting frames there for better I/O performance.
21 | ```shell
22 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/")
23 | mkdir /mnt/SSD/thumos14_extracted/
24 | ln -s /mnt/SSD/thumos14_extracted/ ../data/thumos14/rawframes/
25 | ```
26 | Afterwards, run the following script to extract frames.
27 | ```shell
28 | bash extract_frames.sh
29 | ```
30 |
31 | ### Fetching proposal files
32 | Run the follow scripts to fetch pre-computed tag proposals.
33 | ```shell
34 | bash fetch_tag_proposals.sh
35 | ```
36 |
37 | ### Folder structure
38 | In the context of the whole project (for thumos14 only), the folder structure will look like:
39 |
40 | ```
41 | mmaction
42 | ├── mmaction
43 | ├── tools
44 | ├── configs
45 | ├── data
46 | │ ├── thumos14
47 | │ │ ├── thumos14_tag_val_normalized_proposal_list.txt
48 | │ │ ├── thumos14_tag_test_normalized_proposal_list.txt
49 | │ │ ├── annotations
50 | │ │ ├── videos_val
51 | │ │ │ ├── video_validation_0000001.mp4
52 | │ │ │ ├── ...
53 | │ │ ├── videos_test
54 | │ │ │ ├── video_test_0000001.mp4
55 | │ │ ├── rawframes
56 | │ │ │ ├── video_validation_0000001
57 | | │ │ │ ├── img_00001.jpg
58 | | │ │ │ ├── img_00002.jpg
59 | | │ │ │ ├── ...
60 | | │ │ │ ├── flow_x_00001.jpg
61 | | │ │ │ ├── flow_x_00002.jpg
62 | | │ │ │ ├── ...
63 | | │ │ │ ├── flow_y_00001.jpg
64 | | │ │ │ ├── flow_y_00002.jpg
65 | | │ │ │ ├── ...
66 | │ │ │ ├── ...
67 | │ │ │ ├── video_test_0000001
68 | ```
69 |
70 | For training and evaluating on THUMOS-14, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md).
--------------------------------------------------------------------------------
/data_tools/thumos14/download_annotations.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | DATA_DIR="../../data/thumos14/"
4 |
5 | if [[ ! -d "${DATA_DIR}" ]]; then
6 | echo "${DATA_DIR} does not exist. Creating";
7 | mkdir -p ${DATA_DIR}
8 | fi
9 |
10 | wget http://crcv.ucf.edu/THUMOS14/Validation_set/TH14_Temporal_annotations_validation.zip
11 | wget http://crcv.ucf.edu/THUMOS14/test_set/TH14_Temporal_annotations_test.zip
12 |
13 | unzip -j TH14_Temporal_annotations_validation.zip -d $DATA_DIR/annotations_val
14 | unzip -j TH14_Temporal_annotations_test.zip -d $DATA_DIR/annotations_test
--------------------------------------------------------------------------------
/data_tools/thumos14/download_videos.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | DATA_DIR="../../data/thumos14/"
4 |
5 | cd ${DATA_DIR}
6 |
7 | wget https://storage.googleapis.com/thumos14_files/TH14_validation_set_mp4.zip
8 | wget https://storage.googleapis.com/thumos14_files/TH14_Test_set_mp4.zip
9 |
10 | unzip -j TH14_validation_set_mp4.zip -d videos_val/
11 |
12 | unzip -P "THUMOS14_REGISTERED" TH14_Test_set_mp4.zip -d videos_test/
13 |
14 | cd ../../data_tools/thumos14/
--------------------------------------------------------------------------------
/data_tools/thumos14/extracted_frames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../
4 | python build_rawframes.py ../data/thumos14/videos_val/ ../data/thumos14/rawframes/ --level 1 --flow_type tvl1 --ext mp4
5 | echo "Raw frames (RGB and tv-l1) Generated for val set"
6 |
7 | python build_rawframes.py ../data/thumos14/videos_test/ ../data/thumos14/rawframes/ --level 1 --flow_type tvl1 --ext mp4
8 | echo "Raw frames (RGB and tv-l1) Generated for test set"
9 |
10 | cd thumos14/
11 |
--------------------------------------------------------------------------------
/data_tools/thumos14/fetch_tag_proposals.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/thumos14_tag_val_normalized_proposal_list.txt -P ../../data/thumos14/
4 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/thumos14_tag_test_normalized_proposal_list.txt -P ../../data/thumos14/
5 |
--------------------------------------------------------------------------------
/data_tools/ucf101/PREPARING_UCF101.md:
--------------------------------------------------------------------------------
1 | ## Preparing UCF-101
2 |
3 | For more details, please refer to the official [website](https://www.crcv.ucf.edu/data/UCF101.php). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/ucf101/`.
4 |
5 | ### Prepare annotations
6 | First of all, run the following script to prepare annotations.
7 | ```shell
8 | bash download_annotations.sh
9 | ```
10 |
11 | ### Prepare videos
12 | Then, use the following script to prepare videos.
13 | ```shell
14 | bash download_videos.sh
15 | ```
16 |
17 | ### Extract frames
18 | Now it is time to extract frames from videos.
19 | Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow).
20 | If you have some SSD, then we recommend extracting frames there for better I/O performance. The extracted frames (RGB + Flow) will take up ~100GB.
21 | ```shell
22 | # execute these two line (Assume the SSD is mounted at "/mnt/SSD/")
23 | mkdir /mnt/SSD/ucf101_extracted/
24 | ln -s /mnt/SSD/ucf101_extracted/ ../data/ucf101/rawframes
25 | ```
26 |
27 | If you didn't install dense_flow in the installation or only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames.
28 | ```shell
29 | bash extract_rgb_frames.sh
30 | ```
31 |
32 | If both rgb and optical flow are required, run the following script to extract frames alternatively.
33 | ```shell
34 | bash extract_frames.sh
35 | ```
36 |
37 | ### Generate filelist
38 | Run the follow script to generate filelist in the format of rawframes and videos.
39 | ```shell
40 | bash generate_filelist.sh
41 | ```
42 |
43 | ### Folder structure
44 | In the context of the whole project (for ucf101 only), the folder structure will look like:
45 | ```
46 | mmaction
47 | ├── mmaction
48 | ├── tools
49 | ├── configs
50 | ├── data
51 | │ ├── ucf101
52 | │ │ ├── ucf101_{train,val}_split_{1,2,3}_rawframes.txt
53 | │ │ ├── ucf101_{train,val}_split_{1,2,3}_videos.txt
54 | │ │ ├── annotations
55 | │ │ ├── videos
56 | │ │ │ ├── ApplyEyeMakeup
57 | │ │ │ │ ├── v_ApplyEyeMakeup_g01_c01.avi
58 |
59 | │ │ │ ├── YoYo
60 | │ │ │ │ ├── v_YoYo_g25_c05.avi
61 | │ │ ├── rawframes
62 | │ │ │ ├── ApplyEyeMakeup
63 | │ │ │ │ ├── v_ApplyEyeMakeup_g01_c01
64 | │ │ │ │ │ ├── img_00001.jpg
65 | │ │ │ │ │ ├── img_00002.jpg
66 | │ │ │ │ │ ├── ...
67 | │ │ │ │ │ ├── flow_x_00001.jpg
68 | │ │ │ │ │ ├── flow_x_00002.jpg
69 | │ │ │ │ │ ├── ...
70 | │ │ │ │ │ ├── flow_y_00001.jpg
71 | │ │ │ │ │ ├── flow_y_00002.jpg
72 | │ │ │ ├── ...
73 | │ │ │ ├── YoYo
74 | │ │ │ │ ├── v_YoYo_g01_c01
75 | │ │ │ │ ├── ...
76 | │ │ │ │ ├── v_YoYo_g25_c05
77 |
78 | ```
79 |
80 | For training and evaluating on UCF101, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md).
--------------------------------------------------------------------------------
/data_tools/ucf101/download_annotations.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | DATA_DIR="../../data/ucf101/annotations"
4 |
5 | if [[ ! -d "${DATA_DIR}" ]]; then
6 | echo "${DATA_DIR} does not exist. Creating";
7 | mkdir -p ${DATA_DIR}
8 | fi
9 |
10 | wget "https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip"
11 |
12 | unzip -j UCF101TrainTestSplits-RecognitionTask.zip -d ${DATA_DIR}/
13 | rm UCF101TrainTestSplits-RecognitionTask.zip
14 |
--------------------------------------------------------------------------------
/data_tools/ucf101/download_videos.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | DATA_DIR="../../data/ucf101/"
4 |
5 | cd ${DATA_DIR}
6 |
7 | wget https://www.crcv.ucf.edu/data/UCF101/UCF101.rar
8 | unrar x UCF101.rar
9 | mv ./UCF-101 ./videos
10 |
11 | cd "../../data_tools/ucf101"
12 |
--------------------------------------------------------------------------------
/data_tools/ucf101/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../
4 | python build_rawframes.py ../data/ucf101/videos/ ../data/ucf101/rawframes/ --level 2 --flow_type tvl1
5 | echo "Raw frames (RGB and tv-l1) Generated"
6 | cd ucf101/
7 |
--------------------------------------------------------------------------------
/data_tools/ucf101/generate_filelist.sh:
--------------------------------------------------------------------------------
1 | #! /usr/bin/bash env
2 |
3 | cd ../../
4 | PYTHONPATH=. python data_tools/build_file_list.py ucf101 data/ucf101/rawframes/ --level 2 --format rawframes --shuffle
5 | echo "Filelist for rawframes generated."
6 |
7 | PYTHONPATH=. python data_tools/build_file_list.py ucf101 data/ucf101/videos/ --level 2 --format videos --shuffle
8 | echo "Filelist for videos generated."
9 |
10 | cd data_tools/ucf101/
--------------------------------------------------------------------------------
/mmaction/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 |
3 | __all__ = ['__version__', 'short_version']
4 |
--------------------------------------------------------------------------------
/mmaction/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .env import init_dist, get_root_logger, set_random_seed
2 | from .train import train_network
3 |
4 | __all__ = [
5 | 'init_dist', 'get_root_logger', 'set_random_seed',
6 | 'train_network',
7 | ]
8 |
--------------------------------------------------------------------------------
/mmaction/apis/env.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import random
4 |
5 | import numpy as np
6 | import torch
7 | import torch.distributed as dist
8 | import torch.multiprocessing as mp
9 | from mmcv.runner import get_dist_info
10 |
11 |
12 | def init_dist(launcher, backend='nccl', **kwargs):
13 | if mp.get_start_method(allow_none=True) is None:
14 | mp.set_start_method('spawn')
15 | if launcher == 'pytorch':
16 | _init_dist_pytorch(backend, **kwargs)
17 | elif launcher == 'mpi':
18 | _init_dist_mpi(backend, **kwargs)
19 | elif launcher == 'slurm':
20 | _init_dist_slurm(backend, **kwargs)
21 | else:
22 | raise ValueError('Invalid launcher type: {}'.format(launcher))
23 |
24 |
25 | def _init_dist_pytorch(backend, **kwargs):
26 | # TODO: use local_rank instead of rank % num_gpus
27 | rank = int(os.environ['RANK'])
28 | num_gpus = torch.cuda.device_count()
29 | torch.cuda.set_device(rank % num_gpus)
30 | dist.init_process_group(backend=backend, **kwargs)
31 |
32 |
33 | def _init_dist_mpi(backend, **kwargs):
34 | raise NotImplementedError
35 |
36 |
37 | def _init_dist_slurm(backend, **kwargs):
38 | raise NotImplementedError
39 |
40 |
41 | def set_random_seed(seed):
42 | random.seed(seed)
43 | np.random.seed(seed)
44 | torch.manual_seed(seed)
45 | torch.cuda.manual_seed_all(seed)
46 |
47 |
48 | def get_root_logger(log_level=logging.INFO):
49 | logger = logging.getLogger()
50 | if not logger.hasHandlers():
51 | logging.basicConfig(
52 | format='%(asctime)s - %(levelname)s - %(message)s',
53 | level=log_level)
54 | rank, _ = get_dist_info()
55 | if rank != 0:
56 | logger.setLevel('ERROR')
57 | return logger
58 |
--------------------------------------------------------------------------------
/mmaction/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation import *
2 | from .utils import *
3 |
--------------------------------------------------------------------------------
/mmaction/core/anchor2d/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_target import anchor_target
3 |
4 | __all__ = ['AnchorGenerator', 'anchor_target']
--------------------------------------------------------------------------------
/mmaction/core/anchor2d/anchor_generator.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class AnchorGenerator(object):
5 |
6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
7 | self.base_size = base_size
8 | self.scales = torch.Tensor(scales)
9 | self.ratios = torch.Tensor(ratios)
10 | self.scale_major = scale_major
11 | self.ctr = ctr
12 | self.base_anchors = self.gen_base_anchors()
13 |
14 | @property
15 | def num_base_anchors(self):
16 | return self.base_anchors.size(0)
17 |
18 | def gen_base_anchors(self):
19 | w = self.base_size
20 | h = self.base_size
21 | if self.ctr is None:
22 | x_ctr = 0.5 * (w - 1)
23 | y_ctr = 0.5 * (h - 1)
24 | else:
25 | x_ctr, y_ctr = self.ctr
26 |
27 | h_ratios = torch.sqrt(self.ratios)
28 | w_ratios = 1 / h_ratios
29 | if self.scale_major:
30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
32 | else:
33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
35 |
36 | base_anchors = torch.stack(
37 | [
38 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
39 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
40 | ],
41 | dim=-1).round()
42 |
43 | return base_anchors
44 |
45 | def _meshgrid(self, x, y, row_major=True):
46 | xx = x.repeat(len(y))
47 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
48 | if row_major:
49 | return xx, yy
50 | else:
51 | return yy, xx
52 |
53 | def grid_anchors(self, featmap_size, stride=16, device='cuda'):
54 | base_anchors = self.base_anchors.to(device)
55 |
56 | feat_h, feat_w = featmap_size
57 | shift_x = torch.arange(0, feat_w, device=device) * stride
58 | shift_y = torch.arange(0, feat_h, device=device) * stride
59 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
60 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
61 | shifts = shifts.type_as(base_anchors)
62 | # first feat_w elements correspond to the first row of shifts
63 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
64 | # shifted anchors (K, A, 4), reshape to (K*A, 4)
65 |
66 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
67 | all_anchors = all_anchors.view(-1, 4)
68 | # first A rows correspond to A anchors of (0, 0) in feature map,
69 | # then (0, 1), (0, 2), ...
70 | return all_anchors
71 |
72 | def valid_flags(self, featmap_size, valid_size, device='cuda'):
73 | feat_h, feat_w = featmap_size
74 | valid_h, valid_w = valid_size
75 | assert valid_h <= feat_h and valid_w <= feat_w
76 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
77 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
78 | valid_x[:valid_w] = 1
79 | valid_y[:valid_h] = 1
80 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
81 | valid = valid_xx & valid_yy
82 | valid = valid[:, None].expand(
83 | valid.size(0), self.num_base_anchors).contiguous().view(-1)
84 | return valid
85 |
--------------------------------------------------------------------------------
/mmaction/core/bbox1d/__init__.py:
--------------------------------------------------------------------------------
1 | from .geometry import temporal_iou
2 |
3 | __all__ = [
4 | 'temporal_iou'
5 | ]
6 |
--------------------------------------------------------------------------------
/mmaction/core/bbox1d/geometry.py:
--------------------------------------------------------------------------------
1 |
2 | def temporal_iou(span_A, span_B):
3 | """
4 | Calculates the intersection over union of two temporal "bounding boxes"
5 | span_A: (start, end)
6 | span_B: (start, end)
7 | """
8 | union = min(span_A[0], span_B[0]), max(span_A[1], span_B[1])
9 | inter = max(span_A[0], span_B[0]), min(span_A[1], span_B[1])
10 |
11 | if inter[0] >= inter[1]:
12 | return 0
13 | else:
14 | return float(inter[1] - inter[0]) / float(union[1] - union[0])
15 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/__init__.py:
--------------------------------------------------------------------------------
1 | from .geometry import bbox_overlaps
2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult
3 | from .samplers import BaseSampler, PseudoSampler, RandomSampler
4 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample
5 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
6 | bbox_mapping_back, bbox2roi, bbox2result)
7 | from .bbox_target import bbox_target
8 |
9 | __all__ = [
10 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler',
12 | 'build_assigner', 'build_sampler', 'assign_and_sample',
13 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
14 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target'
15 | ]
16 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/assign_sampling.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 |
3 | from . import assigners, samplers
4 |
5 |
6 | def build_assigner(cfg, **kwargs):
7 | if isinstance(cfg, assigners.BaseAssigner):
8 | return cfg
9 | elif isinstance(cfg, dict):
10 | return mmcv.runner.obj_from_dict(
11 | cfg, assigners, default_args=kwargs)
12 | else:
13 | raise TypeError('Invalid type {} for building a sampler'.format(
14 | type(cfg)))
15 |
16 |
17 | def build_sampler(cfg, **kwargs):
18 | if isinstance(cfg, samplers.BaseSampler):
19 | return cfg
20 | elif isinstance(cfg, dict):
21 | return mmcv.runner.obj_from_dict(
22 | cfg, samplers, default_args=kwargs)
23 | else:
24 | raise TypeError('Invalid type {} for building a sampler'.format(
25 | type(cfg)))
26 |
27 |
28 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
29 | bbox_assigner = build_assigner(cfg.assigner)
30 | bbox_sampler = build_sampler(cfg.sampler)
31 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
32 | gt_labels)
33 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
34 | gt_labels)
35 | return assign_result, sampling_result
36 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_assigner import BaseAssigner
2 | from .max_iou_assigner import MaxIoUAssigner
3 | from .assign_result import AssignResult
4 |
5 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/assigners/assign_result.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class AssignResult(object):
5 |
6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
7 | self.num_gts = num_gts
8 | self.gt_inds = gt_inds
9 | self.max_overlaps = max_overlaps
10 | self.labels = labels
11 |
12 | def add_gt_(self, gt_labels):
13 | self_inds = torch.arange(
14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
15 | self.gt_inds = torch.cat([self_inds, self.gt_inds])
16 | self.max_overlaps = torch.cat(
17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
18 | if self.labels is not None:
19 | self.labels = torch.cat([gt_labels, self.labels])
20 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/assigners/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 |
3 |
4 | class BaseAssigner(metaclass=ABCMeta):
5 |
6 | @abstractmethod
7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 | pass
9 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/bbox_target.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .transforms import bbox2delta
4 | from mmaction.utils.misc import multi_apply
5 |
6 |
7 | def bbox_target(pos_bboxes_list,
8 | neg_bboxes_list,
9 | pos_gt_bboxes_list,
10 | pos_gt_labels_list,
11 | cfg,
12 | reg_classes=1,
13 | target_means=[.0, .0, .0, .0],
14 | target_stds=[1.0, 1.0, 1.0, 1.0],
15 | concat=True):
16 | (labels, label_weights, bbox_targets,
17 | bbox_weights, class_weights) = multi_apply(
18 | bbox_target_single,
19 | pos_bboxes_list,
20 | neg_bboxes_list,
21 | pos_gt_bboxes_list,
22 | pos_gt_labels_list,
23 | cfg=cfg,
24 | reg_classes=reg_classes,
25 | target_means=target_means,
26 | target_stds=target_stds)
27 |
28 | if concat:
29 | labels = torch.cat(labels, 0)
30 | label_weights = torch.cat(label_weights, 0)
31 | bbox_targets = torch.cat(bbox_targets, 0)
32 | bbox_weights = torch.cat(bbox_weights, 0)
33 | class_weights = torch.cat(class_weights, 0)
34 | return labels, label_weights, bbox_targets, bbox_weights, class_weights
35 |
36 |
37 | def bbox_target_single(pos_bboxes,
38 | neg_bboxes,
39 | pos_gt_bboxes,
40 | pos_gt_labels,
41 | cfg,
42 | reg_classes=1,
43 | target_means=[.0, .0, .0, .0],
44 | target_stds=[1.0, 1.0, 1.0, 1.0]):
45 | num_pos = pos_bboxes.size(0)
46 | num_neg = neg_bboxes.size(0)
47 | num_samples = num_pos + num_neg
48 | if len(pos_gt_labels[0]) == 1:
49 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
50 | else:
51 | labels = pos_bboxes.new_zeros(
52 | (num_samples, len(pos_gt_labels[0])), dtype=torch.long)
53 | label_weights = pos_bboxes.new_zeros(num_samples)
54 | if len(pos_gt_labels[0]) == 1:
55 | class_weights = pos_bboxes.new_zeros(num_samples)
56 | else:
57 | class_weights = pos_bboxes.new_zeros(
58 | num_samples, len(pos_gt_labels[0]))
59 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
60 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
61 | if num_pos > 0:
62 | labels[:num_pos] = pos_gt_labels
63 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
64 | label_weights[:num_pos] = pos_weight
65 | class_weight = 1.0 if not hasattr(
66 | cfg, 'cls_weight') or cfg.cls_weight <= 0 else cfg.cls_weight
67 | class_weights[:num_pos] = class_weight
68 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
69 | target_stds)
70 | bbox_targets[:num_pos, :] = pos_bbox_targets
71 | bbox_weights[:num_pos, :] = 1
72 | if num_neg > 0:
73 | label_weights[-num_neg:] = 1.0
74 | class_weights[-num_neg:] = 0.0
75 |
76 | return labels, label_weights, bbox_targets, bbox_weights, class_weights
77 |
78 |
79 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
80 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
81 | 4 * num_classes))
82 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
83 | 4 * num_classes))
84 | for i in torch.nonzero(labels > 0).squeeze(-1):
85 | start, end = labels[i] * 4, (labels[i] + 1) * 4
86 | bbox_targets_expand[i, start:end] = bbox_targets[i, :]
87 | bbox_weights_expand[i, start:end] = bbox_weights[i, :]
88 | return bbox_targets_expand, bbox_weights_expand
89 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/geometry.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
5 | """Calculate overlap between two set of bboxes.
6 |
7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox
8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
9 | bboxes1 and bboxes2.
10 |
11 | Args:
12 | bboxes1 (Tensor): shape (m, 4)
13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 | must be equal.
15 | mode (str): "iou" (intersection over union) or iof (intersection over
16 | foreground).
17 |
18 | Returns:
19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
20 | """
21 |
22 | assert mode in ['iou', 'iof']
23 |
24 | rows = bboxes1.size(0)
25 | cols = bboxes2.size(0)
26 | if is_aligned:
27 | assert rows == cols
28 |
29 | if rows * cols == 0:
30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 |
32 | if is_aligned:
33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2]
34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2]
35 |
36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2]
37 | overlap = wh[:, 0] * wh[:, 1]
38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 |
41 | if mode == 'iou':
42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 | ious = overlap / (area1 + area2 - overlap)
45 | else:
46 | ious = overlap / area1
47 | else:
48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
50 |
51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2]
52 | overlap = wh[:, :, 0] * wh[:, :, 1]
53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 |
56 | if mode == 'iou':
57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 | ious = overlap / (area1[:, None] + area2 - overlap)
60 | else:
61 | ious = overlap / (area1[:, None])
62 |
63 | return ious
64 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_sampler import BaseSampler
2 | from .pseudo_sampler import PseudoSampler
3 | from .random_sampler import RandomSampler
4 | from .sampling_result import SamplingResult
5 |
6 | __all__ = [
7 | 'BaseSampler', 'PseudoSampler', 'RandomSampler'
8 | 'SamplingResult'
9 | ]
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/samplers/base_sampler.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 |
3 | import torch
4 |
5 | from .sampling_result import SamplingResult
6 |
7 |
8 | class BaseSampler(metaclass=ABCMeta):
9 |
10 | def __init__(self,
11 | num,
12 | pos_fraction,
13 | neg_pos_ub=-1,
14 | add_gt_as_proposals=True,
15 | **kwargs):
16 | self.num = num
17 | self.pos_fraction = pos_fraction
18 | self.neg_pos_ub = neg_pos_ub
19 | self.add_gt_as_proposals = add_gt_as_proposals
20 | self.pos_sampler = self
21 | self.neg_sampler = self
22 |
23 | @abstractmethod
24 | def _sample_pos(self, assign_result, num_expected, **kwargs):
25 | pass
26 |
27 | @abstractmethod
28 | def _sample_neg(self, assign_result, num_expected, **kwargs):
29 | pass
30 |
31 | def sample(self,
32 | assign_result,
33 | bboxes,
34 | gt_bboxes,
35 | gt_labels=None,
36 | **kwargs):
37 | """Sample positive and negative bboxes.
38 | This is a simple implementation of bbox sampling given candidates,
39 | assigning results and ground truth bboxes.
40 | Args:
41 | assign_result (:obj:`AssignResult`): Bbox assigning results.
42 | bboxes (Tensor): Boxes to be sampled from.
43 | gt_bboxes (Tensor): Ground truth bboxes.
44 | gt_labels (Tensor, optional): Class labels of ground truth bboxes.
45 | Returns:
46 | :obj:`SamplingResult`: Sampling result.
47 | """
48 | bboxes = bboxes[:, :4]
49 |
50 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
51 | if self.add_gt_as_proposals:
52 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
53 | assign_result.add_gt_(gt_labels)
54 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
55 | gt_flags = torch.cat([gt_ones, gt_flags])
56 |
57 | num_expected_pos = int(self.num * self.pos_fraction)
58 | pos_inds = self.pos_sampler._sample_pos(
59 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
60 | # We found that sampled indices have duplicated items occasionally.
61 | # (may be a bug of PyTorch)
62 | pos_inds = pos_inds.unique()
63 | num_sampled_pos = pos_inds.numel()
64 | num_expected_neg = self.num - num_sampled_pos
65 | if self.neg_pos_ub >= 0:
66 | _pos = max(1, num_sampled_pos)
67 | neg_upper_bound = int(self.neg_pos_ub * _pos)
68 | if num_expected_neg > neg_upper_bound:
69 | num_expected_neg = neg_upper_bound
70 | neg_inds = self.neg_sampler._sample_neg(
71 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
72 | neg_inds = neg_inds.unique()
73 |
74 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
75 | assign_result, gt_flags)
76 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .base_sampler import BaseSampler
4 | from .sampling_result import SamplingResult
5 |
6 |
7 | class PseudoSampler(BaseSampler):
8 |
9 | def __init__(self, **kwargs):
10 | pass
11 |
12 | def _sample_pos(self, **kwargs):
13 | raise NotImplementedError
14 |
15 | def _sample_neg(self, **kwargs):
16 | raise NotImplementedError
17 |
18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 | pos_inds = torch.nonzero(
20 | assign_result.gt_inds > 0).squeeze(-1).unique()
21 | neg_inds = torch.nonzero(
22 | assign_result.gt_inds == 0).squeeze(-1).unique()
23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
25 | assign_result, gt_flags)
26 | return sampling_result
27 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/samplers/random_sampler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from .base_sampler import BaseSampler
5 |
6 |
7 | class RandomSampler(BaseSampler):
8 |
9 | def __init__(self,
10 | num,
11 | pos_fraction,
12 | neg_pos_ub=-1,
13 | add_gt_as_proposals=True,
14 | **kwargs):
15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
16 | add_gt_as_proposals)
17 |
18 | @staticmethod
19 | def random_choice(gallery, num):
20 | """Random select some elements from the gallery.
21 | It seems that Pytorch's implementation is slower than numpy so we use
22 | numpy to randperm the indices.
23 | """
24 | assert len(gallery) >= num
25 | if isinstance(gallery, list):
26 | gallery = np.array(gallery)
27 | cands = np.arange(len(gallery))
28 | np.random.shuffle(cands)
29 | rand_inds = cands[:num]
30 | if not isinstance(gallery, np.ndarray):
31 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
32 | return gallery[rand_inds]
33 |
34 | def _sample_pos(self, assign_result, num_expected, **kwargs):
35 | """Randomly sample some positive samples."""
36 | pos_inds = torch.nonzero(assign_result.gt_inds > 0)
37 | if pos_inds.numel() != 0:
38 | pos_inds = pos_inds.squeeze(1)
39 | if pos_inds.numel() <= num_expected:
40 | return pos_inds
41 | else:
42 | return self.random_choice(pos_inds, num_expected)
43 |
44 | def _sample_neg(self, assign_result, num_expected, **kwargs):
45 | """Randomly sample some negative samples."""
46 | neg_inds = torch.nonzero(assign_result.gt_inds == 0)
47 | if neg_inds.numel() != 0:
48 | neg_inds = neg_inds.squeeze(1)
49 | if len(neg_inds) <= num_expected:
50 | return neg_inds
51 | else:
52 | return self.random_choice(neg_inds, num_expected)
53 |
--------------------------------------------------------------------------------
/mmaction/core/bbox2d/samplers/sampling_result.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class SamplingResult(object):
5 |
6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
7 | gt_flags):
8 | self.pos_inds = pos_inds
9 | self.neg_inds = neg_inds
10 | self.pos_bboxes = bboxes[pos_inds]
11 | self.neg_bboxes = bboxes[neg_inds]
12 | self.pos_is_gt = gt_flags[pos_inds]
13 |
14 | self.num_gts = gt_bboxes.shape[0]
15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
17 | if assign_result.labels is not None:
18 | self.pos_gt_labels = assign_result.labels[pos_inds]
19 | else:
20 | self.pos_gt_labels = None
21 |
22 | @property
23 | def bboxes(self):
24 | return torch.cat([self.pos_bboxes, self.neg_bboxes])
25 |
--------------------------------------------------------------------------------
/mmaction/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .class_names import (get_classes)
2 | from .eval_hooks import (DistEvalHook, DistEvalTopKAccuracyHook,
3 | AVADistEvalmAPHook)
4 |
5 | __all__ = [
6 | 'get_classes',
7 | 'DistEvalHook', 'DistEvalTopKAccuracyHook',
8 | 'AVADistEvalmAPHook'
9 | ]
10 |
--------------------------------------------------------------------------------
/mmaction/core/evaluation/accuracy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.metrics import confusion_matrix
3 |
4 |
5 | def softmax(x, dim=1):
6 | """Compute softmax values for each sets of scores in x."""
7 | e_x = np.exp(x - np.max(x, axis=dim, keepdims=True))
8 | return e_x / e_x.sum(axis=dim, keepdims=True)
9 |
10 |
11 | def mean_class_accuracy(scores, labels):
12 | pred = np.argmax(scores, axis=1)
13 | cf = confusion_matrix(labels, pred).astype(float)
14 |
15 | cls_cnt = cf.sum(axis=1)
16 | cls_hit = np.diag(cf)
17 |
18 | return np.mean(cls_hit/cls_cnt)
19 |
20 |
21 | def top_k_acc(score, lb_set, k=3):
22 | idx = np.argsort(score)[-k:]
23 | return len(lb_set.intersection(idx)), len(lb_set)
24 |
25 |
26 | def top_k_hit(score, lb_set, k=3):
27 | idx = np.argsort(score)[-k:]
28 | return len(lb_set.intersection(idx)) > 0, 1
29 |
30 |
31 | def top_k_accuracy(scores, labels, k=(1,)):
32 | res = []
33 | for kk in k:
34 | hits = []
35 | for x, y in zip(scores, labels):
36 | y = [y] if isinstance(y, int) else y
37 | hits.append(top_k_hit(x, set(y), k=kk)[0])
38 | res.append(np.mean(hits))
39 | return res
40 |
--------------------------------------------------------------------------------
/mmaction/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
5 | """Calculate the ious between each bbox of bboxes1 and bboxes2.
6 | Args:
7 | bboxes1(ndarray): shape (n, 4)
8 | bboxes2(ndarray): shape (k, 4)
9 | mode(str): iou (intersection over union) or iof (intersection
10 | over foreground)
11 | Returns:
12 | ious(ndarray): shape (n, k)
13 | """
14 |
15 | assert mode in ['iou', 'iof']
16 |
17 | bboxes1 = bboxes1.astype(np.float32)
18 | bboxes2 = bboxes2.astype(np.float32)
19 | rows = bboxes1.shape[0]
20 | cols = bboxes2.shape[0]
21 | ious = np.zeros((rows, cols), dtype=np.float32)
22 | if rows * cols == 0:
23 | return ious
24 | exchange = False
25 | if bboxes1.shape[0] > bboxes2.shape[0]:
26 | bboxes1, bboxes2 = bboxes2, bboxes1
27 | ious = np.zeros((cols, rows), dtype=np.float32)
28 | exchange = True
29 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
30 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
31 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
32 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
33 | for i in range(bboxes1.shape[0]):
34 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
35 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
36 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
37 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
38 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
39 | y_end - y_start + 1, 0)
40 | if mode == 'iou':
41 | union = area1[i] + area2 - overlap
42 | else:
43 | union = area1[i] if not exchange else area2
44 | ious[i, :] = overlap / union
45 | if exchange:
46 | ious = ious.T
47 | return ious
48 |
--------------------------------------------------------------------------------
/mmaction/core/evaluation/class_names.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 |
3 |
4 | def ava_classes():
5 | return [
6 | 'bend/bow (at the waist)', 'crawl', 'crouch/kneel', 'dance',
7 | 'fall down', 'get up', 'jump/leap', 'lie/sleep', 'martial art',
8 | 'run/jog', 'sit', 'stand', 'swim', 'walk', 'answer phone',
9 | 'brush teeth', 'carry/hold (an object)', 'catch (an object)', 'chop',
10 | 'climb (e.g., a mountain)',
11 | 'clink glass', 'close (e.g., a door, a box)', 'cook', 'cut', 'dig',
12 | 'dress/put on clothing', 'drink', 'driving (e.g., a car, a truck)',
13 | 'eat', 'enter', 'exit', 'extract', 'fishing', 'hit (an object)',
14 | 'kick (an object)', 'lift/pick up', 'listen (e.g., to music)',
15 | 'open (e.g., a window, a car door)', 'paint', 'play board game',
16 | 'play musical instrument', 'play with pets', 'point to (an object)',
17 | 'press', 'pull (an object)', 'push (an object)', 'put down', 'read',
18 | 'ride (e.g., a bike, a car, a horse)', 'row boat', 'sail boat',
19 | 'shoot', 'shovel', 'smoke', 'stir', 'take a photo',
20 | 'text on/look at a cellphone', 'throw', 'touch (an object)',
21 | ' (e.g., a screwdriver)', 'watch (e.g., TV)', 'work on a computer',
22 | 'write', 'fight/hit (a person)',
23 | 'give/serve (an object) to (a person)',
24 | 'grab (a person)', 'hand clap', 'hand shake', 'hand wave',
25 | 'hug (a person)',
26 | 'kick (a person)', 'kiss (a person)', 'lift (a person)',
27 | 'listen to (a person)', 'play with kids', 'push (another person)',
28 | 'sing to (e.g., self, a person, a group)',
29 | 'take (an object) from (a person)',
30 | 'talk to (e.g., self, a person, a group)', 'watch (a person)'
31 | ]
32 |
33 |
34 | dataset_aliases = {
35 | 'ava': ['ava', 'ava2.1', 'ava2.2'],
36 | }
37 |
38 |
39 | def get_classes(dataset):
40 | """Get class names of a dataset."""
41 | alias2name = {}
42 | for name, aliases in dataset_aliases.items():
43 | for alias in aliases:
44 | alias2name[alias] = name
45 |
46 | if mmcv.is_str(dataset):
47 | if dataset in alias2name:
48 | labels = eval(alias2name[dataset] + '_classes()')
49 | else:
50 | raise ValueError('Unrecognized dataset: {}'.format(dataset))
51 | else:
52 | raise TypeError('dataset must a str, but got {}'.format(type(dataset)))
53 | return labels
54 |
--------------------------------------------------------------------------------
/mmaction/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import multiclass_nms, singleclass_nms
2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes,
3 | merge_aug_scores)
4 |
5 | __all__ = [
6 | 'multiclass_nms', 'singleclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 | 'merge_aug_scores'
8 | ]
9 |
--------------------------------------------------------------------------------
/mmaction/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import numpy as np
4 |
5 | from mmaction.ops import nms
6 | from ..bbox2d import bbox_mapping_back
7 |
8 |
9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
10 | """Merge augmented proposals (multiscale, flip, etc.)
11 |
12 | Args:
13 | aug_proposals (list[Tensor]): proposals from different testing
14 | schemes, shape (n, 5). Note that they are not rescaled to the
15 | original image size.
16 | img_metas (list[dict]): image info including "shape_scale" and "flip".
17 | rpn_test_cfg (dict): rpn test config.
18 |
19 | Returns:
20 | Tensor: shape (n, 4), proposals corresponding to original image scale.
21 | """
22 | recovered_proposals = []
23 | for proposals, img_info in zip(aug_proposals, img_metas):
24 | img_shape = img_info['img_shape']
25 | scale_factor = img_info['scale_factor']
26 | flip = img_info['flip']
27 | _proposals = proposals.clone()
28 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
29 | scale_factor, flip)
30 | recovered_proposals.append(_proposals)
31 | aug_proposals = torch.cat(recovered_proposals, dim=0)
32 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
33 | scores = merged_proposals[:, 4]
34 | _, order = scores.sort(0, descending=True)
35 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
36 | order = order[:num]
37 | merged_proposals = merged_proposals[order, :]
38 | return merged_proposals
39 |
40 |
41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
42 | """Merge augmented detection bboxes and scores.
43 |
44 | Args:
45 | aug_bboxes (list[Tensor]): shape (n, 4*#class)
46 | aug_scores (list[Tensor] or None): shape (n, #class)
47 | img_shapes (list[Tensor]): shape (3, ).
48 | rcnn_test_cfg (dict): rcnn test config.
49 |
50 | Returns:
51 | tuple: (bboxes, scores)
52 | """
53 | recovered_bboxes = []
54 | for bboxes, img_info in zip(aug_bboxes, img_metas):
55 | img_shape = img_info[0]['img_shape']
56 | scale_factor = img_info[0]['scale_factor']
57 | flip = img_info[0]['flip']
58 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
59 | recovered_bboxes.append(bboxes)
60 | bboxes = torch.stack(recovered_bboxes).mean(dim=0)
61 | if aug_scores is None:
62 | return bboxes
63 | else:
64 | scores = torch.stack(aug_scores).mean(dim=0)
65 | return bboxes, scores
66 |
67 |
68 | def merge_aug_scores(aug_scores):
69 | """Merge augmented bbox scores."""
70 | if isinstance(aug_scores[0], torch.Tensor):
71 | return torch.mean(torch.stack(aug_scores), dim=0)
72 | else:
73 | return np.mean(aug_scores, axis=0)
74 |
--------------------------------------------------------------------------------
/mmaction/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import allreduce_grads, DistOptimizerHook
2 |
3 | __all__ = [
4 | 'allreduce_grads', 'DistOptimizerHook',
5 | ]
--------------------------------------------------------------------------------
/mmaction/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch.distributed as dist
4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
5 | _take_tensors)
6 | from mmcv.runner import OptimizerHook
7 |
8 |
9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 | if bucket_size_mb > 0:
11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 | buckets = _take_tensors(tensors, bucket_size_bytes)
13 | else:
14 | buckets = OrderedDict()
15 | for tensor in tensors:
16 | tp = tensor.type()
17 | if tp not in buckets:
18 | buckets[tp] = []
19 | buckets[tp].append(tensor)
20 | buckets = buckets.values()
21 |
22 | for bucket in buckets:
23 | flat_tensors = _flatten_dense_tensors(bucket)
24 | dist.all_reduce(flat_tensors)
25 | flat_tensors.div_(world_size)
26 | for tensor, synced in zip(
27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 | tensor.copy_(synced)
29 |
30 |
31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
32 | grads = [
33 | param.grad.data for param in model.parameters()
34 | if param.requires_grad and param.grad is not None
35 | ]
36 | world_size = dist.get_world_size()
37 | if coalesce:
38 | _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 | else:
40 | for tensor in grads:
41 | dist.all_reduce(tensor.div_(world_size))
42 |
43 |
44 | class DistOptimizerHook(OptimizerHook):
45 |
46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 | self.grad_clip = grad_clip
48 | self.coalesce = coalesce
49 | self.bucket_size_mb = bucket_size_mb
50 |
51 | def after_train_iter(self, runner):
52 | runner.optimizer.zero_grad()
53 | runner.outputs['loss'].backward()
54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
55 | if self.grad_clip is not None:
56 | self.clip_grads(runner.model.parameters())
57 | runner.optimizer.step()
58 |
--------------------------------------------------------------------------------
/mmaction/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .rawframes_dataset import RawFramesDataset
2 | from .lmdbframes_dataset import LMDBFramesDataset
3 | from .video_dataset import VideoDataset
4 | from .ssn_dataset import SSNDataset
5 | from .ava_dataset import AVADataset
6 | from .utils import get_untrimmed_dataset, get_trimmed_dataset
7 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
8 |
9 | __all__ = [
10 | 'RawFramesDataset', 'LMDBFramesDataset',
11 | 'VideoDataset', 'SSNDataset', 'AVADataset',
12 | 'get_trimmed_dataset', 'get_untrimmed_dataset',
13 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader'
14 | ]
15 |
--------------------------------------------------------------------------------
/mmaction/datasets/feature_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/datasets/feature_dataset.py
--------------------------------------------------------------------------------
/mmaction/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import GroupSampler, DistributedGroupSampler
3 |
4 | __all__ = [
5 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader'
6 | ]
7 |
--------------------------------------------------------------------------------
/mmaction/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 |
3 | from mmcv.runner import get_dist_info
4 | from mmcv.parallel import collate
5 | from torch.utils.data import DataLoader
6 |
7 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler
8 |
9 | # https://github.com/pytorch/pytorch/issues/973
10 | import resource
11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
13 |
14 |
15 | def build_dataloader(dataset,
16 | imgs_per_gpu,
17 | workers_per_gpu,
18 | num_gpus=1,
19 | dist=True,
20 | **kwargs):
21 | shuffle = kwargs.get('shuffle', True)
22 | if dist:
23 | rank, world_size = get_dist_info()
24 | if shuffle:
25 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, rank)
26 | else:
27 | sampler = DistributedSampler(dataset, world_size, rank, shuffle=False)
28 | batch_size = imgs_per_gpu
29 | num_workers = workers_per_gpu
30 | else:
31 | if not kwargs.get('shuffle', True):
32 | sampler = None
33 | else:
34 | sampler = GroupSampler(dataset, imgs_per_gpu)
35 | batch_size = num_gpus * imgs_per_gpu
36 | num_workers = num_gpus * workers_per_gpu
37 |
38 | data_loader = DataLoader(
39 | dataset,
40 | batch_size=batch_size,
41 | sampler=sampler,
42 | num_workers=num_workers,
43 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
44 | pin_memory=False,
45 | **kwargs)
46 |
47 | return data_loader
48 |
--------------------------------------------------------------------------------
/mmaction/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .flow_losses import charbonnier_loss, SSIM_loss
2 | from .losses import (
3 | weighted_nll_loss, weighted_cross_entropy, weighted_binary_cross_entropy,
4 | weighted_smoothl1, accuracy,
5 | weighted_multilabel_binary_cross_entropy,
6 | multilabel_accuracy)
7 | from .ssn_losses import (OHEMHingeLoss, completeness_loss,
8 | classwise_regression_loss)
9 |
10 | __all__ = [
11 | 'charbonnier_loss', 'SSIM_loss',
12 | 'weighted_nll_loss', 'weighted_cross_entropy',
13 | 'weighted_binary_cross_entropy',
14 | 'weighted_smoothl1', 'accuracy',
15 | 'weighted_multilabel_binary_cross_entropy',
16 | 'multilabel_accuracy',
17 | 'OHEMHingeLoss', 'completeness_loss',
18 | 'classwise_regression_loss'
19 | ]
20 |
--------------------------------------------------------------------------------
/mmaction/losses/flow_losses.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import torch.nn as nn
4 |
5 |
6 | def charbonnier_loss(difference, mask, alpha=1, beta=1., epsilon=0.001):
7 | '''
8 | : sum( (x*beta)^2 + epsilon^2)^alpha
9 | '''
10 | if mask is not None:
11 | assert difference.size(0) == mask.size(0)
12 | assert difference.size(2) == mask.size(2)
13 | assert difference.size(3) == mask.size(3)
14 | res = torch.pow(torch.pow(difference * beta, 2) + epsilon ** 2, alpha)
15 | if mask is not None:
16 | batch_pixels = torch.sum(mask)
17 | return torch.sum(res * mask) / batch_pixels
18 | else:
19 | batch_pixels = torch.numel(res)
20 | return torch.sum(res) / batch_pixels
21 |
22 |
23 | def SSIM_loss(img1, img2, kernel_size=8, stride=8, c1=0.00001, c2=0.00001):
24 | num = img1.size(0)
25 | channels = img1.size(1)
26 |
27 | kernel_h = kernel_w = kernel_size
28 | sigma = (kernel_w + kernel_h) / 12.
29 | gauss_kernel = torch.zeros((1, 1, kernel_h, kernel_w)).type(img1.type())
30 | for h in range(kernel_h):
31 | for w in range(kernel_w):
32 | gauss_kernel[0, 0, h, w] = math.exp(
33 | -(math.pow(h - kernel_h/2.0, 2) + math.pow(- kernel_w/2.0, 2))
34 | / (2.0 * sigma ** 2)) / (2 * 3.14159 * sigma ** 2)
35 | gauss_kernel = gauss_kernel / torch.sum(gauss_kernel)
36 | gauss_kernel = gauss_kernel.repeat(channels, 1, 1, 1)
37 |
38 | gauss_filter = nn.Conv2d(channels, channels, kernel_size,
39 | stride=stride, padding=0,
40 | groups=channels, bias=False)
41 | gauss_filter.weight.data = gauss_kernel
42 | gauss_filter.weight.requires_grad = False
43 |
44 | ux = gauss_filter(img1)
45 | uy = gauss_filter(img2)
46 | sx2 = gauss_filter(img1 ** 2)
47 | sy2 = gauss_filter(img2 ** 2)
48 | sxy = gauss_filter(img1 * img2)
49 |
50 | ux2 = ux ** 2
51 | uy2 = uy ** 2
52 | sx2 = sx2 - ux2
53 | sy2 = sy2 - uy2
54 | sxy = sxy - ux * uy
55 |
56 | lp = (2 * ux * uy + c1) / (ux2 + uy2 + c1)
57 | sc = (2 * sxy + c2) / (sx2 + sy2 + c2)
58 |
59 | ssim = lp * sc
60 | return (lp.numel() - torch.sum(ssim)) / num
61 |
--------------------------------------------------------------------------------
/mmaction/losses/ssn_losses.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 |
5 | class OHEMHingeLoss(torch.autograd.Function):
6 | """
7 | This class is the core implementation for the completeness loss in paper.
8 | It compute class-wise hinge loss and performs online hard negative mining
9 | (OHEM).
10 | """
11 |
12 | @staticmethod
13 | def forward(ctx, pred, labels, is_positive, ohem_ratio, group_size):
14 | n_sample = pred.size()[0]
15 | assert n_sample == len(
16 | labels), "mismatch between sample size and label size"
17 | losses = torch.zeros(n_sample)
18 | slopes = torch.zeros(n_sample)
19 | for i in range(n_sample):
20 | losses[i] = max(0, 1 - is_positive * pred[i, labels[i] - 1])
21 | slopes[i] = -is_positive if losses[i] != 0 else 0
22 |
23 | losses = losses.view(-1, group_size).contiguous()
24 | sorted_losses, indices = torch.sort(losses, dim=1, descending=True)
25 | keep_num = int(group_size * ohem_ratio)
26 | loss = torch.zeros(1).cuda()
27 | for i in range(losses.size(0)):
28 | loss += sorted_losses[i, :keep_num].sum()
29 | ctx.loss_ind = indices[:, :keep_num]
30 | ctx.labels = labels
31 | ctx.slopes = slopes
32 | ctx.shape = pred.size()
33 | ctx.group_size = group_size
34 | ctx.num_group = losses.size(0)
35 | return loss
36 |
37 | @staticmethod
38 | def backward(ctx, grad_output):
39 | labels = ctx.labels
40 | slopes = ctx.slopes
41 |
42 | grad_in = torch.zeros(ctx.shape)
43 | for group in range(ctx.num_group):
44 | for idx in ctx.loss_ind[group]:
45 | loc = idx + group * ctx.group_size
46 | grad_in[loc, labels[loc] - 1] = slopes[loc] * \
47 | grad_output.data[0]
48 | return torch.autograd.Variable(grad_in.cuda()), None, None, None, None
49 |
50 |
51 | def completeness_loss(pred, labels, sample_split,
52 | sample_group_size, ohem_ratio=0.17):
53 | pred_dim = pred.size()[1]
54 | pred = pred.view(-1, sample_group_size, pred_dim)
55 | labels = labels.view(-1, sample_group_size)
56 |
57 | pos_group_size = sample_split
58 | neg_group_size = sample_group_size - sample_split
59 | pos_prob = pred[:, :sample_split, :].contiguous().view(-1, pred_dim)
60 | neg_prob = pred[:, sample_split:, :].contiguous().view(-1, pred_dim)
61 | pos_ls = OHEMHingeLoss.apply(pos_prob,
62 | labels[:, :sample_split].contiguous(
63 | ).view(-1), 1,
64 | 1.0, pos_group_size)
65 | neg_ls = OHEMHingeLoss.apply(neg_prob,
66 | labels[:, sample_split:].contiguous(
67 | ).view(-1), -1,
68 | ohem_ratio, neg_group_size)
69 | pos_cnt = pos_prob.size(0)
70 | neg_cnt = int(neg_prob.size()[0] * ohem_ratio)
71 |
72 | return pos_ls / float(pos_cnt + neg_cnt) + \
73 | neg_ls / float(pos_cnt + neg_cnt)
74 |
75 |
76 | def classwise_regression_loss(pred, labels, targets):
77 | indexer = labels.data - 1
78 | prep = pred[:, indexer, :]
79 | class_pred = torch.cat((torch.diag(prep[:, :, 0]).view(-1, 1),
80 | torch.diag(prep[:, :, 1]).view(-1, 1)),
81 | dim=1)
82 | loss = F.smooth_l1_loss(class_pred.view(-1), targets.view(-1)) * 2
83 | return loss
84 |
--------------------------------------------------------------------------------
/mmaction/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .tenons.backbones import *
2 | from .tenons.spatial_temporal_modules import *
3 | from .tenons.segmental_consensuses import *
4 | from .tenons.cls_heads import *
5 | from .recognizers import *
6 | from .tenons.necks import *
7 | from .tenons.roi_extractors import *
8 | from .tenons.anchor_heads import *
9 | from .tenons.shared_heads import *
10 | from .tenons.bbox_heads import *
11 | from .detectors import *
12 | from .localizers import *
13 |
14 |
15 | from .registry import (BACKBONES, SPATIAL_TEMPORAL_MODULES, SEGMENTAL_CONSENSUSES, HEADS,
16 | RECOGNIZERS, LOCALIZERS, DETECTORS, ARCHITECTURES,
17 | NECKS, ROI_EXTRACTORS)
18 | from .builder import (build_backbone, build_spatial_temporal_module, build_segmental_consensus,
19 | build_head, build_recognizer, build_detector,
20 | build_localizer, build_architecture,
21 | build_neck, build_roi_extractor)
22 |
23 | __all__ = [
24 | 'BACKBONES', 'SPATIAL_TEMPORAL_MODULES', 'SEGMENTAL_CONSENSUSES', 'HEADS',
25 | 'RECOGNIZERS', 'LOCALIZERS', 'DETECTORS', 'ARCHITECTURES',
26 | 'NECKS', 'ROI_EXTRACTORS',
27 | 'build_backbone', 'build_spatial_temporal_module', 'build_segmental_consensus',
28 | 'build_head', 'build_recognizer', 'build_detector',
29 | 'build_localizer', 'build_architecture',
30 | 'build_neck', 'build_roi_extractor'
31 | ]
32 |
--------------------------------------------------------------------------------
/mmaction/models/builder.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 | from torch import nn
3 |
4 | from .registry import (BACKBONES, FLOWNETS, SPATIAL_TEMPORAL_MODULES,
5 | SEGMENTAL_CONSENSUSES, HEADS,
6 | RECOGNIZERS, DETECTORS, LOCALIZERS, ARCHITECTURES,
7 | NECKS, ROI_EXTRACTORS)
8 |
9 |
10 | def _build_module(cfg, registry, default_args):
11 | assert isinstance(cfg, dict) and 'type' in cfg
12 | assert isinstance(default_args, dict) or default_args is None
13 | args = cfg.copy()
14 | obj_type = args.pop('type')
15 | if mmcv.is_str(obj_type):
16 | if obj_type not in registry.module_dict:
17 | raise KeyError('{} is not in the {} registry'.format(
18 | obj_type, registry.name))
19 | obj_type = registry.module_dict[obj_type]
20 | elif not isinstance(obj_type, type):
21 | raise TypeError('type must be a str or valid type, but got {}'.format(
22 | type(obj_type)))
23 | if default_args is not None:
24 | for name, value in default_args.items():
25 | args.setdefault(name, value)
26 | return obj_type(**args)
27 |
28 |
29 | def build(cfg, registry, default_args=None):
30 | if isinstance(cfg, list):
31 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg]
32 | return nn.Sequential(*modules)
33 | else:
34 | return _build_module(cfg, registry, default_args)
35 |
36 |
37 | def build_backbone(cfg):
38 | return build(cfg, BACKBONES)
39 |
40 |
41 | def build_flownet(cfg):
42 | return build(cfg, FLOWNETS)
43 |
44 |
45 | def build_spatial_temporal_module(cfg):
46 | return build(cfg, SPATIAL_TEMPORAL_MODULES)
47 |
48 |
49 | def build_segmental_consensus(cfg):
50 | return build(cfg, SEGMENTAL_CONSENSUSES)
51 |
52 |
53 | def build_head(cfg):
54 | return build(cfg, HEADS)
55 |
56 |
57 | def build_recognizer(cfg, train_cfg=None, test_cfg=None):
58 | return build(cfg, RECOGNIZERS,
59 | dict(train_cfg=train_cfg, test_cfg=test_cfg))
60 |
61 |
62 | def build_localizer(cfg, train_cfg=None, test_cfg=None):
63 | return build(cfg, LOCALIZERS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
64 |
65 |
66 | def build_detector(cfg, train_cfg=None, test_cfg=None):
67 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
68 |
69 |
70 | def build_architecture(cfg, train_cfg=None, test_cfg=None):
71 | return build(cfg, ARCHITECTURES,
72 | dict(train_cfg=train_cfg, test_cfg=test_cfg))
73 |
74 |
75 | def build_neck(cfg):
76 | return build(cfg, NECKS)
77 |
78 |
79 | def build_roi_extractor(cfg):
80 | return build(cfg, ROI_EXTRACTORS)
81 |
--------------------------------------------------------------------------------
/mmaction/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseDetector
2 | from .two_stage import TwoStageDetector
3 | from .fast_rcnn import FastRCNN
4 | from .faster_rcnn import FasterRCNN
5 |
6 | __all__ = [
7 | 'BaseDetector', 'TwoStageDetector',
8 | 'FastRCNN', 'FasterRCNN',
9 | ]
10 |
--------------------------------------------------------------------------------
/mmaction/models/detectors/fast_rcnn.py:
--------------------------------------------------------------------------------
1 | from .two_stage import TwoStageDetector
2 | from ..registry import DETECTORS
3 |
4 |
5 | @DETECTORS.register_module
6 | class FastRCNN(TwoStageDetector):
7 |
8 | def __init__(self,
9 | backbone,
10 | bbox_roi_extractor,
11 | bbox_head,
12 | train_cfg,
13 | test_cfg,
14 | dropout_ratio=0,
15 | neck=None,
16 | shared_head=None,
17 | pretrained=None):
18 | super(FastRCNN, self).__init__(
19 | backbone=backbone,
20 | neck=neck,
21 | shared_head=shared_head,
22 | bbox_roi_extractor=bbox_roi_extractor,
23 | dropout_ratio=dropout_ratio,
24 | bbox_head=bbox_head,
25 | train_cfg=train_cfg,
26 | test_cfg=test_cfg,
27 | pretrained=pretrained)
28 |
--------------------------------------------------------------------------------
/mmaction/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
1 | from .two_stage import TwoStageDetector
2 | from ..registry import DETECTORS
3 |
4 |
5 | @DETECTORS.register_module
6 | class FasterRCNN(TwoStageDetector):
7 |
8 | def __init__(self,
9 | backbone,
10 | rpn_head,
11 | bbox_roi_extractor,
12 | bbox_head,
13 | train_cfg,
14 | test_cfg,
15 | dropout_ratio=0,
16 | neck=None,
17 | shared_head=None,
18 | pretrained=None):
19 | super(FasterRCNN, self).__init__(
20 | backbone=backbone,
21 | neck=neck,
22 | shared_head=shared_head,
23 | rpn_head=rpn_head,
24 | bbox_roi_extractor=bbox_roi_extractor,
25 | dropout_ratio=dropout_ratio,
26 | bbox_head=bbox_head,
27 | train_cfg=train_cfg,
28 | test_cfg=test_cfg,
29 | pretrained=pretrained)
30 |
--------------------------------------------------------------------------------
/mmaction/models/localizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseLocalizer
2 | from .SSN2D import SSN2D
3 |
4 | __all__ = [
5 | 'BaseLocalizer', 'SSN2D'
6 | ]
7 |
--------------------------------------------------------------------------------
/mmaction/models/localizers/base.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from abc import ABCMeta, abstractmethod
3 |
4 | import torch.nn as nn
5 |
6 | class BaseLocalizer(nn.Module):
7 | """Base class for localizers"""
8 |
9 | __metaclass__ = ABCMeta
10 |
11 | def __init__(self):
12 | super(BaseLocalizer, self).__init__()
13 |
14 | @abstractmethod
15 | def forward_train(self, num_modalities, **kwargs):
16 | pass
17 |
18 | @abstractmethod
19 | def forward_test(self, num_modalities, **kwargs):
20 | pass
21 |
22 | def init_weights(self, pretrained=None):
23 | if pretrained is not None:
24 | logger = logging.getLogger()
25 | logger.info("load model from: {}".format(pretrained))
26 |
27 | def forward(self, num_modalities, img_meta, return_loss=True, **kwargs):
28 | num_modalities = int(num_modalities[0])
29 | if return_loss:
30 | return self.forward_train(num_modalities, img_meta, **kwargs)
31 | else:
32 | return self.forward_test(num_modalities, img_meta, **kwargs)
33 |
--------------------------------------------------------------------------------
/mmaction/models/recognizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseRecognizer
2 | from .TSN2D import TSN2D
3 | from .TSN3D import TSN3D
4 |
5 | __all__ = [
6 | 'BaseRecognizer', 'TSN2D', 'TSN3D'
7 | ]
--------------------------------------------------------------------------------
/mmaction/models/recognizers/base.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from abc import ABCMeta, abstractmethod
3 |
4 | import torch.nn as nn
5 |
6 |
7 | class BaseRecognizer(nn.Module):
8 | """Base class for recognizers"""
9 |
10 | __metaclass__ = ABCMeta
11 |
12 | def __init__(self):
13 | super(BaseRecognizer, self).__init__()
14 |
15 | @property
16 | def with_tenon_list(self):
17 | return hasattr(self, 'tenon_list') and self.tenon_list is not None
18 |
19 | @property
20 | def with_cls(self):
21 | return hasattr(self, 'cls_head') and self.cls_head is not None
22 |
23 | @abstractmethod
24 | def forward_train(self, num_modalities, **kwargs):
25 | pass
26 |
27 | @abstractmethod
28 | def forward_test(self, num_modalities, **kwargs):
29 | pass
30 |
31 | def init_weights(self, pretrained=None):
32 | if pretrained is not None:
33 | logger = logging.getLogger()
34 | logger.info("load model from: {}".format(pretrained))
35 |
36 | def forward(self, num_modalities, img_meta, return_loss=True, **kwargs):
37 | num_modalities = int(num_modalities[0])
38 | if return_loss:
39 | return self.forward_train(num_modalities, img_meta, **kwargs)
40 | else:
41 | return self.forward_test(num_modalities, img_meta, **kwargs)
42 |
--------------------------------------------------------------------------------
/mmaction/models/registry.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | class Registry(object):
5 |
6 | def __init__(self, name):
7 | self._name = name
8 | self._module_dict = dict()
9 |
10 | @property
11 | def name(self):
12 | return self._name
13 |
14 | @property
15 | def module_dict(self):
16 | return self._module_dict
17 |
18 | def _register_module(self, module_class):
19 | """Register a module
20 |
21 | Args:
22 | module (:obj:`nn.Module`): Module to be registered.
23 | """
24 | if not issubclass(module_class, nn.Module):
25 | raise TypeError(
26 | 'module must be a child of nn.Module, but got {}'.format(
27 | module_class))
28 | module_name = module_class.__name__
29 | if module_name in self._module_dict:
30 | raise KeyError('{} is already registered in {}'.format(
31 | module_name, self.name))
32 | self._module_dict[module_name] = module_class
33 |
34 | def register_module(self, cls):
35 | self._register_module(cls)
36 | return cls
37 |
38 |
39 | BACKBONES = Registry('backbone')
40 | FLOWNETS = Registry('flownet')
41 | SPATIAL_TEMPORAL_MODULES = Registry('spatial_temporal_module')
42 | SEGMENTAL_CONSENSUSES = Registry('segmental_consensus')
43 | HEADS = Registry('head')
44 | RECOGNIZERS = Registry('recognizer')
45 | LOCALIZERS = Registry('localizer')
46 | DETECTORS = Registry('detector')
47 | ARCHITECTURES = Registry('architecture')
48 | NECKS = Registry('neck')
49 | ROI_EXTRACTORS = Registry('roi_extractor')
50 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_head import AnchorHead
2 | from .rpn_head import RPNHead
3 |
4 | __all__ = ['AnchorHead', 'RPNHead']
--------------------------------------------------------------------------------
/mmaction/models/tenons/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .bninception import BNInception
2 | from .resnet import ResNet
3 |
4 | from .inception_v1_i3d import InceptionV1_I3D
5 | from .resnet_i3d import ResNet_I3D
6 | from .resnet_s3d import ResNet_S3D
7 | from .resnet_i3d_slowfast import ResNet_I3D_SlowFast
8 | from .resnet_r3d import ResNet_R3D
9 | from .c3d import C3D
10 |
11 | __all__ = [
12 | 'BNInception',
13 | 'ResNet',
14 | 'InceptionV1_I3D',
15 | 'ResNet_I3D',
16 | 'ResNet_S3D',
17 | 'ResNet_I3D_SlowFast',
18 | 'ResNet_R3D',
19 | 'C3D'
20 | ]
21 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 |
3 | __all__ = [
4 | 'BBoxHead'
5 | ]
--------------------------------------------------------------------------------
/mmaction/models/tenons/cls_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .cls_head import ClsHead
2 | from .ssn_head import SSNHead
3 |
4 | __all__ = [
5 | 'ClsHead', 'SSNHead'
6 | ]
7 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/cls_heads/cls_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from ...registry import HEADS
5 |
6 | @HEADS.register_module
7 | class ClsHead(nn.Module):
8 | """Simplest classification head"""
9 |
10 | def __init__(self,
11 | with_avg_pool=True,
12 | temporal_feature_size=1,
13 | spatial_feature_size=7,
14 | dropout_ratio=0.8,
15 | in_channels=2048,
16 | num_classes=101,
17 | init_std=0.01,
18 | fcn_testing=False):
19 |
20 | super(ClsHead, self).__init__()
21 |
22 | self.with_avg_pool = with_avg_pool
23 | self.dropout_ratio = dropout_ratio
24 | self.in_channels = in_channels
25 | self.dropout_ratio = dropout_ratio
26 | self.temporal_feature_size = temporal_feature_size
27 | self.spatial_feature_size = spatial_feature_size
28 | self.init_std = init_std
29 | self.fcn_testing = fcn_testing
30 | self.num_classes = num_classes
31 |
32 | if self.dropout_ratio != 0:
33 | self.dropout = nn.Dropout(p=self.dropout_ratio)
34 | else:
35 | self.dropout = None
36 | if self.with_avg_pool:
37 | self.avg_pool = nn.AvgPool3d((temporal_feature_size, spatial_feature_size, spatial_feature_size))
38 |
39 | self.fc_cls = nn.Linear(in_channels, num_classes)
40 | self.new_cls = None
41 |
42 | def init_weights(self):
43 | nn.init.normal_(self.fc_cls.weight, 0, self.init_std)
44 | nn.init.constant_(self.fc_cls.bias, 0)
45 |
46 | def forward(self, x):
47 | if not self.fcn_testing:
48 | if x.ndimension() == 4:
49 | x = x.unsqueeze(2)
50 | assert x.shape[1] == self.in_channels
51 | assert x.shape[2] == self.temporal_feature_size
52 | assert x.shape[3] == self.spatial_feature_size
53 | assert x.shape[4] == self.spatial_feature_size
54 | if self.with_avg_pool:
55 | x = self.avg_pool(x)
56 | if self.dropout is not None:
57 | x = self.dropout(x)
58 | x = x.view(x.size(0), -1)
59 |
60 | cls_score = self.fc_cls(x)
61 | return cls_score
62 | else:
63 | if x.ndimension() == 4:
64 | x = x.unsqueeze(2)
65 | if self.with_avg_pool:
66 | x = self.avg_pool(x)
67 | if self.new_cls is None:
68 | self.new_cls = nn.Conv3d(self.in_channels, self.num_classes, 1,1,0).cuda()
69 | self.new_cls.load_state_dict({'weight': self.fc_cls.weight.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1),
70 | 'bias': self.fc_cls.bias})
71 | class_map = self.new_cls(x)
72 | return class_map
73 |
74 | def loss(self,
75 | cls_score,
76 | labels):
77 | losses = dict()
78 | losses['loss_cls'] = F.cross_entropy(cls_score, labels)
79 |
80 | return losses
81 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/flownets/__init__.py:
--------------------------------------------------------------------------------
1 | from .motionnet import MotionNet
2 |
3 | __all__ = [
4 | "MotionNet",
5 | ]
6 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FPN
2 |
3 | __all__ = ['FPN']
--------------------------------------------------------------------------------
/mmaction/models/tenons/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 | from .single_level_straight3d import SingleRoIStraight3DExtractor
3 |
4 | __all__ = [
5 | 'SingleRoIExtractor', 'SingleRoIStraight3DExtractor'
6 | ]
--------------------------------------------------------------------------------
/mmaction/models/tenons/roi_extractors/single_level.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from mmaction import ops
5 |
6 | from ...registry import ROI_EXTRACTORS
7 |
8 | @ROI_EXTRACTORS.register_module
9 | class SingleRoIExtractor(nn.Module):
10 | """Extract RoI features from a single level feature map.
11 |
12 | If there are multiple input feature levels, each RoI is mapped to a level
13 | according to its scale.
14 |
15 | Args:
16 | roi_layer (dict): Specify RoI layer type and arguments.
17 | out_channels (int): Output channels of RoI layers.
18 | featmap_strides (int): Strides of input feature maps.
19 | finest_scale (int): Scale threshold of mapping to level 0.
20 | """
21 |
22 | def __init__(self,
23 | roi_layer,
24 | out_channels,
25 | featmap_strides,
26 | finest_scale=56):
27 | super(SingleRoIExtractor, self).__init__()
28 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
29 | self.out_channels = out_channels
30 | self.featmap_strides = featmap_strides
31 | self.finest_scale = finest_scale
32 |
33 | @property
34 | def num_inputs(self):
35 | """int: Input feature map levels."""
36 | return len(self.featmap_strides)
37 |
38 | def init_weights(self):
39 | pass
40 |
41 | def build_roi_layers(self, layer_cfg, featmap_strides):
42 | cfg = layer_cfg.copy()
43 | layer_type = cfg.pop('type')
44 | assert hasattr(ops, layer_type)
45 | layer_cls = getattr(ops, layer_type)
46 | roi_layers = nn.ModuleList(
47 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
48 | return roi_layers
49 |
50 | def map_roi_levels(self, rois, num_levels):
51 | """Map rois to corresponding feature levels by scales.
52 |
53 | - scale < finest_scale: level 0
54 | - finest_scale <= scale < finest_scale * 2: level 1
55 | - finest_scale * 2 <= scale < finest_scale * 4: level 2
56 | - scale >= finest_scale * 4: level 3
57 |
58 | Args:
59 | rois (Tensor): Input RoIs, shape (k, 5).
60 | num_levels (int): Total level number.
61 |
62 | Returns:
63 | Tensor: Level index (0-based) of each RoI, shape (k, )
64 | """
65 | scale = torch.sqrt(
66 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))
67 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
68 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
69 | return target_lvls
70 |
71 | def forward(self, feats, rois):
72 | if len(feats) == 1:
73 | return self.roi_layers[0](feats[0], rois)
74 |
75 | out_size = self.roi_layers[0].out_size
76 | num_levels = len(feats)
77 | target_lvls = self.map_roi_levels(rois, num_levels)
78 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels,
79 | out_size, out_size).fill_(0)
80 | for i in range(num_levels):
81 | inds = target_lvls == i
82 | if inds.any():
83 | rois_ = rois[inds, :]
84 | roi_feats_t = self.roi_layers[i](feats[i], rois_)
85 | roi_feats[inds] += roi_feats_t
86 | return roi_feats
87 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/segmental_consensuses/TODO.md:
--------------------------------------------------------------------------------
1 | ### TODO
2 |
3 | [x] SimpleConsensus
4 |
5 | [ ] STPP
6 |
7 | [ ] TRN
8 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/segmental_consensuses/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple_consensus import SimpleConsensus
2 | from .stpp import parse_stage_config
3 | from .stpp import StructuredTemporalPyramidPooling
4 |
5 | __all__ = [
6 | 'SimpleConsensus',
7 | 'StructuredTemporalPyramidPooling',
8 | 'parse_stage_config'
9 | ]
10 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/segmental_consensuses/simple_consensus.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from ...registry import SEGMENTAL_CONSENSUSES
4 |
5 | @SEGMENTAL_CONSENSUSES.register_module
6 | class SimpleConsensus(nn.Module):
7 | def __init__(self, consensus_type, dim=1):
8 | super(SimpleConsensus, self).__init__()
9 | assert consensus_type in ['avg']
10 | self.consensus_type = consensus_type
11 | self.dim = dim
12 |
13 | def init_weights(self):
14 | pass
15 |
16 | def forward(self, input):
17 | if self.consensus_type == 'avg':
18 | output = input.mean(dim=self.dim, keepdim=True)
19 | else:
20 | return None
21 | return output
22 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | from .res_i3d_layer import ResI3DLayer
3 |
4 | __all__ = [
5 | 'ResLayer', 'ResI3DLayer'
6 | ]
--------------------------------------------------------------------------------
/mmaction/models/tenons/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import torch.nn as nn
4 | from mmcv.cnn import constant_init, kaiming_init
5 | from mmcv.runner import load_checkpoint
6 |
7 | from ..backbones import ResNet
8 | from ..backbones.resnet import make_res_layer
9 | from ...registry import HEADS
10 | from ..spatial_temporal_modules.non_local import NonLocalModule
11 |
12 |
13 | @HEADS.register_module
14 | class ResLayer(nn.Module):
15 |
16 | def __init__(self,
17 | depth,
18 | pretrained=None,
19 | stage=3,
20 | stride=2,
21 | dilation=1,
22 | style='pytorch',
23 | bn_eval=True,
24 | bn_frozen=True,
25 | all_frozen=False,
26 | with_cp=False):
27 | super(ResLayer, self).__init__()
28 | self.bn_eval = bn_eval
29 | self.bn_frozen = bn_frozen
30 | self.all_frozen = all_frozen
31 | self.stage = stage
32 | block, stage_blocks = ResNet.arch_settings[depth]
33 | self.pretrained = pretrained
34 | stage_block = stage_blocks[stage]
35 | planes = 64 * 2**stage
36 | inplanes = 64 * 2**(stage - 1) * block.expansion
37 |
38 | res_layer = make_res_layer(
39 | block,
40 | inplanes,
41 | planes,
42 | stage_block,
43 | stride=stride,
44 | dilation=dilation,
45 | style=style,
46 | with_cp=with_cp)
47 | self.add_module('layer{}'.format(stage + 1), res_layer)
48 |
49 | def init_weights(self):
50 | if isinstance(self.pretrained, str):
51 | logger = logging.getLogger()
52 | load_checkpoint(self, self.pretrained, strict=False, logger=logger)
53 | elif self.pretrained is None:
54 | for m in self.modules():
55 | if isinstance(m, nn.Conv2d):
56 | kaiming_init(m)
57 | elif isinstance(m, nn.BatchNorm2d):
58 | constant_init(m, 1)
59 | else:
60 | raise TypeError('pretrained must be a str or None')
61 |
62 | def forward(self, x):
63 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
64 | out = res_layer(x)
65 | return out
66 |
67 | def train(self, mode=True):
68 | super(ResLayer, self).train(mode)
69 | if self.bn_eval:
70 | for m in self.modules():
71 | if isinstance(m, nn.BatchNorm2d):
72 | m.eval()
73 | if self.bn_frozen:
74 | for params in m.parameters():
75 | params.requires_grad = False
76 | if self.bn_frozen:
77 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
78 | for m in res_layer:
79 | if isinstance(m, nn.BatchNorm2d):
80 | m.eval()
81 | m.weight.requires_grad = False
82 | m.bias.requires_grad = False
83 | if self.all_frozen:
84 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
85 | res_layer.eval()
86 | for param in mod.parameters():
87 | param.requires_grad = False
88 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/spatial_temporal_modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple_spatial_module import SimpleSpatialModule
2 | from .simple_spatial_temporal_module import SimpleSpatialTemporalModule
3 | from .slowfast_spatial_temporal_module import SlowFastSpatialTemporalModule
4 |
5 | __all__ = [
6 | 'SimpleSpatialModule',
7 | 'SimpleSpatialTemporalModule',
8 | 'SlowFastSpatialTemporalModule'
9 | ]
10 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/spatial_temporal_modules/simple_spatial_module.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from ...registry import SPATIAL_TEMPORAL_MODULES
5 |
6 |
7 | @SPATIAL_TEMPORAL_MODULES.register_module
8 | class SimpleSpatialModule(nn.Module):
9 | def __init__(self, spatial_type='avg', spatial_size=7):
10 | super(SimpleSpatialModule, self).__init__()
11 |
12 | assert spatial_type in ['avg']
13 | self.spatial_type = spatial_type
14 |
15 | self.spatial_size = spatial_size if not isinstance(spatial_size, int) else (spatial_size, spatial_size)
16 |
17 | if self.spatial_type == 'avg':
18 | self.op = nn.AvgPool2d(self.spatial_size, stride=1, padding=0)
19 |
20 |
21 | def init_weights(self):
22 | pass
23 |
24 | def forward(self, input):
25 | return self.op(input)
--------------------------------------------------------------------------------
/mmaction/models/tenons/spatial_temporal_modules/simple_spatial_temporal_module.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from ...registry import SPATIAL_TEMPORAL_MODULES
5 |
6 |
7 | @SPATIAL_TEMPORAL_MODULES.register_module
8 | class SimpleSpatialTemporalModule(nn.Module):
9 | def __init__(self, spatial_type='avg', spatial_size=7, temporal_size=1):
10 | super(SimpleSpatialTemporalModule, self).__init__()
11 |
12 | assert spatial_type in ['identity', 'avg', 'max']
13 | self.spatial_type = spatial_type
14 |
15 | self.spatial_size = spatial_size
16 | if spatial_size != -1:
17 | self.spatial_size = (spatial_size, spatial_size)
18 |
19 | self.temporal_size = temporal_size
20 |
21 | assert not (self.spatial_size == -1) ^ (self.temporal_size == -1)
22 |
23 | if self.temporal_size == -1 and self.spatial_size == -1:
24 | self.pool_size = (1, 1, 1)
25 | if self.spatial_type == 'avg':
26 | self.pool_func = nn.AdaptiveAvgPool3d(self.pool_size)
27 | if self.spatial_type == 'max':
28 | self.pool_func = nn.AdaptiveMaxPool3d(self.pool_size)
29 | else:
30 | self.pool_size = (self.temporal_size, ) + self.spatial_size
31 | if self.spatial_type == 'avg':
32 | self.pool_func = nn.AvgPool3d(self.pool_size, stride=1, padding=0)
33 | if self.spatial_type == 'max':
34 | self.pool_func = nn.MaxPool3d(self.pool_size, stride=1, padding=0)
35 |
36 |
37 | def init_weights(self):
38 | pass
39 |
40 | def forward(self, input):
41 | if self.spatial_type == 'identity':
42 | return input
43 | else:
44 | return self.pool_func(input)
45 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/spatial_temporal_modules/slowfast_spatial_temporal_module.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from ...registry import SPATIAL_TEMPORAL_MODULES
5 |
6 |
7 | @SPATIAL_TEMPORAL_MODULES.register_module
8 | class SlowFastSpatialTemporalModule(nn.Module):
9 | def __init__(self, adaptive_pool=True, spatial_type='avg', spatial_size=1, temporal_size=1):
10 | super(SlowFastSpatialTemporalModule, self).__init__()
11 |
12 | self.adaptive_pool = adaptive_pool
13 | assert spatial_type in ['avg']
14 | self.spatial_type = spatial_type
15 |
16 | self.spatial_size = spatial_size if not isinstance(spatial_size, int) else (spatial_size, spatial_size)
17 | self.temporal_size = temporal_size
18 | self.pool_size = (self.temporal_size, ) + self.spatial_size
19 |
20 | if self.adaptive_pool:
21 | if self.spatial_type == 'avg':
22 | self.op = nn.AdaptiveAvgPool3d(self.pool_size)
23 | else:
24 | raise NotImplementedError
25 |
26 |
27 | def init_weights(self):
28 | pass
29 |
30 | def forward(self, input):
31 | x_slow, x_fast = input
32 | x_slow = self.op(x_slow)
33 | x_fast = self.op(x_fast)
34 | return torch.cat((x_slow, x_fast), dim=1)
35 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .conv_module import ConvModule
2 | from .norm import build_norm_layer
3 |
4 | __all__ = [
5 | 'ConvModule', 'build_norm_layer'
6 | ]
--------------------------------------------------------------------------------
/mmaction/models/tenons/utils/conv_module.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import torch.nn as nn
4 | from mmcv.cnn import kaiming_init, constant_init
5 |
6 | from .norm import build_norm_layer
7 |
8 |
9 | class ConvModule(nn.Module):
10 |
11 | def __init__(self,
12 | in_channels,
13 | out_channels,
14 | kernel_size,
15 | stride=1,
16 | padding=0,
17 | dilation=1,
18 | groups=1,
19 | bias=True,
20 | normalize=None,
21 | activation='relu',
22 | inplace=True,
23 | activate_last=True):
24 | super(ConvModule, self).__init__()
25 | self.with_norm = normalize is not None
26 | self.with_activatation = activation is not None
27 | self.with_bias = bias
28 | self.activation = activation
29 | self.activate_last = activate_last
30 |
31 | if self.with_norm and self.with_bias:
32 | warnings.warn('ConvModule has norm and bias at the same time')
33 |
34 | self.conv = nn.Conv2d(
35 | in_channels,
36 | out_channels,
37 | kernel_size,
38 | stride,
39 | padding,
40 | dilation,
41 | groups,
42 | bias=bias)
43 |
44 | self.in_channels = self.conv.in_channels
45 | self.out_channels = self.conv.out_channels
46 | self.kernel_size = self.conv.kernel_size
47 | self.stride = self.conv.stride
48 | self.padding = self.conv.padding
49 | self.dilation = self.conv.dilation
50 | self.transposed = self.conv.transposed
51 | self.output_padding = self.conv.output_padding
52 | self.groups = self.conv.groups
53 |
54 | if self.with_norm:
55 | norm_channels = out_channels if self.activate_last else in_channels
56 | self.norm_name, norm = build_norm_layer(normalize, norm_channels)
57 | self.add_module(self.norm_name, norm)
58 |
59 | if self.with_activatation:
60 | assert activation in ['relu'], 'Only ReLU supported.'
61 | if self.activation == 'relu':
62 | self.activate = nn.ReLU(inplace=inplace)
63 |
64 | # Default using msra init
65 | self.init_weights()
66 |
67 | @property
68 | def norm(self):
69 | return getattr(self, self.norm_name)
70 |
71 | def init_weights(self):
72 | nonlinearity = 'relu' if self.activation is None else self.activation
73 | kaiming_init(self.conv, nonlinearity=nonlinearity)
74 | if self.with_norm:
75 | constant_init(self.norm, 1, bias=0)
76 |
77 | def forward(self, x, activate=True, norm=True):
78 | if self.activate_last:
79 | x = self.conv(x)
80 | if norm and self.with_norm:
81 | x = self.norm(x)
82 | if activate and self.with_activatation:
83 | x = self.activate(x)
84 | else:
85 | if norm and self.with_norm:
86 | x = self.norm(x)
87 | if activate and self.with_activatation:
88 | x = self.activate(x)
89 | x = self.conv(x)
90 | return x
91 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/utils/nonlocal_block.py:
--------------------------------------------------------------------------------
1 | from ..spatial_temporal_modules.non_local import NonLocalModule
2 |
3 |
4 | def build_nonlocal_block(cfg):
5 | """ Build nonlocal block
6 |
7 | Args:
8 | """
9 | assert isinstance(cfg, dict)
10 | cfg_ = cfg.copy()
11 | return NonLocalModule(**cfg_)
12 |
--------------------------------------------------------------------------------
/mmaction/models/tenons/utils/norm.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 |
4 | norm_cfg = {
5 | # format: layer_type: (abbreviation, module)
6 | 'BN': ('bn', nn.BatchNorm2d),
7 | 'SyncBN': ('bn', None),
8 | 'GN': ('gn', nn.GroupNorm),
9 | # and potentially 'SN'
10 | }
11 |
12 |
13 | def build_norm_layer(cfg, num_features, postfix=''):
14 | """ Build normalization layer
15 | Args:
16 | cfg (dict): cfg should contain:
17 | type (str): identify norm layer type.
18 | layer args: args needed to instantiate a norm layer.
19 | frozen (bool): [optional] whether stop gradient updates
20 | of norm layer, it is helpful to set frozen mode
21 | in backbone's norms.
22 | num_features (int): number of channels from input
23 | postfix (int, str): appended into norm abbreation to
24 | create named layer.
25 | Returns:
26 | name (str): abbreation + postfix
27 | layer (nn.Module): created norm layer
28 | """
29 | assert isinstance(cfg, dict) and 'type' in cfg
30 | cfg_ = cfg.copy()
31 |
32 | layer_type = cfg_.pop('type')
33 | if layer_type not in norm_cfg:
34 | raise KeyError('Unrecognized norm type {}'.format(layer_type))
35 | else:
36 | abbr, norm_layer = norm_cfg[layer_type]
37 | if norm_layer is None:
38 | raise NotImplementedError
39 |
40 | assert isinstance(postfix, (int, str))
41 | name = abbr + str(postfix)
42 |
43 | frozen = cfg_.pop('frozen', False)
44 | cfg_.setdefault('eps', 1e-5)
45 | if layer_type != 'GN':
46 | layer = norm_layer(num_features, **cfg_)
47 | else:
48 | assert 'num_groups' in cfg_
49 | layer = norm_layer(num_channels=num_features, **cfg_)
50 |
51 | if frozen:
52 | for param in layer.parameters():
53 | param.requires_grad = False
54 |
55 | return name, layer
--------------------------------------------------------------------------------
/mmaction/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms import nms, soft_nms
2 | from .roi_align import RoIAlign, roi_align
3 | from .roi_pool import RoIPool, roi_pool
4 |
5 | __all__ = [
6 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool'
7 | ]
8 |
--------------------------------------------------------------------------------
/mmaction/ops/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 |
3 | __all__ = ['nms', 'soft_nms']
4 |
--------------------------------------------------------------------------------
/mmaction/ops/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from . import nms_cuda, nms_cpu
5 | from .soft_nms_cpu import soft_nms_cpu
6 |
7 |
8 | def nms(dets, iou_thr, device_id=None):
9 | """Dispatch to either CPU or GPU NMS implementations.
10 |
11 | The input can be either a torch tensor or numpy array. GPU NMS will be used
12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
13 | will be used. The returned type will always be the same as inputs.
14 |
15 | Arguments:
16 | dets (torch.Tensor or np.ndarray): bboxes with scores.
17 | iou_thr (float): IoU threshold for NMS.
18 | device_id (int, optional): when `dets` is a numpy array, if `device_id`
19 | is None, then cpu nms is used, otherwise gpu_nms will be used.
20 |
21 | Returns:
22 | tuple: kept bboxes and indice, which is always the same data type as
23 | the input.
24 | """
25 | # convert dets (tensor or numpy array) to tensor
26 | if isinstance(dets, torch.Tensor):
27 | is_numpy = False
28 | dets_th = dets
29 | elif isinstance(dets, np.ndarray):
30 | is_numpy = True
31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
32 | dets_th = torch.from_numpy(dets).to(device)
33 | else:
34 | raise TypeError(
35 | 'dets must be either a Tensor or numpy array, but got {}'.format(
36 | type(dets)))
37 |
38 | # execute cpu or cuda nms
39 | if dets_th.shape[0] == 0:
40 | inds = dets_th.new_zeros(0, dtype=torch.long)
41 | else:
42 | if dets_th.is_cuda:
43 | inds = nms_cuda.nms(dets_th, iou_thr)
44 | else:
45 | inds = nms_cpu.nms(dets_th, iou_thr)
46 |
47 | if is_numpy:
48 | inds = inds.cpu().numpy()
49 | return dets[inds, :], inds
50 |
51 |
52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
53 | if isinstance(dets, torch.Tensor):
54 | is_tensor = True
55 | dets_np = dets.detach().cpu().numpy()
56 | elif isinstance(dets, np.ndarray):
57 | is_tensor = False
58 | dets_np = dets
59 | else:
60 | raise TypeError(
61 | 'dets must be either a Tensor or numpy array, but got {}'.format(
62 | type(dets)))
63 |
64 | method_codes = {'linear': 1, 'gaussian': 2}
65 | if method not in method_codes:
66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method))
67 | new_dets, inds = soft_nms_cpu(
68 | dets_np,
69 | iou_thr,
70 | method=method_codes[method],
71 | sigma=sigma,
72 | min_score=min_score)
73 |
74 | if is_tensor:
75 | return dets.new_tensor(new_dets), dets.new_tensor(
76 | inds, dtype=torch.long)
77 | else:
78 | return new_dets.astype(np.float32), inds.astype(np.int64)
79 |
--------------------------------------------------------------------------------
/mmaction/ops/nms/setup.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | from setuptools import setup, Extension
3 |
4 | import numpy as np
5 | from Cython.Build import cythonize
6 | from Cython.Distutils import build_ext
7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
8 |
9 | ext_args = dict(
10 | include_dirs=[np.get_include()],
11 | language='c++',
12 | extra_compile_args={
13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'],
14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'],
15 | },
16 | )
17 |
18 | extensions = [
19 | Extension('soft_nms_cpu', ['src/soft_nms_cpu.pyx'], **ext_args),
20 | ]
21 |
22 |
23 | def customize_compiler_for_nvcc(self):
24 | """inject deep into distutils to customize how the dispatch
25 | to cc/nvcc works.
26 | If you subclass UnixCCompiler, it's not trivial to get your subclass
27 | injected in, and still have the right customizations (i.e.
28 | distutils.sysconfig.customize_compiler) run on it. So instead of going
29 | the OO route, I have this. Note, it's kindof like a wierd functional
30 | subclassing going on."""
31 |
32 | # tell the compiler it can processes .cu
33 | self.src_extensions.append('.cu')
34 |
35 | # save references to the default compiler_so and _comple methods
36 | default_compiler_so = self.compiler_so
37 | super = self._compile
38 |
39 | # now redefine the _compile method. This gets executed for each
40 | # object but distutils doesn't have the ability to change compilers
41 | # based on source extension: we add it.
42 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
43 | if osp.splitext(src)[1] == '.cu':
44 | # use the cuda for .cu files
45 | self.set_executable('compiler_so', 'nvcc')
46 | # use only a subset of the extra_postargs, which are 1-1 translated
47 | # from the extra_compile_args in the Extension class
48 | postargs = extra_postargs['nvcc']
49 | else:
50 | postargs = extra_postargs['cc']
51 |
52 | super(obj, src, ext, cc_args, postargs, pp_opts)
53 | # reset the default compiler_so, which we might have changed for cuda
54 | self.compiler_so = default_compiler_so
55 |
56 | # inject our redefined _compile method into the class
57 | self._compile = _compile
58 |
59 |
60 | class custom_build_ext(build_ext):
61 |
62 | def build_extensions(self):
63 | customize_compiler_for_nvcc(self.compiler)
64 | build_ext.build_extensions(self)
65 |
66 |
67 | setup(
68 | name='soft_nms',
69 | cmdclass={'build_ext': custom_build_ext},
70 | ext_modules=cythonize(extensions),
71 | )
72 |
73 | setup(
74 | name='nms_cuda',
75 | ext_modules=[
76 | CUDAExtension('nms_cuda', [
77 | 'src/nms_cuda.cpp',
78 | 'src/nms_kernel.cu',
79 | ]),
80 | CUDAExtension('nms_cpu', [
81 | 'src/nms_cpu.cpp',
82 | ]),
83 | ],
84 | cmdclass={'build_ext': BuildExtension})
85 |
--------------------------------------------------------------------------------
/mmaction/ops/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include
3 |
4 | template
5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
7 |
8 | if (dets.numel() == 0) {
9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 | }
11 |
12 | auto x1_t = dets.select(1, 0).contiguous();
13 | auto y1_t = dets.select(1, 1).contiguous();
14 | auto x2_t = dets.select(1, 2).contiguous();
15 | auto y2_t = dets.select(1, 3).contiguous();
16 | auto scores = dets.select(1, 4).contiguous();
17 |
18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 |
20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 |
22 | auto ndets = dets.size(0);
23 | at::Tensor suppressed_t =
24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 |
26 | auto suppressed = suppressed_t.data();
27 | auto order = order_t.data();
28 | auto x1 = x1_t.data();
29 | auto y1 = y1_t.data();
30 | auto x2 = x2_t.data();
31 | auto y2 = y2_t.data();
32 | auto areas = areas_t.data();
33 |
34 | for (int64_t _i = 0; _i < ndets; _i++) {
35 | auto i = order[_i];
36 | if (suppressed[i] == 1) continue;
37 | auto ix1 = x1[i];
38 | auto iy1 = y1[i];
39 | auto ix2 = x2[i];
40 | auto iy2 = y2[i];
41 | auto iarea = areas[i];
42 |
43 | for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 | auto j = order[_j];
45 | if (suppressed[j] == 1) continue;
46 | auto xx1 = std::max(ix1, x1[j]);
47 | auto yy1 = std::max(iy1, y1[j]);
48 | auto xx2 = std::min(ix2, x2[j]);
49 | auto yy2 = std::min(iy2, y2[j]);
50 |
51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1);
52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1);
53 | auto inter = w * h;
54 | auto ovr = inter / (iarea + areas[j] - inter);
55 | if (ovr >= threshold) suppressed[j] = 1;
56 | }
57 | }
58 | return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 |
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 | at::Tensor result;
63 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
64 | result = nms_cpu_kernel(dets, threshold);
65 | });
66 | return result;
67 | }
68 |
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 | m.def("nms", &nms, "non-maximum suppression");
71 | }
--------------------------------------------------------------------------------
/mmaction/ops/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include
3 |
4 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
5 |
6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
7 |
8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
9 | CHECK_CUDA(dets);
10 | if (dets.numel() == 0)
11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 | return nms_cuda(dets, threshold);
13 | }
14 |
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 | m.def("nms", &nms, "non-maximum suppression");
17 | }
--------------------------------------------------------------------------------
/mmaction/ops/resample2d_package/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/mmaction/ops/resample2d_package/resample2d.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from torch.autograd import Function, Variable
3 | import resample2d_cuda
4 |
5 |
6 | class Resample2dFunction(Function):
7 |
8 | @staticmethod
9 | def forward(ctx, input1, input2, kernel_size=1):
10 | assert input1.is_contiguous()
11 | assert input2.is_contiguous()
12 |
13 | ctx.save_for_backward(input1, input2)
14 | ctx.kernel_size = kernel_size
15 |
16 | _, d, _, _ = input1.size()
17 | b, _, h, w = input2.size()
18 | output = input1.new(b, d, h, w).zero_()
19 |
20 | resample2d_cuda.forward(input1, input2, output, kernel_size)
21 |
22 | return output
23 |
24 | @staticmethod
25 | def backward(ctx, grad_output):
26 | grad_output = grad_output.contiguous()
27 | assert grad_output.is_contiguous()
28 |
29 | input1, input2 = ctx.saved_tensors
30 |
31 | grad_input1 = Variable(input1.new(input1.size()).zero_())
32 | grad_input2 = Variable(input1.new(input2.size()).zero_())
33 |
34 | resample2d_cuda.backward(input1, input2, grad_output.data,
35 | grad_input1.data, grad_input2.data,
36 | ctx.kernel_size)
37 |
38 | return grad_input1, grad_input2, None
39 |
40 |
41 | class Resample2d(Module):
42 |
43 | def __init__(self, kernel_size=1):
44 | super(Resample2d, self).__init__()
45 | self.kernel_size = kernel_size
46 |
47 | def forward(self, input1, input2):
48 | input1_c = input1.contiguous()
49 | return Resample2dFunction.apply(input1_c, input2, self.kernel_size)
50 |
--------------------------------------------------------------------------------
/mmaction/ops/resample2d_package/resample2d_cuda.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "resample2d_kernel.cuh"
5 |
6 | int resample2d_cuda_forward(
7 | at::Tensor& input1,
8 | at::Tensor& input2,
9 | at::Tensor& output,
10 | int kernel_size) {
11 | resample2d_kernel_forward(input1, input2, output, kernel_size);
12 | return 1;
13 | }
14 |
15 | int resample2d_cuda_backward(
16 | at::Tensor& input1,
17 | at::Tensor& input2,
18 | at::Tensor& gradOutput,
19 | at::Tensor& gradInput1,
20 | at::Tensor& gradInput2,
21 | int kernel_size) {
22 | resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size);
23 | return 1;
24 | }
25 |
26 |
27 |
28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
29 | m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)");
30 | m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)");
31 | }
32 |
33 |
--------------------------------------------------------------------------------
/mmaction/ops/resample2d_package/resample2d_kernel.cuh:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | void resample2d_kernel_forward(
6 | at::Tensor& input1,
7 | at::Tensor& input2,
8 | at::Tensor& output,
9 | int kernel_size);
10 |
11 | void resample2d_kernel_backward(
12 | at::Tensor& input1,
13 | at::Tensor& input2,
14 | at::Tensor& gradOutput,
15 | at::Tensor& gradInput1,
16 | at::Tensor& gradInput2,
17 | int kernel_size);
18 |
--------------------------------------------------------------------------------
/mmaction/ops/resample2d_package/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import torch
4 |
5 | from setuptools import setup
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | cxx_args = ['-std=c++14']
9 |
10 | nvcc_args = [
11 | '-gencode', 'arch=compute_50,code=sm_50',
12 | '-gencode', 'arch=compute_52,code=sm_52',
13 | '-gencode', 'arch=compute_60,code=sm_60',
14 | '-gencode', 'arch=compute_61,code=sm_61',
15 | '-gencode', 'arch=compute_70,code=sm_70',
16 | '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 |
19 | setup(
20 | name='resample2d_cuda',
21 | ext_modules=[
22 | CUDAExtension('resample2d_cuda', [
23 | 'resample2d_cuda.cc',
24 | 'resample2d_kernel.cu'
25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 | ],
27 | cmdclass={
28 | 'build_ext': BuildExtension
29 | })
30 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions.roi_align import roi_align
2 | from .modules.roi_align import RoIAlign
3 |
4 | __all__ = ['roi_align', 'RoIAlign']
5 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/roi_align/functions/__init__.py
--------------------------------------------------------------------------------
/mmaction/ops/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Function
2 |
3 | from .. import roi_align_cuda
4 |
5 |
6 | class RoIAlignFunction(Function):
7 |
8 | @staticmethod
9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
10 | if isinstance(out_size, int):
11 | out_h = out_size
12 | out_w = out_size
13 | elif isinstance(out_size, tuple):
14 | assert len(out_size) == 2
15 | assert isinstance(out_size[0], int)
16 | assert isinstance(out_size[1], int)
17 | out_h, out_w = out_size
18 | else:
19 | raise TypeError(
20 | '"out_size" must be an integer or tuple of integers')
21 | ctx.spatial_scale = spatial_scale
22 | ctx.sample_num = sample_num
23 | ctx.save_for_backward(rois)
24 | ctx.feature_size = features.size()
25 |
26 | batch_size, num_channels, data_height, data_width = features.size()
27 | num_rois = rois.size(0)
28 |
29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w)
30 | if features.is_cuda:
31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
32 | sample_num, output)
33 | else:
34 | raise NotImplementedError
35 |
36 | return output
37 |
38 | @staticmethod
39 | def backward(ctx, grad_output):
40 | feature_size = ctx.feature_size
41 | spatial_scale = ctx.spatial_scale
42 | sample_num = ctx.sample_num
43 | rois = ctx.saved_tensors[0]
44 | assert (feature_size is not None and grad_output.is_cuda)
45 |
46 | batch_size, num_channels, data_height, data_width = feature_size
47 | out_w = grad_output.size(3)
48 | out_h = grad_output.size(2)
49 |
50 | grad_input = grad_rois = None
51 | if ctx.needs_input_grad[0]:
52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height,
53 | data_width)
54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
55 | out_w, spatial_scale, sample_num,
56 | grad_input)
57 |
58 | return grad_input, grad_rois, None, None, None
59 |
60 |
61 | roi_align = RoIAlignFunction.apply
62 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch.autograd import gradcheck
4 |
5 | import os.path as osp
6 | import sys
7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
8 | from roi_align import RoIAlign # noqa: E402
9 |
10 | feat_size = 15
11 | spatial_scale = 1.0 / 8
12 | img_size = feat_size / spatial_scale
13 | num_imgs = 2
14 | num_rois = 20
15 |
16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
18 | rois[:, 2:] += img_size * 0.5
19 | rois = np.hstack((batch_ind, rois))
20 |
21 | feat = torch.randn(
22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
23 | rois = torch.from_numpy(rois).float().cuda()
24 | inputs = (feat, rois)
25 | print('Gradcheck for roi align...')
26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
27 | print(test)
28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
29 | print(test)
30 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/roi_align/modules/__init__.py
--------------------------------------------------------------------------------
/mmaction/ops/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_align import RoIAlignFunction
3 |
4 |
5 | class RoIAlign(Module):
6 |
7 | def __init__(self, out_size, spatial_scale, sample_num=0):
8 | super(RoIAlign, self).__init__()
9 |
10 | self.out_size = out_size
11 | self.spatial_scale = float(spatial_scale)
12 | self.sample_num = int(sample_num)
13 |
14 | def forward(self, features, rois):
15 | return RoIAlignFunction.apply(features, rois, self.out_size,
16 | self.spatial_scale, self.sample_num)
17 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_align/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3 |
4 | setup(
5 | name='roi_align_cuda',
6 | ext_modules=[
7 | CUDAExtension('roi_align_cuda', [
8 | 'src/roi_align_cuda.cpp',
9 | 'src/roi_align_kernel.cu',
10 | ]),
11 | ],
12 | cmdclass={'build_ext': BuildExtension})
13 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 | #include
5 |
6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
7 | const float spatial_scale, const int sample_num,
8 | const int channels, const int height,
9 | const int width, const int num_rois,
10 | const int pooled_height, const int pooled_width,
11 | at::Tensor output);
12 |
13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
14 | const float spatial_scale, const int sample_num,
15 | const int channels, const int height,
16 | const int width, const int num_rois,
17 | const int pooled_height, const int pooled_width,
18 | at::Tensor bottom_grad);
19 |
20 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 | CHECK_CUDA(x); \
25 | CHECK_CONTIGUOUS(x)
26 |
27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
28 | int pooled_height, int pooled_width,
29 | float spatial_scale, int sample_num,
30 | at::Tensor output) {
31 | CHECK_INPUT(features);
32 | CHECK_INPUT(rois);
33 | CHECK_INPUT(output);
34 |
35 | // Number of ROIs
36 | int num_rois = rois.size(0);
37 | int size_rois = rois.size(1);
38 |
39 | if (size_rois != 5) {
40 | printf("wrong roi size\n");
41 | return 0;
42 | }
43 |
44 | int num_channels = features.size(1);
45 | int data_height = features.size(2);
46 | int data_width = features.size(3);
47 |
48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
49 | num_channels, data_height, data_width, num_rois,
50 | pooled_height, pooled_width, output);
51 |
52 | return 1;
53 | }
54 |
55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
56 | int pooled_height, int pooled_width,
57 | float spatial_scale, int sample_num,
58 | at::Tensor bottom_grad) {
59 | CHECK_INPUT(top_grad);
60 | CHECK_INPUT(rois);
61 | CHECK_INPUT(bottom_grad);
62 |
63 | // Number of ROIs
64 | int num_rois = rois.size(0);
65 | int size_rois = rois.size(1);
66 | if (size_rois != 5) {
67 | printf("wrong roi size\n");
68 | return 0;
69 | }
70 |
71 | int num_channels = bottom_grad.size(1);
72 | int data_height = bottom_grad.size(2);
73 | int data_width = bottom_grad.size(3);
74 |
75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
76 | num_channels, data_height, data_width, num_rois,
77 | pooled_height, pooled_width, bottom_grad);
78 |
79 | return 1;
80 | }
81 |
82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
85 | }
86 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions.roi_pool import roi_pool
2 | from .modules.roi_pool import RoIPool
3 |
4 | __all__ = ['roi_pool', 'RoIPool']
5 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_pool/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/roi_pool/functions/__init__.py
--------------------------------------------------------------------------------
/mmaction/ops/roi_pool/functions/roi_pool.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 |
4 | from .. import roi_pool_cuda
5 |
6 |
7 | class RoIPoolFunction(Function):
8 |
9 | @staticmethod
10 | def forward(ctx, features, rois, out_size, spatial_scale):
11 | if isinstance(out_size, int):
12 | out_h = out_size
13 | out_w = out_size
14 | elif isinstance(out_size, tuple):
15 | assert len(out_size) == 2
16 | assert isinstance(out_size[0], int)
17 | assert isinstance(out_size[1], int)
18 | out_h, out_w = out_size
19 | else:
20 | raise TypeError(
21 | '"out_size" must be an integer or tuple of integers')
22 | assert features.is_cuda
23 | ctx.save_for_backward(rois)
24 | num_channels = features.size(1)
25 | num_rois = rois.size(0)
26 | out_size = (num_rois, num_channels, out_h, out_w)
27 | output = features.new_zeros(out_size)
28 | argmax = features.new_zeros(out_size, dtype=torch.int)
29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
30 | output, argmax)
31 | ctx.spatial_scale = spatial_scale
32 | ctx.feature_size = features.size()
33 | ctx.argmax = argmax
34 |
35 | return output
36 |
37 | @staticmethod
38 | def backward(ctx, grad_output):
39 | assert grad_output.is_cuda
40 | spatial_scale = ctx.spatial_scale
41 | feature_size = ctx.feature_size
42 | argmax = ctx.argmax
43 | rois = ctx.saved_tensors[0]
44 | assert feature_size is not None
45 |
46 | grad_input = grad_rois = None
47 | if ctx.needs_input_grad[0]:
48 | grad_input = grad_output.new_zeros(feature_size)
49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
50 | spatial_scale, grad_input)
51 |
52 | return grad_input, grad_rois, None, None
53 |
54 |
55 | roi_pool = RoIPoolFunction.apply
56 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import gradcheck
3 |
4 | import os.path as osp
5 | import sys
6 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
7 | from roi_pool import RoIPool # noqa: E402
8 |
9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
11 | [1, 67, 40, 110, 120]]).cuda()
12 | inputs = (feat, rois)
13 | print('Gradcheck for roi pooling...')
14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
15 | print(test)
16 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_pool/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/roi_pool/modules/__init__.py
--------------------------------------------------------------------------------
/mmaction/ops/roi_pool/modules/roi_pool.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_pool import roi_pool
3 |
4 |
5 | class RoIPool(Module):
6 |
7 | def __init__(self, out_size, spatial_scale):
8 | super(RoIPool, self).__init__()
9 |
10 | self.out_size = out_size
11 | self.spatial_scale = float(spatial_scale)
12 |
13 | def forward(self, features, rois):
14 | return roi_pool(features, rois, self.out_size, self.spatial_scale)
15 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_pool/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3 |
4 | setup(
5 | name='roi_pool',
6 | ext_modules=[
7 | CUDAExtension('roi_pool_cuda', [
8 | 'src/roi_pool_cuda.cpp',
9 | 'src/roi_pool_kernel.cu',
10 | ])
11 | ],
12 | cmdclass={'build_ext': BuildExtension})
13 |
--------------------------------------------------------------------------------
/mmaction/ops/roi_pool/src/roi_pool_cuda.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 | #include
5 |
6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
7 | const float spatial_scale, const int channels,
8 | const int height, const int width, const int num_rois,
9 | const int pooled_h, const int pooled_w,
10 | at::Tensor output, at::Tensor argmax);
11 |
12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
13 | const at::Tensor argmax, const float spatial_scale,
14 | const int batch_size, const int channels,
15 | const int height, const int width,
16 | const int num_rois, const int pooled_h,
17 | const int pooled_w, at::Tensor bottom_grad);
18 |
19 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
20 | #define CHECK_CONTIGUOUS(x) \
21 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
22 | #define CHECK_INPUT(x) \
23 | CHECK_CUDA(x); \
24 | CHECK_CONTIGUOUS(x)
25 |
26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
27 | int pooled_height, int pooled_width,
28 | float spatial_scale, at::Tensor output,
29 | at::Tensor argmax) {
30 | CHECK_INPUT(features);
31 | CHECK_INPUT(rois);
32 | CHECK_INPUT(output);
33 | CHECK_INPUT(argmax);
34 |
35 | // Number of ROIs
36 | int num_rois = rois.size(0);
37 | int size_rois = rois.size(1);
38 |
39 | if (size_rois != 5) {
40 | printf("wrong roi size\n");
41 | return 0;
42 | }
43 |
44 | int channels = features.size(1);
45 | int height = features.size(2);
46 | int width = features.size(3);
47 |
48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
49 | num_rois, pooled_height, pooled_width, output, argmax);
50 |
51 | return 1;
52 | }
53 |
54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
55 | at::Tensor argmax, float spatial_scale,
56 | at::Tensor bottom_grad) {
57 | CHECK_INPUT(top_grad);
58 | CHECK_INPUT(rois);
59 | CHECK_INPUT(argmax);
60 | CHECK_INPUT(bottom_grad);
61 |
62 | int pooled_height = top_grad.size(2);
63 | int pooled_width = top_grad.size(3);
64 | int num_rois = rois.size(0);
65 | int size_rois = rois.size(1);
66 |
67 | if (size_rois != 5) {
68 | printf("wrong roi size\n");
69 | return 0;
70 | }
71 | int batch_size = bottom_grad.size(0);
72 | int channels = bottom_grad.size(1);
73 | int height = bottom_grad.size(2);
74 | int width = bottom_grad.size(3);
75 |
76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
77 | channels, height, width, num_rois, pooled_height,
78 | pooled_width, bottom_grad);
79 |
80 | return 1;
81 | }
82 |
83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
86 | }
87 |
--------------------------------------------------------------------------------
/mmaction/ops/trajectory_conv_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/open-mmlab/mmaction/c7e3b7c11fb94131be9b48a8e3d510589addc3ce/mmaction/ops/trajectory_conv_package/__init__.py
--------------------------------------------------------------------------------
/mmaction/ops/trajectory_conv_package/deform_3d_conv_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | void deformable_im2col(const at::Tensor data_im,
2 | const at::Tensor data_offset, const int channels,
3 | const int time, const int height, const int width,
4 | const int ksize_t, const int ksize_h, const int ksize_w,
5 | const int pad_t, const int pad_h, const int pad_w,
6 | const int stride_t, const int stride_h, const int stride_w,
7 | const int dilation_t, const int dilation_h, const int dilation_w,
8 | const int parallel_imgs,
9 | const int deformable_groups,
10 | at::Tensor data_col);
11 |
12 | void deformable_col2im(const at::Tensor data_col,
13 | const at::Tensor data_offset, const int channels,
14 | const int time, const int height, const int width,
15 | const int ksize_t, const int ksize_h, const int ksize_w,
16 | const int pad_t, const int pad_h, const int pad_w,
17 | const int stride_t, const int stride_h, const int stride_w,
18 | const int dilation_t, const int dilation_h, const int dilation_w,
19 | const int parallel_imgs,
20 | const int deformable_groups,
21 | at::Tensor grad_im);
22 |
23 | void deformable_col2im_coord(const at::Tensor data_col,
24 | const at::Tensor data_im, const at::Tensor data_offset, const int channels,
25 | const int time, const int height, const int width,
26 | const int ksize_t, const int ksize_h, const int ksize_w,
27 | const int pad_t, const int pad_h, const int pad_w,
28 | const int stride_t, const int stride_h, const int stride_w,
29 | const int dilation_t, const int dilation_h, const int dilation_w,
30 | const int parallel_imgs,
31 | const int deformable_groups,
32 | at::Tensor grad_offset);
33 |
34 |
--------------------------------------------------------------------------------
/mmaction/ops/trajectory_conv_package/gradcheck.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import gradcheck
5 |
6 | from traj_conv import TrajConv
7 |
8 | num_deformable_groups = 2
9 |
10 | N, inC, inT, inH, inW = 2, 8, 8, 4, 4
11 | outC, outT, outH, outW = 4, 8, 4, 4
12 | kT, kH, kW = 3, 3, 3
13 |
14 | conv = nn.Conv3d(inC, num_deformable_groups * 3 * kT * kH * kW,
15 | kernel_size=(kT, kH, kW),
16 | stride=(1,1,1),
17 | padding=(1,1,1),
18 | bias=False)
19 |
20 | conv_offset3d = TrajConv(inC, outC, (kT, kH, kW),
21 | stride=(1,1,1), padding=(1,1,1),
22 | num_deformable_groups=num_deformable_groups).double().cuda()
23 |
24 | input = torch.randn(N, inC, inT, inH, inW, requires_grad=True).double().cuda()
25 | offset = torch.rand(N, num_deformable_groups * 2 * kT * kH * kW, inT, inH, inW, requires_grad=True) * 1 - 0.5
26 | offset = offset.double().cuda()
27 | test = gradcheck(conv_offset3d, (input, offset), eps=1e-5, atol=1e-1, rtol=1e-5)
28 | print(test)
29 |
--------------------------------------------------------------------------------
/mmaction/ops/trajectory_conv_package/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import torch
4 |
5 | from setuptools import setup, find_packages
6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
7 |
8 | cxx_args = ['-std=c++14']
9 |
10 | nvcc_args = [
11 | '-gencode', 'arch=compute_50,code=sm_50',
12 | '-gencode', 'arch=compute_52,code=sm_52',
13 | '-gencode', 'arch=compute_60,code=sm_60',
14 | '-gencode', 'arch=compute_61,code=sm_61',
15 | '-gencode', 'arch=compute_70,code=sm_70',
16 | '-gencode', 'arch=compute_70,code=compute_70',
17 | ]
18 |
19 | setup(
20 | name='traj_conv_cuda',
21 | ext_modules=[
22 | CUDAExtension('traj_conv_cuda', [
23 | 'traj_conv_cuda.cpp',
24 | 'deform_3d_conv_cuda_kernel.cu',
25 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args})
26 | ],
27 | cmdclass={
28 | 'build_ext': BuildExtension
29 | })
30 |
--------------------------------------------------------------------------------
/mmaction/utils/misc.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import numpy as np
3 | import mmcv
4 |
5 |
6 | def rsetattr(obj, attr, val):
7 | '''
8 | See:
9 | https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects
10 | '''
11 | pre, _, post = attr.rpartition('.')
12 | return setattr(rgetattr(obj, pre) if pre else obj, post, val)
13 |
14 |
15 | def rgetattr(obj, attr, *args):
16 | def _getattr(obj, attr):
17 | return getattr(obj, attr, *args)
18 | return functools.reduce(_getattr, [obj] + attr.split('.'))
19 |
20 |
21 | def rhasattr(obj, attr, *args):
22 | def _hasattr(obj, attr):
23 | if hasattr(obj, attr):
24 | return getattr(obj, attr)
25 | else:
26 | return None
27 | return functools.reduce(_hasattr, [obj] + attr.split('.')) is not None
28 |
29 |
30 | def tensor2video_snaps(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
31 | num_videos = tensor.size(0)
32 | num_frames = tensor.size(2)
33 | mean = np.array(mean, dtype=np.float32)
34 | std = np.array(std, dtype=np.float32)
35 | video_snaps = []
36 | for vid_id in range(num_videos):
37 | img = tensor[vid_id, :, num_frames //
38 | 2, ...].cpu().numpy().transpose(1, 2, 0)
39 | img = mmcv.imdenormalize(
40 | img, mean, std, to_bgr=to_rgb).astype(np.uint8)
41 | video_snaps.append(np.ascontiguousarray(img))
42 | return video_snaps
43 |
44 |
45 | def multi_apply(func, *args, **kwargs):
46 | pfunc = functools.partial(func, **kwargs) if kwargs else func
47 | map_results = map(pfunc, *args)
48 | return tuple(map(list, zip(*map_results)))
49 |
--------------------------------------------------------------------------------
/modelzoo/.placeholder:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import time
4 | from setuptools import find_packages, setup
5 |
6 |
7 | def readme():
8 | with open('README.md', encoding='utf-8') as f:
9 | content = f.read()
10 | return content
11 |
12 |
13 | MAJOR = 0
14 | MINOR = 1
15 | PATCH = 'rc0'
16 | SUFFIX = ''
17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX)
18 |
19 | version_file = 'mmaction/version.py'
20 |
21 |
22 | def get_git_hash():
23 |
24 | def _minimal_ext_cmd(cmd):
25 | # construct minimal environment
26 | env = {}
27 | for k in ['SYSTEMROOT', 'PATH', 'HOME']:
28 | v = os.environ.get(k)
29 | if v is not None:
30 | env[k] = v
31 | # LANGUAGE is used on win32
32 | env['LANGUAGE'] = 'C'
33 | env['LANG'] = 'C'
34 | env['LC_ALL'] = 'C'
35 | out = subprocess.Popen(
36 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
37 | return out
38 |
39 | try:
40 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
41 | sha = out.strip().decode('ascii')
42 | except OSError:
43 | sha = 'unknown'
44 |
45 | return sha
46 |
47 |
48 | def get_hash():
49 | if os.path.exists('.git'):
50 | sha = get_git_hash()[:7]
51 | elif os.path.exists(version_file):
52 | try:
53 | from mmaction.version import __version__
54 | sha = __version__.split('+')[-1]
55 | except ImportError:
56 | raise ImportError('Unable to get git version')
57 | else:
58 | sha = 'unknown'
59 |
60 | return sha
61 |
62 |
63 | def write_version_py():
64 | content = """# GENERATED VERSION FILE
65 | # TIME: {}
66 |
67 | __version__ = '{}'
68 | short_version = '{}'
69 | """
70 | sha = get_hash()
71 | VERSION = SHORT_VERSION + '+' + sha
72 |
73 | with open(version_file, 'w') as f:
74 | f.write(content.format(time.asctime(), VERSION, SHORT_VERSION))
75 |
76 |
77 | def get_version():
78 | with open(version_file, 'r') as f:
79 | exec(compile(f.read(), version_file, 'exec'))
80 | return locals()['__version__']
81 |
82 |
83 | if __name__ == '__main__':
84 | write_version_py()
85 | setup(
86 | name='mmaction',
87 | version=get_version(),
88 | description='Open MMLab Action Toolbox',
89 | long_description=readme(),
90 | keywords='computer vision, action recognition',
91 | url='https://github.com/open-mmlab/mmaction',
92 | packages=find_packages(exclude=('configs', 'tools', 'demo')),
93 | package_data={'mmaction.ops': ['*/*.so']},
94 | classifiers=[
95 | 'Development Status :: 4 - Beta',
96 | 'License :: OSI Approved :: Apache Software License',
97 | 'Operating System :: OS Independent',
98 | 'Programming Language :: Python :: 2',
99 | 'Programming Language :: Python :: 2.7',
100 | 'Programming Language :: Python :: 3',
101 | 'Programming Language :: Python :: 3.4',
102 | 'Programming Language :: Python :: 3.5',
103 | 'Programming Language :: Python :: 3.6',
104 | ],
105 | license='Apache License 2.0',
106 | setup_requires=['pytest-runner'],
107 | tests_require=['pytest'],
108 | install_requires=[
109 | 'mmcv', 'numpy', 'scipy', 'scikit-learn', 'terminaltables', 'lmdb', 'joblib'
110 | ],
111 | zip_safe=False)
112 |
--------------------------------------------------------------------------------
/test_configs/CSN/ipcsn_kinetics400_se_rgb_r152_seg1_32x2.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='ResNet_3D',
6 | pretrained=None,
7 | depth=152,
8 | use_pool1=True,
9 | block_type='0.3d'),
10 | spatial_temporal_module=dict(
11 | type='SimpleSpatialTemporalModule',
12 | spatial_type='avg',
13 | temporal_size=-1,
14 | spatial_size=-1),
15 | segmental_consensus=dict(
16 | type='SimpleConsensus',
17 | consensus_type='avg'),
18 | cls_head=dict(
19 | type='ClsHead',
20 | with_avg_pool=False,
21 | temporal_feature_size=1,
22 | spatial_feature_size=1,
23 | dropout_ratio=0.5,
24 | in_channels=2048,
25 | num_classes=400))
26 |
27 | train_cfg = None
28 | test_cfg = None
29 | # dataset settings
30 | dataset_type = 'RawFramesDataset'
31 | data_root_val = 'data/kinetics400/rawframes_val/'
32 | img_norm_cfg = dict(
33 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
34 |
35 | data = dict(
36 | test=dict(
37 | type=dataset_type,
38 | ann_file='data/kinetics400/kinetics400_train_list_rawframes.txt',
39 | img_prefix=data_root_val,
40 | img_norm_cfg=img_norm_cfg,
41 | input_format="NCTHW",
42 | num_segments=10,
43 | new_length=32,
44 | new_step=2,
45 | random_shift=True,
46 | modality='RGB',
47 | image_tmpl='img_{:05d}.jpg',
48 | img_scale=256,
49 | input_size=256,
50 | div_255=False,
51 | flip_ratio=0,
52 | resize_keep_ratio=True,
53 | oversample='three_crop',
54 | random_crop=False,
55 | more_fix_crop=False,
56 | multiscale_crop=False,
57 | test_mode=True))
58 |
59 | dist_params = dict(backend='nccl', port=16187)
60 |
--------------------------------------------------------------------------------
/test_configs/CSN/ircsn_kinetics400_se_rgb_r152_seg1_32x2.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='ResNet_3D',
6 | pretrained=None,
7 | depth=152,
8 | use_pool1=True,
9 | block_type='3d-sep'),
10 | spatial_temporal_module=dict(
11 | type='SimpleSpatialTemporalModule',
12 | spatial_type='avg',
13 | temporal_size=-1,
14 | spatial_size=-1),
15 | segmental_consensus=dict(
16 | type='SimpleConsensus',
17 | consensus_type='avg'),
18 | cls_head=dict(
19 | type='ClsHead',
20 | with_avg_pool=False,
21 | temporal_feature_size=1,
22 | spatial_feature_size=1,
23 | dropout_ratio=0.5,
24 | in_channels=2048,
25 | num_classes=400))
26 |
27 | train_cfg = None
28 | test_cfg = None
29 | # dataset settings
30 | dataset_type = 'RawFramesDataset'
31 | data_root_val = 'data/kinetics400/rawframes_val/'
32 | img_norm_cfg = dict(
33 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
34 |
35 | data = dict(
36 | test=dict(
37 | type=dataset_type,
38 | ann_file='data/kinetics400/kinetics400_train_list_rawframes.txt',
39 | img_prefix=data_root_val,
40 | img_norm_cfg=img_norm_cfg,
41 | input_format="NCTHW",
42 | num_segments=10,
43 | new_length=32,
44 | new_step=2,
45 | random_shift=True,
46 | modality='RGB',
47 | image_tmpl='img_{:05d}.jpg',
48 | img_scale=256,
49 | input_size=256,
50 | div_255=False,
51 | flip_ratio=0,
52 | resize_keep_ratio=True,
53 | oversample='three_crop',
54 | random_crop=False,
55 | more_fix_crop=False,
56 | multiscale_crop=False,
57 | test_mode=True))
58 |
59 | dist_params = dict(backend='nccl', port=16187)
60 |
--------------------------------------------------------------------------------
/test_configs/I3D_Flow/i3d_hmdb51_3d_tvl1_inception_v1_seg1_f64s1.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='InceptionV1_I3D',
6 | pretrained=None,
7 | modality='Flow'),
8 | spatial_temporal_module=dict(
9 | type='SimpleSpatialTemporalModule',
10 | spatial_type='avg',
11 | temporal_size=-1,
12 | spatial_size=-1),
13 | segmental_consensus=dict(
14 | type='SimpleConsensus',
15 | consensus_type='avg'),
16 | cls_head=dict(
17 | type='ClsHead',
18 | with_avg_pool=False,
19 | temporal_feature_size=1,
20 | spatial_feature_size=1,
21 | dropout_ratio=0.5,
22 | in_channels=2048,
23 | num_classes=51))
24 | train_cfg = None
25 | test_cfg = None
26 | # dataset settings
27 | dataset_type = 'RawFramesDataset'
28 | data_root_val = 'data/hmdb51/rawframes/'
29 | img_norm_cfg = dict(
30 | mean=[128, 128], std=[128, 128])
31 | data = dict(
32 | test=dict(
33 | type=dataset_type,
34 | ann_file='data/hmdb51/hmdb51_train_split_1_rawframes.txt',
35 | img_prefix=data_root_val,
36 | img_norm_cfg=img_norm_cfg,
37 | input_format="NCTHW",
38 | num_segments=10,
39 | new_length=64,
40 | new_step=1,
41 | random_shift=True,
42 | modality='Flow',
43 | image_tmpl='{}_{:05d}.jpg',
44 | img_scale=256,
45 | input_size=256,
46 | div_255=False,
47 | flip_ratio=0,
48 | resize_keep_ratio=True,
49 | oversample='three_crop',
50 | random_crop=False,
51 | more_fix_crop=False,
52 | multiscale_crop=False,
53 | test_mode=True))
54 |
55 | dist_params = dict(backend='nccl')
56 |
--------------------------------------------------------------------------------
/test_configs/I3D_Flow/i3d_kinetics400_3d_tvl1_inception_v1_seg1_f64s1.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='InceptionV1_I3D',
6 | pretrained=None,
7 | modality='Flow'),
8 | spatial_temporal_module=dict(
9 | type='SimpleSpatialTemporalModule',
10 | spatial_type='avg',
11 | temporal_size=-1,
12 | spatial_size=-1),
13 | segmental_consensus=dict(
14 | type='SimpleConsensus',
15 | consensus_type='avg'),
16 | cls_head=dict(
17 | type='ClsHead',
18 | with_avg_pool=False,
19 | temporal_feature_size=1,
20 | spatial_feature_size=1,
21 | dropout_ratio=0.5,
22 | in_channels=2048,
23 | num_classes=400))
24 | train_cfg = None
25 | test_cfg = None
26 | # dataset settings
27 | dataset_type = 'RawFramesDataset'
28 | data_root_val = 'data/kinetics400/rawframes_val/'
29 | img_norm_cfg = dict(
30 | mean=[128, 128], std=[128, 128])
31 | data = dict(
32 | test=dict(
33 | type=dataset_type,
34 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt',
35 | img_prefix=data_root_val,
36 | img_norm_cfg=img_norm_cfg,
37 | input_format="NCTHW",
38 | num_segments=10,
39 | new_length=64,
40 | new_step=1,
41 | random_shift=True,
42 | modality='Flow',
43 | image_tmpl='{}_{:05d}.jpg',
44 | img_scale=256,
45 | input_size=256,
46 | div_255=False,
47 | flip_ratio=0,
48 | resize_keep_ratio=True,
49 | oversample='three_crop',
50 | random_crop=False,
51 | more_fix_crop=False,
52 | multiscale_crop=False,
53 | test_mode=True))
54 |
55 | dist_params = dict(backend='nccl')
56 |
--------------------------------------------------------------------------------
/test_configs/I3D_Flow/i3d_ucf101_3d_tvl1_inception_v1_seg1_f64s1.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='InceptionV1_I3D',
6 | pretrained=None,
7 | modality='Flow'),
8 | spatial_temporal_module=dict(
9 | type='SimpleSpatialTemporalModule',
10 | spatial_type='avg',
11 | temporal_size=-1,
12 | spatial_size=-1),
13 | segmental_consensus=dict(
14 | type='SimpleConsensus',
15 | consensus_type='avg'),
16 | cls_head=dict(
17 | type='ClsHead',
18 | with_avg_pool=False,
19 | temporal_feature_size=1,
20 | spatial_feature_size=1,
21 | dropout_ratio=0.5,
22 | in_channels=2048,
23 | num_classes=101))
24 | train_cfg = None
25 | test_cfg = None
26 | # dataset settings
27 | dataset_type = 'RawFramesDataset'
28 | data_root_val = 'data/ucf101/rawframes/'
29 | img_norm_cfg = dict(
30 | mean=[128, 128], std=[128, 128])
31 | data = dict(
32 | test=dict(
33 | type=dataset_type,
34 | ann_file='data/ucf101/ucf101_train_split_1_rawframes.txt',
35 | img_prefix=data_root_val,
36 | img_norm_cfg=img_norm_cfg,
37 | input_format="NCTHW",
38 | num_segments=10,
39 | new_length=64,
40 | new_step=1,
41 | random_shift=True,
42 | modality='Flow',
43 | image_tmpl='{}_{:05d}.jpg',
44 | img_scale=256,
45 | input_size=256,
46 | div_255=False,
47 | flip_ratio=0,
48 | resize_keep_ratio=True,
49 | oversample='three_crop',
50 | random_crop=False,
51 | more_fix_crop=False,
52 | multiscale_crop=False,
53 | test_mode=True))
54 |
55 | dist_params = dict(backend='nccl')
56 |
--------------------------------------------------------------------------------
/test_configs/I3D_RGB/i3d_hmdb51_3d_rgb_inception_v1_seg1_f64s1.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='InceptionV1_I3D',
6 | pretrained=None,
7 | modality='RGB'),
8 | spatial_temporal_module=dict(
9 | type='SimpleSpatialTemporalModule',
10 | spatial_type='avg',
11 | temporal_size=-1,
12 | spatial_size=-1),
13 | segmental_consensus=dict(
14 | type='SimpleConsensus',
15 | consensus_type='avg'),
16 | cls_head=dict(
17 | type='ClsHead',
18 | with_avg_pool=False,
19 | temporal_feature_size=1,
20 | spatial_feature_size=1,
21 | dropout_ratio=0.5,
22 | in_channels=2048,
23 | num_classes=51))
24 | train_cfg = None
25 | test_cfg = None
26 | # dataset settings
27 | dataset_type = 'RawFramesDataset'
28 | data_root_val = 'data/hmdb51/rawframes/'
29 | img_norm_cfg = dict(
30 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
31 | data = dict(
32 | test=dict(
33 | type=dataset_type,
34 | ann_file='data/hmdb51/hmdb51_train_split_1_rawframes.txt',
35 | img_prefix=data_root_val,
36 | img_norm_cfg=img_norm_cfg,
37 | input_format="NCTHW",
38 | num_segments=10,
39 | new_length=64,
40 | new_step=1,
41 | random_shift=True,
42 | modality='RGB',
43 | image_tmpl='img_{:05d}.jpg',
44 | img_scale=256,
45 | input_size=256,
46 | div_255=False,
47 | flip_ratio=0,
48 | resize_keep_ratio=True,
49 | oversample='three_crop',
50 | random_crop=False,
51 | more_fix_crop=False,
52 | multiscale_crop=False,
53 | test_mode=True))
54 |
55 | dist_params = dict(backend='nccl')
56 |
--------------------------------------------------------------------------------
/test_configs/I3D_RGB/i3d_kinetics400_3d_rgb_inception_v1_seg1_f64s1.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='InceptionV1_I3D',
6 | pretrained=None,
7 | modality='RGB'),
8 | spatial_temporal_module=dict(
9 | type='SimpleSpatialTemporalModule',
10 | spatial_type='avg',
11 | temporal_size=-1,
12 | spatial_size=-1),
13 | segmental_consensus=dict(
14 | type='SimpleConsensus',
15 | consensus_type='avg'),
16 | cls_head=dict(
17 | type='ClsHead',
18 | with_avg_pool=False,
19 | temporal_feature_size=1,
20 | spatial_feature_size=1,
21 | dropout_ratio=0.5,
22 | in_channels=2048,
23 | num_classes=400))
24 | train_cfg = None
25 | test_cfg = None
26 | # dataset settings
27 | dataset_type = 'RawFramesDataset'
28 | data_root_val = 'data/kinetics400/rawframes_val/'
29 | img_norm_cfg = dict(
30 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
31 | data = dict(
32 | test=dict(
33 | type=dataset_type,
34 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt',
35 | img_prefix=data_root_val,
36 | img_norm_cfg=img_norm_cfg,
37 | input_format="NCTHW",
38 | num_segments=10,
39 | new_length=64,
40 | new_step=1,
41 | random_shift=True,
42 | modality='RGB',
43 | image_tmpl='img_{:05d}.jpg',
44 | img_scale=256,
45 | input_size=256,
46 | div_255=False,
47 | flip_ratio=0,
48 | resize_keep_ratio=True,
49 | oversample='three_crop',
50 | random_crop=False,
51 | more_fix_crop=False,
52 | multiscale_crop=False,
53 | test_mode=True))
54 |
55 | dist_params = dict(backend='nccl')
56 |
--------------------------------------------------------------------------------
/test_configs/I3D_RGB/i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='ResNet_I3D',
6 | pretrained=None,
7 | depth=50,
8 | num_stages=4,
9 | out_indices=[3],
10 | frozen_stages=-1,
11 | inflate_freq=((1,1,1), (1,0,1,0), (1,0,1,0,1,0), (0,1,0)),
12 | inflate_style='3x1x1',
13 | conv1_kernel_t=5,
14 | conv1_stride_t=2,
15 | pool1_kernel_t=1,
16 | pool1_stride_t=2,
17 | bn_eval=False,
18 | partial_bn=False,
19 | style='pytorch'),
20 | spatial_temporal_module=dict(
21 | type='SimpleSpatialTemporalModule',
22 | spatial_type='avg',
23 | temporal_size=4,
24 | spatial_size=7),
25 | segmental_consensus=dict(
26 | type='SimpleConsensus',
27 | consensus_type='avg'),
28 | cls_head=dict(
29 | type='ClsHead',
30 | with_avg_pool=False,
31 | temporal_feature_size=1,
32 | spatial_feature_size=1,
33 | dropout_ratio=0.5,
34 | in_channels=2048,
35 | num_classes=400))
36 | train_cfg = None
37 | test_cfg = None
38 | # dataset settings
39 | dataset_type = 'RawFramesDataset'
40 | data_root_val = 'data/kinetics400/rawframes_val/'
41 | img_norm_cfg = dict(
42 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
43 | data = dict(
44 | test=dict(
45 | type=dataset_type,
46 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt',
47 | img_prefix=data_root_val,
48 | img_norm_cfg=img_norm_cfg,
49 | input_format="NCTHW",
50 | num_segments=10,
51 | new_length=32,
52 | new_step=2,
53 | random_shift=True,
54 | modality='RGB',
55 | image_tmpl='img_{:05d}.jpg',
56 | img_scale=256,
57 | input_size=256,
58 | div_255=False,
59 | flip_ratio=0,
60 | resize_keep_ratio=True,
61 | oversample='three_crop',
62 | random_crop=False,
63 | more_fix_crop=False,
64 | multiscale_crop=False,
65 | test_mode=True))
66 |
67 | dist_params = dict(backend='nccl')
68 |
--------------------------------------------------------------------------------
/test_configs/I3D_RGB/i3d_ucf101_3d_rgb_inception_v1_seg1_f64s1.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='InceptionV1_I3D',
6 | pretrained=None,
7 | modality='RGB'),
8 | spatial_temporal_module=dict(
9 | type='SimpleSpatialTemporalModule',
10 | spatial_type='avg',
11 | temporal_size=-1,
12 | spatial_size=-1),
13 | segmental_consensus=dict(
14 | type='SimpleConsensus',
15 | consensus_type='avg'),
16 | cls_head=dict(
17 | type='ClsHead',
18 | with_avg_pool=False,
19 | temporal_feature_size=1,
20 | spatial_feature_size=1,
21 | dropout_ratio=0.5,
22 | in_channels=2048,
23 | num_classes=101))
24 | train_cfg = None
25 | test_cfg = None
26 | # dataset settings
27 | dataset_type = 'RawFramesDataset'
28 | data_root_val = 'data/ucf101/rawframes/'
29 | img_norm_cfg = dict(
30 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
31 | data = dict(
32 | test=dict(
33 | type=dataset_type,
34 | ann_file='data/ucf101/ucf101_train_split_1_rawframes.txt',
35 | img_prefix=data_root_val,
36 | img_norm_cfg=img_norm_cfg,
37 | input_format="NCTHW",
38 | num_segments=10,
39 | new_length=64,
40 | new_step=1,
41 | random_shift=True,
42 | modality='RGB',
43 | image_tmpl='img_{:05d}.jpg',
44 | img_scale=256,
45 | input_size=256,
46 | div_255=False,
47 | flip_ratio=0,
48 | resize_keep_ratio=True,
49 | oversample='three_crop',
50 | random_crop=False,
51 | more_fix_crop=False,
52 | multiscale_crop=False,
53 | test_mode=True))
54 |
55 | dist_params = dict(backend='nccl')
56 |
--------------------------------------------------------------------------------
/test_configs/R2plus1D/r2plus1d_kinetics400_se_rgb_r34_seg1_32x2.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='ResNet_3D',
6 | pretrained=None,
7 | depth=34,
8 | use_pool1=True,
9 | block_type='2.5d',
10 | use_syncbn=True),
11 | spatial_temporal_module=dict(
12 | type='SimpleSpatialTemporalModule',
13 | spatial_type='avg',
14 | temporal_size=-1,
15 | spatial_size=-1),
16 | segmental_consensus=dict(
17 | type='SimpleConsensus',
18 | consensus_type='avg'),
19 | cls_head=dict(
20 | type='ClsHead',
21 | with_avg_pool=False,
22 | temporal_feature_size=1,
23 | spatial_feature_size=1,
24 | dropout_ratio=0.5,
25 | in_channels=512,
26 | num_classes=400))
27 | train_cfg = None
28 | test_cfg = None
29 | # dataset settings
30 | dataset_type = 'RawFramesDataset'
31 | data_root_val = 'data/kinetics400/rawframes_val/'
32 | img_norm_cfg = dict(
33 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
34 | data = dict(
35 | test=dict(
36 | type=dataset_type,
37 | ann_file='data/kinetics400/kinetics400_train_list_rawframes.txt',
38 | img_prefix=data_root_val,
39 | img_norm_cfg=img_norm_cfg,
40 | input_format="NCTHW",
41 | num_segments=10,
42 | new_length=32,
43 | new_step=2,
44 | random_shift=True,
45 | modality='RGB',
46 | image_tmpl='img_{:05d}.jpg',
47 | img_scale=256,
48 | input_size=256,
49 | div_255=False,
50 | flip_ratio=0,
51 | resize_keep_ratio=True,
52 | oversample='three_crop',
53 | random_crop=False,
54 | more_fix_crop=False,
55 | multiscale_crop=False,
56 | test_mode=True))
57 |
58 | dist_params = dict(backend='nccl', port=16187)
59 |
--------------------------------------------------------------------------------
/test_configs/R2plus1D/r2plus1d_kinetics400_se_rgb_r34_seg1_8x8.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='ResNet_3D',
6 | pretrained=None,
7 | depth=34,
8 | use_pool1=True,
9 | block_type='2.5d',
10 | use_syncbn=True),
11 | spatial_temporal_module=dict(
12 | type='SimpleSpatialTemporalModule',
13 | spatial_type='avg',
14 | temporal_size=-1,
15 | spatial_size=-1),
16 | segmental_consensus=dict(
17 | type='SimpleConsensus',
18 | consensus_type='avg'),
19 | cls_head=dict(
20 | type='ClsHead',
21 | with_avg_pool=False,
22 | temporal_feature_size=1,
23 | spatial_feature_size=1,
24 | dropout_ratio=0.5,
25 | in_channels=512,
26 | num_classes=400))
27 | train_cfg = None
28 | test_cfg = None
29 | # dataset settings
30 | dataset_type = 'RawFramesDataset'
31 | data_root_val = 'data/kinetics400/rawframes_val/'
32 | img_norm_cfg = dict(
33 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
34 | data = dict(
35 | test=dict(
36 | type=dataset_type,
37 | ann_file='data/kinetics400/kinetics400_train_list_rawframes.txt',
38 | img_prefix=data_root_val,
39 | img_norm_cfg=img_norm_cfg,
40 | input_format="NCTHW",
41 | num_segments=10,
42 | new_length=8,
43 | new_step=8,
44 | random_shift=True,
45 | modality='RGB',
46 | image_tmpl='img_{:05d}.jpg',
47 | img_scale=256,
48 | input_size=256,
49 | div_255=False,
50 | flip_ratio=0,
51 | resize_keep_ratio=True,
52 | oversample='three_crop',
53 | random_crop=False,
54 | more_fix_crop=False,
55 | multiscale_crop=False,
56 | test_mode=True))
57 |
58 | dist_params = dict(backend='nccl', port=16187)
59 |
--------------------------------------------------------------------------------
/test_configs/SlowFast/slowfast_kinetics400_se_rgb_r50_seg1_4x16.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN3D',
4 | backbone=dict(
5 | type='ResNet_I3D_SlowFast',
6 | pretrained_slow=None,
7 | pretrained_fast=None,
8 | depth=50,
9 | alpha=8,
10 | beta_inv=8,
11 | num_stages=4,
12 | out_indices=[3],
13 | frozen_stages=-1,
14 | slow_inflate_freq=(0, 0, 1, 1),
15 | fast_inflate_freq=(1, 1, 1, 1),
16 | inflate_style='3x1x1',
17 | bn_eval=False,
18 | partial_bn=False,
19 | style='pytorch'),
20 | spatial_temporal_module=dict(
21 | type='SlowFastSpatialTemporalModule',
22 | adaptive_pool=True,
23 | spatial_type='avg',
24 | temporal_size=1,
25 | spatial_size=1),
26 | segmental_consensus=dict(
27 | type='SimpleConsensus',
28 | consensus_type='avg'),
29 | cls_head=dict(
30 | type='ClsHead',
31 | with_avg_pool=False,
32 | temporal_feature_size=1,
33 | spatial_feature_size=1,
34 | dropout_ratio=0.5,
35 | in_channels=2304, # 2048+256
36 | num_classes=400))
37 | train_cfg = None
38 | test_cfg = None
39 | # dataset settings
40 | dataset_type = 'RawFramesDataset'
41 | data_root_val = 'data/kinetics400/rawframes_val/'
42 | img_norm_cfg = dict(
43 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
44 |
45 | data = dict(
46 | test=dict(
47 | type=dataset_type,
48 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt',
49 | img_prefix=data_root_val,
50 | img_norm_cfg=img_norm_cfg,
51 | input_format="NCTHW",
52 | num_segments=10,
53 | new_length=32,
54 | new_step=2,
55 | modality='RGB',
56 | image_tmpl='img_{:05d}.jpg',
57 | img_scale=256,
58 | input_size=256,
59 | div_255=False,
60 | flip_ratio=0,
61 | resize_keep_ratio=True,
62 | oversample='three_crop',
63 | random_crop=False,
64 | more_fix_crop=False,
65 | multiscale_crop=False,
66 | test_mode=True)
67 | )
68 |
69 | dist_params = dict(backend='nccl', port=16187)
70 |
--------------------------------------------------------------------------------
/test_configs/SlowOnly/slowonly_kinetics400_se_rgb_r101_seg1_8x8.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='TSN3D',
3 | backbone=dict(
4 | type='ResNet_I3D',
5 | pretrained=None,
6 | depth=101,
7 | num_stages=4,
8 | out_indices=[3],
9 | frozen_stages=-1,
10 | inflate_freq=(0, 0, 1, 1),
11 | conv1_kernel_t=1,
12 | conv1_stride_t=1,
13 | pool1_kernel_t=1,
14 | pool1_stride_t=1,
15 | inflate_style='3x1x1',
16 | bn_eval=False,
17 | no_pool2=True,
18 | partial_bn=False,
19 | style='pytorch'),
20 | spatial_temporal_module=dict(
21 | type='SimpleSpatialTemporalModule',
22 | spatial_type='avg',
23 | temporal_size=-1,
24 | spatial_size=-1),
25 | segmental_consensus=dict(
26 | type='SimpleConsensus',
27 | consensus_type='avg'),
28 | cls_head=dict(
29 | type='ClsHead',
30 | with_avg_pool=False,
31 | temporal_feature_size=1,
32 | spatial_feature_size=1,
33 | dropout_ratio=0.5,
34 | in_channels=2048,
35 | num_classes=400))
36 | train_cfg = None
37 | test_cfg = None
38 | # dataset settings
39 | dataset_type = 'RawFramesDataset'
40 | data_root_val = 'data/kinetics400/rawframes_val/'
41 | img_norm_cfg = dict(
42 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
43 | data = dict(
44 | test=dict(
45 | type=dataset_type,
46 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt',
47 | img_prefix=data_root_val,
48 | img_norm_cfg=img_norm_cfg,
49 | input_format="NCTHW",
50 | num_segments=10,
51 | new_length=8,
52 | new_step=8,
53 | random_shift=False,
54 | modality='RGB',
55 | image_tmpl='img_{:05d}.jpg',
56 | img_scale=256,
57 | input_size=256,
58 | div_255=False,
59 | flip_ratio=0,
60 | resize_keep_ratio=True,
61 | oversample='three_crop',
62 | random_crop=False,
63 | more_fix_crop=False,
64 | multiscale_crop=False,
65 | test_mode=True))
66 |
67 | dist_params = dict(backend='nccl')
68 |
--------------------------------------------------------------------------------
/test_configs/SlowOnly/slowonly_kinetics400_se_rgb_r50_seg1_4x16.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='TSN3D',
3 | backbone=dict(
4 | type='ResNet_I3D',
5 | pretrained=None,
6 | depth=50,
7 | num_stages=4,
8 | out_indices=[3],
9 | frozen_stages=-1,
10 | inflate_freq=(0, 0, 1, 1),
11 | conv1_kernel_t=1,
12 | conv1_stride_t=1,
13 | pool1_kernel_t=1,
14 | pool1_stride_t=1,
15 | inflate_style='3x1x1',
16 | bn_eval=False,
17 | no_pool2=True,
18 | partial_bn=False,
19 | style='pytorch'),
20 | spatial_temporal_module=dict(
21 | type='SimpleSpatialTemporalModule',
22 | spatial_type='avg',
23 | temporal_size=-1,
24 | spatial_size=-1),
25 | segmental_consensus=dict(
26 | type='SimpleConsensus',
27 | consensus_type='avg'),
28 | cls_head=dict(
29 | type='ClsHead',
30 | with_avg_pool=False,
31 | temporal_feature_size=1,
32 | spatial_feature_size=1,
33 | dropout_ratio=0.5,
34 | in_channels=2048,
35 | num_classes=400))
36 | train_cfg = None
37 | test_cfg = None
38 | # dataset settings
39 | dataset_type = 'RawFramesDataset'
40 | data_root_val = 'data/kinetics400/rawframes_val/'
41 | img_norm_cfg = dict(
42 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
43 | data = dict(
44 | test=dict(
45 | type=dataset_type,
46 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt',
47 | img_prefix=data_root_val,
48 | img_norm_cfg=img_norm_cfg,
49 | input_format="NCTHW",
50 | num_segments=10,
51 | new_length=4,
52 | new_step=16,
53 | random_shift=False,
54 | modality='RGB',
55 | image_tmpl='img_{:05d}.jpg',
56 | img_scale=256,
57 | input_size=256,
58 | div_255=False,
59 | flip_ratio=0,
60 | resize_keep_ratio=True,
61 | oversample='three_crop',
62 | random_crop=False,
63 | more_fix_crop=False,
64 | multiscale_crop=False,
65 | test_mode=True))
66 |
67 | dist_params = dict(backend='nccl')
68 |
--------------------------------------------------------------------------------
/test_configs/SlowOnly/slowonly_kinetics400_se_rgb_r50_seg1_8x8.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='TSN3D',
3 | backbone=dict(
4 | type='ResNet_I3D',
5 | pretrained=None,
6 | depth=50,
7 | num_stages=4,
8 | out_indices=[3],
9 | frozen_stages=-1,
10 | inflate_freq=(0, 0, 1, 1),
11 | conv1_kernel_t=1,
12 | conv1_stride_t=1,
13 | pool1_kernel_t=1,
14 | pool1_stride_t=1,
15 | inflate_style='3x1x1',
16 | bn_eval=False,
17 | no_pool2=True,
18 | partial_bn=False,
19 | style='pytorch'),
20 | spatial_temporal_module=dict(
21 | type='SimpleSpatialTemporalModule',
22 | spatial_type='avg',
23 | temporal_size=-1,
24 | spatial_size=-1),
25 | segmental_consensus=dict(
26 | type='SimpleConsensus',
27 | consensus_type='avg'),
28 | cls_head=dict(
29 | type='ClsHead',
30 | with_avg_pool=False,
31 | temporal_feature_size=1,
32 | spatial_feature_size=1,
33 | dropout_ratio=0.5,
34 | in_channels=2048,
35 | num_classes=400))
36 | train_cfg = None
37 | test_cfg = None
38 | # dataset settings
39 | dataset_type = 'RawFramesDataset'
40 | data_root_val = 'data/kinetics400/rawframes_val/'
41 | img_norm_cfg = dict(
42 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
43 | data = dict(
44 | test=dict(
45 | type=dataset_type,
46 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt',
47 | img_prefix=data_root_val,
48 | img_norm_cfg=img_norm_cfg,
49 | input_format="NCTHW",
50 | num_segments=10,
51 | new_length=8,
52 | new_step=8,
53 | random_shift=False,
54 | modality='RGB',
55 | image_tmpl='img_{:05d}.jpg',
56 | img_scale=256,
57 | input_size=256,
58 | div_255=False,
59 | flip_ratio=0,
60 | resize_keep_ratio=True,
61 | oversample='three_crop',
62 | random_crop=False,
63 | more_fix_crop=False,
64 | multiscale_crop=False,
65 | test_mode=True))
66 |
67 | dist_params = dict(backend='nccl')
68 |
--------------------------------------------------------------------------------
/test_configs/TSN/tsn_kinetics400_2d_rgb_r50_seg3_f1s1.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='TSN2D',
3 | backbone=dict(
4 | type='ResNet',
5 | pretrained=None,
6 | depth=50,
7 | out_indices=(3,),
8 | bn_eval=False,
9 | partial_bn=False),
10 | spatial_temporal_module=dict(
11 | type='SimpleSpatialModule',
12 | spatial_type='avg',
13 | spatial_size=7),
14 | segmental_consensus=dict(
15 | type='SimpleConsensus',
16 | consensus_type='avg'),
17 | cls_head=dict(
18 | type='ClsHead',
19 | with_avg_pool=False,
20 | temporal_feature_size=1,
21 | spatial_feature_size=1,
22 | dropout_ratio=0.4,
23 | in_channels=2048,
24 | num_classes=400))
25 | train_cfg = None
26 | test_cfg = None
27 | # dataset settings
28 | dataset_type = 'RawFramesDataset'
29 | data_root_val = 'data/kinetics400/rawframes_val'
30 | img_norm_cfg = dict(
31 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
32 |
33 | data = dict(
34 | test=dict(
35 | type=dataset_type,
36 | ann_file='data/kinetics400/kinetics400_val_list_rawframes.txt',
37 | img_prefix=data_root_val,
38 | img_norm_cfg=img_norm_cfg,
39 | num_segments=25,
40 | new_length=1,
41 | new_step=1,
42 | random_shift=False,
43 | modality='RGB',
44 | image_tmpl='img_{:05d}.jpg',
45 | img_scale=256,
46 | input_size=224,
47 | div_255=False,
48 | flip_ratio=0,
49 | resize_keep_ratio=True,
50 | oversample="ten_crop",
51 | random_crop=False,
52 | more_fix_crop=False,
53 | multiscale_crop=False,
54 | test_mode=True))
55 |
56 | dist_params = dict(backend='nccl')
57 |
--------------------------------------------------------------------------------
/test_configs/TSN/ucf101/tsn_flow_bninception.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN2D',
4 | modality='Flow',
5 | in_channels=10,
6 | backbone=dict(
7 | type='BNInception',
8 | pretrained=None,
9 | bn_eval=False,
10 | partial_bn=True),
11 | spatial_temporal_module=dict(
12 | type='SimpleSpatialModule',
13 | spatial_type='avg',
14 | spatial_size=7),
15 | segmental_consensus=dict(
16 | type='SimpleConsensus',
17 | consensus_type='avg'),
18 | cls_head=dict(
19 | type='ClsHead',
20 | with_avg_pool=False,
21 | temporal_feature_size=1,
22 | spatial_feature_size=1,
23 | dropout_ratio=0.7,
24 | in_channels=1024,
25 | num_classes=101))
26 | train_cfg = None
27 | test_cfg = None
28 | # dataset settings
29 | dataset_type = 'RawFramesDataset'
30 | data_root = 'data/ucf101/rawframes'
31 | img_norm_cfg = dict(
32 | mean=[128], std=[1], to_rgb=False)
33 | data = dict(
34 | test=dict(
35 | type=dataset_type,
36 | ann_file='data/ucf101/ucf101_val_split_1_rawframes.txt',
37 | img_prefix=data_root,
38 | img_norm_cfg=img_norm_cfg,
39 | num_segments=25,
40 | new_length=5,
41 | new_step=1,
42 | random_shift=False,
43 | modality='Flow',
44 | image_tmpl='flow_{}_{:05d}.jpg',
45 | img_scale=256,
46 | input_size=224,
47 | div_255=False,
48 | flip_ratio=0,
49 | resize_keep_ratio=True,
50 | oversample='ten_crop',
51 | random_crop=False,
52 | more_fix_crop=False,
53 | multiscale_crop=False,
54 | test_mode=True))
55 |
56 | dist_params = dict(backend='nccl')
57 |
--------------------------------------------------------------------------------
/test_configs/TSN/ucf101/tsn_rgb_bninception.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='TSN2D',
4 | backbone=dict(
5 | type='BNInception',
6 | pretrained=None,
7 | bn_eval=False,
8 | partial_bn=True),
9 | spatial_temporal_module=dict(
10 | type='SimpleSpatialModule',
11 | spatial_type='avg',
12 | spatial_size=7),
13 | segmental_consensus=dict(
14 | type='SimpleConsensus',
15 | consensus_type='avg'),
16 | cls_head=dict(
17 | type='ClsHead',
18 | with_avg_pool=False,
19 | temporal_feature_size=1,
20 | spatial_feature_size=1,
21 | dropout_ratio=0.8,
22 | in_channels=1024,
23 | init_std=0.001,
24 | num_classes=101))
25 | train_cfg = None
26 | test_cfg = None
27 | # dataset settings
28 | dataset_type = 'RawFramesDataset'
29 | data_root = 'data/ucf101/rawframes'
30 | img_norm_cfg = dict(
31 | mean=[104, 117, 128], std=[1, 1, 1], to_rgb=False)
32 |
33 | data = dict(
34 | test=dict(
35 | type=dataset_type,
36 | ann_file='data/ucf101/ucf101_val_split_1_rawframes.txt',
37 | img_prefix=data_root,
38 | img_norm_cfg=img_norm_cfg,
39 | num_segments=25,
40 | new_length=1,
41 | new_step=1,
42 | random_shift=False,
43 | modality='RGB',
44 | image_tmpl='img_{:05d}.jpg',
45 | img_scale=256,
46 | input_size=224,
47 | div_255=False,
48 | flip_ratio=0,
49 | resize_keep_ratio=True,
50 | oversample='ten_crop',
51 | random_crop=False,
52 | more_fix_crop=False,
53 | multiscale_crop=False,
54 | test_mode=True))
55 |
56 | dist_params = dict(backend='nccl')
57 |
--------------------------------------------------------------------------------
/test_configs/ava/ava_fast_rcnn_nl_r50_c4_1x_kinetics_pretrain_crop.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='FastRCNN',
4 | backbone=dict(
5 | type='ResNet_I3D',
6 | pretrained=None,
7 | pretrained2d=False,
8 | depth=50,
9 | num_stages=3,
10 | spatial_strides=(1, 2, 2),
11 | temporal_strides=(1, 1, 1),
12 | dilations=(1, 1, 1),
13 | out_indices=(2,),
14 | frozen_stages=-1,
15 | inflate_freq=((1,1,1), (1,0,1,0), (1,0,1,0,1,0)),
16 | inflate_style='3x1x1',
17 | nonlocal_stages=(1, 2),
18 | # nonlocal_freq=((0,0,0), (0,1,0,1), (0,1,0,1,0,1)),
19 | nonlocal_cfg=dict(nonlocal_type="gaussian"),
20 | nonlocal_freq=((0,0,0), (0,1,0,1), (0,1,0,1,0,1), (0,0,0)),
21 | conv1_kernel_t=5,
22 | conv1_stride_t=1,
23 | pool1_kernel_t=1,
24 | pool1_stride_t=1,
25 | bn_eval=False,
26 | partial_bn=False,
27 | bn_frozen=True,
28 | style='pytorch'),
29 | shared_head=dict(
30 | type='ResI3DLayer',
31 | pretrained=None,
32 | pretrained2d=False,
33 | depth=50,
34 | stage=3,
35 | spatial_stride=2,
36 | temporal_stride=1,
37 | dilation=1,
38 | style='pytorch',
39 | inflate_freq=(0, 1, 0),
40 | inflate_style='3x1x1',
41 | bn_eval=False,
42 | bn_frozen=True),
43 | bbox_roi_extractor=dict(
44 | type='SingleRoIStraight3DExtractor',
45 | roi_layer=dict(type='RoIAlign', out_size=16, sample_num=2),
46 | out_channels=1024,
47 | featmap_strides=[16],
48 | with_temporal_pool=True),
49 | dropout_ratio=0.3,
50 | bbox_head=dict(
51 | type='BBoxHead',
52 | with_reg=False,
53 | with_temporal_pool=False,
54 | with_spatial_pool=True,
55 | spatial_pool_type='max',
56 | roi_feat_size=(1, 8, 8),
57 | in_channels=2048,
58 | num_classes=81,
59 | target_means=[0., 0., 0., 0.],
60 | target_stds=[0.1, 0.1, 0.2, 0.2],
61 | multilabel_classification=True,
62 | reg_class_agnostic=True,
63 | nms_class_agnostic=True))
64 | # model training and testing settings
65 | test_cfg = dict(
66 | train_detector=False,
67 | person_det_score_thr=0.85,
68 | rcnn=dict(
69 | score_thr=0.00, nms=dict(type='nms', iou_thr=1.0), max_per_img=100,
70 | action_thr=0.00))
71 | # dataset settings
72 | dataset_type = 'AVADataset'
73 | data_root = 'data/ava/rawframes/'
74 | img_norm_cfg = dict(
75 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
76 | data = dict(
77 | test=dict(
78 | type=dataset_type,
79 | ann_file='data/ava/annotations/ava_val_v2.1.csv',
80 | exclude_file='data/ava/annotations/ava_val_excluded_timestamps_v2.1.csv',
81 | label_file='data/ava/annotations/ava_action_list_v2.1_for_activitynet_2018.pbtxt',
82 | video_stat_file='data/ava/ava_video_resolution_stats.csv',
83 | proposal_file='data/ava/ava_dense_proposals_val.FAIR.recall_93.9.pkl',
84 | img_prefix=data_root,
85 | img_norm_cfg=img_norm_cfg,
86 | input_format='NCTHW',
87 | new_length=32,
88 | new_step=2,
89 | random_shift=False,
90 | modality='RGB',
91 | image_tmpl='img_{:05d}.jpg',
92 | img_scale=[(800, 256), ],
93 | input_size=None,
94 | div_255=False,
95 | size_divisor=32,
96 | flip_ratio=0,
97 | resize_keep_ratio=True,
98 | with_label=False,
99 | test_mode=True))
100 |
101 | dist_params = dict(backend='nccl')
102 |
--------------------------------------------------------------------------------
/test_configs/thumos14/ssn_thumos14_rgb_bn_inception.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='SSN2D',
4 | backbone=dict(
5 | type='BNInception',
6 | pretrained=None,
7 | bn_eval=False,
8 | partial_bn=True),
9 | spatial_temporal_module=dict(
10 | type='SimpleSpatialModule',
11 | spatial_type='avg',
12 | spatial_size=7),
13 | dropout_ratio=0.8,
14 | segmental_consensus=dict(
15 | type='StructuredTemporalPyramidPooling',
16 | standalong_classifier=True,
17 | stpp_cfg=(1, 1, 1),
18 | num_seg=(2, 5, 2)),
19 | cls_head=dict(
20 | type='SSNHead',
21 | dropout_ratio=0.,
22 | in_channels_activity=1024,
23 | in_channels_complete=3072,
24 | num_classes=20,
25 | with_bg=False,
26 | with_reg=True))
27 | # model training and testing settings
28 | test_cfg=dict(
29 | ssn=dict(
30 | sampler=dict(
31 | test_interval=6,
32 | batch_size=16),
33 | evaluater=dict(
34 | top_k=2000,
35 | nms=0.2,
36 | softmax_before_filter=True,
37 | cls_score_dict=None,
38 | cls_top_k=2)))
39 | # dataset settings
40 | dataset_type = 'SSNDataset'
41 | data_root = './data/thumos14/rawframes/'
42 | img_norm_cfg = dict(
43 | mean=[104, 117, 128], std=[1, 1, 1], to_rgb=False)
44 | data = dict(
45 | test=dict(
46 | type=dataset_type,
47 | ann_file='data/thumos14/thumos14_tag_test_normalized_proposal_list.txt',
48 | img_prefix=data_root,
49 | img_norm_cfg=img_norm_cfg,
50 | train_cfg=train_cfg,
51 | test_cfg=test_cfg,
52 | input_format='NCHW',
53 | aug_ratio=0.5,
54 | new_length=1,
55 | new_step=1,
56 | random_shift=False,
57 | modality='RGB',
58 | image_tmpl='img_{:05d}.jpg',
59 | img_scale=(340, 256),
60 | input_size=224,
61 | oversample=None,
62 | div_255=False,
63 | size_divisor=32,
64 | flip_ratio=0,
65 | resize_keep_ratio=True,
66 | test_mode=True))
67 |
68 | dist_params = dict(backend='nccl')
69 |
--------------------------------------------------------------------------------
/tools/dist_test_detector.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$3 $(dirname "$0")/test_detector.py $1 $2 --launcher pytorch --eval bbox ${@:4}
6 |
7 |
--------------------------------------------------------------------------------
/tools/dist_test_recognizer.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$3 $(dirname "$0")/test_recognizer.py $1 $2 --launcher pytorch ${@:4}
6 |
--------------------------------------------------------------------------------
/tools/dist_test_recognizer_heavy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$3 $(dirname "$0")/test_recognizer_heavy.py $1 $2 --launcher pytorch ${@:4}
6 |
--------------------------------------------------------------------------------
/tools/dist_train_detector.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train_detector.py $1 --launcher pytorch ${@:3}
6 |
--------------------------------------------------------------------------------
/tools/dist_train_localizer.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train_localizer.py $1 --launcher pytorch ${@:3}
6 |
--------------------------------------------------------------------------------
/tools/dist_train_recognizer.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train_recognizer.py $1 --launcher pytorch ${@:3}
6 |
--------------------------------------------------------------------------------
/tools/generate_lmdb.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import glob
3 | import os.path as osp
4 | from mmcv.lmdb.io import create_rawimage_dataset
5 |
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser(
9 | description='generate lmdb datasets from raw frames')
10 | parser.add_argument(
11 | 'root_dir', help='root directory to store the raw frames')
12 | parser.add_argument(
13 | 'target_dir', help='target directory to stored the generated lmdbs')
14 | parser.add_argument('--image_format', nargs='+',
15 | help='format of the images to be stored',
16 | default=['img*.jpg'])
17 | parser.add_argument('--lmdb_tmpl', type=str,
18 | help='template for the lmdb to be generated',
19 | default='{}_img_lmdb')
20 | parser.add_argument('--image_tmpl', type=str,
21 | help='template for the lmdb key', default=None)
22 | parser.add_argument('--modality', type=str, help='modality',
23 | choices=['RGB', 'Flow'], default='RGB')
24 | args = parser.parse_args()
25 |
26 | return args
27 |
28 |
29 | def main():
30 | args = parse_args()
31 | video_path_list = glob.glob(osp.join(args.root_dir, '*'))
32 | for i, vv in enumerate(video_path_list):
33 | if not osp.isdir(vv):
34 | continue
35 | image_file_list = []
36 | for image_format in args.image_format:
37 | image_file_list += glob.glob(osp.join(vv, image_format))
38 | vid = vv.split('/')[-1]
39 | output_path = osp.join(args.target_dir, args.lmdb_tmpl.format(vid))
40 | create_rawimage_dataset(output_path, image_file_list,
41 | image_tmpl=args.image_tmpl,
42 | flag='color' if args.modality == 'RGB'
43 | else 'grayscale',
44 | check_valid=True)
45 |
46 |
47 | if __name__ == '__main__':
48 | main()
49 |
--------------------------------------------------------------------------------
/tools/train_detector.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import argparse
4 | from mmcv import Config
5 |
6 | from mmaction import __version__
7 | from mmaction.datasets import get_trimmed_dataset
8 | from mmaction.apis import (train_network, init_dist, get_root_logger,
9 | set_random_seed)
10 | from mmaction.models import build_detector
11 | import torch
12 |
13 |
14 | def parse_args():
15 | parser = argparse.ArgumentParser(description='Train an action recognizer')
16 | parser.add_argument('config', help='train config file path')
17 | parser.add_argument('--work_dir', help='the dir to save logs and models')
18 | parser.add_argument(
19 | '--resume_from', help='the checkpoint file to resume from')
20 | parser.add_argument(
21 | '--validate',
22 | action='store_true',
23 | help='whether to evaluate the checkpoint during training')
24 | parser.add_argument(
25 | '--gpus',
26 | type=int,
27 | default=1,
28 | help='number of gpus to use '
29 | '(only applicable to non-distributed training)')
30 | parser.add_argument('--seed', type=int, default=None, help='random seed')
31 | parser.add_argument(
32 | '--launcher',
33 | choices=['none', 'pytorch', 'slurm', 'mpi'],
34 | default='none',
35 | help='job launcher')
36 | parser.add_argument('--local_rank', type=int, default=0)
37 | args = parser.parse_args()
38 |
39 | return args
40 |
41 |
42 | def main():
43 | args = parse_args()
44 |
45 | cfg = Config.fromfile(args.config)
46 | # set cudnn_benchmark
47 | if cfg.get('cudnn_benchmark', False):
48 | torch.backends.cudnn.benchmark = True
49 | # update configs according to CLI args
50 | if args.work_dir is not None:
51 | cfg.work_dir = args.work_dir
52 | if args.resume_from is not None:
53 | cfg.resume_from = args.resume_from
54 | cfg.gpus = args.gpus
55 | if cfg.checkpoint_config is not None:
56 | # save mmaction version in checkpoints as meta data
57 | cfg.checkpoint_config.meta = dict(
58 | mmact_version=__version__, config=cfg.text)
59 |
60 | # init distributed env first, since logger depends on the dist info.
61 | if args.launcher == 'none':
62 | distributed = False
63 | else:
64 | distributed = True
65 | init_dist(args.launcher, **cfg.dist_params)
66 |
67 | # init logger before other steps
68 | logger = get_root_logger(cfg.log_level)
69 | logger.info('Distributed training: {}'.format(distributed))
70 |
71 | # set random seeds
72 | if args.seed is not None:
73 | logger.info('Set random seed to {}'.format(args.seed))
74 | set_random_seed(args.seed)
75 |
76 | model = build_detector(
77 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
78 |
79 | train_dataset = get_trimmed_dataset(cfg.data.train)
80 | train_network(
81 | model,
82 | train_dataset,
83 | cfg,
84 | distributed=distributed,
85 | validate=args.validate,
86 | logger=logger)
87 |
88 |
89 | if __name__ == '__main__':
90 | main()
91 |
--------------------------------------------------------------------------------
/tools/train_localizer.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import argparse
4 | from mmcv import Config
5 |
6 | from mmaction import __version__
7 | from mmaction.datasets import get_trimmed_dataset
8 | from mmaction.apis import (train_network, init_dist, get_root_logger,
9 | set_random_seed)
10 | from mmaction.models import build_localizer
11 | import torch
12 |
13 |
14 | def parse_args():
15 | parser = argparse.ArgumentParser(description='Train an action localizer')
16 | parser.add_argument('config', help='train config file path')
17 | parser.add_argument('--work_dir', help='the dir to save logs and models')
18 | parser.add_argument(
19 | '--resume_from', help='the checkpoint file to resume from')
20 | parser.add_argument(
21 | '--validate',
22 | action='store_true',
23 | help='whether to evaluate the checkpoint during training')
24 | parser.add_argument(
25 | '--gpus',
26 | type=int,
27 | default=1,
28 | help='number of gpus to use '
29 | '(only applicable to non-distributed training)')
30 | parser.add_argument('--seed', type=int, default=None, help='random seed')
31 | parser.add_argument(
32 | '--launcher',
33 | choices=['none', 'pytorch', 'slurm', 'mpi'],
34 | default='none',
35 | help='job launcher')
36 | parser.add_argument('--local_rank', type=int, default=0)
37 | args = parser.parse_args()
38 |
39 | return args
40 |
41 |
42 | def main():
43 | args = parse_args()
44 |
45 | cfg = Config.fromfile(args.config)
46 | # set cudnn_benchmark
47 | if cfg.get('cudnn_benchmark', False):
48 | torch.backends.cudnn.benchmark = True
49 | # update configs according to CLI args
50 | if args.work_dir is not None:
51 | cfg.work_dir = args.work_dir
52 | if args.resume_from is not None:
53 | cfg.resume_from = args.resume_from
54 | cfg.gpus = args.gpus
55 | if cfg.checkpoint_config is not None:
56 | # save mmaction version in checkpoints as meta data
57 | cfg.checkpoint_config.meta = dict(
58 | mmact_version=__version__, config=cfg.text)
59 |
60 | # init distributed env first, since logger depends on the dist info.
61 | if args.launcher == 'none':
62 | distributed = False
63 | else:
64 | distributed = True
65 | init_dist(args.launcher, **cfg.dist_params)
66 |
67 | # init logger before other steps
68 | logger = get_root_logger(cfg.log_level)
69 | logger.info('Distributed training: {}'.format(distributed))
70 |
71 | # set random seeds
72 | if args.seed is not None:
73 | logger.info('Set random seed to {}'.format(args.seed))
74 | set_random_seed(args.seed)
75 |
76 | model = build_localizer(
77 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
78 |
79 | train_dataset = get_trimmed_dataset(cfg.data.train)
80 | train_network(
81 | model,
82 | train_dataset,
83 | cfg,
84 | distributed=distributed,
85 | validate=args.validate,
86 | logger=logger)
87 |
88 |
89 | if __name__ == '__main__':
90 | main()
91 |
--------------------------------------------------------------------------------
/tools/train_recognizer.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import argparse
4 | from mmcv import Config
5 |
6 | from mmaction import __version__
7 | from mmaction.datasets import get_trimmed_dataset
8 | from mmaction.apis import (train_network, init_dist, get_root_logger,
9 | set_random_seed)
10 | from mmaction.models import build_recognizer
11 | import torch
12 |
13 |
14 | def parse_args():
15 | parser = argparse.ArgumentParser(description='Train an action recognizer')
16 | parser.add_argument('config', help='train config file path')
17 | parser.add_argument('--work_dir', help='the dir to save logs and models')
18 | parser.add_argument(
19 | '--resume_from', help='the checkpoint file to resume from')
20 | parser.add_argument(
21 | '--validate',
22 | action='store_true',
23 | help='whether to evaluate the checkpoint during training')
24 | parser.add_argument(
25 | '--gpus',
26 | type=int,
27 | default=1,
28 | help='number of gpus to use '
29 | '(only applicable to non-distributed training)')
30 | parser.add_argument('--seed', type=int, default=None, help='random seed')
31 | parser.add_argument(
32 | '--launcher',
33 | choices=['none', 'pytorch', 'slurm', 'mpi'],
34 | default='none',
35 | help='job launcher')
36 | parser.add_argument('--local_rank', type=int, default=0)
37 | args = parser.parse_args()
38 |
39 | return args
40 |
41 |
42 | def main():
43 | args = parse_args()
44 |
45 | cfg = Config.fromfile(args.config)
46 | # set cudnn_benchmark
47 | if cfg.get('cudnn_benchmark', False):
48 | torch.backends.cudnn.benchmark = True
49 | # update configs according to CLI args
50 | if args.work_dir is not None:
51 | cfg.work_dir = args.work_dir
52 | if args.resume_from is not None:
53 | cfg.resume_from = args.resume_from
54 | cfg.gpus = args.gpus
55 | if cfg.checkpoint_config is not None:
56 | # save mmaction version in checkpoints as meta data
57 | cfg.checkpoint_config.meta = dict(
58 | mmact_version=__version__, config=cfg.text)
59 |
60 | # init distributed env first, since logger depends on the dist info.
61 | if args.launcher == 'none':
62 | distributed = False
63 | else:
64 | distributed = True
65 | init_dist(args.launcher, **cfg.dist_params)
66 |
67 | # init logger before other steps
68 | logger = get_root_logger(cfg.log_level)
69 | logger.info('Distributed training: {}'.format(distributed))
70 |
71 | # set random seeds
72 | if args.seed is not None:
73 | logger.info('Set random seed to {}'.format(args.seed))
74 | set_random_seed(args.seed)
75 |
76 | model = build_recognizer(
77 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
78 |
79 | train_dataset = get_trimmed_dataset(cfg.data.train)
80 | train_network(
81 | model,
82 | train_dataset,
83 | cfg,
84 | distributed=distributed,
85 | validate=args.validate,
86 | logger=logger)
87 |
88 |
89 | if __name__ == '__main__':
90 | main()
91 |
--------------------------------------------------------------------------------